diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index 59fd7a2bb..dd9da900a 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -1783,77 +1783,61 @@ FORCEINLINE u16 ConvertColor6665To5551(u32 srcColor) #ifdef ENABLE_SSE2 template -FORCEINLINE void ConvertColor555To8888Opaque(const __m128i &src, __m128i &dstLo, __m128i &dstHi) +FORCEINLINE void ConvertColor555To8888(const __m128i &srcColor, const __m128i &srcAlphaBits32Lo, const __m128i &srcAlphaBits32Hi, __m128i &dstLo, __m128i &dstHi) { __m128i src32; // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) - if (SWAP_RB) - { - src32 = _mm_unpacklo_epi16(src, _mm_setzero_si128()); - dstLo = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 19), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(src32, 14), _mm_set1_epi32(0x00070000))); - dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x00000700))) ); - dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src32, 7), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(src32, 12), _mm_set1_epi32(0x00000007))) ); - dstLo = _mm_or_si128( dstLo, _mm_set1_epi32(0xFF000000) ); - - src32 = _mm_unpackhi_epi16(src, _mm_setzero_si128()); - dstHi = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 19), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(src32, 14), _mm_set1_epi32(0x00070000))); - dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x00000700))) ); - dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src32, 7), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(src32, 12), _mm_set1_epi32(0x00000007))) ); - dstHi = _mm_or_si128( dstHi, _mm_set1_epi32(0xFF000000) ); - } - else - { - src32 = _mm_unpacklo_epi16(src, _mm_setzero_si128()); - dstLo = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(src32, 2), _mm_set1_epi32(0x00000007))); - dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x00000700))) ); - dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00070000))) ); - dstLo = _mm_or_si128( dstLo, _mm_set1_epi32(0xFF000000) ); - - src32 = _mm_unpackhi_epi16(src, _mm_setzero_si128()); - dstHi = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(src32, 2), _mm_set1_epi32(0x00000007))); - dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x00000700))) ); - dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00070000))) ); - dstHi = _mm_or_si128( dstHi, _mm_set1_epi32(0xFF000000) ); - } + src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128()); + dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9)); + dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x00F800F8) ); + dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) ); + dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00070707)) ); + dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo ); + + src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128()); + dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9)); + dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x00F800F8) ); + dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) ); + dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00070707)) ); + dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi ); } template -FORCEINLINE void ConvertColor555To6665Opaque(const __m128i &src, __m128i &dstLo, __m128i &dstHi) +FORCEINLINE void ConvertColor555To6665(const __m128i &srcColor, const __m128i &srcAlphaBits32Lo, const __m128i &srcAlphaBits32Hi, __m128i &dstLo, __m128i &dstHi) { __m128i src32; // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) - if (SWAP_RB) - { - src32 = _mm_unpacklo_epi16(src, _mm_setzero_si128()); - dstLo = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 17), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(src32, 12), _mm_set1_epi32(0x00010000))); - dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src32, 1), _mm_set1_epi32(0x00000100))) ); - dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src32, 9), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src32, 14), _mm_set1_epi32(0x00000001))) ); - dstLo = _mm_or_si128( dstLo, _mm_set1_epi32(0x1F000000) ); - - src32 = _mm_unpackhi_epi16(src, _mm_setzero_si128()); - dstHi = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 17), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(src32, 12), _mm_set1_epi32(0x00010000))); - dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src32, 1), _mm_set1_epi32(0x00000100))) ); - dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src32, 9), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src32, 14), _mm_set1_epi32(0x00000001))) ); - dstHi = _mm_or_si128( dstHi, _mm_set1_epi32(0x1F000000) ); - } - else - { - src32 = _mm_unpacklo_epi16(src, _mm_setzero_si128()); - dstLo = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src32, 4), _mm_set1_epi32(0x00000001))); - dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src32, 1), _mm_set1_epi32(0x00000100))) ); - dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(src32, 2), _mm_set1_epi32(0x00010000))) ); - dstLo = _mm_or_si128( dstLo, _mm_set1_epi32(0x1F000000) ); - - src32 = _mm_unpackhi_epi16(src, _mm_setzero_si128()); - dstHi = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src32, 4), _mm_set1_epi32(0x00000001))); - dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src32, 1), _mm_set1_epi32(0x00000100))) ); - dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(src32, 2), _mm_set1_epi32(0x00010000))) ); - dstHi = _mm_or_si128( dstHi, _mm_set1_epi32(0x1F000000) ); - } + src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128()); + dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7)); + dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x003E003E) ); + dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) ); + dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00010101)) ); + dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo ); + + src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128()); + dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7)); + dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x003E003E) ); + dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) ); + dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00010101)) ); + dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi ); +} + +template +FORCEINLINE void ConvertColor555To8888Opaque(const __m128i &srcColor, __m128i &dstLo, __m128i &dstHi) +{ + const __m128i srcAlphaBits32 = _mm_set1_epi32(0xFF000000); + ConvertColor555To8888(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi); +} + +template +FORCEINLINE void ConvertColor555To6665Opaque(const __m128i &srcColor, __m128i &dstLo, __m128i &dstHi) +{ + const __m128i srcAlphaBits32 = _mm_set1_epi32(0x1F000000); + ConvertColor555To6665(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi); } template diff --git a/desmume/src/texcache.cpp b/desmume/src/texcache.cpp index 5cfca2162..f0a63a135 100644 --- a/desmume/src/texcache.cpp +++ b/desmume/src/texcache.cpp @@ -870,77 +870,36 @@ public: const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); - __m128i tmpColor; - __m128i tmpAlpha; + __m128i tmpAlpha[2]; __m128i convertedColor[4]; if (TEXFORMAT == TexFormat_15bpp) { - __m128i alpha = _mm_srli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), 3 ); - __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); - __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); + const __m128i alpha = _mm_srli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), 3 ); + const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); + const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); - tmpColor = _mm_unpacklo_epi16(palColor0, _mm_setzero_si128()); - tmpAlpha = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); - convertedColor[0] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(tmpColor, 4), _mm_set1_epi32(0x00000001))); - convertedColor[0] = _mm_or_si128( convertedColor[0], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000100))) ); - convertedColor[0] = _mm_or_si128( convertedColor[0], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 2), _mm_set1_epi32(0x00010000))) ); - convertedColor[0] = _mm_or_si128( convertedColor[0], tmpAlpha); + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); + ConvertColor555To6665(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); - tmpColor = _mm_unpackhi_epi16(palColor0, _mm_setzero_si128()); - tmpAlpha = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); - convertedColor[1] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(tmpColor, 4), _mm_set1_epi32(0x00000001))); - convertedColor[1] = _mm_or_si128( convertedColor[1], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000100))) ); - convertedColor[1] = _mm_or_si128( convertedColor[1], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 2), _mm_set1_epi32(0x00010000))) ); - convertedColor[1] = _mm_or_si128( convertedColor[1], tmpAlpha); - - tmpColor = _mm_unpacklo_epi16(palColor1, _mm_setzero_si128()); - tmpAlpha = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); - convertedColor[2] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(tmpColor, 4), _mm_set1_epi32(0x00000001))); - convertedColor[2] = _mm_or_si128( convertedColor[2], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000100))) ); - convertedColor[2] = _mm_or_si128( convertedColor[2], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 2), _mm_set1_epi32(0x00010000))) ); - convertedColor[2] = _mm_or_si128( convertedColor[2], tmpAlpha); - - tmpColor = _mm_unpackhi_epi16(palColor1, _mm_setzero_si128()); - tmpAlpha = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); - convertedColor[3] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(tmpColor, 4), _mm_set1_epi32(0x00000001))); - convertedColor[3] = _mm_or_si128( convertedColor[3], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000100))) ); - convertedColor[3] = _mm_or_si128( convertedColor[3], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 2), _mm_set1_epi32(0x00010000))) ); - convertedColor[3] = _mm_or_si128( convertedColor[3], tmpAlpha); + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); + ConvertColor555To6665(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); } else { - __m128i alpha = _mm_or_si128( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), _mm_srli_epi16(_mm_and_si128(bits, _mm_set1_epi8(0xE0)), 5) ); - __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); - __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); + const __m128i alpha = _mm_or_si128( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), _mm_srli_epi16(_mm_and_si128(bits, _mm_set1_epi8(0xE0)), 5) ); + const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); + const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); - tmpColor = _mm_unpacklo_epi16(palColor0, _mm_setzero_si128()); - tmpAlpha = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); - convertedColor[0] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(tmpColor, 2), _mm_set1_epi32(0x00000007))); - convertedColor[0] = _mm_or_si128( convertedColor[0], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000700))) ); - convertedColor[0] = _mm_or_si128( convertedColor[0], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00070000))) ); - convertedColor[0] = _mm_or_si128( convertedColor[0], tmpAlpha); + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); + ConvertColor555To8888(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); - tmpColor = _mm_unpackhi_epi16(palColor0, _mm_setzero_si128()); - tmpAlpha = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); - convertedColor[1] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(tmpColor, 2), _mm_set1_epi32(0x00000007))); - convertedColor[1] = _mm_or_si128( convertedColor[1], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000700))) ); - convertedColor[1] = _mm_or_si128( convertedColor[1], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00070000))) ); - convertedColor[1] = _mm_or_si128( convertedColor[1], tmpAlpha); - - tmpColor = _mm_unpacklo_epi16(palColor1, _mm_setzero_si128()); - tmpAlpha = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); - convertedColor[2] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(tmpColor, 2), _mm_set1_epi32(0x00000007))); - convertedColor[2] = _mm_or_si128( convertedColor[2], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000700))) ); - convertedColor[2] = _mm_or_si128( convertedColor[2], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00070000))) ); - convertedColor[2] = _mm_or_si128( convertedColor[2], tmpAlpha); - - tmpColor = _mm_unpackhi_epi16(palColor1, _mm_setzero_si128()); - tmpAlpha = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); - convertedColor[3] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(tmpColor, 2), _mm_set1_epi32(0x00000007))); - convertedColor[3] = _mm_or_si128( convertedColor[3], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000700))) ); - convertedColor[3] = _mm_or_si128( convertedColor[3], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00070000))) ); - convertedColor[3] = _mm_or_si128( convertedColor[3], tmpAlpha); + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); + ConvertColor555To8888(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); } _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);