GPU:
- Further optimize the SSE2 versions of ConvertColor555To6665Opaque() and ConvertColor555To8888Opaque().
This commit is contained in:
parent
a05ddab710
commit
e6dac5ec96
|
@ -1783,77 +1783,61 @@ FORCEINLINE u16 ConvertColor6665To5551(u32 srcColor)
|
||||||
#ifdef ENABLE_SSE2
|
#ifdef ENABLE_SSE2
|
||||||
|
|
||||||
template <bool SWAP_RB>
|
template <bool SWAP_RB>
|
||||||
FORCEINLINE void ConvertColor555To8888Opaque(const __m128i &src, __m128i &dstLo, __m128i &dstHi)
|
FORCEINLINE void ConvertColor555To8888(const __m128i &srcColor, const __m128i &srcAlphaBits32Lo, const __m128i &srcAlphaBits32Hi, __m128i &dstLo, __m128i &dstHi)
|
||||||
{
|
{
|
||||||
__m128i src32;
|
__m128i src32;
|
||||||
|
|
||||||
// Conversion algorithm:
|
// Conversion algorithm:
|
||||||
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
|
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
|
||||||
if (SWAP_RB)
|
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
|
||||||
{
|
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
|
||||||
src32 = _mm_unpacklo_epi16(src, _mm_setzero_si128());
|
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x00F800F8) );
|
||||||
dstLo = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 19), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(src32, 14), _mm_set1_epi32(0x00070000)));
|
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x00000700))) );
|
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00070707)) );
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src32, 7), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(src32, 12), _mm_set1_epi32(0x00000007))) );
|
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_set1_epi32(0xFF000000) );
|
|
||||||
|
|
||||||
src32 = _mm_unpackhi_epi16(src, _mm_setzero_si128());
|
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
|
||||||
dstHi = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 19), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(src32, 14), _mm_set1_epi32(0x00070000)));
|
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x00000700))) );
|
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x00F800F8) );
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src32, 7), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(src32, 12), _mm_set1_epi32(0x00000007))) );
|
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_set1_epi32(0xFF000000) );
|
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00070707)) );
|
||||||
}
|
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
|
||||||
else
|
|
||||||
{
|
|
||||||
src32 = _mm_unpacklo_epi16(src, _mm_setzero_si128());
|
|
||||||
dstLo = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(src32, 2), _mm_set1_epi32(0x00000007)));
|
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x00000700))) );
|
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00070000))) );
|
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_set1_epi32(0xFF000000) );
|
|
||||||
|
|
||||||
src32 = _mm_unpackhi_epi16(src, _mm_setzero_si128());
|
|
||||||
dstHi = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(src32, 2), _mm_set1_epi32(0x00000007)));
|
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x00000700))) );
|
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00070000))) );
|
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_set1_epi32(0xFF000000) );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool SWAP_RB>
|
template <bool SWAP_RB>
|
||||||
FORCEINLINE void ConvertColor555To6665Opaque(const __m128i &src, __m128i &dstLo, __m128i &dstHi)
|
FORCEINLINE void ConvertColor555To6665(const __m128i &srcColor, const __m128i &srcAlphaBits32Lo, const __m128i &srcAlphaBits32Hi, __m128i &dstLo, __m128i &dstHi)
|
||||||
{
|
{
|
||||||
__m128i src32;
|
__m128i src32;
|
||||||
|
|
||||||
// Conversion algorithm:
|
// Conversion algorithm:
|
||||||
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
|
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
|
||||||
if (SWAP_RB)
|
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
|
||||||
{
|
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
|
||||||
src32 = _mm_unpacklo_epi16(src, _mm_setzero_si128());
|
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x003E003E) );
|
||||||
dstLo = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 17), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(src32, 12), _mm_set1_epi32(0x00010000)));
|
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src32, 1), _mm_set1_epi32(0x00000100))) );
|
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00010101)) );
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src32, 9), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src32, 14), _mm_set1_epi32(0x00000001))) );
|
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_set1_epi32(0x1F000000) );
|
|
||||||
|
|
||||||
src32 = _mm_unpackhi_epi16(src, _mm_setzero_si128());
|
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
|
||||||
dstHi = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 17), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(src32, 12), _mm_set1_epi32(0x00010000)));
|
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src32, 1), _mm_set1_epi32(0x00000100))) );
|
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x003E003E) );
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src32, 9), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src32, 14), _mm_set1_epi32(0x00000001))) );
|
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_set1_epi32(0x1F000000) );
|
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00010101)) );
|
||||||
}
|
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
|
||||||
else
|
}
|
||||||
{
|
|
||||||
src32 = _mm_unpacklo_epi16(src, _mm_setzero_si128());
|
|
||||||
dstLo = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src32, 4), _mm_set1_epi32(0x00000001)));
|
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src32, 1), _mm_set1_epi32(0x00000100))) );
|
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(src32, 2), _mm_set1_epi32(0x00010000))) );
|
|
||||||
dstLo = _mm_or_si128( dstLo, _mm_set1_epi32(0x1F000000) );
|
|
||||||
|
|
||||||
src32 = _mm_unpackhi_epi16(src, _mm_setzero_si128());
|
template <bool SWAP_RB>
|
||||||
dstHi = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src32, 4), _mm_set1_epi32(0x00000001)));
|
FORCEINLINE void ConvertColor555To8888Opaque(const __m128i &srcColor, __m128i &dstLo, __m128i &dstHi)
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src32, 1), _mm_set1_epi32(0x00000100))) );
|
{
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src32, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(src32, 2), _mm_set1_epi32(0x00010000))) );
|
const __m128i srcAlphaBits32 = _mm_set1_epi32(0xFF000000);
|
||||||
dstHi = _mm_or_si128( dstHi, _mm_set1_epi32(0x1F000000) );
|
ConvertColor555To8888<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <bool SWAP_RB>
|
||||||
|
FORCEINLINE void ConvertColor555To6665Opaque(const __m128i &srcColor, __m128i &dstLo, __m128i &dstHi)
|
||||||
|
{
|
||||||
|
const __m128i srcAlphaBits32 = _mm_set1_epi32(0x1F000000);
|
||||||
|
ConvertColor555To6665<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool SWAP_RB>
|
template <bool SWAP_RB>
|
||||||
|
|
|
@ -870,77 +870,36 @@ public:
|
||||||
const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0);
|
const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0);
|
||||||
const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1);
|
const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1);
|
||||||
|
|
||||||
__m128i tmpColor;
|
__m128i tmpAlpha[2];
|
||||||
__m128i tmpAlpha;
|
|
||||||
__m128i convertedColor[4];
|
__m128i convertedColor[4];
|
||||||
|
|
||||||
if (TEXFORMAT == TexFormat_15bpp)
|
if (TEXFORMAT == TexFormat_15bpp)
|
||||||
{
|
{
|
||||||
__m128i alpha = _mm_srli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), 3 );
|
const __m128i alpha = _mm_srli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), 3 );
|
||||||
__m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha);
|
const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha);
|
||||||
__m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha);
|
const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha);
|
||||||
|
|
||||||
tmpColor = _mm_unpacklo_epi16(palColor0, _mm_setzero_si128());
|
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
|
||||||
tmpAlpha = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
|
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
|
||||||
convertedColor[0] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(tmpColor, 4), _mm_set1_epi32(0x00000001)));
|
ConvertColor555To6665<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
|
||||||
convertedColor[0] = _mm_or_si128( convertedColor[0], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000100))) );
|
|
||||||
convertedColor[0] = _mm_or_si128( convertedColor[0], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 2), _mm_set1_epi32(0x00010000))) );
|
|
||||||
convertedColor[0] = _mm_or_si128( convertedColor[0], tmpAlpha);
|
|
||||||
|
|
||||||
tmpColor = _mm_unpackhi_epi16(palColor0, _mm_setzero_si128());
|
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
|
||||||
tmpAlpha = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
|
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
|
||||||
convertedColor[1] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(tmpColor, 4), _mm_set1_epi32(0x00000001)));
|
ConvertColor555To6665<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
|
||||||
convertedColor[1] = _mm_or_si128( convertedColor[1], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000100))) );
|
|
||||||
convertedColor[1] = _mm_or_si128( convertedColor[1], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 2), _mm_set1_epi32(0x00010000))) );
|
|
||||||
convertedColor[1] = _mm_or_si128( convertedColor[1], tmpAlpha);
|
|
||||||
|
|
||||||
tmpColor = _mm_unpacklo_epi16(palColor1, _mm_setzero_si128());
|
|
||||||
tmpAlpha = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
|
|
||||||
convertedColor[2] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(tmpColor, 4), _mm_set1_epi32(0x00000001)));
|
|
||||||
convertedColor[2] = _mm_or_si128( convertedColor[2], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000100))) );
|
|
||||||
convertedColor[2] = _mm_or_si128( convertedColor[2], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 2), _mm_set1_epi32(0x00010000))) );
|
|
||||||
convertedColor[2] = _mm_or_si128( convertedColor[2], tmpAlpha);
|
|
||||||
|
|
||||||
tmpColor = _mm_unpackhi_epi16(palColor1, _mm_setzero_si128());
|
|
||||||
tmpAlpha = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
|
|
||||||
convertedColor[3] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(tmpColor, 4), _mm_set1_epi32(0x00000001)));
|
|
||||||
convertedColor[3] = _mm_or_si128( convertedColor[3], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000100))) );
|
|
||||||
convertedColor[3] = _mm_or_si128( convertedColor[3], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 2), _mm_set1_epi32(0x00010000))) );
|
|
||||||
convertedColor[3] = _mm_or_si128( convertedColor[3], tmpAlpha);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
__m128i alpha = _mm_or_si128( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), _mm_srli_epi16(_mm_and_si128(bits, _mm_set1_epi8(0xE0)), 5) );
|
const __m128i alpha = _mm_or_si128( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), _mm_srli_epi16(_mm_and_si128(bits, _mm_set1_epi8(0xE0)), 5) );
|
||||||
__m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha);
|
const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha);
|
||||||
__m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha);
|
const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha);
|
||||||
|
|
||||||
tmpColor = _mm_unpacklo_epi16(palColor0, _mm_setzero_si128());
|
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
|
||||||
tmpAlpha = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
|
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
|
||||||
convertedColor[0] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(tmpColor, 2), _mm_set1_epi32(0x00000007)));
|
ConvertColor555To8888<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
|
||||||
convertedColor[0] = _mm_or_si128( convertedColor[0], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000700))) );
|
|
||||||
convertedColor[0] = _mm_or_si128( convertedColor[0], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00070000))) );
|
|
||||||
convertedColor[0] = _mm_or_si128( convertedColor[0], tmpAlpha);
|
|
||||||
|
|
||||||
tmpColor = _mm_unpackhi_epi16(palColor0, _mm_setzero_si128());
|
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
|
||||||
tmpAlpha = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
|
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
|
||||||
convertedColor[1] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(tmpColor, 2), _mm_set1_epi32(0x00000007)));
|
ConvertColor555To8888<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
|
||||||
convertedColor[1] = _mm_or_si128( convertedColor[1], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000700))) );
|
|
||||||
convertedColor[1] = _mm_or_si128( convertedColor[1], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00070000))) );
|
|
||||||
convertedColor[1] = _mm_or_si128( convertedColor[1], tmpAlpha);
|
|
||||||
|
|
||||||
tmpColor = _mm_unpacklo_epi16(palColor1, _mm_setzero_si128());
|
|
||||||
tmpAlpha = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
|
|
||||||
convertedColor[2] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(tmpColor, 2), _mm_set1_epi32(0x00000007)));
|
|
||||||
convertedColor[2] = _mm_or_si128( convertedColor[2], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000700))) );
|
|
||||||
convertedColor[2] = _mm_or_si128( convertedColor[2], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00070000))) );
|
|
||||||
convertedColor[2] = _mm_or_si128( convertedColor[2], tmpAlpha);
|
|
||||||
|
|
||||||
tmpColor = _mm_unpackhi_epi16(palColor1, _mm_setzero_si128());
|
|
||||||
tmpAlpha = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
|
|
||||||
convertedColor[3] = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 3), _mm_set1_epi32(0x000000F8)), _mm_and_si128(_mm_srli_epi32(tmpColor, 2), _mm_set1_epi32(0x00000007)));
|
|
||||||
convertedColor[3] = _mm_or_si128( convertedColor[3], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 6), _mm_set1_epi32(0x0000F800)), _mm_and_si128(_mm_slli_epi32(tmpColor, 1), _mm_set1_epi32(0x00000700))) );
|
|
||||||
convertedColor[3] = _mm_or_si128( convertedColor[3], _mm_or_si128(_mm_and_si128(_mm_slli_epi32(tmpColor, 9), _mm_set1_epi32(0x00F80000)), _mm_and_si128(_mm_slli_epi32(tmpColor, 4), _mm_set1_epi32(0x00070000))) );
|
|
||||||
convertedColor[3] = _mm_or_si128( convertedColor[3], tmpAlpha);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
|
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
|
||||||
|
|
Loading…
Reference in New Issue