Colorspace Handler: Add dedicated functions ColorspaceApplyIntensityToBuffer16() and ColorspaceApplyIntensityToBuffer32() for applying an RGB intensity value to a framebuffer.

This commit is contained in:
rogerman 2017-10-03 13:54:51 -07:00
parent 174dcc11bb
commit 74ba49e168
6 changed files with 976 additions and 0 deletions

View File

@ -563,6 +563,181 @@ void ColorspaceCopyBuffer32(const u32 *src, u32 *dst, size_t pixCount)
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceApplyIntensityToBuffer16(u16 *dst, size_t pixCount, float intensity)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ApplyIntensityToBuffer16_IsUnaligned(dst, pixCountVector, intensity);
}
else
{
i = csh.ApplyIntensityToBuffer16_SwapRB(dst, pixCountVector, intensity);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ApplyIntensityToBuffer16_IsUnaligned(dst, pixCountVector, intensity);
}
else
{
i = csh.ApplyIntensityToBuffer16(dst, pixCountVector, intensity);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
if (intensity > 0.999f)
{
if (SWAP_RB)
{
for (; i < pixCount; i++)
{
dst[i] = COLOR5551_SWAP_RB(dst[i]);
}
}
return;
}
else if (intensity < 0.001f)
{
for (; i < pixCount; i++)
{
dst[i] = dst[i] & 0x8000;
}
return;
}
const u16 intensity_u16 = (u16)(intensity * (float)(0xFFFF));
for (; i < pixCount; i++)
{
u16 outColor = (SWAP_RB) ? COLOR5551_SWAP_RB(dst[i]) : dst[i];
u8 r = (u8)( (((outColor >> 0) & 0x1F) * intensity_u16) >> 16 );
u8 g = (u8)( (((outColor >> 5) & 0x1F) * intensity_u16) >> 16 );
u8 b = (u8)( (((outColor >> 10) & 0x1F) * intensity_u16) >> 16 );
u8 a = outColor & 0x8000;
dst[i] = ( (r << 0) | (g << 5) | (b << 10) | a );
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensity)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ApplyIntensityToBuffer32_IsUnaligned(dst, pixCountVector, intensity);
}
else
{
i = csh.ApplyIntensityToBuffer32_SwapRB(dst, pixCountVector, intensity);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ApplyIntensityToBuffer32_IsUnaligned(dst, pixCountVector, intensity);
}
else
{
i = csh.ApplyIntensityToBuffer32(dst, pixCountVector, intensity);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
if (intensity > 0.999f)
{
if (SWAP_RB)
{
for (; i < pixCount; i++)
{
FragmentColor dstColor;
dstColor.color = dst[i];
FragmentColor &outColor = (FragmentColor &)dst[i];
outColor.r = dstColor.b;
outColor.b = dstColor.r;
}
}
return;
}
else if (intensity < 0.001f)
{
for (; i < pixCount; i++)
{
dst[i] = dst[i] & 0xFF000000;
}
return;
}
const u16 intensity_u16 = (u16)(intensity * (float)(0xFFFF));
if (SWAP_RB)
{
for (; i < pixCount; i++)
{
FragmentColor dstColor;
dstColor.color = dst[i];
FragmentColor &outColor = (FragmentColor &)dst[i];
outColor.r = (u8)( ((u16)dstColor.b * intensity_u16) >> 16 );
outColor.g = (u8)( ((u16)dstColor.g * intensity_u16) >> 16 );
outColor.b = (u8)( ((u16)dstColor.r * intensity_u16) >> 16 );
}
}
else
{
for (; i < pixCount; i++)
{
FragmentColor &outColor = (FragmentColor &)dst[i];
outColor.r = (u8)( ((u16)outColor.r * intensity_u16) >> 16 );
outColor.g = (u8)( ((u16)outColor.g * intensity_u16) >> 16 );
outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 );
}
}
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
@ -835,6 +1010,176 @@ size_t ColorspaceHandler::CopyBuffer32_SwapRB_IsUnaligned(const u32 *src, u32 *d
return this->CopyBuffer32_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ApplyIntensityToBuffer16(u16 *dst, size_t pixCount, float intensity) const
{
size_t i = 0;
if (intensity > 0.999f)
{
return pixCount;
}
else if (intensity < 0.001f)
{
for (; i < pixCount; i++)
{
dst[i] = dst[i] & 0x8000;
}
return i;
}
const u16 intensity_u16 = (u16)(intensity * (float)(0xFFFF));
for (; i < pixCount; i++)
{
u16 outColor = dst[i];
u8 r = (u8)( (((outColor >> 0) & 0x1F) * intensity_u16) >> 16 );
u8 g = (u8)( (((outColor >> 5) & 0x1F) * intensity_u16) >> 16 );
u8 b = (u8)( (((outColor >> 10) & 0x1F) * intensity_u16) >> 16 );
u8 a = outColor & 0x8000;
dst[i] = ( (r << 0) | (g << 5) | (b << 10) | a );
}
return i;
}
size_t ColorspaceHandler::ApplyIntensityToBuffer16_SwapRB(u16 *dst, size_t pixCount, float intensity) const
{
size_t i = 0;
if (intensity > 0.999f)
{
for (; i < pixCount; i++)
{
dst[i] = COLOR5551_SWAP_RB(dst[i]);
}
return i;
}
else if (intensity < 0.001f)
{
for (; i < pixCount; i++)
{
dst[i] = dst[i] & 0x8000;
}
return i;
}
const u16 intensity_u16 = (u16)(intensity * (float)(0xFFFF));
for (; i < pixCount; i++)
{
u16 outColor = COLOR5551_SWAP_RB(dst[i]);
u8 r = (u8)( (((outColor >> 0) & 0x1F) * intensity_u16) >> 16 );
u8 g = (u8)( (((outColor >> 5) & 0x1F) * intensity_u16) >> 16 );
u8 b = (u8)( (((outColor >> 10) & 0x1F) * intensity_u16) >> 16 );
u8 a = outColor & 0x8000;
dst[i] = ( (r << 0) | (g << 5) | (b << 10) | a );
}
return i;
}
size_t ColorspaceHandler::ApplyIntensityToBuffer16_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const
{
return this->ApplyIntensityToBuffer16(dst, pixCount, intensity);
}
size_t ColorspaceHandler::ApplyIntensityToBuffer16_SwapRB_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const
{
return this->ApplyIntensityToBuffer16_SwapRB(dst, pixCount, intensity);
}
size_t ColorspaceHandler::ApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensity) const
{
size_t i = 0;
if (intensity > 0.999f)
{
return pixCount;
}
else if (intensity < 0.001f)
{
for (; i < pixCount; i++)
{
dst[i] = dst[i] & 0xFF000000;
}
return i;
}
const u16 intensity_u16 = (u16)(intensity * (float)(0xFFFF));
for (; i < pixCount; i++)
{
FragmentColor &outColor = (FragmentColor &)dst[i];
outColor.r = (u8)( ((u16)outColor.r * intensity_u16) >> 16 );
outColor.g = (u8)( ((u16)outColor.g * intensity_u16) >> 16 );
outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 );
}
return i;
}
size_t ColorspaceHandler::ApplyIntensityToBuffer32_SwapRB(u32 *dst, size_t pixCount, float intensity) const
{
size_t i = 0;
if (intensity > 0.999f)
{
for (; i < pixCount; i++)
{
FragmentColor dstColor;
dstColor.color = dst[i];
FragmentColor &outColor = (FragmentColor &)dst[i];
outColor.r = dstColor.b;
outColor.b = dstColor.r;
}
return i;
}
else if (intensity < 0.001f)
{
for (; i < pixCount; i++)
{
dst[i] = dst[i] & 0xFF000000;
}
return i;
}
const u16 intensity_u16 = (u16)(intensity * (float)(0xFFFF));
for (; i < pixCount; i++)
{
FragmentColor dstColor;
dstColor.color = dst[i];
FragmentColor &outColor = (FragmentColor &)dst[i];
outColor.r = (u8)( ((u16)dstColor.b * intensity_u16) >> 16 );
outColor.g = (u8)( ((u16)dstColor.g * intensity_u16) >> 16 );
outColor.b = (u8)( ((u16)dstColor.r * intensity_u16) >> 16 );
}
return i;
}
size_t ColorspaceHandler::ApplyIntensityToBuffer32_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const
{
return this->ApplyIntensityToBuffer32(dst, pixCount, intensity);
}
size_t ColorspaceHandler::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const
{
return this->ApplyIntensityToBuffer32_SwapRB(dst, pixCount, intensity);
}
template void ColorspaceConvertBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
@ -879,3 +1224,13 @@ template void ColorspaceCopyBuffer32<true, true>(const u32 *src, u32 *dst, size_
template void ColorspaceCopyBuffer32<true, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceCopyBuffer32<false, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceCopyBuffer32<false, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceApplyIntensityToBuffer16<true, true>(u16 *dst, size_t pixCount, float intensity);
template void ColorspaceApplyIntensityToBuffer16<true, false>(u16 *dst, size_t pixCount, float intensity);
template void ColorspaceApplyIntensityToBuffer16<false, true>(u16 *dst, size_t pixCount, float intensity);
template void ColorspaceApplyIntensityToBuffer16<false, false>(u16 *dst, size_t pixCount, float intensity);
template void ColorspaceApplyIntensityToBuffer32<true, true>(u32 *dst, size_t pixCount, float intensity);
template void ColorspaceApplyIntensityToBuffer32<true, false>(u32 *dst, size_t pixCount, float intensity);
template void ColorspaceApplyIntensityToBuffer32<false, true>(u32 *dst, size_t pixCount, float intensity);
template void ColorspaceApplyIntensityToBuffer32<false, false>(u32 *dst, size_t pixCount, float intensity);

View File

@ -265,6 +265,65 @@ FORCEINLINE u32 ColorspaceCopy32(u32 srcColor)
return ColorspaceCopy32<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceApplyIntensity16(u16 srcColor, float intensity)
{
u16 outColor = (SWAP_RB) ? COLOR5551_SWAP_RB(srcColor) : srcColor;
if (intensity > 0.999f)
{
return outColor;
}
else if (intensity < 0.001f)
{
return (outColor & 0x8000);
}
const u16 intensity_u16 = (u16)(intensity * (float)(0xFFFF));
u8 r = (u8)( (((outColor >> 0) & 0x1F) * intensity_u16) >> 16 );
u8 g = (u8)( (((outColor >> 5) & 0x1F) * intensity_u16) >> 16 );
u8 b = (u8)( (((outColor >> 10) & 0x1F) * intensity_u16) >> 16 );
u8 a = outColor & 0x8000;
return ( (r << 0) | (g << 5) | (b << 10) | a );
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceApplyIntensity32(FragmentColor srcColor, float intensity)
{
FragmentColor outColor;
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r);
outColor.g = srcColor.g;
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b);
outColor.a = srcColor.a;
if (intensity > 0.999f)
{
return outColor.color;
}
else if (intensity < 0.001f)
{
return (outColor.color & 0xFF000000);
}
const u16 intensity_u16 = (u16)(intensity * (float)(0xFFFF));
outColor.r = (u8)( ((u16)outColor.r * intensity_u16) >> 16 );
outColor.g = (u8)( ((u16)outColor.g * intensity_u16) >> 16 );
outColor.b = (u8)( ((u16)outColor.b * intensity_u16) >> 16 );
outColor.a = outColor.a;
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceApplyIntensity32(u32 srcColor, float intensity)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceApplyIntensity32<SWAP_RB>(srcColorComponent);
}
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
@ -276,6 +335,9 @@ template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer888XTo8888
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceCopyBuffer16(const u16 *src, u16 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceCopyBuffer32(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceApplyIntensityToBuffer16(u16 *dst, size_t pixCount, float intensity);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensity);
class ColorspaceHandler
{
public:
@ -321,6 +383,16 @@ public:
size_t CopyBuffer32_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t CopyBuffer32_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ApplyIntensityToBuffer16(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_SwapRB(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_SwapRB_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_SwapRB(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const;
};
FORCEINLINE FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)

View File

@ -248,6 +248,62 @@ FORCEINLINE v256u32 ColorspaceCopy32_AVX2(const v256u32 &src)
return src;
}
template<bool SWAP_RB>
FORCEINLINE v256u16 ColorspaceApplyIntensity16_AVX2(const v256u16 &src, float intensity)
{
v256u16 tempSrc = (SWAP_RB) ? _mm256_or_si256( _mm256_or_si256(_mm256_srli_epi16(_mm256_and_si256(src, _mm256_set1_epi16(0x7C00)), 10), _mm256_or_si256(_mm256_and_si256(src, _mm256_set1_epi16(0x0E30)), _mm256_slli_epi16(_mm256_and_si256(src, _mm256_set1_epi16(0x001F)), 10))), _mm256_and_si256(src, _mm256_set1_epi16(0x8000)) ) : src;
if (intensity > 0.999f)
{
return tempSrc;
}
else if (intensity < 0.001f)
{
return _mm256_and_si256(tempSrc, _mm256_set1_epi16(0x8000));
}
v256u16 r = _mm256_and_si256( tempSrc, _mm256_set1_epi16(0x001F) );
v256u16 g = _mm256_and_si256( _mm256_srli_epi16(tempSrc, 5), _mm256_set1_epi16(0x001F) );
v256u16 b = _mm256_and_si256( _mm256_srli_epi16(tempSrc, 10), _mm256_set1_epi16(0x001F) );
v256u16 a = _mm256_and_si256( tempSrc, _mm256_set1_epi16(0x8000) );
const v256u16 intensity_v256 = _mm256_set1_epi16( (u16)(intensity * (float)(0xFFFF)) );
r = _mm256_mulhi_epu16(r, intensity_v256);
g = _mm256_slli_epi16( _mm256_mulhi_epu16(g, intensity_v256), 5 );
b = _mm256_slli_epi16( _mm256_mulhi_epu16(b, intensity_v256), 10 );
return _mm256_or_si256( _mm256_or_si256( _mm256_or_si256(r, g), b), a);
}
template<bool SWAP_RB>
FORCEINLINE v256u32 ColorspaceApplyIntensity32_AVX2(const v256u32 &src, float intensity)
{
v256u32 tempSrc = (SWAP_RB) ? _mm256_shuffle_epi8(src, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2)) : src;
if (intensity > 0.999f)
{
return tempSrc;
}
else if (intensity < 0.001f)
{
return _mm256_and_si256(tempSrc, _mm256_set1_epi32(0xFF000000));
}
v256u16 r = _mm256_and_si256( tempSrc, _mm256_set1_epi32(0x000000FF) );
v256u16 g = _mm256_and_si256( _mm256_srli_epi32(tempSrc, 8), _mm256_set1_epi32(0x000000FF) );
v256u16 b = _mm256_and_si256( _mm256_srli_epi32(tempSrc, 16), _mm256_set1_epi32(0x000000FF) );
v256u32 a = _mm256_and_si256( tempSrc, _mm256_set1_epi32(0xFF000000) );
const v256u16 intensity_v256 = _mm256_set1_epi16( (u16)(intensity * (float)(0xFFFF)) );
r = _mm256_mulhi_epu16(r, intensity_v256);
g = _mm256_slli_epi32( _mm256_mulhi_epu16(g, intensity_v256), 8 );
b = _mm256_slli_epi32( _mm256_mulhi_epu16(b, intensity_v256), 16 );
return _mm256_or_si256( _mm256_or_si256( _mm256_or_si256(r, g), b), a);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
{
@ -456,6 +512,160 @@ size_t ColorspaceCopyBuffer32_AVX2(const u32 *src, u32 *dst, size_t pixCountVec2
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceApplyIntensityToBuffer16_AVX2(u16 *dst, size_t pixCountVec256, float intensity)
{
size_t i = 0;
if (intensity > 0.999f)
{
if (SWAP_RB)
{
for (; i < pixCountVec256; i+=16)
{
const v256u16 dst_v256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(dst+i)) : _mm256_load_si256((v256u16 *)(dst+i));
const v256u16 tempDst = _mm256_or_si256( _mm256_or_si256(_mm256_srli_epi16(_mm256_and_si256(dst_v256, _mm256_set1_epi16(0x7C00)), 10), _mm256_or_si256(_mm256_and_si256(dst_v256, _mm256_set1_epi16(0x0E30)), _mm256_slli_epi16(_mm256_and_si256(dst_v256, _mm256_set1_epi16(0x001F)), 10))), _mm256_and_si256(dst_v256, _mm256_set1_epi16(0x8000)) );
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u16 *)(dst+i), tempDst);
}
else
{
_mm256_store_si256( (v256u16 *)(dst+i), tempDst);
}
}
}
else
{
return pixCountVec256;
}
}
else if (intensity < 0.001f)
{
for (; i < pixCountVec256; i+=16)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u16 *)(dst+i), _mm256_and_si256(_mm256_loadu_si256((v256u16 *)(dst+i)), _mm256_set1_epi16(0x8000)) );
}
else
{
_mm256_store_si256( (v256u16 *)(dst+i), _mm256_and_si256(_mm256_load_si256((v256u16 *)(dst+i)), _mm256_set1_epi16(0x8000)) );
}
}
}
else
{
const v256u16 intensity_v256 = _mm256_set1_epi16( (u16)(intensity * (float)(0xFFFF)) );
for (; i < pixCountVec256; i+=16)
{
v256u16 dst_v256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(dst+i)) : _mm256_load_si256((v256u16 *)(dst+i));
v256u16 tempDst = (SWAP_RB) ? _mm256_or_si256( _mm256_or_si256(_mm256_srli_epi16(_mm256_and_si256(dst_v256, _mm256_set1_epi16(0x7C00)), 10), _mm256_or_si256(_mm256_and_si256(dst_v256, _mm256_set1_epi16(0x0E30)), _mm256_slli_epi16(_mm256_and_si256(dst_v256, _mm256_set1_epi16(0x001F)), 10))), _mm256_and_si256(dst_v256, _mm256_set1_epi16(0x8000)) ) : dst_v256;
v256u16 r = _mm256_and_si256( tempDst, _mm256_set1_epi16(0x001F) );
v256u16 g = _mm256_and_si256( _mm256_srli_epi16(tempDst, 5), _mm256_set1_epi16(0x001F) );
v256u16 b = _mm256_and_si256( _mm256_srli_epi16(tempDst, 10), _mm256_set1_epi16(0x001F) );
v256u16 a = _mm256_and_si256( tempDst, _mm256_set1_epi16(0x8000) );
r = _mm256_mulhi_epu16(r, intensity_v256);
g = _mm256_slli_epi32( _mm256_mulhi_epu16(g, intensity_v256), 5 );
b = _mm256_slli_epi32( _mm256_mulhi_epu16(b, intensity_v256), 10 );
tempDst = _mm256_or_si256( _mm256_or_si256( _mm256_or_si256(r, g), b), a);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u16 *)(dst+i), tempDst);
}
else
{
_mm256_store_si256((v256u16 *)(dst+i), tempDst);
}
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceApplyIntensityToBuffer32_AVX2(u32 *dst, size_t pixCountVec256, float intensity)
{
size_t i = 0;
if (intensity > 0.999f)
{
if (SWAP_RB)
{
for (; i < pixCountVec256; i+=8)
{
const v256u32 dst_v256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u32 *)(dst+i)) : _mm256_load_si256((v256u32 *)(dst+i));
const v256u32 tempDst = _mm256_shuffle_epi8(dst_v256, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2));
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u32 *)(dst+i), tempDst);
}
else
{
_mm256_store_si256( (v256u32 *)(dst+i), tempDst);
}
}
}
else
{
return pixCountVec256;
}
}
else if (intensity < 0.001f)
{
for (; i < pixCountVec256; i+=8)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u32 *)(dst+i), _mm256_and_si256(_mm256_loadu_si256((v256u32 *)(dst+i)), _mm256_set1_epi32(0xFF000000)) );
}
else
{
_mm256_store_si256( (v256u32 *)(dst+i), _mm256_and_si256(_mm256_load_si256((v256u32 *)(dst+i)), _mm256_set1_epi32(0xFF000000)) );
}
}
}
else
{
const v256u16 intensity_v256 = _mm256_set1_epi16( (u16)(intensity * (float)(0xFFFF)) );
for (; i < pixCountVec256; i+=8)
{
v256u32 dst_v256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u32 *)(dst+i)) : _mm256_load_si256((v256u32 *)(dst+i));
v256u32 tempDst = (SWAP_RB) ? _mm256_shuffle_epi8(dst_v256, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2)) : dst_v256;
v256u16 r = _mm256_and_si256( tempDst, _mm256_set1_epi32(0x000000FF) );
v256u16 g = _mm256_and_si256( _mm256_srli_epi32(tempDst, 8), _mm256_set1_epi32(0x000000FF) );
v256u16 b = _mm256_and_si256( _mm256_srli_epi32(tempDst, 16), _mm256_set1_epi32(0x000000FF) );
v256u32 a = _mm256_and_si256( tempDst, _mm256_set1_epi32(0xFF000000) );
r = _mm256_mulhi_epu16(r, intensity_v256);
g = _mm256_slli_epi32( _mm256_mulhi_epu16(g, intensity_v256), 8 );
b = _mm256_slli_epi32( _mm256_mulhi_epu16(b, intensity_v256), 16 );
tempDst = _mm256_or_si256( _mm256_or_si256( _mm256_or_si256(r, g), b), a);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u32 *)(dst+i), tempDst);
}
else
{
_mm256_store_si256((v256u32 *)(dst+i), tempDst);
}
}
}
return i;
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, false>(src, dst, pixCount);
@ -616,6 +826,46 @@ size_t ColorspaceHandler_AVX2::CopyBuffer32_SwapRB_IsUnaligned(const u32 *src, u
return ColorspaceCopyBuffer32_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer16(u16 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer16_AVX2<false, false>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer16_SwapRB(u16 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer16_AVX2<true, false>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer16_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer16_AVX2<false, true>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer16_SwapRB_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer16_AVX2<true, true>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer32_AVX2<false, false>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer32_SwapRB(u32 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer32_AVX2<true, false>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer32_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer32_AVX2<false, true>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer32_AVX2<true, true>(dst, pixCount, intensity);
}
template void ColorspaceConvert555To8888_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
@ -649,4 +899,10 @@ template v256u16 ColorspaceCopy16_AVX2<false>(const v256u16 &src);
template v256u32 ColorspaceCopy32_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceCopy32_AVX2<false>(const v256u32 &src);
template v256u16 ColorspaceApplyIntensity16_AVX2<true>(const v256u16 &src, float intensity);
template v256u16 ColorspaceApplyIntensity16_AVX2<false>(const v256u16 &src, float intensity);
template v256u32 ColorspaceApplyIntensity32_AVX2<true>(const v256u32 &src, float intensity);
template v256u32 ColorspaceApplyIntensity32_AVX2<false>(const v256u32 &src, float intensity);
#endif // ENABLE_AVX2

View File

@ -37,6 +37,9 @@ template<bool SWAP_RB> v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256
template<bool SWAP_RB> v256u16 ColorspaceCopy16_AVX2(const v256u16 &src);
template<bool SWAP_RB> v256u32 ColorspaceCopy32_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u16 ColorspaceApplyIntensity16_AVX2(const v256u16 &src, float intensity);
template<bool SWAP_RB> v256u32 ColorspaceApplyIntensity32_AVX2(const v256u32 &src, float intensity);
class ColorspaceHandler_AVX2 : public ColorspaceHandler
{
public:
@ -82,6 +85,16 @@ public:
size_t CopyBuffer32_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t CopyBuffer32_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ApplyIntensityToBuffer16(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_SwapRB(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_SwapRB_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_SwapRB(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const;
};
#endif // ENABLE_AVX2

View File

@ -292,6 +292,66 @@ FORCEINLINE v128u32 ColorspaceCopy32_SSE2(const v128u32 &src)
return src;
}
template<bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceApplyIntensity16_SSE2(const v128u16 &src, float intensity)
{
v128u16 tempSrc = (SWAP_RB) ? _mm_or_si128( _mm_or_si128(_mm_srli_epi16(_mm_and_si128(src, _mm_set1_epi16(0x7C00)), 10), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi16(0x0E30)), _mm_slli_epi16(_mm_and_si128(src, _mm_set1_epi16(0x001F)), 10))), _mm_and_si128(src, _mm_set1_epi16(0x8000)) ) : src;
if (intensity > 0.999f)
{
return tempSrc;
}
else if (intensity < 0.001f)
{
return _mm_and_si128(tempSrc, _mm_set1_epi16(0x8000));
}
v128u16 r = _mm_and_si128( tempSrc, _mm_set1_epi16(0x001F) );
v128u16 g = _mm_and_si128( _mm_srli_epi16(tempSrc, 5), _mm_set1_epi16(0x001F) );
v128u16 b = _mm_and_si128( _mm_srli_epi16(tempSrc, 10), _mm_set1_epi16(0x001F) );
v128u16 a = _mm_and_si128( tempSrc, _mm_set1_epi16(0x8000) );
const v128u16 intensity_v128 = _mm_set1_epi16( (u16)(intensity * (float)(0xFFFF)) );
r = _mm_mulhi_epu16(r, intensity_v128);
g = _mm_slli_epi16( _mm_mulhi_epu16(g, intensity_v128), 5 );
b = _mm_slli_epi16( _mm_mulhi_epu16(b, intensity_v128), 10 );
return _mm_or_si128( _mm_or_si128( _mm_or_si128(r, g), b), a);
}
template<bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceApplyIntensity32_SSE2(const v128u32 &src, float intensity)
{
#ifdef ENABLE_SSSE3
v128u32 tempSrc = (SWAP_RB) ? _mm_shuffle_epi8(src, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2)) : src;
#else
v128u32 tempSrc = (SWAP_RB) ? _mm_or_si128( _mm_or_si128(_mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x000000FF)), 16))), _mm_and_si128(src, _mm_set1_epi32(0xFF000000)) ) : src;
#endif
if (intensity > 0.999f)
{
return tempSrc;
}
else if (intensity < 0.001f)
{
return _mm_and_si128(tempSrc, _mm_set1_epi32(0xFF000000));
}
v128u16 r = _mm_and_si128( tempSrc, _mm_set1_epi32(0x000000FF) );
v128u16 g = _mm_and_si128( _mm_srli_epi32(tempSrc, 8), _mm_set1_epi32(0x000000FF) );
v128u16 b = _mm_and_si128( _mm_srli_epi32(tempSrc, 16), _mm_set1_epi32(0x000000FF) );
v128u32 a = _mm_and_si128( tempSrc, _mm_set1_epi32(0xFF000000) );
const v128u16 intensity_v128 = _mm_set1_epi16( (u16)(intensity * (float)(0xFFFF)) );
r = _mm_mulhi_epu16(r, intensity_v128);
g = _mm_slli_epi32( _mm_mulhi_epu16(g, intensity_v128), 8 );
b = _mm_slli_epi32( _mm_mulhi_epu16(b, intensity_v128), 16 );
return _mm_or_si128( _mm_or_si128( _mm_or_si128(r, g), b), a);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
@ -500,6 +560,167 @@ size_t ColorspaceCopyBuffer32_SSE2(const u32 *src, u32 *dst, size_t pixCountVec1
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceApplyIntensityToBuffer16_SSE2(u16 *dst, size_t pixCountVec128, float intensity)
{
size_t i = 0;
if (intensity > 0.999f)
{
if (SWAP_RB)
{
for (; i < pixCountVec128; i+=8)
{
const v128u16 dst_v128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(dst+i)) : _mm_load_si128((v128u16 *)(dst+i));
const v128u16 tempDst = _mm_or_si128( _mm_or_si128(_mm_srli_epi16(_mm_and_si128(dst_v128, _mm_set1_epi16(0x7C00)), 10), _mm_or_si128(_mm_and_si128(dst_v128, _mm_set1_epi16(0x0E30)), _mm_slli_epi16(_mm_and_si128(dst_v128, _mm_set1_epi16(0x001F)), 10))), _mm_and_si128(dst_v128, _mm_set1_epi16(0x8000)) );
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u16 *)(dst+i), tempDst);
}
else
{
_mm_store_si128( (v128u16 *)(dst+i), tempDst);
}
}
}
else
{
return pixCountVec128;
}
}
else if (intensity < 0.001f)
{
for (; i < pixCountVec128; i+=8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u16 *)(dst+i), _mm_and_si128(_mm_loadu_si128((v128u16 *)(dst+i)), _mm_set1_epi16(0x8000)) );
}
else
{
_mm_store_si128( (v128u16 *)(dst+i), _mm_and_si128(_mm_load_si128((v128u16 *)(dst+i)), _mm_set1_epi16(0x8000)) );
}
}
}
else
{
const v128u16 intensity_v128 = _mm_set1_epi16( (u16)(intensity * (float)(0xFFFF)) );
for (; i < pixCountVec128; i+=8)
{
v128u16 dst_v128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(dst+i)) : _mm_load_si128((v128u16 *)(dst+i));
v128u16 tempDst = (SWAP_RB) ? _mm_or_si128( _mm_or_si128(_mm_srli_epi16(_mm_and_si128(dst_v128, _mm_set1_epi16(0x7C00)), 10), _mm_or_si128(_mm_and_si128(dst_v128, _mm_set1_epi16(0x0E30)), _mm_slli_epi16(_mm_and_si128(dst_v128, _mm_set1_epi16(0x001F)), 10))), _mm_and_si128(dst_v128, _mm_set1_epi16(0x8000)) ) : dst_v128;
v128u16 r = _mm_and_si128( tempDst, _mm_set1_epi16(0x001F) );
v128u16 g = _mm_and_si128( _mm_srli_epi16(tempDst, 5), _mm_set1_epi16(0x001F) );
v128u16 b = _mm_and_si128( _mm_srli_epi16(tempDst, 10), _mm_set1_epi16(0x001F) );
v128u16 a = _mm_and_si128( tempDst, _mm_set1_epi16(0x8000) );
r = _mm_mulhi_epu16(r, intensity_v128);
g = _mm_slli_epi16( _mm_mulhi_epu16(g, intensity_v128), 5 );
b = _mm_slli_epi16( _mm_mulhi_epu16(b, intensity_v128), 10 );
tempDst = _mm_or_si128( _mm_or_si128( _mm_or_si128(r, g), b), a);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u16 *)(dst+i), tempDst);
}
else
{
_mm_store_si128((v128u16 *)(dst+i), tempDst);
}
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceApplyIntensityToBuffer32_SSE2(u32 *dst, size_t pixCountVec128, float intensity)
{
size_t i = 0;
if (intensity > 0.999f)
{
if (SWAP_RB)
{
for (; i < pixCountVec128; i+=4)
{
const v128u32 dst_v128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u32 *)(dst+i)) : _mm_load_si128((v128u32 *)(dst+i));
#ifdef ENABLE_SSSE3
const v128u32 tempDst = _mm_shuffle_epi8(dst_v128, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2));
#else
const v128u32 tempDst = _mm_or_si128( _mm_or_si128(_mm_srli_epi32(_mm_and_si128(dst_v128, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(dst_v128, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(dst_v128, _mm_set1_epi32(0x000000FF)), 16))), _mm_and_si128(dst_v128, _mm_set1_epi32(0xFF000000)) );
#endif
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u32 *)(dst+i), tempDst);
}
else
{
_mm_store_si128( (v128u32 *)(dst+i), tempDst);
}
}
}
else
{
return pixCountVec128;
}
}
else if (intensity < 0.001f)
{
for (; i < pixCountVec128; i+=4)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u32 *)(dst+i), _mm_and_si128(_mm_loadu_si128((v128u32 *)(dst+i)), _mm_set1_epi32(0xFF000000)) );
}
else
{
_mm_store_si128( (v128u32 *)(dst+i), _mm_and_si128(_mm_load_si128((v128u32 *)(dst+i)), _mm_set1_epi32(0xFF000000)) );
}
}
}
else
{
const v128u16 intensity_v128 = _mm_set1_epi16( (u16)(intensity * (float)(0xFFFF)) );
for (; i < pixCountVec128; i+=4)
{
v128u32 dst_v128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u32 *)(dst+i)) : _mm_load_si128((v128u32 *)(dst+i));
#ifdef ENABLE_SSSE3
v128u32 tempDst = (SWAP_RB) ? _mm_shuffle_epi8(dst_v128, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2)) : dst_v128;
#else
v128u32 tempDst = (SWAP_RB) ? _mm_or_si128( _mm_or_si128(_mm_srli_epi32(_mm_and_si128(dst_v128, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(dst_v128, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(dst_v128, _mm_set1_epi32(0x000000FF)), 16))), _mm_and_si128(dst_v128, _mm_set1_epi32(0xFF000000)) ) : dst_v128;
#endif
v128u16 r = _mm_and_si128( tempDst, _mm_set1_epi32(0x000000FF) );
v128u16 g = _mm_and_si128( _mm_srli_epi32(tempDst, 8), _mm_set1_epi32(0x000000FF) );
v128u16 b = _mm_and_si128( _mm_srli_epi32(tempDst, 16), _mm_set1_epi32(0x000000FF) );
v128u32 a = _mm_and_si128( tempDst, _mm_set1_epi32(0xFF000000) );
r = _mm_mulhi_epu16(r, intensity_v128);
g = _mm_slli_epi32( _mm_mulhi_epu16(g, intensity_v128), 8 );
b = _mm_slli_epi32( _mm_mulhi_epu16(b, intensity_v128), 16 );
tempDst = _mm_or_si128( _mm_or_si128( _mm_or_si128(r, g), b), a);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u32 *)(dst+i), tempDst);
}
else
{
_mm_store_si128((v128u32 *)(dst+i), tempDst);
}
}
}
return i;
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, false>(src, dst, pixCount);
@ -660,6 +881,46 @@ size_t ColorspaceHandler_SSE2::CopyBuffer32_SwapRB_IsUnaligned(const u32 *src, u
return ColorspaceCopyBuffer32_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer16(u16 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer16_SSE2<false, false>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer16_SwapRB(u16 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer16_SSE2<true, false>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer16_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer16_SSE2<false, true>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer16_SwapRB_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer16_SSE2<true, true>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer32_SSE2<false, false>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer32_SwapRB(u32 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer32_SSE2<true, false>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer32_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer32_SSE2<false, true>(dst, pixCount, intensity);
}
size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const
{
return ColorspaceApplyIntensityToBuffer32_SSE2<true, true>(dst, pixCount, intensity);
}
template void ColorspaceConvert555To8888_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
@ -693,4 +954,10 @@ template v128u16 ColorspaceCopy16_SSE2<false>(const v128u16 &src);
template v128u32 ColorspaceCopy32_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceCopy32_SSE2<false>(const v128u32 &src);
template v128u16 ColorspaceApplyIntensity16_SSE2<true>(const v128u16 &src, float intensity);
template v128u16 ColorspaceApplyIntensity16_SSE2<false>(const v128u16 &src, float intensity);
template v128u32 ColorspaceApplyIntensity32_SSE2<true>(const v128u32 &src, float intensity);
template v128u32 ColorspaceApplyIntensity32_SSE2<false>(const v128u32 &src, float intensity);
#endif // ENABLE_SSE2

View File

@ -37,6 +37,9 @@ template<bool SWAP_RB> v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128
template<bool SWAP_RB> v128u16 ColorspaceCopy16_SSE2(const v128u16 &src);
template<bool SWAP_RB> v128u32 ColorspaceCopy32_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceApplyIntensity16_SSE2(const v128u16 &src, float intensity);
template<bool SWAP_RB> v128u32 ColorspaceApplyIntensity32_SSE2(const v128u32 &src, float intensity);
class ColorspaceHandler_SSE2 : public ColorspaceHandler
{
public:
@ -82,6 +85,16 @@ public:
size_t CopyBuffer32_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t CopyBuffer32_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ApplyIntensityToBuffer16(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_SwapRB(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer16_SwapRB_IsUnaligned(u16 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_SwapRB(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const;
size_t ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, size_t pixCount, float intensity) const;
};
#endif // ENABLE_SSE2