- Avoid doing scalar lookups in the SSE2 portion of GPUEngineBase::ApplyMasterBrightness().
This commit is contained in:
rogerman 2016-01-05 22:06:55 +00:00
parent 76ba4e164d
commit a1280789db
1 changed files with 6 additions and 20 deletions

View File

@ -2815,20 +2815,13 @@ void GPUEngineBase::ApplyMasterBrightness()
size_t i = 0; size_t i = 0;
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
__m128i intensity_vec128 = _mm_set1_epi16(intensity);
const size_t ssePixCount = pixCount - (pixCount % 8); const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8) for (; i < ssePixCount; i += 8)
{ {
__m128i dstColor_vec128 = _mm_load_si128((__m128i *)(dst + i)); __m128i dstColor_vec128 = _mm_load_si128((__m128i *)(dst + i));
dstColor_vec128 = _mm_and_si128(dstColor_vec128, _mm_set1_epi16(0x7FFF)); _mm_store_si128( (__m128i *)(dst + i), this->_ColorEffectIncreaseBrightness(dstColor_vec128, intensity_vec128) );
dst[i+7] = GPUEngineBase::_fadeInColors[intensity][ _mm_extract_epi16(dstColor_vec128, 7) ];
dst[i+6] = GPUEngineBase::_fadeInColors[intensity][ _mm_extract_epi16(dstColor_vec128, 6) ];
dst[i+5] = GPUEngineBase::_fadeInColors[intensity][ _mm_extract_epi16(dstColor_vec128, 5) ];
dst[i+4] = GPUEngineBase::_fadeInColors[intensity][ _mm_extract_epi16(dstColor_vec128, 4) ];
dst[i+3] = GPUEngineBase::_fadeInColors[intensity][ _mm_extract_epi16(dstColor_vec128, 3) ];
dst[i+2] = GPUEngineBase::_fadeInColors[intensity][ _mm_extract_epi16(dstColor_vec128, 2) ];
dst[i+1] = GPUEngineBase::_fadeInColors[intensity][ _mm_extract_epi16(dstColor_vec128, 1) ];
dst[i+0] = GPUEngineBase::_fadeInColors[intensity][ _mm_extract_epi16(dstColor_vec128, 0) ];
} }
#endif #endif
for (; i < pixCount; i++) for (; i < pixCount; i++)
@ -2851,20 +2844,13 @@ void GPUEngineBase::ApplyMasterBrightness()
size_t i = 0; size_t i = 0;
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
__m128i intensity_vec128 = _mm_set1_epi16(intensity);
const size_t ssePixCount = pixCount - (pixCount % 8); const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8) for (; i < ssePixCount; i += 8)
{ {
__m128i dstColor_vec128 = _mm_load_si128((__m128i *)(dst + i)); __m128i dstColor_vec128 = _mm_load_si128((__m128i *)(dst + i));
dstColor_vec128 = _mm_and_si128(dstColor_vec128, _mm_set1_epi16(0x7FFF)); _mm_store_si128( (__m128i *)(dst + i), this->_ColorEffectDecreaseBrightness(dstColor_vec128, intensity_vec128) );
dst[i+7] = GPUEngineBase::_fadeOutColors[intensity][ _mm_extract_epi16(dstColor_vec128, 7) ];
dst[i+6] = GPUEngineBase::_fadeOutColors[intensity][ _mm_extract_epi16(dstColor_vec128, 6) ];
dst[i+5] = GPUEngineBase::_fadeOutColors[intensity][ _mm_extract_epi16(dstColor_vec128, 5) ];
dst[i+4] = GPUEngineBase::_fadeOutColors[intensity][ _mm_extract_epi16(dstColor_vec128, 4) ];
dst[i+3] = GPUEngineBase::_fadeOutColors[intensity][ _mm_extract_epi16(dstColor_vec128, 3) ];
dst[i+2] = GPUEngineBase::_fadeOutColors[intensity][ _mm_extract_epi16(dstColor_vec128, 2) ];
dst[i+1] = GPUEngineBase::_fadeOutColors[intensity][ _mm_extract_epi16(dstColor_vec128, 1) ];
dst[i+0] = GPUEngineBase::_fadeOutColors[intensity][ _mm_extract_epi16(dstColor_vec128, 0) ];
} }
#endif #endif
for (; i < pixCount; i++) for (; i < pixCount; i++)