GPU:
- Add 555-to-6665 opaque color conversion. - Add UNALIGNED switch to 555-to-8888, 555-to-6665, 8888-to-5551, and 6665-to-5551 color buffer conversion functions, allowing clients to inform these functions that the incoming buffer pointers may not be 16-byte aligned. - Rendered lines from GPUEngineBase::_HandleDisplayModeOff(), GPUEngineA::_HandleDisplayModeVRAM(), and GPUEngineA::_HandleDisplayModeMainMemory() now output colors with the alpha bits filled in. This is working towards a time when clients that work directly in 16-bit and 32-bit colorspaces don’t have to fill in the alpha bits themselves. - Unify more color conversion code.
This commit is contained in:
parent
d1a8663acb
commit
4d2307538d
|
@ -50,6 +50,7 @@
|
|||
u32 Render3DFramesPerSecond;
|
||||
|
||||
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
|
||||
CACHE_ALIGN u32 color_555_to_666[32768];
|
||||
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
|
||||
|
@ -4387,15 +4388,15 @@ void GPUEngineBase::_HandleDisplayModeOff(const size_t l)
|
|||
switch (GPU->GetDisplayInfo().colorFormat)
|
||||
{
|
||||
case NDSColorFormat_BGR555_Rev:
|
||||
memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>((u16 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH), 0x7FFF);
|
||||
memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>((u16 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH), 0xFFFF);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>((u32 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH), 0x003F3F3F);
|
||||
memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>((u32 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH), 0x1F3F3F3F);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>((u32 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH), 0x00FFFFFF);
|
||||
memset_u32_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>((u32 *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH), 0xFFFFFFFF);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -5915,11 +5916,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
|
||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++)
|
||||
{
|
||||
dst[i].color = COLOR555TO6665_OPAQUE(src[i] & 0x7FFF);
|
||||
}
|
||||
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -5927,11 +5924,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
|
||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++)
|
||||
{
|
||||
dst[i].color = COLOR555TO8888_OPAQUE(src[i] & 0x7FFF);
|
||||
}
|
||||
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -5951,11 +5944,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
|
||||
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
|
||||
|
||||
for (size_t i = 0; i < customPixCount; i++)
|
||||
{
|
||||
dst[i].color = COLOR555TO6665_OPAQUE(src[i] & 0x7FFF);
|
||||
}
|
||||
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, customPixCount);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -5963,11 +5952,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
|
||||
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
|
||||
|
||||
for (size_t i = 0; i < customPixCount; i++)
|
||||
{
|
||||
dst[i].color = COLOR555TO8888_OPAQUE(src[i] & 0x7FFF);
|
||||
}
|
||||
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, customPixCount);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -5993,17 +5978,17 @@ void GPUEngineA::_HandleDisplayModeMainMemory(const size_t l)
|
|||
u32 *dst = dstColorLine;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const __m128i fifoMask = _mm_set1_epi32(0x7FFF7FFF);
|
||||
const __m128i alphaBit = _mm_set1_epi16(0x8000);
|
||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(__m128i); i++)
|
||||
{
|
||||
__m128i fifoColor = _mm_set_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv());
|
||||
fifoColor = _mm_shuffle_epi32(fifoColor, 0x1B); // We need to shuffle the four FIFO values back into the correct order, since they were originally loaded in reverse order.
|
||||
_mm_store_si128((__m128i *)dst + i, _mm_and_si128(fifoColor, fifoMask));
|
||||
_mm_store_si128((__m128i *)dst + i, _mm_or_si128(fifoColor, alphaBit));
|
||||
}
|
||||
#else
|
||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++)
|
||||
{
|
||||
dst[i] = DISP_FIFOrecv() & 0x7FFF7FFF;
|
||||
dst[i] = DISP_FIFOrecv() | 0x80008000;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
|
@ -6323,14 +6308,17 @@ GPUSubsystem::GPUSubsystem()
|
|||
|
||||
if (needInitTables)
|
||||
{
|
||||
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
|
||||
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
|
||||
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
|
||||
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
|
||||
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
|
||||
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
|
||||
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
|
||||
|
||||
for (size_t i = 0; i < 32768; i++)
|
||||
{
|
||||
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
|
||||
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
|
||||
color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 );
|
||||
|
||||
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
|
||||
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
|
||||
color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 );
|
||||
|
@ -7081,8 +7069,8 @@ void NDSDisplay::SetEngineByID(const GPUEngineID theID)
|
|||
this->_gpu->SetDisplayByID(this->_ID);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *dst, size_t pixCount)
|
||||
template <bool SWAP_RB, bool UNALIGNED>
|
||||
void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
|
@ -7090,12 +7078,20 @@ void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *dst, size
|
|||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
__m128i src_vec128 = _mm_load_si128((__m128i *)(src + i));
|
||||
__m128i src_vec128 = (UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
|
||||
__m128i dstConvertedLo, dstConvertedHi;
|
||||
ConvertColor555To8888Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
if (UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -7105,6 +7101,38 @@ void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *dst, size
|
|||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool UNALIGNED>
|
||||
void ConvertColorBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
__m128i src_vec128 = (UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
|
||||
__m128i dstConvertedLo, dstConvertedHi;
|
||||
ConvertColor555To6665Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor555To6665Opaque<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
void ConvertColorBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
|
||||
{
|
||||
|
@ -7143,7 +7171,7 @@ void ConvertColorBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount)
|
|||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
template <bool SWAP_RB, bool UNALIGNED>
|
||||
void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
@ -7152,7 +7180,14 @@ void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst
|
|||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
|
||||
if (UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -7162,7 +7197,7 @@ void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst
|
|||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
template <bool SWAP_RB, bool UNALIGNED>
|
||||
void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
@ -7171,7 +7206,14 @@ void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst
|
|||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
|
||||
if (UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -7217,8 +7259,15 @@ template void GPUEngineBase::RenderLayerBG<GPULayerID_BG1>(u16 *dstColorBuffer);
|
|||
template void GPUEngineBase::RenderLayerBG<GPULayerID_BG2>(u16 *dstColorBuffer);
|
||||
template void GPUEngineBase::RenderLayerBG<GPULayerID_BG3>(u16 *dstColorBuffer);
|
||||
|
||||
template void ConvertColorBuffer555To8888Opaque<true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To8888Opaque<false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To8888Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer555To6665Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To6665Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To6665Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To6665Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer8888To6665<true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To6665<false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
|
@ -7226,8 +7275,12 @@ template void ConvertColorBuffer8888To6665<false>(const u32 *src, u32 *dst, size
|
|||
template void ConvertColorBuffer6665To8888<true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To8888<false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer8888To5551<true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To5551<false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer6665To5551<true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To5551<false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
|
|
@ -1638,12 +1638,14 @@ extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
|
|||
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
|
||||
|
||||
extern CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_666[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_888[32768];
|
||||
|
||||
#define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color
|
||||
#define COLOR555TO6665_OPAQUE_SWAP_RB(col) (color_555_to_6665_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color with R and B components swapped
|
||||
#define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color
|
||||
|
||||
#ifdef LOCAL_LE
|
||||
|
@ -1681,6 +1683,12 @@ FORCEINLINE u32 ConvertColor555To8888Opaque(const u16 src)
|
|||
return (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ConvertColor555To6665Opaque(const u16 src)
|
||||
{
|
||||
return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ConvertColor8888To6665(u32 srcColor)
|
||||
{
|
||||
|
@ -1790,6 +1798,41 @@ FORCEINLINE void ConvertColor555To8888Opaque(const __m128i src, __m128i &dstLo,
|
|||
dstHi = _mm_or_si128( _mm_and_si128(_mm_shuffle_epi32(tmpDstLo, 0x72), _mm_set_epi32(0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF)), _mm_and_si128(_mm_shuffle_epi32(dstHi, 0xD8), _mm_set_epi32(0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000)) );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ConvertColor555To6665Opaque(const __m128i src, __m128i &dstLo, __m128i &dstHi)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 6-bit formula: dstRGB8 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
|
||||
if (SWAP_RB)
|
||||
{
|
||||
dstLo = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src, 17), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(src, 12), _mm_set1_epi32(0x00010000)));
|
||||
dstLo = _mm_or_si128(dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src, 1), _mm_set1_epi32(0x00000100))) );
|
||||
dstLo = _mm_or_si128(dstLo, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src, 9), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src, 14), _mm_set1_epi32(0x00000001))) );
|
||||
dstLo = _mm_or_si128(dstLo, _mm_set1_epi32(0x1F000000));
|
||||
|
||||
dstHi = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src, 1), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_srli_epi32(src, 4), _mm_set1_epi32(0x00010000)));
|
||||
dstHi = _mm_or_si128(dstHi, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src, 12), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src, 17), _mm_set1_epi32(0x00000100))) );
|
||||
dstHi = _mm_or_si128(dstHi, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src, 25), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src, 30), _mm_set1_epi32(0x00000001))) );
|
||||
dstHi = _mm_or_si128(dstHi, _mm_set1_epi32(0x1F000000));
|
||||
}
|
||||
else
|
||||
{
|
||||
dstLo = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src, 1), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src, 4), _mm_set1_epi32(0x00000001)));
|
||||
dstLo = _mm_or_si128(dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src, 4), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src, 1), _mm_set1_epi32(0x00000100))) );
|
||||
dstLo = _mm_or_si128(dstLo, _mm_or_si128(_mm_and_si128(_mm_slli_epi32(src, 7), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_slli_epi32(src, 2), _mm_set1_epi32(0x00010000))) );
|
||||
dstLo = _mm_or_si128(dstLo, _mm_set1_epi32(0x1F000000));
|
||||
|
||||
dstHi = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src, 15), _mm_set1_epi32(0x0000003E)), _mm_and_si128(_mm_srli_epi32(src, 20), _mm_set1_epi32(0x00000001)));
|
||||
dstHi = _mm_or_si128(dstHi, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src, 12), _mm_set1_epi32(0x00003E00)), _mm_and_si128(_mm_srli_epi32(src, 17), _mm_set1_epi32(0x00000100))) );
|
||||
dstHi = _mm_or_si128(dstHi, _mm_or_si128(_mm_and_si128(_mm_srli_epi32(src, 9), _mm_set1_epi32(0x003E0000)), _mm_and_si128(_mm_srli_epi32(src, 14), _mm_set1_epi32(0x00010000))) );
|
||||
dstHi = _mm_or_si128(dstHi, _mm_set1_epi32(0x1F000000));
|
||||
}
|
||||
|
||||
__m128i tmpDstLo = dstLo;
|
||||
dstLo = _mm_or_si128( _mm_and_si128(_mm_shuffle_epi32(tmpDstLo, 0xD8), _mm_set_epi32(0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF)), _mm_and_si128(_mm_shuffle_epi32(dstHi, 0x72), _mm_set_epi32(0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000)) );
|
||||
dstHi = _mm_or_si128( _mm_and_si128(_mm_shuffle_epi32(tmpDstLo, 0x72), _mm_set_epi32(0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF)), _mm_and_si128(_mm_shuffle_epi32(dstHi, 0xD8), _mm_set_epi32(0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000)) );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE __m128i ConvertColor8888To6665(const __m128i src)
|
||||
{
|
||||
|
@ -1957,10 +2000,13 @@ FORCEINLINE __m128i ConvertColor6665To5551(const __m128i srcLo, const __m128i sr
|
|||
|
||||
#endif
|
||||
|
||||
template<bool SWAP_RB> void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template<bool SWAP_RB> void ConvertColorBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template<bool SWAP_RB> void ConvertColorBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template<bool SWAP_RB> void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB> void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1007,7 +1007,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
}
|
||||
else
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
else if (this->_outputFormat == NDSColorFormat_BGR888_Rev)
|
||||
|
@ -1038,7 +1038,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
}
|
||||
else
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1083,7 +1083,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1130,7 +1130,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -754,7 +754,7 @@
|
|||
|
||||
if (dispInfo.pixelBytes == 2)
|
||||
{
|
||||
RGB555ToRGBA8888Buffer((u16 *)displayBuffer, bitmapData, (w * h));
|
||||
ConvertColorBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h));
|
||||
}
|
||||
else if (dispInfo.pixelBytes == 4)
|
||||
{
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#import "cocoa_util.h"
|
||||
|
||||
#include "../NDSSystem.h"
|
||||
#include "../GPU.h"
|
||||
#include "../common.h"
|
||||
#include "../mc.h"
|
||||
#undef BOOL
|
||||
|
@ -691,7 +692,7 @@ void RomIconToRGBA8888(uint32_t *bitmapData)
|
|||
//
|
||||
// The first entry always represents the alpha, so we can just ignore it.
|
||||
clut[0] = 0x00000000;
|
||||
RGB555ToRGBA8888Buffer(iconClutPtr, &clut[1], 15);
|
||||
ConvertColorBuffer555To8888Opaque<false, true>((u16 *)iconClutPtr, &clut[1], 15);
|
||||
|
||||
// Load the image from the icon pixel data.
|
||||
//
|
||||
|
|
|
@ -625,11 +625,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
|
|||
{
|
||||
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
ConvertColorBuffer6665To5551<false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ConvertColorBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
ConvertColorBuffer8888To5551<false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ConvertColorBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1920,7 +1920,7 @@ static void DoDisplay(bool firstTime)
|
|||
//convert pixel format to 32bpp for compositing
|
||||
//why do we do this over and over? well, we are compositing to
|
||||
//filteredbuffer32bpp, and it needs to get refreshed each frame.
|
||||
ConvertColorBuffer555To8888Opaque<true>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16));
|
||||
ConvertColorBuffer555To8888Opaque<true, false>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16));
|
||||
|
||||
if(firstTime)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue