Colorspace Handler:

- Factor out the generic colorspace handling routines out of GPU.cpp/GPU.h into their own separate files.
- Add vectorized routines using AVX2 and AltiVec.
This commit is contained in:
rogerman 2016-08-16 06:47:22 +00:00
parent d837653b5f
commit d8735a803b
25 changed files with 2877 additions and 876 deletions

View File

@ -18,6 +18,14 @@
along with the this software. If not, see <http://www.gnu.org/licenses/>. along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifdef FASTBUILD
#undef FORCEINLINE
#define FORCEINLINE
//compilation speed hack (cuts time exactly in half by cutting out permutations)
#define DISABLE_MOSAIC
#define DISABLE_COLOREFFECTDISABLEHINT
#endif
#include "GPU.h" #include "GPU.h"
#include <assert.h> #include <assert.h>
@ -40,75 +48,8 @@
#include "matrix.h" #include "matrix.h"
#include "emufile.h" #include "emufile.h"
#ifdef FASTBUILD
#undef FORCEINLINE
#define FORCEINLINE
//compilation speed hack (cuts time exactly in half by cutting out permutations)
#define DISABLE_MOSAIC
#endif
u32 Render3DFramesPerSecond; u32 Render3DFramesPerSecond;
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_666[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_888[32768];
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
};
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
};
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
};
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
};
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
};
//maybe not very precise
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
0, 4, 8, 13, 17, 22, 26, 31
};
//TODO - generate this in the static init method more accurately
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
0, 8, 16, 26, 34, 44, 52, 63
};
//instantiate static instance //instantiate static instance
u16 GPUEngineBase::_brightnessUpTable555[17][0x8000]; u16 GPUEngineBase::_brightnessUpTable555[17][0x8000];
FragmentColor GPUEngineBase::_brightnessUpTable666[17][0x8000]; FragmentColor GPUEngineBase::_brightnessUpTable666[17][0x8000];
@ -167,7 +108,7 @@ const CACHE_ALIGN BGLayerSize GPUEngineBase::_BGLayerSizeLUT[8][4] = {
{{128,128}, {256,256}, {512,256}, {512,512}}, //affine ext direct {{128,128}, {256,256}, {512,256}, {512,512}}, //affine ext direct
}; };
static void ExpandLine8(u8 *__restrict dst, const u8 *__restrict src, size_t dstLength) static FORCEINLINE void ExpandLine8(u8 *__restrict dst, const u8 *__restrict src, size_t dstLength)
{ {
#ifdef ENABLE_SSSE3 #ifdef ENABLE_SSSE3
const bool isIntegerScale = ((dstLength % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0); const bool isIntegerScale = ((dstLength % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0);
@ -1655,11 +1596,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break; break;
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16); dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
break; break;
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16); dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
break; break;
} }
@ -1682,11 +1623,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break; break;
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16); dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
break; break;
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16); dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
break; break;
} }
@ -1767,11 +1708,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break; break;
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16); dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
break; break;
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16); dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
break; break;
} }
break; break;
@ -1833,13 +1774,13 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break; break;
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
srcColor32.color = ConvertColor555To6665Opaque<false>(srcColor16); srcColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB); dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
dstColor32.a = 0x1F; dstColor32.a = 0x1F;
break; break;
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
srcColor32.color = ConvertColor555To8888Opaque<false>(srcColor16); srcColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB); dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
dstColor32.a = 0xFF; dstColor32.a = 0xFF;
break; break;
@ -2132,7 +2073,7 @@ FORCEINLINE void GPUEngineBase::_RenderPixel3D(GPUEngineCompositorInfo &compInfo
// Render the pixel using the selected color effect. // Render the pixel using the selected color effect.
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
{ {
const u16 srcColor16 = ConvertColor6665To5551<false>(srcColor32); const u16 srcColor16 = ColorspaceConvert6665To5551<false>(srcColor32);
switch (selectedEffect) switch (selectedEffect)
{ {
@ -2695,13 +2636,13 @@ void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compInfo)
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]); ColorspaceConvert555To6665Opaque_SSE2<false>(src16[0], src[0], src[1]);
ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]); ColorspaceConvert555To6665Opaque_SSE2<false>(src16[1], src[2], src[3]);
} }
else else
{ {
ConvertColor555To8888Opaque<false>(src16[0], src[0], src[1]); ColorspaceConvert555To8888Opaque_SSE2<false>(src16[0], src[0], src[1]);
ConvertColor555To8888Opaque<false>(src16[1], src[2], src[3]); ColorspaceConvert555To8888Opaque_SSE2<false>(src16[1], src[2], src[3]);
} }
} }
@ -2796,13 +2737,13 @@ void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compInfo)
{ {
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{ {
ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]); ColorspaceConvert555To6665Opaque_SSE2<false>(src16[0], src[0], src[1]);
ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]); ColorspaceConvert555To6665Opaque_SSE2<false>(src16[1], src[2], src[3]);
} }
else else
{ {
ConvertColor555To8888Opaque<false>(src16[0], src[0], src[1]); ColorspaceConvert555To8888Opaque_SSE2<false>(src16[0], src[0], src[1]);
ConvertColor555To8888Opaque<false>(src16[1], src[2], src[3]); ColorspaceConvert555To8888Opaque_SSE2<false>(src16[1], src[2], src[3]);
} }
} }
@ -4502,7 +4443,7 @@ void GPUEngineBase::UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex)
} }
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo) FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
{ {
bool useCustomVRAM = false; bool useCustomVRAM = false;
@ -4538,26 +4479,28 @@ void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
} }
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo) FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo)
{ {
this->_RenderLine_LayerBG_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compInfo); this->_RenderLine_LayerBG_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compInfo);
} }
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo) FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo)
{ {
#ifndef DISABLE_COLOREFFECTDISABLEHINT
if (compInfo.renderState.colorEffect == ColorEffect_Disable) if (compInfo.renderState.colorEffect == ColorEffect_Disable)
{ {
this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, true, ISCUSTOMRENDERINGNEEDED>(compInfo); this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, true, ISCUSTOMRENDERINGNEEDED>(compInfo);
} }
else else
#endif
{ {
this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, false, ISCUSTOMRENDERINGNEEDED>(compInfo); this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, false, ISCUSTOMRENDERINGNEEDED>(compInfo);
} }
} }
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo) FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo)
{ {
if (ISDEBUGRENDER) if (ISDEBUGRENDER)
{ {
@ -4951,7 +4894,7 @@ void GPUEngineBase::ResolveCustomRendering()
void GPUEngineBase::ResolveRGB666ToRGB888() void GPUEngineBase::ResolveRGB666ToRGB888()
{ {
ConvertColorBuffer6665To8888<false>((u32 *)this->renderedBuffer, (u32 *)this->renderedBuffer, this->renderedWidth * this->renderedHeight); ColorspaceConvertBuffer6665To8888<false, false>((u32 *)this->renderedBuffer, (u32 *)this->renderedBuffer, this->renderedWidth * this->renderedHeight);
} }
void GPUEngineBase::ResolveToCustomFramebuffer() void GPUEngineBase::ResolveToCustomFramebuffer()
@ -5575,12 +5518,12 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16)); renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16));
ConvertColorBuffer6665To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount); ColorspaceConvertBuffer6665To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
break; break;
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16)); renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16));
ConvertColorBuffer8888To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount); ColorspaceConvertBuffer8888To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
break; break;
} }
} }
@ -6570,7 +6513,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{ {
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH); const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH); FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555To6665Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
break; break;
} }
@ -6578,7 +6521,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{ {
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH); const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH); FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555To8888Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
break; break;
} }
} }
@ -6598,7 +6541,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{ {
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth); const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth); FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, customPixCount); ColorspaceConvertBuffer555To6665Opaque<false, false>(src, (u32 *)dst, customPixCount);
break; break;
} }
@ -6606,7 +6549,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{ {
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth); const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth); FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, customPixCount); ColorspaceConvertBuffer555To8888Opaque<false, false>(src, (u32 *)dst, customPixCount);
break; break;
} }
} }
@ -6802,28 +6745,7 @@ void GPUEngineB::RenderLine(const u16 l)
GPUSubsystem::GPUSubsystem() GPUSubsystem::GPUSubsystem()
{ {
static bool needInitTables = true; ColorspaceHandlerInit();
if (needInitTables)
{
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
for (size_t i = 0; i < 32768; i++)
{
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 );
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 );
}
needInitTables = false;
}
_defaultEventHandler = new GPUEventHandlerDefault; _defaultEventHandler = new GPUEventHandlerDefault;
_event = _defaultEventHandler; _event = _defaultEventHandler;
@ -7581,178 +7503,6 @@ void NDSDisplay::SetEngineByID(const GPUEngineID theID)
this->_gpu->SetDisplayByID(this->_ID); this->_gpu->SetDisplayByID(this->_ID);
} }
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
__m128i src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
__m128i dstConvertedLo, dstConvertedHi;
ConvertColor555To8888Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
else
{
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor555To8888Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
__m128i src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
__m128i dstConvertedLo, dstConvertedHi;
ConvertColor555To6665Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
else
{
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor555To6665Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB>
void ConvertColorBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 4);
for (; i < ssePixCount; i += 4)
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To6665<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor8888To6665<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB>
void ConvertColorBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 4);
for (; i < ssePixCount; i += 4)
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To8888<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor6665To8888<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
}
else
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor8888To5551<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
}
else
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor6665To5551<SWAP_RB>(src[i]);
}
}
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG0>(); template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG0>();
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG1>(); template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG1>();
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG2>(); template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG2>();
@ -7774,29 +7524,3 @@ template void GPUEngineBase::ParseReg_BGnY<GPULayerID_BG3>();
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR555_Rev>(const u16 l, bool skip); template void GPUSubsystem::RenderLine<NDSColorFormat_BGR555_Rev>(const u16 l, bool skip);
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR666_Rev>(const u16 l, bool skip); template void GPUSubsystem::RenderLine<NDSColorFormat_BGR666_Rev>(const u16 l, bool skip);
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR888_Rev>(const u16 l, bool skip); template void GPUSubsystem::RenderLine<NDSColorFormat_BGR888_Rev>(const u16 l, bool skip);
template void ConvertColorBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To8888Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To6665<true>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer8888To6665<false>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer6665To8888<true>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer6665To8888<false>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);

View File

@ -25,9 +25,11 @@
#include <iosfwd> #include <iosfwd>
#include "types.h" #include "types.h"
#include "./utils/colorspacehandler/colorspacehandler.h"
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
#endif #endif
#ifdef ENABLE_SSSE3 #ifdef ENABLE_SSSE3
@ -101,15 +103,6 @@ enum DisplayCaptureSize
DisplayCaptureSize_256x192 = 3, DisplayCaptureSize_256x192 = 3,
}; };
union FragmentColor
{
u32 color;
struct
{
u8 r,g,b,a;
};
};
typedef union typedef union
{ {
u32 value; u32 value;
@ -1052,61 +1045,6 @@ enum NDSDisplayID
NDSDisplayID_Touch = 1 NDSDisplayID_Touch = 1
}; };
enum NDSColorFormat
{
// The color format information is packed in a 32-bit value.
// The bits are as follows:
// FFFOOOOO AAAAAABB BBBBGGGG GGRRRRRR
//
// F = Flags (see below)
// O = Color order (see below)
// A = Bit count for alpha [0-63]
// B = Bit count for blue [0-63]
// G = Bit count for green [0-63]
// R = Bit count for red [0-63]
//
// Flags:
// Bit 29: Reverse order flag.
// Set = Bits are in reverse order, usually for little-endian usage.
// Cleared = Bits are in normal order, usually for big-endian usage.
//
// Color order bits, 24-28:
// 0x00 = RGBA, common format
// 0x01 = RGAB
// 0x02 = RBGA
// 0x03 = RBAG
// 0x04 = RAGB
// 0x05 = RABG
// 0x06 = GRBA
// 0x07 = GRAB
// 0x08 = GBRA
// 0x09 = GBAR
// 0x0A = GARB
// 0x0B = GABR
// 0x0C = BRGA
// 0x0D = BRAG
// 0x0E = BGRA, common format
// 0x0F = BGAR
// 0x10 = BARG
// 0x11 = BAGR
// 0x12 = ARGB
// 0x13 = ARBG
// 0x14 = AGRB
// 0x15 = AGBR
// 0x16 = ABRG
// 0x17 = ABGR
// Color formats used for internal processing.
//NDSColorFormat_ABGR1555_Rev = 0x20045145,
//NDSColorFormat_ABGR5666_Rev = 0x20186186,
//NDSColorFormat_ABGR8888_Rev = 0x20208208,
// Color formats used by the output framebuffers.
NDSColorFormat_BGR555_Rev = 0x20005145,
NDSColorFormat_BGR666_Rev = 0x20006186,
NDSColorFormat_BGR888_Rev = 0x20008208
};
struct DISPCAPCNT_parsed struct DISPCAPCNT_parsed
{ {
u8 EVA; u8 EVA;
@ -1410,9 +1348,9 @@ protected:
template<size_t WIN_NUM> bool _IsWindowInsideVerticalRange(GPUEngineCompositorInfo &compInfo); template<size_t WIN_NUM> bool _IsWindowInsideVerticalRange(GPUEngineCompositorInfo &compInfo);
void _PerformWindowTesting(GPUEngineCompositorInfo &compInfo); void _PerformWindowTesting(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo); template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo); template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo); template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo); template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, itemsForPriority_t *__restrict item); template<NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, itemsForPriority_t *__restrict item);
@ -1733,346 +1671,4 @@ public:
extern GPUSubsystem *GPU; extern GPUSubsystem *GPU;
extern MMU_struct MMU; extern MMU_struct MMU;
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
extern CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
extern CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
extern CACHE_ALIGN u32 color_555_to_666[32768];
extern CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
extern CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
extern CACHE_ALIGN u32 color_555_to_888[32768];
#define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color
#define COLOR555TO6665_OPAQUE_SWAP_RB(col) (color_555_to_6665_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color with R and B components swapped
#define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color
#ifdef LOCAL_LE
#define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, little-endian
#else
#define COLOR555TO6665(col,alpha5) ((alpha5) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, big-endian
#endif
#define COLOR555TO8888_OPAQUE(col) (color_555_to_8888_opaque[(col)]) // Convert a 15-bit color to an opaque 32-bit color
#define COLOR555TO8888_OPAQUE_SWAP_RB(col) (color_555_to_8888_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque 32-bit color with R and B components swapped
#define COLOR555TO888(col) (color_555_to_888[(col)]) // Convert a 15-bit color to an opaque 24-bit color or a fully transparent 32-bit color
#ifdef LOCAL_LE
#define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, little-endian
#else
#define COLOR555TO8888(col,alpha8) ((alpha8) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, big-endian
#endif
//produce a 15bpp color from individual 5bit components
#define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) )
//produce a 16bpp color from individual 5bit components
#define R6G6B6TORGB15(r,g,b) ( ((r)>>1) | (((g)&0x3E)<<4) | (((b)&0x3E)<<9) )
inline FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)
{
FragmentColor ret;
ret.r = r; ret.g = g; ret.b = b; ret.a = a;
return ret;
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor555To8888Opaque(const u16 src)
{
return (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF);
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor555To6665Opaque(const u16 src)
{
return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor8888To6665(FragmentColor srcColor)
{
FragmentColor outColor;
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2;
outColor.g = srcColor.g >> 2;
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2;
outColor.a = srcColor.a >> 3;
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor8888To6665(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ConvertColor8888To6665<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor6665To8888(FragmentColor srcColor)
{
FragmentColor outColor;
outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)];
outColor.g = material_6bit_to_8bit[srcColor.g];
outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)];
outColor.a = material_5bit_to_8bit[srcColor.a];
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor6665To8888(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ConvertColor6665To8888<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u16 ConvertColor8888To5551(FragmentColor srcColor)
{
return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 );
}
template <bool SWAP_RB>
FORCEINLINE u16 ConvertColor8888To5551(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ConvertColor8888To5551<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u16 ConvertColor6665To5551(FragmentColor srcColor)
{
return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000);
}
template <bool SWAP_RB>
FORCEINLINE u16 ConvertColor6665To5551(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ConvertColor6665To5551<SWAP_RB>(srcColorComponent);
}
#ifdef ENABLE_SSE2
template <bool SWAP_RB>
FORCEINLINE void ConvertColor555To8888(const __m128i &srcColor, const __m128i &srcAlphaBits32Lo, const __m128i &srcAlphaBits32Hi, __m128i &dstLo, __m128i &dstHi)
{
__m128i src32;
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x00F800F8) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00070707)) );
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x00F800F8) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00070707)) );
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ConvertColor555To6665(const __m128i &srcColor, const __m128i &srcAlphaBits32Lo, const __m128i &srcAlphaBits32Hi, __m128i &dstLo, __m128i &dstHi)
{
__m128i src32;
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x003E003E) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00010101)) );
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x003E003E) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00010101)) );
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ConvertColor555To8888Opaque(const __m128i &srcColor, __m128i &dstLo, __m128i &dstHi)
{
const __m128i srcAlphaBits32 = _mm_set1_epi32(0xFF000000);
ConvertColor555To8888<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ConvertColor555To6665Opaque(const __m128i &srcColor, __m128i &dstLo, __m128i &dstHi)
{
const __m128i srcAlphaBits32 = _mm_set1_epi32(0x1F000000);
ConvertColor555To6665<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE __m128i ConvertColor8888To6665(const __m128i &src)
{
// Conversion algorithm:
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
__m128i rgb;
const __m128i a = _mm_and_si128( _mm_srli_epi32(src, 3), _mm_set1_epi32(0x1F000000) );
if (SWAP_RB)
{
#ifdef ENABLE_SSSE3
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2) );
#else
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x003F0000)), 18), _mm_or_si128(_mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00003F00)), 2), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x0000003F)), 14)) );
#endif
}
else
{
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
}
return _mm_or_si128(rgb, a);
}
template <bool SWAP_RB>
FORCEINLINE __m128i ConvertColor6665To8888(const __m128i &src)
{
// Conversion algorithm:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
__m128i rgb = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 2), _mm_set1_epi32(0x00FCFCFC)), _mm_and_si128(_mm_srli_epi32(src, 4), _mm_set1_epi32(0x00030303)) );
const __m128i a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 3), _mm_set1_epi32(0xF8000000)), _mm_and_si128(_mm_srli_epi32(src, 2), _mm_set1_epi32(0x07000000)) );
if (SWAP_RB)
{
#ifdef ENABLE_SSSE3
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2) );
#else
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x000000FF)), 16)) );
#endif
}
return _mm_or_si128(rgb, a);
}
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
FORCEINLINE __m128i _ConvertColorBaseTo5551(const __m128i &srcLo, const __m128i &srcHi)
{
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
{
return srcLo;
}
__m128i rgbLo;
__m128i rgbHi;
__m128i alpha;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 17), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 17), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 1), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 1), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm_packs_epi32( _mm_and_si128(_mm_srli_epi32(srcLo, 24), _mm_set1_epi32(0x0000001F)), _mm_and_si128(_mm_srli_epi32(srcHi, 24), _mm_set1_epi32(0x0000001F)) );
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
}
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 19), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 19), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 3), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 3), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm_packs_epi32( _mm_and_si128(_mm_srli_epi32(srcLo, 24), _mm_set1_epi32(0x000000FF)), _mm_and_si128(_mm_srli_epi32(srcHi, 24), _mm_set1_epi32(0x000000FF)) );
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
}
return _mm_or_si128(_mm_packs_epi32(rgbLo, rgbHi), alpha);
}
template <bool SWAP_RB>
FORCEINLINE __m128i ConvertColor8888To5551(const __m128i &srcLo, const __m128i &srcHi)
{
return _ConvertColorBaseTo5551<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE __m128i ConvertColor6665To5551(const __m128i &srcLo, const __m128i &srcHi)
{
return _ConvertColorBaseTo5551<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
#endif
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB> void ConvertColorBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB> void ConvertColorBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
#endif #endif

View File

@ -52,6 +52,7 @@ libdesmume_a_SOURCES = \
utils/decrypt/decrypt.h utils/decrypt/header.cpp utils/decrypt/header.h \ utils/decrypt/decrypt.h utils/decrypt/header.cpp utils/decrypt/header.h \
utils/task.cpp utils/task.h \ utils/task.cpp utils/task.h \
utils/vfat.h utils/vfat.cpp \ utils/vfat.h utils/vfat.cpp \
utils/colorspacehandler/colorspacehandler.cpp \
utils/dlditool.cpp \ utils/dlditool.cpp \
utils/libfat/bit_ops.h \ utils/libfat/bit_ops.h \
utils/libfat/cache.cpp \ utils/libfat/cache.cpp \
@ -107,6 +108,21 @@ libdesmume_a_SOURCES = \
libretro-common/rthreads/async_job.c \ libretro-common/rthreads/async_job.c \
libretro-common/rthreads/rsemaphore.c \ libretro-common/rthreads/rsemaphore.c \
libretro-common/rthreads/rthreads.c libretro-common/rthreads/rthreads.c
if SUPPORT_SSE2 += \
libdesmume_a_SOURCES += \
utils/colorspacehandler/colorspacehandler_SSE2.cpp
endif
if SUPPORT_AVX2 += \
libdesmume_a_SOURCES += \
utils/colorspacehandler/colorspacehandler_AVX2.cpp
endif
if SUPPORT_ALTIVEC += \
libdesmume_a_SOURCES += \
utils/colorspacehandler/colorspacehandler_AltiVec.cpp
endif
if HAVE_JIT if HAVE_JIT
libdesmume_a_SOURCES += \ libdesmume_a_SOURCES += \

View File

@ -32,6 +32,7 @@
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
#endif #endif
typedef struct typedef struct
@ -990,9 +991,9 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4)); const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), ConvertColor8888To6665<true>(srcColorLo) ); _mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), ConvertColor8888To6665<true>(srcColorHi) ); _mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) ); _mm_store_si128( (__m128i *)(dstRGBA5551 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
} }
#endif #endif
@ -1001,17 +1002,17 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
#endif #endif
for (; i < pixCount; i++) for (; i < pixCount; i++)
{ {
dstFramebuffer[i].color = ConvertColor8888To6665<true>(srcFramebuffer[i]); dstFramebuffer[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]); dstRGBA5551[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
} }
} }
else if (dstFramebuffer != NULL) else if (dstFramebuffer != NULL)
{ {
ConvertColorBuffer8888To6665<true>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount); ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
} }
else else
{ {
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount); ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
} }
} }
else if (this->_outputFormat == NDSColorFormat_BGR888_Rev) else if (this->_outputFormat == NDSColorFormat_BGR888_Rev)
@ -1027,7 +1028,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), srcColorLo ); _mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), srcColorLo );
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), srcColorHi ); _mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), srcColorHi );
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) ); _mm_store_si128( (__m128i *)(dstRGBA5551 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
} }
#endif #endif
@ -1036,8 +1037,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
#endif #endif
for (; i < pixCount; i++) for (; i < pixCount; i++)
{ {
dstFramebuffer[i].color = ConvertColor8888To6665<true>(srcFramebuffer[i]); dstFramebuffer[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]); dstRGBA5551[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
} }
} }
else if (dstFramebuffer != NULL) else if (dstFramebuffer != NULL)
@ -1046,7 +1047,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
} }
else else
{ {
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount); ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
} }
} }
} }
@ -1068,9 +1069,9 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0)); const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4)); const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), ConvertColor8888To6665<true>(srcColorLo) ); _mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), ConvertColor8888To6665<true>(srcColorHi) ); _mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) ); _mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
} }
#endif #endif
@ -1079,8 +1080,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
#endif #endif
for (; x < pixCount; x++, ir++, iw++) for (; x < pixCount; x++, ir++, iw++)
{ {
dstFramebuffer[iw].color = ConvertColor8888To6665<true>(srcFramebuffer[ir]); dstFramebuffer[iw].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[ir]);
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]); dstRGBA5551[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
} }
} }
} }
@ -1088,14 +1089,14 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
{ {
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
{ {
ConvertColorBuffer8888To6665<true>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebuffer + iw, pixCount); ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebuffer + iw, pixCount);
} }
} }
else else
{ {
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
{ {
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount); ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
} }
} }
} }
@ -1115,7 +1116,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), srcColorLo ); _mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), srcColorLo );
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), srcColorHi ); _mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), srcColorHi );
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) ); _mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
} }
#endif #endif
@ -1125,7 +1126,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
for (; x < pixCount; x++, ir++, iw++) for (; x < pixCount; x++, ir++, iw++)
{ {
dstFramebuffer[iw] = srcFramebuffer[ir]; dstFramebuffer[iw] = srcFramebuffer[ir];
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]); dstRGBA5551[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
} }
} }
} }
@ -1146,7 +1147,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
{ {
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
{ {
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount); ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
} }
} }
} }

View File

@ -243,6 +243,8 @@
AB564915186E6F67002740F4 /* Image_Piano.png in Resources */ = {isa = PBXBuildFile; fileRef = AB56490B186E6F67002740F4 /* Image_Piano.png */; }; AB564915186E6F67002740F4 /* Image_Piano.png in Resources */ = {isa = PBXBuildFile; fileRef = AB56490B186E6F67002740F4 /* Image_Piano.png */; };
AB5785FD17176AFC002C5FC7 /* OpenEmuBase.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB5785FC17176AFC002C5FC7 /* OpenEmuBase.framework */; }; AB5785FD17176AFC002C5FC7 /* OpenEmuBase.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB5785FC17176AFC002C5FC7 /* OpenEmuBase.framework */; };
AB58F32D1364F44B0074C376 /* cocoa_file.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB58F32C1364F44B0074C376 /* cocoa_file.mm */; }; AB58F32D1364F44B0074C376 /* cocoa_file.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB58F32C1364F44B0074C376 /* cocoa_file.mm */; };
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
AB5FDDAD1D62C8A00094617C /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
AB64987C13ECC73800EE7DD2 /* FileTypeInfo.plist in Resources */ = {isa = PBXBuildFile; fileRef = AB64987B13ECC73800EE7DD2 /* FileTypeInfo.plist */; }; AB64987C13ECC73800EE7DD2 /* FileTypeInfo.plist in Resources */ = {isa = PBXBuildFile; fileRef = AB64987B13ECC73800EE7DD2 /* FileTypeInfo.plist */; };
AB68101B187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; }; AB68101B187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; };
AB68101C187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; }; AB68101C187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; };
@ -974,6 +976,12 @@
ABB97878144E89CC00793FA3 /* Icon_DeSmuME_32x32.png in Resources */ = {isa = PBXBuildFile; fileRef = ABB97875144E89CC00793FA3 /* Icon_DeSmuME_32x32.png */; }; ABB97878144E89CC00793FA3 /* Icon_DeSmuME_32x32.png in Resources */ = {isa = PBXBuildFile; fileRef = ABB97875144E89CC00793FA3 /* Icon_DeSmuME_32x32.png */; };
ABBC0F8D1394B1AA0028B6BD /* DefaultUserPrefs.plist in Resources */ = {isa = PBXBuildFile; fileRef = ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */; }; ABBC0F8D1394B1AA0028B6BD /* DefaultUserPrefs.plist in Resources */ = {isa = PBXBuildFile; fileRef = ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */; };
ABBF04A514B515F300E505A0 /* AppIcon_ROMCheats.icns in Resources */ = {isa = PBXBuildFile; fileRef = ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */; }; ABBF04A514B515F300E505A0 /* AppIcon_ROMCheats.icns in Resources */ = {isa = PBXBuildFile; fileRef = ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */; };
ABBFFF851D6283C0003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
ABBFFF861D6283C1003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
ABBFFF871D6283C1003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
ABBFFF891D6283D2003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
ABBFFF8A1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
ABBFFF8B1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
ABC3AF2F14B7F06900D5B13D /* Icon_VolumeFull_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */; }; ABC3AF2F14B7F06900D5B13D /* Icon_VolumeFull_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */; };
ABC3AF3014B7F06900D5B13D /* Icon_VolumeMute_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */; }; ABC3AF3014B7F06900D5B13D /* Icon_VolumeMute_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */; };
ABC3AF3114B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */; }; ABC3AF3114B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */; };
@ -1534,6 +1542,14 @@
ABBB421516B4A5F30012E5AB /* OGLRender_3_2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OGLRender_3_2.h; path = ../OGLRender_3_2.h; sourceTree = "<group>"; }; ABBB421516B4A5F30012E5AB /* OGLRender_3_2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OGLRender_3_2.h; path = ../OGLRender_3_2.h; sourceTree = "<group>"; };
ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */ = {isa = PBXFileReference; lastKnownFileType = file.bplist; path = DefaultUserPrefs.plist; sourceTree = "<group>"; }; ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */ = {isa = PBXFileReference; lastKnownFileType = file.bplist; path = DefaultUserPrefs.plist; sourceTree = "<group>"; };
ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; path = AppIcon_ROMCheats.icns; sourceTree = "<group>"; }; ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; path = AppIcon_ROMCheats.icns; sourceTree = "<group>"; };
ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler.cpp; sourceTree = "<group>"; };
ABBFFF701D5F9C52003CD598 /* colorspacehandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler.h; sourceTree = "<group>"; };
ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_SSE2.cpp; sourceTree = "<group>"; };
ABBFFF761D5FD2ED003CD598 /* colorspacehandler_SSE2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_SSE2.h; sourceTree = "<group>"; };
ABBFFF7B1D610457003CD598 /* colorspacehandler_AVX2.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AVX2.cpp; sourceTree = "<group>"; };
ABBFFF7C1D610457003CD598 /* colorspacehandler_AVX2.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AVX2.h; sourceTree = "<group>"; };
ABBFFF811D611A36003CD598 /* colorspacehandler_AltiVec.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AltiVec.cpp; sourceTree = "<group>"; };
ABBFFF821D611A36003CD598 /* colorspacehandler_AltiVec.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AltiVec.h; sourceTree = "<group>"; };
ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeFull_16x16.png; path = images/Icon_VolumeFull_16x16.png; sourceTree = "<group>"; }; ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeFull_16x16.png; path = images/Icon_VolumeFull_16x16.png; sourceTree = "<group>"; };
ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeMute_16x16.png; path = images/Icon_VolumeMute_16x16.png; sourceTree = "<group>"; }; ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeMute_16x16.png; path = images/Icon_VolumeMute_16x16.png; sourceTree = "<group>"; };
ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeOneThird_16x16.png; path = images/Icon_VolumeOneThird_16x16.png; sourceTree = "<group>"; }; ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeOneThird_16x16.png; path = images/Icon_VolumeOneThird_16x16.png; sourceTree = "<group>"; };
@ -2508,6 +2524,21 @@
path = openemu; path = openemu;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
ABBFFF6E1D5F9C10003CD598 /* colorspacehandler */ = {
isa = PBXGroup;
children = (
ABBFFF811D611A36003CD598 /* colorspacehandler_AltiVec.cpp */,
ABBFFF7B1D610457003CD598 /* colorspacehandler_AVX2.cpp */,
ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */,
ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */,
ABBFFF821D611A36003CD598 /* colorspacehandler_AltiVec.h */,
ABBFFF7C1D610457003CD598 /* colorspacehandler_AVX2.h */,
ABBFFF761D5FD2ED003CD598 /* colorspacehandler_SSE2.h */,
ABBFFF701D5F9C52003CD598 /* colorspacehandler.h */,
);
path = colorspacehandler;
sourceTree = "<group>";
};
ABC2ECD613B1C87000FAAA2A /* Images */ = { ABC2ECD613B1C87000FAAA2A /* Images */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
@ -2759,6 +2790,7 @@
ABD1FF211345ACBF00AF11D1 /* decrypt */, ABD1FF211345ACBF00AF11D1 /* decrypt */,
ABD1FF2E1345ACBF00AF11D1 /* libfat */, ABD1FF2E1345ACBF00AF11D1 /* libfat */,
ABE670241415DE6C00E8E4C9 /* tinyxml */, ABE670241415DE6C00E8E4C9 /* tinyxml */,
ABBFFF6E1D5F9C10003CD598 /* colorspacehandler */,
ABD1FF1D1345ACBF00AF11D1 /* ConvertUTF.c */, ABD1FF1D1345ACBF00AF11D1 /* ConvertUTF.c */,
AB9038A517C5ECFD00F410BD /* advanscene.cpp */, AB9038A517C5ECFD00F410BD /* advanscene.cpp */,
ABD1FF1F1345ACBF00AF11D1 /* datetime.cpp */, ABD1FF1F1345ACBF00AF11D1 /* datetime.cpp */,
@ -3770,6 +3802,7 @@
ABE6840D189E33BC007FD69C /* OGLDisplayOutput.cpp in Sources */, ABE6840D189E33BC007FD69C /* OGLDisplayOutput.cpp in Sources */,
ABD1FF121345AC9C00AF11D1 /* slot2_none.cpp in Sources */, ABD1FF121345AC9C00AF11D1 /* slot2_none.cpp in Sources */,
ABD1FF131345AC9C00AF11D1 /* slot2_paddle.cpp in Sources */, ABD1FF131345AC9C00AF11D1 /* slot2_paddle.cpp in Sources */,
ABBFFF8A1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
ABD1FF141345AC9C00AF11D1 /* slot2_piano.cpp in Sources */, ABD1FF141345AC9C00AF11D1 /* slot2_piano.cpp in Sources */,
ABD1FF151345AC9C00AF11D1 /* slot2_rumblepak.cpp in Sources */, ABD1FF151345AC9C00AF11D1 /* slot2_rumblepak.cpp in Sources */,
ABD1041F1346652500AF11D1 /* sndOSX.cpp in Sources */, ABD1041F1346652500AF11D1 /* sndOSX.cpp in Sources */,
@ -3864,6 +3897,7 @@
AB40565E169F5DBB0016AC3E /* virtualmemory.cpp in Sources */, AB40565E169F5DBB0016AC3E /* virtualmemory.cpp in Sources */,
AB405661169F5DBB0016AC3E /* zonememory.cpp in Sources */, AB405661169F5DBB0016AC3E /* zonememory.cpp in Sources */,
AB405679169F5DCC0016AC3E /* x86assembler.cpp in Sources */, AB405679169F5DCC0016AC3E /* x86assembler.cpp in Sources */,
ABBFFF861D6283C1003CD598 /* colorspacehandler.cpp in Sources */,
AB40567C169F5DCC0016AC3E /* x86compiler.cpp in Sources */, AB40567C169F5DCC0016AC3E /* x86compiler.cpp in Sources */,
ABFEA8A41BB4EC1100B08C25 /* sfnt.c in Sources */, ABFEA8A41BB4EC1100B08C25 /* sfnt.c in Sources */,
ABA731691BB51FDC00B26147 /* type1cid.c in Sources */, ABA731691BB51FDC00B26147 /* type1cid.c in Sources */,
@ -4017,6 +4051,7 @@
AB796D4315CDCBA200C59155 /* version.cpp in Sources */, AB796D4315CDCBA200C59155 /* version.cpp in Sources */,
ABFEA82B1BB4EC1100B08C25 /* ftinit.c in Sources */, ABFEA82B1BB4EC1100B08C25 /* ftinit.c in Sources */,
AB796D4415CDCBA200C59155 /* vfat.cpp in Sources */, AB796D4415CDCBA200C59155 /* vfat.cpp in Sources */,
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */,
AB796D4515CDCBA200C59155 /* videofilter.cpp in Sources */, AB796D4515CDCBA200C59155 /* videofilter.cpp in Sources */,
AB796D4615CDCBA200C59155 /* WavFile.cpp in Sources */, AB796D4615CDCBA200C59155 /* WavFile.cpp in Sources */,
AB796D4715CDCBA200C59155 /* wifi.cpp in Sources */, AB796D4715CDCBA200C59155 /* wifi.cpp in Sources */,
@ -4096,6 +4131,7 @@
AB26D87C16B5253D00A2305C /* OGLRender_3_2.cpp in Sources */, AB26D87C16B5253D00A2305C /* OGLRender_3_2.cpp in Sources */,
AB3A655E16CC5421001F5D4A /* EmuControllerDelegate.mm in Sources */, AB3A655E16CC5421001F5D4A /* EmuControllerDelegate.mm in Sources */,
AB3A656116CC5438001F5D4A /* cocoa_GPU.mm in Sources */, AB3A656116CC5438001F5D4A /* cocoa_GPU.mm in Sources */,
AB5FDDAD1D62C8A00094617C /* colorspacehandler_SSE2.cpp in Sources */,
AB8967D916D2ED0700F826F1 /* DisplayWindowController.mm in Sources */, AB8967D916D2ED0700F826F1 /* DisplayWindowController.mm in Sources */,
AB29B33116D4BEBF000EF671 /* InputManager.mm in Sources */, AB29B33116D4BEBF000EF671 /* InputManager.mm in Sources */,
AB8B7AAC17CE8C440051CEBF /* slot1comp_protocol.cpp in Sources */, AB8B7AAC17CE8C440051CEBF /* slot1comp_protocol.cpp in Sources */,
@ -4272,6 +4308,7 @@
AB2ABA401C9F9CFA00173B15 /* rsemaphore.c in Sources */, AB2ABA401C9F9CFA00173B15 /* rsemaphore.c in Sources */,
AB8F3CF01A53AC2600A80BF6 /* ringbuffer.cpp in Sources */, AB8F3CF01A53AC2600A80BF6 /* ringbuffer.cpp in Sources */,
AB8F3CF11A53AC2600A80BF6 /* arm_jit.cpp in Sources */, AB8F3CF11A53AC2600A80BF6 /* arm_jit.cpp in Sources */,
ABBFFF891D6283D2003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
AB8F3CF21A53AC2600A80BF6 /* troubleshootingWindowDelegate.mm in Sources */, AB8F3CF21A53AC2600A80BF6 /* troubleshootingWindowDelegate.mm in Sources */,
AB8F3CF31A53AC2600A80BF6 /* assembler.cpp in Sources */, AB8F3CF31A53AC2600A80BF6 /* assembler.cpp in Sources */,
AB8F3CF41A53AC2600A80BF6 /* assert.cpp in Sources */, AB8F3CF41A53AC2600A80BF6 /* assert.cpp in Sources */,
@ -4295,6 +4332,7 @@
AB8F3D041A53AC2600A80BF6 /* virtualmemory.cpp in Sources */, AB8F3D041A53AC2600A80BF6 /* virtualmemory.cpp in Sources */,
AB8F3D051A53AC2600A80BF6 /* zonememory.cpp in Sources */, AB8F3D051A53AC2600A80BF6 /* zonememory.cpp in Sources */,
AB8F3D061A53AC2600A80BF6 /* x86assembler.cpp in Sources */, AB8F3D061A53AC2600A80BF6 /* x86assembler.cpp in Sources */,
ABBFFF851D6283C0003CD598 /* colorspacehandler.cpp in Sources */,
AB8F3D071A53AC2600A80BF6 /* x86compiler.cpp in Sources */, AB8F3D071A53AC2600A80BF6 /* x86compiler.cpp in Sources */,
AB8F3D081A53AC2600A80BF6 /* x86compilercontext.cpp in Sources */, AB8F3D081A53AC2600A80BF6 /* x86compilercontext.cpp in Sources */,
AB8F3D091A53AC2600A80BF6 /* x86compilerfunc.cpp in Sources */, AB8F3D091A53AC2600A80BF6 /* x86compilerfunc.cpp in Sources */,
@ -4367,6 +4405,7 @@
ABB3C6911501C04F00E0C22E /* SoundTouch.cpp in Sources */, ABB3C6911501C04F00E0C22E /* SoundTouch.cpp in Sources */,
ABB3C6921501C04F00E0C22E /* sse_optimized.cpp in Sources */, ABB3C6921501C04F00E0C22E /* sse_optimized.cpp in Sources */,
ABB3C6931501C04F00E0C22E /* TDStretch.cpp in Sources */, ABB3C6931501C04F00E0C22E /* TDStretch.cpp in Sources */,
ABBFFF871D6283C1003CD598 /* colorspacehandler.cpp in Sources */,
ABB3C6941501C04F00E0C22E /* WavFile.cpp in Sources */, ABB3C6941501C04F00E0C22E /* WavFile.cpp in Sources */,
ABB3C6951501C04F00E0C22E /* metaspu.cpp in Sources */, ABB3C6951501C04F00E0C22E /* metaspu.cpp in Sources */,
ABB3C6961501C04F00E0C22E /* SndOut.cpp in Sources */, ABB3C6961501C04F00E0C22E /* SndOut.cpp in Sources */,
@ -4436,6 +4475,7 @@
ABB3C6D11501C04F00E0C22E /* slot1.cpp in Sources */, ABB3C6D11501C04F00E0C22E /* slot1.cpp in Sources */,
ABB3C6D31501C04F00E0C22E /* SPU.cpp in Sources */, ABB3C6D31501C04F00E0C22E /* SPU.cpp in Sources */,
ABB3C6D41501C04F00E0C22E /* texcache.cpp in Sources */, ABB3C6D41501C04F00E0C22E /* texcache.cpp in Sources */,
ABBFFF8B1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
AB9038BA17C5ED2200F410BD /* slot1comp_rom.cpp in Sources */, AB9038BA17C5ED2200F410BD /* slot1comp_rom.cpp in Sources */,
ABB3C6D51501C04F00E0C22E /* thumb_instructions.cpp in Sources */, ABB3C6D51501C04F00E0C22E /* thumb_instructions.cpp in Sources */,
AB2EE13317D57F5000F68622 /* fsnitro.cpp in Sources */, AB2EE13317D57F5000F68622 /* fsnitro.cpp in Sources */,

View File

@ -740,6 +740,14 @@
AB2F56F11704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; }; AB2F56F11704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
AB2F56F21704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; }; AB2F56F21704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
AB2F56F31704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; }; AB2F56F31704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */; };
AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */; };
AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
AB3ACB7814C2361100D7D192 /* appDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6714C2361100D7D192 /* appDelegate.mm */; }; AB3ACB7814C2361100D7D192 /* appDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6714C2361100D7D192 /* appDelegate.mm */; };
AB3ACB7914C2361100D7D192 /* cheatWindowDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6914C2361100D7D192 /* cheatWindowDelegate.mm */; }; AB3ACB7914C2361100D7D192 /* cheatWindowDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6914C2361100D7D192 /* cheatWindowDelegate.mm */; };
AB3ACB7C14C2361100D7D192 /* inputPrefsView.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6F14C2361100D7D192 /* inputPrefsView.mm */; }; AB3ACB7C14C2361100D7D192 /* inputPrefsView.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6F14C2361100D7D192 /* inputPrefsView.mm */; };
@ -1156,6 +1164,8 @@
AB73AA2E1507C9F500A310C8 /* OpenGL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABC570D4134431DA00E7B0B1 /* OpenGL.framework */; }; AB73AA2E1507C9F500A310C8 /* OpenGL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABC570D4134431DA00E7B0B1 /* OpenGL.framework */; };
AB73AA2F1507C9F500A310C8 /* libz.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = AB0A0D1914AACA9600E83E91 /* libz.dylib */; }; AB73AA2F1507C9F500A310C8 /* libz.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = AB0A0D1914AACA9600E83E91 /* libz.dylib */; };
AB75226F14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns in Resources */ = {isa = PBXBuildFile; fileRef = AB75226D14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns */; }; AB75226F14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns in Resources */ = {isa = PBXBuildFile; fileRef = AB75226D14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns */; };
AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
AB7DDA6D173DC38F004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; }; AB7DDA6D173DC38F004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
AB7DDA6E173DC399004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; }; AB7DDA6E173DC399004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
AB7DDA6F173DC39E004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; }; AB7DDA6F173DC39E004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
@ -1835,6 +1845,12 @@
AB2F56EF1704C86900E28885 /* utilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = utilities.c; sourceTree = "<group>"; }; AB2F56EF1704C86900E28885 /* utilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = utilities.c; sourceTree = "<group>"; };
AB350BA41478AC96007165AC /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = System/Library/Frameworks/IOKit.framework; sourceTree = SDKROOT; }; AB350BA41478AC96007165AC /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = System/Library/Frameworks/IOKit.framework; sourceTree = SDKROOT; };
AB350D38147A1D8D007165AC /* English */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = English; path = translations/English.lproj/HID_usage_strings.plist; sourceTree = "<group>"; }; AB350D38147A1D8D007165AC /* English */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = English; path = translations/English.lproj/HID_usage_strings.plist; sourceTree = "<group>"; };
AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler.cpp; sourceTree = "<group>"; };
AB37E36D1D6188BC004A2C0D /* colorspacehandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler.h; sourceTree = "<group>"; };
AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AltiVec.cpp; sourceTree = "<group>"; };
AB37E36F1D6188BC004A2C0D /* colorspacehandler_AltiVec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AltiVec.h; sourceTree = "<group>"; };
AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_SSE2.cpp; sourceTree = "<group>"; };
AB37E3731D6188BC004A2C0D /* colorspacehandler_SSE2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_SSE2.h; sourceTree = "<group>"; };
AB3ACB6614C2361100D7D192 /* appDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = appDelegate.h; sourceTree = "<group>"; }; AB3ACB6614C2361100D7D192 /* appDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = appDelegate.h; sourceTree = "<group>"; };
AB3ACB6714C2361100D7D192 /* appDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = appDelegate.mm; sourceTree = "<group>"; }; AB3ACB6714C2361100D7D192 /* appDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = appDelegate.mm; sourceTree = "<group>"; };
AB3ACB6814C2361100D7D192 /* cheatWindowDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cheatWindowDelegate.h; sourceTree = "<group>"; }; AB3ACB6814C2361100D7D192 /* cheatWindowDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cheatWindowDelegate.h; sourceTree = "<group>"; };
@ -2894,6 +2910,19 @@
path = src; path = src;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
AB37E36B1D6188BC004A2C0D /* colorspacehandler */ = {
isa = PBXGroup;
children = (
AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */,
AB37E36D1D6188BC004A2C0D /* colorspacehandler.h */,
AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */,
AB37E36F1D6188BC004A2C0D /* colorspacehandler_AltiVec.h */,
AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */,
AB37E3731D6188BC004A2C0D /* colorspacehandler_SSE2.h */,
);
path = colorspacehandler;
sourceTree = "<group>";
};
AB3ACB6514C2361100D7D192 /* userinterface */ = { AB3ACB6514C2361100D7D192 /* userinterface */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
@ -3207,6 +3236,7 @@
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
ABBCE2A115ACB29100A2C965 /* AsmJit */, ABBCE2A115ACB29100A2C965 /* AsmJit */,
AB37E36B1D6188BC004A2C0D /* colorspacehandler */,
ABD1FF211345ACBF00AF11D1 /* decrypt */, ABD1FF211345ACBF00AF11D1 /* decrypt */,
ABD1FF2E1345ACBF00AF11D1 /* libfat */, ABD1FF2E1345ACBF00AF11D1 /* libfat */,
ABE670241415DE6C00E8E4C9 /* tinyxml */, ABE670241415DE6C00E8E4C9 /* tinyxml */,
@ -4508,6 +4538,8 @@
AB50200A1D09E712002FA150 /* file_path.c in Sources */, AB50200A1D09E712002FA150 /* file_path.c in Sources */,
AB50200B1D09E712002FA150 /* retro_dirent.c in Sources */, AB50200B1D09E712002FA150 /* retro_dirent.c in Sources */,
AB50200C1D09E712002FA150 /* retro_stat.c in Sources */, AB50200C1D09E712002FA150 /* retro_stat.c in Sources */,
AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */,
AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@ -4687,6 +4719,8 @@
AB5020161D09E712002FA150 /* file_path.c in Sources */, AB5020161D09E712002FA150 /* file_path.c in Sources */,
AB5020171D09E712002FA150 /* retro_dirent.c in Sources */, AB5020171D09E712002FA150 /* retro_dirent.c in Sources */,
AB5020181D09E712002FA150 /* retro_stat.c in Sources */, AB5020181D09E712002FA150 /* retro_stat.c in Sources */,
AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@ -4896,6 +4930,8 @@
AB50200D1D09E712002FA150 /* file_path.c in Sources */, AB50200D1D09E712002FA150 /* file_path.c in Sources */,
AB50200E1D09E712002FA150 /* retro_dirent.c in Sources */, AB50200E1D09E712002FA150 /* retro_dirent.c in Sources */,
AB50200F1D09E712002FA150 /* retro_stat.c in Sources */, AB50200F1D09E712002FA150 /* retro_stat.c in Sources */,
AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@ -5105,6 +5141,8 @@
AB5020101D09E712002FA150 /* file_path.c in Sources */, AB5020101D09E712002FA150 /* file_path.c in Sources */,
AB5020111D09E712002FA150 /* retro_dirent.c in Sources */, AB5020111D09E712002FA150 /* retro_dirent.c in Sources */,
AB5020121D09E712002FA150 /* retro_stat.c in Sources */, AB5020121D09E712002FA150 /* retro_stat.c in Sources */,
AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@ -5284,6 +5322,8 @@
AB5020131D09E712002FA150 /* file_path.c in Sources */, AB5020131D09E712002FA150 /* file_path.c in Sources */,
AB5020141D09E712002FA150 /* retro_dirent.c in Sources */, AB5020141D09E712002FA150 /* retro_dirent.c in Sources */,
AB5020151D09E712002FA150 /* retro_stat.c in Sources */, AB5020151D09E712002FA150 /* retro_stat.c in Sources */,
AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };

View File

@ -754,7 +754,7 @@
if (dispInfo.pixelBytes == 2) if (dispInfo.pixelBytes == 2)
{ {
ConvertColorBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h)); ColorspaceConvertBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h));
} }
else if (dispInfo.pixelBytes == 4) else if (dispInfo.pixelBytes == 4)
{ {

View File

@ -692,7 +692,7 @@ void RomIconToRGBA8888(uint32_t *bitmapData)
// //
// The first entry always represents the alpha, so we can just ignore it. // The first entry always represents the alpha, so we can just ignore it.
clut[0] = 0x00000000; clut[0] = 0x00000000;
ConvertColorBuffer555To8888Opaque<false, true>((u16 *)iconClutPtr, &clut[1], 15); ColorspaceConvertBuffer555To8888Opaque<false, true>((u16 *)iconClutPtr, &clut[1], 15);
// Load the image from the icon pixel data. // Load the image from the icon pixel data.
// //

View File

@ -1,65 +1,63 @@
/* /*
Copyright (C) 2008-2015 DeSmuME team Copyright (C) 2008-2015 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or the Free Software Foundation, either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This file is distributed in the hope that it will be useful, This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>. along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <stdio.h> #include <stdio.h>
#include <zlib.h> #include <zlib.h>
#include "types.h" #include "types.h"
#include "ImageOut.h" #include "ImageOut.h"
#include "formats/rpng.h" #include "formats/rpng.h"
#include "formats/rbmp.h" #include "formats/rbmp.h"
#include "GPU.h" #include "GPU.h"
static u8* Convert15To24(const u16* src, int width, int height) static u8* Convert15To24(const u16* src, int width, int height)
{ {
u8 *tmp_buffer; u8 *tmp_buffer;
u8 *tmp_inc; u8 *tmp_inc;
tmp_inc = tmp_buffer = (u8 *)malloc(width * height * 3); tmp_inc = tmp_buffer = (u8 *)malloc(width * height * 3);
for(int y=0;y<height;y++) for (int i = 0; i < width*height; i++)
{ {
for(int x=0;x<width;x++) u32 dst = ColorspaceConvert555To8888Opaque<true>(*src++);
{ *tmp_inc++ = dst & 0xFF;
u32 dst = ConvertColor555To8888Opaque<true>(*src++); *tmp_inc++ = (dst >> 8) & 0xFF;
*tmp_inc++ = dst&0xFF; *tmp_inc++ = (dst >> 16) & 0xFF;
*tmp_inc++ = (dst>>8)&0xFF; }
*tmp_inc++ = (dst>>16)&0xFF;
} return tmp_buffer;
} }
return tmp_buffer;
} int NDS_WritePNG_15bpp(int width, int height, const u16 *data, const char *filename)
{
int NDS_WritePNG_15bpp(int width, int height, const u16 *data, const char *filename) u8* tmp = Convert15To24(data,width,height);
{ bool ok = rpng_save_image_bgr24(filename,tmp,width,height,width*3);
u8* tmp = Convert15To24(data,width,height); free(tmp);
bool ok = rpng_save_image_bgr24(filename,tmp,width,height,width*3); return ok?1:0;
free(tmp); }
return ok?1:0;
} int NDS_WriteBMP_15bpp(int width, int height, const u16 *data, const char *filename)
{
int NDS_WriteBMP_15bpp(int width, int height, const u16 *data, const char *filename) u8* tmp = Convert15To24(data,width,height);
{ bool ok = rbmp_save_image(filename,tmp,width,height,width*3,RBMP_SOURCE_TYPE_BGR24);
u8* tmp = Convert15To24(data,width,height); free(tmp);
bool ok = rbmp_save_image(filename,tmp,width,height,width*3,RBMP_SOURCE_TYPE_BGR24); return ok?1:0;
free(tmp); }
return ok?1:0;
} int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename)
{
int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename) bool ok = rbmp_save_image(filename,buf,width,height,width*4,RBMP_SOURCE_TYPE_ARGB8888);
{ return ok?1:0;
bool ok = rbmp_save_image(filename,buf,width,height,width*4,RBMP_SOURCE_TYPE_ARGB8888);
return ok?1:0;
} }

View File

@ -605,11 +605,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
{ {
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) ) if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
{ {
ConvertColorBuffer8888To6665<false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount); ColorspaceConvertBuffer8888To6665<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
} }
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) ) else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
{ {
ConvertColorBuffer6665To8888<false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount); ColorspaceConvertBuffer6665To8888<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
} }
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) || else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) ) ((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
@ -622,11 +622,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
{ {
if (this->_outputFormat == NDSColorFormat_BGR666_Rev) if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
{ {
ConvertColorBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount); ColorspaceConvertBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
} }
else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev) else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev)
{ {
ConvertColorBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount); ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
} }
} }

View File

@ -31,6 +31,10 @@
#include "MMU.h" #include "MMU.h"
#include "NDSSystem.h" #include "NDSSystem.h"
#ifdef ENABLE_SSE2
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
#endif
using std::min; using std::min;
using std::max; using std::max;
@ -452,13 +456,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp) if (TEXFORMAT == TexFormat_15bpp)
{ {
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
else else
{ {
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
// Set converted colors to 0 if the palette index is 0. // Set converted colors to 0 if the palette index is 0.
@ -518,13 +522,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp) if (TEXFORMAT == TexFormat_15bpp)
{ {
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
else else
{ {
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
@ -581,13 +585,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp) if (TEXFORMAT == TexFormat_15bpp)
{ {
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
else else
{ {
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
// Set converted colors to 0 if the palette index is 0. // Set converted colors to 0 if the palette index is 0.
@ -647,13 +651,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp) if (TEXFORMAT == TexFormat_15bpp)
{ {
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
else else
{ {
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]); ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]); ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
} }
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
@ -882,11 +886,11 @@ public:
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
ConvertColor555To6665<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); ColorspaceConvert555To6665_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
ConvertColor555To6665<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); ColorspaceConvert555To6665_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
} }
else else
{ {
@ -896,11 +900,11 @@ public:
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
ConvertColor555To8888<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); ColorspaceConvert555To8888_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
ConvertColor555To8888<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); ColorspaceConvert555To8888_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
} }
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);

View File

@ -76,6 +76,18 @@
#ifdef __SSE4_2__ #ifdef __SSE4_2__
#define ENABLE_SSE4_2 #define ENABLE_SSE4_2
#endif #endif
#ifdef __AVX__
#define ENABLE_AVX
#endif
#ifdef __AVX2__
#define ENABLE_AVX2
#endif
#ifdef __ALTIVEC__
#define ENABLE_ALTIVEC
#endif
#endif #endif
#ifdef _MSC_VER #ifdef _MSC_VER
@ -223,6 +235,38 @@ typedef u32 uint32;
#define uint32 u32 //uint32 is defined in Leopard somewhere, avoid conflicts #define uint32 u32 //uint32 is defined in Leopard somewhere, avoid conflicts
#endif #endif
#ifdef ENABLE_ALTIVEC
#ifndef __APPLE_ALTIVEC__
#include <altivec.h>
#endif
typedef vector unsigned char v128u8;
typedef vector signed char v128s8;
typedef vector unsigned short v128u16;
typedef vector signed short v128s16;
typedef vector unsigned int v128u32;
typedef vector signed int v128s32;
#endif
#ifdef ENABLE_SSE2
#include <emmintrin.h>
typedef __m128i v128u8;
typedef __m128i v128s8;
typedef __m128i v128u16;
typedef __m128i v128s16;
typedef __m128i v128u32;
typedef __m128i v128s32;
#endif
#ifdef ENABLE_AVX2
#include <immintrin.h>
typedef __m256i v256u8;
typedef __m256i v256s8;
typedef __m256i v256u16;
typedef __m256i v256s16;
typedef __m256i v256u32;
typedef __m256i v256s32;
#endif
/*---------- GPU3D fixed-points types -----------*/ /*---------- GPU3D fixed-points types -----------*/
typedef s32 f32; typedef s32 f32;

View File

@ -0,0 +1,776 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler.h"
#if defined(ENABLE_AVX2)
#include "colorspacehandler_AVX2.h"
#elif defined(ENABLE_SSE2)
#include "colorspacehandler_SSE2.h"
#elif defined(ENABLE_ALTIVEC)
#include "colorspacehandler_AltiVec.h"
#endif
#if defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
#define USEVECTORSIZE_128
#endif
#if defined(ENABLE_AVX2)
#define USEVECTORSIZE_256
#endif
// By default, the hand-coded vectorized code will be used instead of a compiler's built-in
// autovectorization (if supported). However, if USEMANUALVECTORIZATION is not defined, then
// the compiler will use autovectorization (if supported).
#if defined(USEVECTORSIZE_128) || defined(USEVECTORSIZE_256) || defined(USEVECTORSIZE_512)
// Comment out USEMANUALVECTORIZATION to disable the hand-coded vectorized code.
#define USEMANUALVECTORIZATION
#endif
#ifdef USEMANUALVECTORIZATION
#if defined(ENABLE_AVX2)
static const ColorspaceHandler_AVX2 csh;
#elif defined(ENABLE_SSE2)
static const ColorspaceHandler_SSE2 csh;
#elif defined(ENABLE_ALTIVEC)
static const ColorspaceHandler_AltiVec csh;
#else
static const ColorspaceHandler csh;
#endif
#else
static const ColorspaceHandler csh;
#endif
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_666[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_888[32768];
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
};
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
};
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
};
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
};
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
};
//maybe not very precise
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
0, 4, 8, 13, 17, 22, 26, 31
};
//TODO - generate this in the static init method more accurately
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
0, 8, 16, 26, 34, 44, 52, 63
};
void ColorspaceHandlerInit()
{
static bool needInitTables = true;
if (needInitTables)
{
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
for (size_t i = 0; i < 32768; i++)
{
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 );
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 );
}
}
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert555To8888Opaque(const u16 src)
{
return (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF);
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src)
{
return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert8888To6665(FragmentColor srcColor)
{
FragmentColor outColor;
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2;
outColor.g = srcColor.g >> 2;
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2;
outColor.a = srcColor.a >> 3;
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert8888To6665(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert8888To6665<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert6665To8888(FragmentColor srcColor)
{
FragmentColor outColor;
outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)];
outColor.g = material_6bit_to_8bit[srcColor.g];
outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)];
outColor.a = material_5bit_to_8bit[srcColor.a];
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert6665To8888(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert6665To8888<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert8888To5551(FragmentColor srcColor)
{
return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 );
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert8888To5551(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert8888To5551<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert6665To5551(FragmentColor srcColor)
{
return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000);
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert6665To5551(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert6665To5551<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To8888Opaque_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To8888Opaque(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To8888Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To6665Opaque_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To6665Opaque(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To6665Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 4);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 16);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To6665_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To6665_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To6665_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To6665(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To6665<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 4);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 16);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To8888_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To8888_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To8888_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To8888(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To8888<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To5551_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To5551_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To5551_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To5551(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To5551<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To5551_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To5551_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To5551_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To5551(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To5551<SWAP_RB>(src[i]);
}
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To8888Opaque<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To8888Opaque<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To6665Opaque<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To6665Opaque<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To6665<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To6665<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To6665(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To6665_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To8888<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To8888<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To8888(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To8888_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To5551<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To5551<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To5551(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To5551_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To5551<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (;i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To5551<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To5551(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount);
}
template u32 ColorspaceConvert555To8888Opaque<true>(const u16 src);
template u32 ColorspaceConvert555To8888Opaque<false>(const u16 src);
template u32 ColorspaceConvert555To6665Opaque<true>(const u16 src);
template u32 ColorspaceConvert555To6665Opaque<false>(const u16 src);
template u32 ColorspaceConvert8888To6665<true>(FragmentColor srcColor);
template u32 ColorspaceConvert8888To6665<false>(FragmentColor srcColor);
template u32 ColorspaceConvert8888To6665<true>(u32 srcColor);
template u32 ColorspaceConvert8888To6665<false>(u32 srcColor);
template u32 ColorspaceConvert6665To8888<true>(FragmentColor srcColor);
template u32 ColorspaceConvert6665To8888<false>(FragmentColor srcColor);
template u32 ColorspaceConvert6665To8888<true>(u32 srcColor);
template u32 ColorspaceConvert6665To8888<false>(u32 srcColor);
template u16 ColorspaceConvert8888To5551<true>(FragmentColor srcColor);
template u16 ColorspaceConvert8888To5551<false>(FragmentColor srcColor);
template u16 ColorspaceConvert8888To5551<true>(u32 srcColor);
template u16 ColorspaceConvert8888To5551<false>(u32 srcColor);
template u16 ColorspaceConvert6665To5551<true>(FragmentColor srcColor);
template u16 ColorspaceConvert6665To5551<false>(FragmentColor srcColor);
template u16 ColorspaceConvert6665To5551<true>(u32 srcColor);
template u16 ColorspaceConvert6665To5551<false>(u32 srcColor);
template void ColorspaceConvertBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<true, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<false, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<false, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<true, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<false, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<false, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);

View File

@ -0,0 +1,194 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_H
#define COLORSPACEHANDLER_H
#include "types.h"
#include <stdio.h>
#include <stdint.h>
enum NDSColorFormat
{
// The color format information is packed in a 32-bit value.
// The bits are as follows:
// FFFOOOOO AAAAAABB BBBBGGGG GGRRRRRR
//
// F = Flags (see below)
// O = Color order (see below)
// A = Bit count for alpha [0-63]
// B = Bit count for blue [0-63]
// G = Bit count for green [0-63]
// R = Bit count for red [0-63]
//
// Flags:
// Bit 29: Reverse order flag.
// Set = Bits are in reverse order, usually for little-endian usage.
// Cleared = Bits are in normal order, usually for big-endian usage.
//
// Color order bits, 24-28:
// 0x00 = RGBA, common format
// 0x01 = RGAB
// 0x02 = RBGA
// 0x03 = RBAG
// 0x04 = RAGB
// 0x05 = RABG
// 0x06 = GRBA
// 0x07 = GRAB
// 0x08 = GBRA
// 0x09 = GBAR
// 0x0A = GARB
// 0x0B = GABR
// 0x0C = BRGA
// 0x0D = BRAG
// 0x0E = BGRA, common format
// 0x0F = BGAR
// 0x10 = BARG
// 0x11 = BAGR
// 0x12 = ARGB
// 0x13 = ARBG
// 0x14 = AGRB
// 0x15 = AGBR
// 0x16 = ABRG
// 0x17 = ABGR
// Color formats used for internal processing.
//NDSColorFormat_ABGR1555_Rev = 0x20045145,
//NDSColorFormat_ABGR5666_Rev = 0x20186186,
//NDSColorFormat_ABGR8888_Rev = 0x20208208,
// Color formats used by the output framebuffers.
NDSColorFormat_BGR555_Rev = 0x20005145,
NDSColorFormat_BGR666_Rev = 0x20006186,
NDSColorFormat_BGR888_Rev = 0x20008208
};
union FragmentColor
{
u32 color;
struct
{
u8 r,g,b,a;
};
};
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
extern CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
extern CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
extern CACHE_ALIGN u32 color_555_to_666[32768];
extern CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
extern CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
extern CACHE_ALIGN u32 color_555_to_888[32768];
#define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color
#define COLOR555TO6665_OPAQUE_SWAP_RB(col) (color_555_to_6665_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color with R and B components swapped
#define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color
#ifdef LOCAL_LE
#define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, little-endian
#else
#define COLOR555TO6665(col,alpha5) ((alpha5) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, big-endian
#endif
#define COLOR555TO8888_OPAQUE(col) (color_555_to_8888_opaque[(col)]) // Convert a 15-bit color to an opaque 32-bit color
#define COLOR555TO8888_OPAQUE_SWAP_RB(col) (color_555_to_8888_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque 32-bit color with R and B components swapped
#define COLOR555TO888(col) (color_555_to_888[(col)]) // Convert a 15-bit color to an opaque 24-bit color or a fully transparent 32-bit color
#ifdef LOCAL_LE
#define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, little-endian
#else
#define COLOR555TO8888(col,alpha8) ((alpha8) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, big-endian
#endif
//produce a 15bpp color from individual 5bit components
#define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) )
//produce a 16bpp color from individual 5bit components
#define R6G6B6TORGB15(r,g,b) ( ((r)>>1) | (((g)&0x3E)<<4) | (((b)&0x3E)<<9) )
void ColorspaceHandlerInit();
template<bool SWAP_RB> u32 ColorspaceConvert555To8888Opaque(const u16 src);
template<bool SWAP_RB> u32 ColorspaceConvert555To6665Opaque(const u16 src);
template<bool SWAP_RB> u32 ColorspaceConvert8888To6665(FragmentColor srcColor);
template<bool SWAP_RB> u32 ColorspaceConvert8888To6665(u32 srcColor);
template<bool SWAP_RB> u32 ColorspaceConvert6665To8888(FragmentColor srcColor);
template<bool SWAP_RB> u32 ColorspaceConvert6665To8888(u32 srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert8888To5551(FragmentColor srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert8888To5551(u32 srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert6665To5551(FragmentColor srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert6665To5551(u32 srcColor);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
class ColorspaceHandler
{
public:
ColorspaceHandler() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
FORCEINLINE FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)
{
FragmentColor ret;
ret.r = r; ret.g = g; ret.b = b; ret.a = a;
return ret;
}
#endif /* COLORSPACEHANDLER_H */

View File

@ -0,0 +1,491 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler_AVX2.h"
#ifndef ENABLE_AVX2
#error This code requires AVX2 support.
#else
#include <immintrin.h>
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi)
{
v256u32 src32;
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
src32 = _mm256_unpacklo_epi16(srcColor, _mm256_setzero_si256());
dstLo = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 19), _mm256_srli_epi32(src32, 7)) : _mm256_or_si256(_mm256_slli_epi32(src32, 3), _mm256_slli_epi32(src32, 9));
dstLo = _mm256_and_si256( dstLo, _mm256_set1_epi32(0x00F800F8) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_slli_epi32(src32, 6), _mm256_set1_epi32(0x0000F800)) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_srli_epi32(dstLo, 5), _mm256_set1_epi32(0x00070707)) );
dstLo = _mm256_or_si256( dstLo, srcAlphaBits32Lo );
src32 = _mm256_unpackhi_epi16(srcColor, _mm256_setzero_si256());
dstHi = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 19), _mm256_srli_epi32(src32, 7)) : _mm256_or_si256(_mm256_slli_epi32(src32, 3), _mm256_slli_epi32(src32, 9));
dstHi = _mm256_and_si256( dstHi, _mm256_set1_epi32(0x00F800F8) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_slli_epi32(src32, 6), _mm256_set1_epi32(0x0000F800)) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_srli_epi32(dstHi, 5), _mm256_set1_epi32(0x00070707)) );
dstHi = _mm256_or_si256( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi)
{
v256u32 src32;
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
src32 = _mm256_unpacklo_epi16(srcColor, _mm256_setzero_si256());
dstLo = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 17), _mm256_srli_epi32(src32, 9)) : _mm256_or_si256(_mm256_slli_epi32(src32, 1), _mm256_slli_epi32(src32, 7));
dstLo = _mm256_and_si256( dstLo, _mm256_set1_epi32(0x003E003E) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_slli_epi32(src32, 4), _mm256_set1_epi32(0x00003E00)) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_srli_epi32(dstLo, 5), _mm256_set1_epi32(0x00010101)) );
dstLo = _mm256_or_si256( dstLo, srcAlphaBits32Lo );
src32 = _mm256_unpackhi_epi16(srcColor, _mm256_setzero_si256());
dstHi = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 17), _mm256_srli_epi32(src32, 9)) : _mm256_or_si256(_mm256_slli_epi32(src32, 1), _mm256_slli_epi32(src32, 7));
dstHi = _mm256_and_si256( dstHi, _mm256_set1_epi32(0x003E003E) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_slli_epi32(src32, 4), _mm256_set1_epi32(0x00003E00)) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_srli_epi32(dstHi, 5), _mm256_set1_epi32(0x00010101)) );
dstHi = _mm256_or_si256( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u32 srcAlphaBits32 = _mm256_set1_epi32(0xFF000000);
ColorspaceConvert555To8888_AVX2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u32 srcAlphaBits32 = _mm256_set1_epi32(0x1F000000);
ColorspaceConvert555To6665_AVX2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src)
{
// Conversion algorithm:
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
v256u32 rgb;
const v256u32 a = _mm256_and_si256( _mm256_srli_epi32(src, 3), _mm256_set1_epi32(0x1F000000) );
if (SWAP_RB)
{
rgb = _mm256_and_si256( _mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x003F3F3F) );
rgb = _mm256_shuffle_epi8( rgb, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
}
else
{
rgb = _mm256_and_si256( _mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x003F3F3F) );
}
return _mm256_or_si256(rgb, a);
}
template <bool SWAP_RB>
FORCEINLINE v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src)
{
// Conversion algorithm:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
v256u32 rgb = _mm256_or_si256( _mm256_and_si256(_mm256_slli_epi32(src, 2), _mm256_set1_epi32(0x00FCFCFC)), _mm256_and_si256(_mm256_srli_epi32(src, 4), _mm256_set1_epi32(0x00030303)) );
const v256u32 a = _mm256_or_si256( _mm256_and_si256(_mm256_slli_epi32(src, 3), _mm256_set1_epi32(0xF8000000)), _mm256_and_si256(_mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x07000000)) );
if (SWAP_RB)
{
rgb = _mm256_shuffle_epi8( rgb, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
}
return _mm256_or_si256(rgb, a);
}
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
FORCEINLINE v256u16 _ConvertColorBaseTo5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
{
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
{
return srcLo;
}
v256u32 rgbLo;
v256u32 rgbHi;
v256u16 alpha;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 17), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 4), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_slli_epi32(srcLo, 9), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 17), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 4), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_slli_epi32(srcHi, 9), _mm256_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 1), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 4), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 7), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 1), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 4), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 7), _mm256_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm256_packs_epi32( _mm256_and_si256(_mm256_srli_epi32(srcLo, 24), _mm256_set1_epi32(0x0000001F)), _mm256_and_si256(_mm256_srli_epi32(srcHi, 24), _mm256_set1_epi32(0x0000001F)) );
alpha = _mm256_cmpgt_epi16(alpha, _mm256_setzero_si256());
alpha = _mm256_and_si256(alpha, _mm256_set1_epi16(0x8000));
}
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 19), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 6), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_slli_epi32(srcLo, 7), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 19), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 6), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_slli_epi32(srcHi, 7), _mm256_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 3), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 6), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 9), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 3), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 6), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 9), _mm256_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm256_packs_epi32( _mm256_srli_epi32(srcLo, 24), _mm256_srli_epi32(srcHi, 24) );
alpha = _mm256_cmpgt_epi16(alpha, _mm256_setzero_si256());
alpha = _mm256_and_si256(alpha, _mm256_set1_epi16(0x8000));
}
return _mm256_or_si256(_mm256_packs_epi32(rgbLo, rgbHi), alpha);
}
template <bool SWAP_RB>
FORCEINLINE v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
{
return _ConvertColorBaseTo5551_AVX2<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
{
return _ConvertColorBaseTo5551_AVX2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_storeu_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
else
{
_mm256_store_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_store_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_storeu_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
else
{
_mm256_store_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_store_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To6665_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=8)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert8888To6665_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
}
else
{
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert8888To6665_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To8888_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=8)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert6665To8888_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
}
else
{
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert6665To8888_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To5551_AVX2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u16 *)(dst+i), ColorspaceConvert8888To5551_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i)), _mm256_loadu_si256((v256u32 *)(src+i+8))) );
}
else
{
_mm256_store_si256( (v256u16 *)(dst+i), ColorspaceConvert8888To5551_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i)), _mm256_load_si256((v256u32 *)(src+i+8))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To5551_AVX2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u16 *)(dst+i), ColorspaceConvert6665To5551_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i)), _mm256_loadu_si256((v256u32 *)(src+i+8))) );
}
else
{
_mm256_store_si256( (v256u16 *)(dst+i), ColorspaceConvert6665To5551_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i)), _mm256_load_si256((v256u32 *)(src+i+8))) );
}
}
return i;
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<true, true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template v256u32 ColorspaceConvert8888To6665_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert8888To6665_AVX2<false>(const v256u32 &src);
template v256u32 ColorspaceConvert6665To8888_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert6665To8888_AVX2<false>(const v256u32 &src);
template v256u16 ColorspaceConvert8888To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert8888To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert6665To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert6665To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
#endif // ENABLE_AVX2

View File

@ -0,0 +1,74 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_AVX2_H
#define COLORSPACEHANDLER_AVX2_H
#include "colorspacehandler.h"
#ifndef ENABLE_AVX2
#warning This header requires AVX2 support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
template<bool SWAP_RB> v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
class ColorspaceHandler_AVX2 : public ColorspaceHandler
{
public:
ColorspaceHandler_AVX2() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
#endif // ENABLE_AVX2
#endif /* COLORSPACEHANDLER_AVX2_H */

View File

@ -0,0 +1,345 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler_Altivec.h"
#ifndef ENABLE_ALTIVEC
#error This code requires PowerPC AltiVec support.
#else
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
dstLo = vec_unpackl((vector pixel)srcColor);
dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){3,3,3,0, 3,3,3,0, 3,3,3,0, 3,3,3,0})), vec_sr((v128u8)dstLo, ((v128u8){2,2,2,0, 2,2,2,0, 2,2,2,0, 2,2,2,0})) );
dstLo = vec_sel(dstLo, srcAlphaBits32Lo, vec_splat_u32(0xFF000000));
dstHi = vec_unpackh((vector pixel)srcColor);
dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){3,3,3,0, 3,3,3,0, 3,3,3,0, 3,3,3,0})), vec_sr((v128u8)dstHi, ((v128u8){2,2,2,0, 2,2,2,0, 2,2,2,0, 2,2,2,0})) );
dstHi = vec_sel(dstHi, srcAlphaBits32Hi, vec_splat_u32(0xFF000000));
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
dstLo = vec_unpackl((vector pixel)srcColor);
dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){1,1,1,0, 1,1,1,0, 1,1,1,0, 1,1,1,0})), vec_sr((v128u8)dstLo, ((v128u8){4,4,4,0, 4,4,4,0, 4,4,4,0, 4,4,4,0})) );
dstLo = vec_sel(dstLo, srcAlphaBits32Lo, vec_splat_u32(0xFF000000));
dstHi = vec_unpackh((vector pixel)srcColor);
dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){1,1,1,0, 1,1,1,0, 1,1,1,0, 1,1,1,0})), vec_sr((v128u8)dstHi, ((v128u8){4,4,4,0, 4,4,4,0, 4,4,4,0, 4,4,4,0})) );
dstHi = vec_sel(dstHi, srcAlphaBits32Hi, vec_splat_u32(0xFF000000));
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = {0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000};
ColorspaceConvert555To8888_AltiVec<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = {0x1F000000, 0x1F000000, 0x1F000000, 0x1F000000};
ColorspaceConvert555To6665_AltiVec<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src)
{
// Conversion algorithm:
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
v128u8 rgba = vec_sr( (v128u8)src, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3}) );
if (SWAP_RB)
{
rgba = vec_perm( rgba, rgba, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
}
return (v128u32)rgba;
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src)
{
// Conversion algorithm:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
v128u8 rgba = vec_or( vec_sl((v128u8)src, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3})), vec_sr((v128u8)src, ((v128u8){4,4,4,2, 4,4,4,2, 4,4,4,2, 4,4,4,2})) );
if (SWAP_RB)
{
rgba = vec_perm( rgba, rgba, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
}
return (v128u32)rgba;
}
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
FORCEINLINE v128u16 _ConvertColorBaseTo5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
{
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
{
return srcLo;
}
v128u32 rgbLo;
v128u32 rgbHi;
v128u16 dstColor;
v128u16 dstAlpha;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
// Convert alpha
dstAlpha = vec_packsu( vec_and(vec_sr(srcLo, vec_splat_u32(24)), vec_splat_u32(0x0000001F)), vec_and(vec_sr(srcHi, vec_splat_u32(24)), vec_splat_u32(0x0000001F)) );
dstAlpha = vec_cmpgt(dstAlpha, vec_splat_u16(0));
dstAlpha = vec_and(dstAlpha, vec_splat_u16(0x8000));
// Convert RGB
if (SWAP_RB)
{
rgbLo = vec_perm( srcLo, srcLo, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
rgbHi = vec_perm( srcHi, srcHi, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
rgbLo = vec_sl( rgbLo, vec_splat_u32(2) );
rgbHi = vec_sl( rgbHi, vec_splat_u32(2) );
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
}
else
{
rgbLo = vec_sl( srcLo, vec_splat_u32(2) );
rgbHi = vec_sl( srcHi, vec_splat_u32(2) );
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
}
}
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
{
// Convert alpha
dstAlpha = vec_packsu( vec_sr(srcLo, vec_splat_u32(24)), vec_sr(srcHi, vec_splat_u32(24)) );
dstAlpha = vec_cmpgt(dstAlpha, vec_splat_u16(0));
dstAlpha = vec_and(dstAlpha, vec_splat_u16(0x8000));
// Convert RGB
if (SWAP_RB)
{
rgbLo = vec_perm( srcLo, srcLo, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
rgbHi = vec_perm( srcHi, srcHi, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
}
else
{
dstColor = (v128u16)vec_packpx(srcLo, srcHi);
}
}
dstColor = vec_and(dstColor, vec_splat_u16(0x7FFF));
return vec_or(dstColor, dstAlpha);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_AltiVec<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_AltiVec<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AltiVec<SWAP_RB>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AltiVec<SWAP_RB>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer8888To6665_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
vec_st( ColorspaceConvert8888To6665_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer6665To8888_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
vec_st( ColorspaceConvert6665To8888_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer8888To5551_AltiVec(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
vec_st( ColorspaceConvert8888To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer6665To5551_AltiVec(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
vec_st( ColorspaceConvert6665To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
}
return i;
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AltiVec<true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_AltiVec<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_AltiVec<false>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_AltiVec<false>(const v128u32 &src);
template v128u16 ColorspaceConvert8888To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert8888To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
#endif // ENABLE_SSE2

View File

@ -0,0 +1,64 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_ALTIVEC_H
#define COLORSPACEHANDLER_ALTIVEC_H
#include "colorspacehandler.h"
#ifndef ENABLE_ALTIVEC
#warning This header requires PowerPC AltiVec support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
// AltiVec has very poor support for dealing with unaligned addresses (it's possible, just
// very obtuse), so we're not even going to bother dealing with any unaligned addresses.
class ColorspaceHandler_AltiVec : public ColorspaceHandler
{
public:
ColorspaceHandler_AltiVec() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
#endif // ENABLE_ALTIVEC
#endif /* COLORSPACEHANDLER_ALTIVEC_H */

View File

@ -0,0 +1,503 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler_SSE2.h"
#ifndef ENABLE_SSE2
#error This code requires SSE2 support.
#else
#include <emmintrin.h>
#ifdef ENABLE_SSSE3
#include <tmmintrin.h>
#endif
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
v128u32 src32;
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x00F800F8) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00070707)) );
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x00F800F8) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00070707)) );
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
v128u32 src32;
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x003E003E) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00010101)) );
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x003E003E) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00010101)) );
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = _mm_set1_epi32(0xFF000000);
ColorspaceConvert555To8888_SSE2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = _mm_set1_epi32(0x1F000000);
ColorspaceConvert555To6665_SSE2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src)
{
// Conversion algorithm:
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
v128u32 rgb;
const v128u32 a = _mm_and_si128( _mm_srli_epi32(src, 3), _mm_set1_epi32(0x1F000000) );
if (SWAP_RB)
{
#ifdef ENABLE_SSSE3
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
#else
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x003F0000)), 18), _mm_or_si128(_mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00003F00)), 2), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x0000003F)), 14)) );
#endif
}
else
{
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
}
return _mm_or_si128(rgb, a);
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src)
{
// Conversion algorithm:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
v128u32 rgb = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 2), _mm_set1_epi32(0x00FCFCFC)), _mm_and_si128(_mm_srli_epi32(src, 4), _mm_set1_epi32(0x00030303)) );
const v128u32 a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 3), _mm_set1_epi32(0xF8000000)), _mm_and_si128(_mm_srli_epi32(src, 2), _mm_set1_epi32(0x07000000)) );
if (SWAP_RB)
{
#ifdef ENABLE_SSSE3
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
#else
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x000000FF)), 16)) );
#endif
}
return _mm_or_si128(rgb, a);
}
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
FORCEINLINE v128u16 _ConvertColorBaseTo5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
{
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
{
return srcLo;
}
v128u32 rgbLo;
v128u32 rgbHi;
v128u16 alpha;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 17), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 17), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 1), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 1), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm_packs_epi32( _mm_and_si128(_mm_srli_epi32(srcLo, 24), _mm_set1_epi32(0x0000001F)), _mm_and_si128(_mm_srli_epi32(srcHi, 24), _mm_set1_epi32(0x0000001F)) );
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
}
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 19), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 19), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 3), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 3), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm_packs_epi32( _mm_srli_epi32(srcLo, 24), _mm_srli_epi32(srcHi, 24) );
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
}
return _mm_or_si128(_mm_packs_epi32(rgbLo, rgbHi), alpha);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_SSE2<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_SSE2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_storeu_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
else
{
_mm_store_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_store_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_storeu_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
else
{
_mm_store_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_store_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To6665_SSE2(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
}
else
{
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To8888_SSE2(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert6665To8888_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
}
else
{
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert6665To8888_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To5551_SSE2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u16 *)(dst+i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i)), _mm_loadu_si128((v128u32 *)(src+i+4))) );
}
else
{
_mm_store_si128( (v128u16 *)(dst+i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i)), _mm_load_si128((v128u32 *)(src+i+4))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To5551_SSE2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u16 *)(dst+i), ColorspaceConvert6665To5551_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i)), _mm_loadu_si128((v128u32 *)(src+i+4))) );
}
else
{
_mm_store_si128( (v128u16 *)(dst+i), ColorspaceConvert6665To5551_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i)), _mm_load_si128((v128u32 *)(src+i+4))) );
}
}
return i;
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<true, true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_SSE2<false>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_SSE2<false>(const v128u32 &src);
template v128u16 ColorspaceConvert8888To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert8888To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
#endif // ENABLE_SSE2

View File

@ -0,0 +1,74 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_SSE2_H
#define COLORSPACEHANDLER_SSE2_H
#include "colorspacehandler.h"
#ifndef ENABLE_SSE2
#warning This header requires SSE2 support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
class ColorspaceHandler_SSE2 : public ColorspaceHandler
{
public:
ColorspaceHandler_SSE2() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
#endif // ENABLE_SSE2
#endif /* COLORSPACEHANDLER_SSE2_H */

View File

@ -59,44 +59,41 @@
#define DESMUME_PLATFORM_STRING "" #define DESMUME_PLATFORM_STRING ""
#endif #endif
#define DESMUME_SSE_STRING "" #define DESMUME_CPUEXT_PRIMARY_STRING ""
#define DESMUME_AVX_STRING "" #define DESMUME_CPUEXT_SECONDARY_STRING ""
#ifdef ENABLE_SSE #if defined(ENABLE_SSE4_2)
#undef DESMUME_SSE_STRING #undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_SSE_STRING " SSE" #define DESMUME_CPUEXT_PRIMARY_STRING " SSE4.2"
#endif #elif defined(ENABLE_SSE4_1)
#ifdef ENABLE_SSE2 #undef DESMUME_CPUEXT_PRIMARY_STRING
#undef DESMUME_SSE_STRING #define DESMUME_CPUEXT_PRIMARY_STRING " SSE4.1"
#define DESMUME_SSE_STRING " SSE2" #elif defined(ENABLE_SSSE3)
#endif #undef DESMUME_CPUEXT_PRIMARY_STRING
#ifdef ENABLE_SSE3 #define DESMUME_CPUEXT_PRIMARY_STRING " SSSE3"
#undef DESMUME_SSE_STRING #elif defined(ENABLE_SSE3)
#define DESMUME_SSE_STRING " SSE3" #undef DESMUME_CPUEXT_PRIMARY_STRING
#endif #define DESMUME_CPUEXT_PRIMARY_STRING " SSE3"
#ifdef ENABLE_SSSE3 #elif defined(ENABLE_SSE2)
#undef DESMUME_SSE_STRING #undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_SSE_STRING " SSSE3" #define DESMUME_CPUEXT_PRIMARY_STRING " SSE2"
#endif #elif defined(ENABLE_SSE)
#ifdef ENABLE_SSE4_1 #undef DESMUME_CPUEXT_PRIMARY_STRING
#undef DESMUME_SSE_STRING #define DESMUME_CPUEXT_PRIMARY_STRING " SSE"
#define DESMUME_SSE_STRING " SSE4.1" #elif defined(ENABLE_ALTIVEC)
#endif #undef DESMUME_CPUEXT_PRIMARY_STRING
#ifdef ENABLE_SSE4_2 #define DESMUME_CPUEXT_PRIMARY_STRING " AltiVec"
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSE4.2"
#endif
#ifdef ENABLE_AVX
#undef DESMUME_AVX_STRING
#define DESMUME_AVX_STRING "+AVX"
#endif
#ifdef ENABLE_AVX2
#undef DESMUME_AVX_STRING
#define DESMUME_AVX_STRING "+AVX2"
#endif #endif
#define DESMUME_CPUEXT_STRING DESMUME_SSE_STRING DESMUME_AVX_STRING #if defined(ENABLE_AVX2)
#undef DESMUME_CPUEXT_SECONDARY_STRING
#define DESMUME_CPUEXT_SECONDARY_STRING "+AVX2"
#elif defined(ENABLE_AVX)
#undef DESMUME_CPUEXT_SECONDARY_STRING
#define DESMUME_CPUEXT_SECONDARY_STRING "+AVX"
#endif
#define DESMUME_CPUEXT_STRING DESMUME_CPUEXT_PRIMARY_STRING DESMUME_CPUEXT_SECONDARY_STRING
#ifdef DEVELOPER #ifdef DEVELOPER
#define DESMUME_FEATURE_STRING " dev+" #define DESMUME_FEATURE_STRING " dev+"

View File

@ -171,6 +171,8 @@
<ClCompile Include="..\utils\AsmJit\x86\x86func.cpp" /> <ClCompile Include="..\utils\AsmJit\x86\x86func.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86operand.cpp" /> <ClCompile Include="..\utils\AsmJit\x86\x86operand.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86util.cpp" /> <ClCompile Include="..\utils\AsmJit\x86\x86util.cpp" />
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler.cpp" />
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler_SSE2.cpp" />
<ClCompile Include="..\utils\datetime.cpp" /> <ClCompile Include="..\utils\datetime.cpp" />
<ClCompile Include="..\utils\dlditool.cpp" /> <ClCompile Include="..\utils\dlditool.cpp" />
<ClCompile Include="..\utils\emufat.cpp" /> <ClCompile Include="..\utils\emufat.cpp" />
@ -442,6 +444,8 @@
<ClInclude Include="..\utils\AsmJit\x86\x86func.h" /> <ClInclude Include="..\utils\AsmJit\x86\x86func.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86operand.h" /> <ClInclude Include="..\utils\AsmJit\x86\x86operand.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86util.h" /> <ClInclude Include="..\utils\AsmJit\x86\x86util.h" />
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler.h" />
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler_SSE2.h" />
<ClInclude Include="..\utils\datetime.h" /> <ClInclude Include="..\utils\datetime.h" />
<ClInclude Include="..\utils\emufat.h" /> <ClInclude Include="..\utils\emufat.h" />
<ClInclude Include="..\utils\emufat_types.h" /> <ClInclude Include="..\utils\emufat_types.h" />

View File

@ -121,6 +121,9 @@
<Filter Include="Core\libretro-common\lists"> <Filter Include="Core\libretro-common\lists">
<UniqueIdentifier>{18cba3ce-aaa6-441d-8111-408d0fcef7d2}</UniqueIdentifier> <UniqueIdentifier>{18cba3ce-aaa6-441d-8111-408d0fcef7d2}</UniqueIdentifier>
</Filter> </Filter>
<Filter Include="Core\utils\colorspacehandler">
<UniqueIdentifier>{db5dc512-2b75-4476-8cac-75fd4acfd85f}</UniqueIdentifier>
</Filter>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="..\aggdraw.cpp"> <ClCompile Include="..\aggdraw.cpp">
@ -966,6 +969,12 @@
<ClCompile Include="..\libretro-common\file\archive_file_zlib.c"> <ClCompile Include="..\libretro-common\file\archive_file_zlib.c">
<Filter>Core\libretro-common\file</Filter> <Filter>Core\libretro-common\file</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler.cpp">
<Filter>Core\utils\colorspacehandler</Filter>
</ClCompile>
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler_SSE2.cpp">
<Filter>Core\utils\colorspacehandler</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\armcpu.h"> <ClInclude Include="..\armcpu.h">
@ -1739,6 +1748,12 @@
<ClInclude Include="..\libretro-common\include\compat\msvc.h"> <ClInclude Include="..\libretro-common\include\compat\msvc.h">
<Filter>Core\libretro-common\include\compat</Filter> <Filter>Core\libretro-common\include\compat</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler.h">
<Filter>Core\utils\colorspacehandler</Filter>
</ClInclude>
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler_SSE2.h">
<Filter>Core\utils\colorspacehandler</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="..\instruction_tabdef.inc"> <None Include="..\instruction_tabdef.inc">

View File

@ -316,13 +316,14 @@ static void do_video_conversion(AVIFile* avi, const u16* buffer)
int height = avi->prescaleLevel*384; int height = avi->prescaleLevel*384;
u8* outbuf = avi_file->convert_buffer + width*(height-1)*3; u8* outbuf = avi_file->convert_buffer + width*(height-1)*3;
for(int y=0;y<height;y++) for (int y = 0; y < height; y++)
{ {
for(int x=0;x<width;x++) for (int x = 0; x < width; x++)
{ {
u32 dst = ConvertColor555To8888Opaque<true>(*buffer++); u32 dst = ColorspaceConvert555To8888Opaque<true>(*buffer++);
*(u32 *)outbuf = (dst & 0x00FFFFFF) | (*(u32 *)outbuf & 0xFF000000); *outbuf++ = dst & 0xFF;
outbuf += 3; *outbuf++ = (dst >> 8) & 0xFF;
*outbuf++ = (dst >> 16) & 0xFF;
} }
outbuf -= width*3*2; outbuf -= width*3*2;

View File

@ -1920,7 +1920,7 @@ static void DoDisplay(bool firstTime)
//convert pixel format to 32bpp for compositing //convert pixel format to 32bpp for compositing
//why do we do this over and over? well, we are compositing to //why do we do this over and over? well, we are compositing to
//filteredbuffer32bpp, and it needs to get refreshed each frame. //filteredbuffer32bpp, and it needs to get refreshed each frame.
ConvertColorBuffer555To8888Opaque<true, false>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16)); ColorspaceConvertBuffer555To8888Opaque<true, false>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16));
if(firstTime) if(firstTime)
{ {