Colorspace Handler:
- Factor out the generic colorspace handling routines out of GPU.cpp/GPU.h into their own separate files. - Add vectorized routines using AVX2 and AltiVec.
This commit is contained in:
parent
d837653b5f
commit
d8735a803b
|
@ -18,6 +18,14 @@
|
|||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifdef FASTBUILD
|
||||
#undef FORCEINLINE
|
||||
#define FORCEINLINE
|
||||
//compilation speed hack (cuts time exactly in half by cutting out permutations)
|
||||
#define DISABLE_MOSAIC
|
||||
#define DISABLE_COLOREFFECTDISABLEHINT
|
||||
#endif
|
||||
|
||||
#include "GPU.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
@ -40,75 +48,8 @@
|
|||
#include "matrix.h"
|
||||
#include "emufile.h"
|
||||
|
||||
#ifdef FASTBUILD
|
||||
#undef FORCEINLINE
|
||||
#define FORCEINLINE
|
||||
//compilation speed hack (cuts time exactly in half by cutting out permutations)
|
||||
#define DISABLE_MOSAIC
|
||||
#endif
|
||||
|
||||
u32 Render3DFramesPerSecond;
|
||||
|
||||
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
|
||||
CACHE_ALIGN u32 color_555_to_666[32768];
|
||||
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
|
||||
CACHE_ALIGN u32 color_555_to_888[32768];
|
||||
|
||||
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
|
||||
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
|
||||
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
|
||||
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
|
||||
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
|
||||
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
|
||||
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
|
||||
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
|
||||
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
|
||||
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
|
||||
};
|
||||
|
||||
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
|
||||
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
|
||||
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
|
||||
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
|
||||
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
|
||||
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
|
||||
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
|
||||
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
|
||||
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
|
||||
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
|
||||
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
|
||||
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
|
||||
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
|
||||
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
|
||||
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
|
||||
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
|
||||
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
|
||||
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
|
||||
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
|
||||
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
|
||||
};
|
||||
|
||||
//maybe not very precise
|
||||
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
|
||||
0, 4, 8, 13, 17, 22, 26, 31
|
||||
};
|
||||
|
||||
//TODO - generate this in the static init method more accurately
|
||||
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
|
||||
0, 8, 16, 26, 34, 44, 52, 63
|
||||
};
|
||||
|
||||
//instantiate static instance
|
||||
u16 GPUEngineBase::_brightnessUpTable555[17][0x8000];
|
||||
FragmentColor GPUEngineBase::_brightnessUpTable666[17][0x8000];
|
||||
|
@ -167,7 +108,7 @@ const CACHE_ALIGN BGLayerSize GPUEngineBase::_BGLayerSizeLUT[8][4] = {
|
|||
{{128,128}, {256,256}, {512,256}, {512,512}}, //affine ext direct
|
||||
};
|
||||
|
||||
static void ExpandLine8(u8 *__restrict dst, const u8 *__restrict src, size_t dstLength)
|
||||
static FORCEINLINE void ExpandLine8(u8 *__restrict dst, const u8 *__restrict src, size_t dstLength)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
const bool isIntegerScale = ((dstLength % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0);
|
||||
|
@ -1655,11 +1596,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
|
|||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1682,11 +1623,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
|
|||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1767,11 +1708,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
|
|||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -1833,13 +1774,13 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
|
|||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
srcColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
|
||||
srcColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
|
||||
dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
|
||||
dstColor32.a = 0x1F;
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
srcColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
|
||||
srcColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
|
||||
dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
|
||||
dstColor32.a = 0xFF;
|
||||
break;
|
||||
|
@ -2132,7 +2073,7 @@ FORCEINLINE void GPUEngineBase::_RenderPixel3D(GPUEngineCompositorInfo &compInfo
|
|||
// Render the pixel using the selected color effect.
|
||||
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
|
||||
{
|
||||
const u16 srcColor16 = ConvertColor6665To5551<false>(srcColor32);
|
||||
const u16 srcColor16 = ColorspaceConvert6665To5551<false>(srcColor32);
|
||||
|
||||
switch (selectedEffect)
|
||||
{
|
||||
|
@ -2695,13 +2636,13 @@ void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compInfo)
|
|||
|
||||
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]);
|
||||
ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[0], src[0], src[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[1], src[2], src[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(src16[0], src[0], src[1]);
|
||||
ConvertColor555To8888Opaque<false>(src16[1], src[2], src[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[0], src[0], src[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[1], src[2], src[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2796,13 +2737,13 @@ void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compInfo)
|
|||
{
|
||||
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]);
|
||||
ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[0], src[0], src[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[1], src[2], src[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(src16[0], src[0], src[1]);
|
||||
ConvertColor555To8888Opaque<false>(src16[1], src[2], src[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[0], src[0], src[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[1], src[2], src[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4502,7 +4443,7 @@ void GPUEngineBase::UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex)
|
|||
}
|
||||
|
||||
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED>
|
||||
void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
|
||||
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
|
||||
{
|
||||
bool useCustomVRAM = false;
|
||||
|
||||
|
@ -4538,26 +4479,28 @@ void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
|
|||
}
|
||||
|
||||
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED>
|
||||
void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo)
|
||||
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo)
|
||||
{
|
||||
this->_RenderLine_LayerBG_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compInfo);
|
||||
}
|
||||
|
||||
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED>
|
||||
void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo)
|
||||
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo)
|
||||
{
|
||||
#ifndef DISABLE_COLOREFFECTDISABLEHINT
|
||||
if (compInfo.renderState.colorEffect == ColorEffect_Disable)
|
||||
{
|
||||
this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, true, ISCUSTOMRENDERINGNEEDED>(compInfo);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, false, ISCUSTOMRENDERINGNEEDED>(compInfo);
|
||||
}
|
||||
}
|
||||
|
||||
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED>
|
||||
void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo)
|
||||
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo)
|
||||
{
|
||||
if (ISDEBUGRENDER)
|
||||
{
|
||||
|
@ -4951,7 +4894,7 @@ void GPUEngineBase::ResolveCustomRendering()
|
|||
|
||||
void GPUEngineBase::ResolveRGB666ToRGB888()
|
||||
{
|
||||
ConvertColorBuffer6665To8888<false>((u32 *)this->renderedBuffer, (u32 *)this->renderedBuffer, this->renderedWidth * this->renderedHeight);
|
||||
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)this->renderedBuffer, (u32 *)this->renderedBuffer, this->renderedWidth * this->renderedHeight);
|
||||
}
|
||||
|
||||
void GPUEngineBase::ResolveToCustomFramebuffer()
|
||||
|
@ -5575,12 +5518,12 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
|||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16));
|
||||
ConvertColorBuffer6665To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
|
||||
ColorspaceConvertBuffer6665To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16));
|
||||
ConvertColorBuffer8888To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
|
||||
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -6570,7 +6513,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
ColorspaceConvertBuffer555To6665Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -6578,7 +6521,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
ColorspaceConvertBuffer555To8888Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -6598,7 +6541,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
|
||||
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
|
||||
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, customPixCount);
|
||||
ColorspaceConvertBuffer555To6665Opaque<false, false>(src, (u32 *)dst, customPixCount);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -6606,7 +6549,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
|
||||
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
|
||||
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, customPixCount);
|
||||
ColorspaceConvertBuffer555To8888Opaque<false, false>(src, (u32 *)dst, customPixCount);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -6802,28 +6745,7 @@ void GPUEngineB::RenderLine(const u16 l)
|
|||
|
||||
GPUSubsystem::GPUSubsystem()
|
||||
{
|
||||
static bool needInitTables = true;
|
||||
|
||||
if (needInitTables)
|
||||
{
|
||||
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
|
||||
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
|
||||
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
|
||||
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
|
||||
|
||||
for (size_t i = 0; i < 32768; i++)
|
||||
{
|
||||
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
|
||||
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
|
||||
color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 );
|
||||
|
||||
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
|
||||
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
|
||||
color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 );
|
||||
}
|
||||
|
||||
needInitTables = false;
|
||||
}
|
||||
ColorspaceHandlerInit();
|
||||
|
||||
_defaultEventHandler = new GPUEventHandlerDefault;
|
||||
_event = _defaultEventHandler;
|
||||
|
@ -7581,178 +7503,6 @@ void NDSDisplay::SetEngineByID(const GPUEngineID theID)
|
|||
this->_gpu->SetDisplayByID(this->_ID);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
__m128i src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
|
||||
__m128i dstConvertedLo, dstConvertedHi;
|
||||
ConvertColor555To8888Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor555To8888Opaque<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ConvertColorBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
__m128i src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
|
||||
__m128i dstConvertedLo, dstConvertedHi;
|
||||
ConvertColor555To6665Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor555To6665Opaque<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
void ConvertColorBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To6665<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor8888To6665<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
void ConvertColorBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To8888<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor6665To8888<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor8888To5551<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor6665To5551<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG0>();
|
||||
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG1>();
|
||||
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG2>();
|
||||
|
@ -7774,29 +7524,3 @@ template void GPUEngineBase::ParseReg_BGnY<GPULayerID_BG3>();
|
|||
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR555_Rev>(const u16 l, bool skip);
|
||||
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR666_Rev>(const u16 l, bool skip);
|
||||
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR888_Rev>(const u16 l, bool skip);
|
||||
|
||||
template void ConvertColorBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To8888Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer555To6665Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To6665Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To6665Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To6665Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer8888To6665<true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To6665<false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer6665To8888<true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To8888<false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer8888To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer6665To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
|
|
@ -25,9 +25,11 @@
|
|||
#include <iosfwd>
|
||||
|
||||
#include "types.h"
|
||||
#include "./utils/colorspacehandler/colorspacehandler.h"
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
|
@ -101,15 +103,6 @@ enum DisplayCaptureSize
|
|||
DisplayCaptureSize_256x192 = 3,
|
||||
};
|
||||
|
||||
union FragmentColor
|
||||
{
|
||||
u32 color;
|
||||
struct
|
||||
{
|
||||
u8 r,g,b,a;
|
||||
};
|
||||
};
|
||||
|
||||
typedef union
|
||||
{
|
||||
u32 value;
|
||||
|
@ -1052,61 +1045,6 @@ enum NDSDisplayID
|
|||
NDSDisplayID_Touch = 1
|
||||
};
|
||||
|
||||
enum NDSColorFormat
|
||||
{
|
||||
// The color format information is packed in a 32-bit value.
|
||||
// The bits are as follows:
|
||||
// FFFOOOOO AAAAAABB BBBBGGGG GGRRRRRR
|
||||
//
|
||||
// F = Flags (see below)
|
||||
// O = Color order (see below)
|
||||
// A = Bit count for alpha [0-63]
|
||||
// B = Bit count for blue [0-63]
|
||||
// G = Bit count for green [0-63]
|
||||
// R = Bit count for red [0-63]
|
||||
//
|
||||
// Flags:
|
||||
// Bit 29: Reverse order flag.
|
||||
// Set = Bits are in reverse order, usually for little-endian usage.
|
||||
// Cleared = Bits are in normal order, usually for big-endian usage.
|
||||
//
|
||||
// Color order bits, 24-28:
|
||||
// 0x00 = RGBA, common format
|
||||
// 0x01 = RGAB
|
||||
// 0x02 = RBGA
|
||||
// 0x03 = RBAG
|
||||
// 0x04 = RAGB
|
||||
// 0x05 = RABG
|
||||
// 0x06 = GRBA
|
||||
// 0x07 = GRAB
|
||||
// 0x08 = GBRA
|
||||
// 0x09 = GBAR
|
||||
// 0x0A = GARB
|
||||
// 0x0B = GABR
|
||||
// 0x0C = BRGA
|
||||
// 0x0D = BRAG
|
||||
// 0x0E = BGRA, common format
|
||||
// 0x0F = BGAR
|
||||
// 0x10 = BARG
|
||||
// 0x11 = BAGR
|
||||
// 0x12 = ARGB
|
||||
// 0x13 = ARBG
|
||||
// 0x14 = AGRB
|
||||
// 0x15 = AGBR
|
||||
// 0x16 = ABRG
|
||||
// 0x17 = ABGR
|
||||
|
||||
// Color formats used for internal processing.
|
||||
//NDSColorFormat_ABGR1555_Rev = 0x20045145,
|
||||
//NDSColorFormat_ABGR5666_Rev = 0x20186186,
|
||||
//NDSColorFormat_ABGR8888_Rev = 0x20208208,
|
||||
|
||||
// Color formats used by the output framebuffers.
|
||||
NDSColorFormat_BGR555_Rev = 0x20005145,
|
||||
NDSColorFormat_BGR666_Rev = 0x20006186,
|
||||
NDSColorFormat_BGR888_Rev = 0x20008208
|
||||
};
|
||||
|
||||
struct DISPCAPCNT_parsed
|
||||
{
|
||||
u8 EVA;
|
||||
|
@ -1410,9 +1348,9 @@ protected:
|
|||
template<size_t WIN_NUM> bool _IsWindowInsideVerticalRange(GPUEngineCompositorInfo &compInfo);
|
||||
void _PerformWindowTesting(GPUEngineCompositorInfo &compInfo);
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo);
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo);
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo);
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo);
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo);
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo);
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo);
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, itemsForPriority_t *__restrict item);
|
||||
|
@ -1733,346 +1671,4 @@ public:
|
|||
extern GPUSubsystem *GPU;
|
||||
extern MMU_struct MMU;
|
||||
|
||||
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
|
||||
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
|
||||
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
|
||||
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
|
||||
|
||||
extern CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_666[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_888[32768];
|
||||
|
||||
#define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color
|
||||
#define COLOR555TO6665_OPAQUE_SWAP_RB(col) (color_555_to_6665_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color with R and B components swapped
|
||||
#define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color
|
||||
|
||||
#ifdef LOCAL_LE
|
||||
#define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, little-endian
|
||||
#else
|
||||
#define COLOR555TO6665(col,alpha5) ((alpha5) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, big-endian
|
||||
#endif
|
||||
|
||||
#define COLOR555TO8888_OPAQUE(col) (color_555_to_8888_opaque[(col)]) // Convert a 15-bit color to an opaque 32-bit color
|
||||
#define COLOR555TO8888_OPAQUE_SWAP_RB(col) (color_555_to_8888_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque 32-bit color with R and B components swapped
|
||||
#define COLOR555TO888(col) (color_555_to_888[(col)]) // Convert a 15-bit color to an opaque 24-bit color or a fully transparent 32-bit color
|
||||
|
||||
#ifdef LOCAL_LE
|
||||
#define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, little-endian
|
||||
#else
|
||||
#define COLOR555TO8888(col,alpha8) ((alpha8) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, big-endian
|
||||
#endif
|
||||
|
||||
//produce a 15bpp color from individual 5bit components
|
||||
#define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) )
|
||||
|
||||
//produce a 16bpp color from individual 5bit components
|
||||
#define R6G6B6TORGB15(r,g,b) ( ((r)>>1) | (((g)&0x3E)<<4) | (((b)&0x3E)<<9) )
|
||||
|
||||
inline FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)
|
||||
{
|
||||
FragmentColor ret;
|
||||
ret.r = r; ret.g = g; ret.b = b; ret.a = a;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ConvertColor555To8888Opaque(const u16 src)
|
||||
{
|
||||
return (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ConvertColor555To6665Opaque(const u16 src)
|
||||
{
|
||||
return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ConvertColor8888To6665(FragmentColor srcColor)
|
||||
{
|
||||
FragmentColor outColor;
|
||||
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2;
|
||||
outColor.g = srcColor.g >> 2;
|
||||
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2;
|
||||
outColor.a = srcColor.a >> 3;
|
||||
|
||||
return outColor.color;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ConvertColor8888To6665(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ConvertColor8888To6665<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ConvertColor6665To8888(FragmentColor srcColor)
|
||||
{
|
||||
FragmentColor outColor;
|
||||
outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)];
|
||||
outColor.g = material_6bit_to_8bit[srcColor.g];
|
||||
outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)];
|
||||
outColor.a = material_5bit_to_8bit[srcColor.a];
|
||||
|
||||
return outColor.color;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ConvertColor6665To8888(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ConvertColor6665To8888<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ConvertColor8888To5551(FragmentColor srcColor)
|
||||
{
|
||||
return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ConvertColor8888To5551(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ConvertColor8888To5551<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ConvertColor6665To5551(FragmentColor srcColor)
|
||||
{
|
||||
return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ConvertColor6665To5551(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ConvertColor6665To5551<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ConvertColor555To8888(const __m128i &srcColor, const __m128i &srcAlphaBits32Lo, const __m128i &srcAlphaBits32Hi, __m128i &dstLo, __m128i &dstHi)
|
||||
{
|
||||
__m128i src32;
|
||||
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
|
||||
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
|
||||
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
|
||||
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x00F800F8) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00070707)) );
|
||||
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
|
||||
|
||||
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
|
||||
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
|
||||
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x00F800F8) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00070707)) );
|
||||
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ConvertColor555To6665(const __m128i &srcColor, const __m128i &srcAlphaBits32Lo, const __m128i &srcAlphaBits32Hi, __m128i &dstLo, __m128i &dstHi)
|
||||
{
|
||||
__m128i src32;
|
||||
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
|
||||
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
|
||||
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
|
||||
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x003E003E) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00010101)) );
|
||||
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
|
||||
|
||||
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
|
||||
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
|
||||
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x003E003E) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00010101)) );
|
||||
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ConvertColor555To8888Opaque(const __m128i &srcColor, __m128i &dstLo, __m128i &dstHi)
|
||||
{
|
||||
const __m128i srcAlphaBits32 = _mm_set1_epi32(0xFF000000);
|
||||
ConvertColor555To8888<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ConvertColor555To6665Opaque(const __m128i &srcColor, __m128i &dstLo, __m128i &dstHi)
|
||||
{
|
||||
const __m128i srcAlphaBits32 = _mm_set1_epi32(0x1F000000);
|
||||
ConvertColor555To6665<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE __m128i ConvertColor8888To6665(const __m128i &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
|
||||
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
|
||||
__m128i rgb;
|
||||
const __m128i a = _mm_and_si128( _mm_srli_epi32(src, 3), _mm_set1_epi32(0x1F000000) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
|
||||
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2) );
|
||||
#else
|
||||
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x003F0000)), 18), _mm_or_si128(_mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00003F00)), 2), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x0000003F)), 14)) );
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
|
||||
}
|
||||
|
||||
return _mm_or_si128(rgb, a);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE __m128i ConvertColor6665To8888(const __m128i &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
|
||||
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
|
||||
__m128i rgb = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 2), _mm_set1_epi32(0x00FCFCFC)), _mm_and_si128(_mm_srli_epi32(src, 4), _mm_set1_epi32(0x00030303)) );
|
||||
const __m128i a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 3), _mm_set1_epi32(0xF8000000)), _mm_and_si128(_mm_srli_epi32(src, 2), _mm_set1_epi32(0x07000000)) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2) );
|
||||
#else
|
||||
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x000000FF)), 16)) );
|
||||
#endif
|
||||
}
|
||||
|
||||
return _mm_or_si128(rgb, a);
|
||||
}
|
||||
|
||||
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
|
||||
FORCEINLINE __m128i _ConvertColorBaseTo5551(const __m128i &srcLo, const __m128i &srcHi)
|
||||
{
|
||||
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
|
||||
{
|
||||
return srcLo;
|
||||
}
|
||||
|
||||
__m128i rgbLo;
|
||||
__m128i rgbHi;
|
||||
__m128i alpha;
|
||||
|
||||
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 17), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 17), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 1), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 1), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
|
||||
// Convert alpha
|
||||
alpha = _mm_packs_epi32( _mm_and_si128(_mm_srli_epi32(srcLo, 24), _mm_set1_epi32(0x0000001F)), _mm_and_si128(_mm_srli_epi32(srcHi, 24), _mm_set1_epi32(0x0000001F)) );
|
||||
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
|
||||
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
|
||||
}
|
||||
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 19), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 19), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 3), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 3), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
|
||||
// Convert alpha
|
||||
alpha = _mm_packs_epi32( _mm_and_si128(_mm_srli_epi32(srcLo, 24), _mm_set1_epi32(0x000000FF)), _mm_and_si128(_mm_srli_epi32(srcHi, 24), _mm_set1_epi32(0x000000FF)) );
|
||||
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
|
||||
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
|
||||
}
|
||||
|
||||
return _mm_or_si128(_mm_packs_epi32(rgbLo, rgbHi), alpha);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE __m128i ConvertColor8888To5551(const __m128i &srcLo, const __m128i &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE __m128i ConvertColor6665To5551(const __m128i &srcLo, const __m128i &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template<bool SWAP_RB> void ConvertColorBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template<bool SWAP_RB> void ConvertColorBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
|
||||
|
||||
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -52,6 +52,7 @@ libdesmume_a_SOURCES = \
|
|||
utils/decrypt/decrypt.h utils/decrypt/header.cpp utils/decrypt/header.h \
|
||||
utils/task.cpp utils/task.h \
|
||||
utils/vfat.h utils/vfat.cpp \
|
||||
utils/colorspacehandler/colorspacehandler.cpp \
|
||||
utils/dlditool.cpp \
|
||||
utils/libfat/bit_ops.h \
|
||||
utils/libfat/cache.cpp \
|
||||
|
@ -107,6 +108,21 @@ libdesmume_a_SOURCES = \
|
|||
libretro-common/rthreads/async_job.c \
|
||||
libretro-common/rthreads/rsemaphore.c \
|
||||
libretro-common/rthreads/rthreads.c
|
||||
|
||||
if SUPPORT_SSE2 += \
|
||||
libdesmume_a_SOURCES += \
|
||||
utils/colorspacehandler/colorspacehandler_SSE2.cpp
|
||||
endif
|
||||
|
||||
if SUPPORT_AVX2 += \
|
||||
libdesmume_a_SOURCES += \
|
||||
utils/colorspacehandler/colorspacehandler_AVX2.cpp
|
||||
endif
|
||||
|
||||
if SUPPORT_ALTIVEC += \
|
||||
libdesmume_a_SOURCES += \
|
||||
utils/colorspacehandler/colorspacehandler_AltiVec.cpp
|
||||
endif
|
||||
|
||||
if HAVE_JIT
|
||||
libdesmume_a_SOURCES += \
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
|
@ -990,9 +991,9 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
|
||||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), ConvertColor8888To6665<true>(srcColorLo) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), ConvertColor8888To6665<true>(srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1001,17 +1002,17 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dstFramebuffer[i].color = ConvertColor8888To6665<true>(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]);
|
||||
dstFramebuffer[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
|
||||
}
|
||||
}
|
||||
else if (dstFramebuffer != NULL)
|
||||
{
|
||||
ConvertColorBuffer8888To6665<true>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
else if (this->_outputFormat == NDSColorFormat_BGR888_Rev)
|
||||
|
@ -1027,7 +1028,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), srcColorLo );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), srcColorHi );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1036,8 +1037,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dstFramebuffer[i].color = ConvertColor8888To6665<true>(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]);
|
||||
dstFramebuffer[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
|
||||
}
|
||||
}
|
||||
else if (dstFramebuffer != NULL)
|
||||
|
@ -1046,7 +1047,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
}
|
||||
else
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1068,9 +1069,9 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
|
||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
|
||||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), ConvertColor8888To6665<true>(srcColorLo) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), ConvertColor8888To6665<true>(srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1079,8 +1080,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
#endif
|
||||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
dstFramebuffer[iw].color = ConvertColor8888To6665<true>(srcFramebuffer[ir]);
|
||||
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]);
|
||||
dstFramebuffer[iw].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[ir]);
|
||||
dstRGBA5551[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1088,14 +1089,14 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||
{
|
||||
ConvertColorBuffer8888To6665<true>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebuffer + iw, pixCount);
|
||||
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebuffer + iw, pixCount);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1115,7 +1116,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), srcColorLo );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), srcColorHi );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1125,7 +1126,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
dstFramebuffer[iw] = srcFramebuffer[ir];
|
||||
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]);
|
||||
dstRGBA5551[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1146,7 +1147,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -243,6 +243,8 @@
|
|||
AB564915186E6F67002740F4 /* Image_Piano.png in Resources */ = {isa = PBXBuildFile; fileRef = AB56490B186E6F67002740F4 /* Image_Piano.png */; };
|
||||
AB5785FD17176AFC002C5FC7 /* OpenEmuBase.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB5785FC17176AFC002C5FC7 /* OpenEmuBase.framework */; };
|
||||
AB58F32D1364F44B0074C376 /* cocoa_file.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB58F32C1364F44B0074C376 /* cocoa_file.mm */; };
|
||||
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
|
||||
AB5FDDAD1D62C8A00094617C /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
|
||||
AB64987C13ECC73800EE7DD2 /* FileTypeInfo.plist in Resources */ = {isa = PBXBuildFile; fileRef = AB64987B13ECC73800EE7DD2 /* FileTypeInfo.plist */; };
|
||||
AB68101B187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; };
|
||||
AB68101C187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; };
|
||||
|
@ -974,6 +976,12 @@
|
|||
ABB97878144E89CC00793FA3 /* Icon_DeSmuME_32x32.png in Resources */ = {isa = PBXBuildFile; fileRef = ABB97875144E89CC00793FA3 /* Icon_DeSmuME_32x32.png */; };
|
||||
ABBC0F8D1394B1AA0028B6BD /* DefaultUserPrefs.plist in Resources */ = {isa = PBXBuildFile; fileRef = ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */; };
|
||||
ABBF04A514B515F300E505A0 /* AppIcon_ROMCheats.icns in Resources */ = {isa = PBXBuildFile; fileRef = ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */; };
|
||||
ABBFFF851D6283C0003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
|
||||
ABBFFF861D6283C1003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
|
||||
ABBFFF871D6283C1003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
|
||||
ABBFFF891D6283D2003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
|
||||
ABBFFF8A1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
|
||||
ABBFFF8B1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
|
||||
ABC3AF2F14B7F06900D5B13D /* Icon_VolumeFull_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */; };
|
||||
ABC3AF3014B7F06900D5B13D /* Icon_VolumeMute_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */; };
|
||||
ABC3AF3114B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */; };
|
||||
|
@ -1534,6 +1542,14 @@
|
|||
ABBB421516B4A5F30012E5AB /* OGLRender_3_2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OGLRender_3_2.h; path = ../OGLRender_3_2.h; sourceTree = "<group>"; };
|
||||
ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */ = {isa = PBXFileReference; lastKnownFileType = file.bplist; path = DefaultUserPrefs.plist; sourceTree = "<group>"; };
|
||||
ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; path = AppIcon_ROMCheats.icns; sourceTree = "<group>"; };
|
||||
ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler.cpp; sourceTree = "<group>"; };
|
||||
ABBFFF701D5F9C52003CD598 /* colorspacehandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler.h; sourceTree = "<group>"; };
|
||||
ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_SSE2.cpp; sourceTree = "<group>"; };
|
||||
ABBFFF761D5FD2ED003CD598 /* colorspacehandler_SSE2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_SSE2.h; sourceTree = "<group>"; };
|
||||
ABBFFF7B1D610457003CD598 /* colorspacehandler_AVX2.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AVX2.cpp; sourceTree = "<group>"; };
|
||||
ABBFFF7C1D610457003CD598 /* colorspacehandler_AVX2.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AVX2.h; sourceTree = "<group>"; };
|
||||
ABBFFF811D611A36003CD598 /* colorspacehandler_AltiVec.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AltiVec.cpp; sourceTree = "<group>"; };
|
||||
ABBFFF821D611A36003CD598 /* colorspacehandler_AltiVec.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AltiVec.h; sourceTree = "<group>"; };
|
||||
ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeFull_16x16.png; path = images/Icon_VolumeFull_16x16.png; sourceTree = "<group>"; };
|
||||
ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeMute_16x16.png; path = images/Icon_VolumeMute_16x16.png; sourceTree = "<group>"; };
|
||||
ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeOneThird_16x16.png; path = images/Icon_VolumeOneThird_16x16.png; sourceTree = "<group>"; };
|
||||
|
@ -2508,6 +2524,21 @@
|
|||
path = openemu;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
ABBFFF6E1D5F9C10003CD598 /* colorspacehandler */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
ABBFFF811D611A36003CD598 /* colorspacehandler_AltiVec.cpp */,
|
||||
ABBFFF7B1D610457003CD598 /* colorspacehandler_AVX2.cpp */,
|
||||
ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */,
|
||||
ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */,
|
||||
ABBFFF821D611A36003CD598 /* colorspacehandler_AltiVec.h */,
|
||||
ABBFFF7C1D610457003CD598 /* colorspacehandler_AVX2.h */,
|
||||
ABBFFF761D5FD2ED003CD598 /* colorspacehandler_SSE2.h */,
|
||||
ABBFFF701D5F9C52003CD598 /* colorspacehandler.h */,
|
||||
);
|
||||
path = colorspacehandler;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
ABC2ECD613B1C87000FAAA2A /* Images */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
|
@ -2759,6 +2790,7 @@
|
|||
ABD1FF211345ACBF00AF11D1 /* decrypt */,
|
||||
ABD1FF2E1345ACBF00AF11D1 /* libfat */,
|
||||
ABE670241415DE6C00E8E4C9 /* tinyxml */,
|
||||
ABBFFF6E1D5F9C10003CD598 /* colorspacehandler */,
|
||||
ABD1FF1D1345ACBF00AF11D1 /* ConvertUTF.c */,
|
||||
AB9038A517C5ECFD00F410BD /* advanscene.cpp */,
|
||||
ABD1FF1F1345ACBF00AF11D1 /* datetime.cpp */,
|
||||
|
@ -3770,6 +3802,7 @@
|
|||
ABE6840D189E33BC007FD69C /* OGLDisplayOutput.cpp in Sources */,
|
||||
ABD1FF121345AC9C00AF11D1 /* slot2_none.cpp in Sources */,
|
||||
ABD1FF131345AC9C00AF11D1 /* slot2_paddle.cpp in Sources */,
|
||||
ABBFFF8A1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
ABD1FF141345AC9C00AF11D1 /* slot2_piano.cpp in Sources */,
|
||||
ABD1FF151345AC9C00AF11D1 /* slot2_rumblepak.cpp in Sources */,
|
||||
ABD1041F1346652500AF11D1 /* sndOSX.cpp in Sources */,
|
||||
|
@ -3864,6 +3897,7 @@
|
|||
AB40565E169F5DBB0016AC3E /* virtualmemory.cpp in Sources */,
|
||||
AB405661169F5DBB0016AC3E /* zonememory.cpp in Sources */,
|
||||
AB405679169F5DCC0016AC3E /* x86assembler.cpp in Sources */,
|
||||
ABBFFF861D6283C1003CD598 /* colorspacehandler.cpp in Sources */,
|
||||
AB40567C169F5DCC0016AC3E /* x86compiler.cpp in Sources */,
|
||||
ABFEA8A41BB4EC1100B08C25 /* sfnt.c in Sources */,
|
||||
ABA731691BB51FDC00B26147 /* type1cid.c in Sources */,
|
||||
|
@ -4017,6 +4051,7 @@
|
|||
AB796D4315CDCBA200C59155 /* version.cpp in Sources */,
|
||||
ABFEA82B1BB4EC1100B08C25 /* ftinit.c in Sources */,
|
||||
AB796D4415CDCBA200C59155 /* vfat.cpp in Sources */,
|
||||
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */,
|
||||
AB796D4515CDCBA200C59155 /* videofilter.cpp in Sources */,
|
||||
AB796D4615CDCBA200C59155 /* WavFile.cpp in Sources */,
|
||||
AB796D4715CDCBA200C59155 /* wifi.cpp in Sources */,
|
||||
|
@ -4096,6 +4131,7 @@
|
|||
AB26D87C16B5253D00A2305C /* OGLRender_3_2.cpp in Sources */,
|
||||
AB3A655E16CC5421001F5D4A /* EmuControllerDelegate.mm in Sources */,
|
||||
AB3A656116CC5438001F5D4A /* cocoa_GPU.mm in Sources */,
|
||||
AB5FDDAD1D62C8A00094617C /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
AB8967D916D2ED0700F826F1 /* DisplayWindowController.mm in Sources */,
|
||||
AB29B33116D4BEBF000EF671 /* InputManager.mm in Sources */,
|
||||
AB8B7AAC17CE8C440051CEBF /* slot1comp_protocol.cpp in Sources */,
|
||||
|
@ -4272,6 +4308,7 @@
|
|||
AB2ABA401C9F9CFA00173B15 /* rsemaphore.c in Sources */,
|
||||
AB8F3CF01A53AC2600A80BF6 /* ringbuffer.cpp in Sources */,
|
||||
AB8F3CF11A53AC2600A80BF6 /* arm_jit.cpp in Sources */,
|
||||
ABBFFF891D6283D2003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
AB8F3CF21A53AC2600A80BF6 /* troubleshootingWindowDelegate.mm in Sources */,
|
||||
AB8F3CF31A53AC2600A80BF6 /* assembler.cpp in Sources */,
|
||||
AB8F3CF41A53AC2600A80BF6 /* assert.cpp in Sources */,
|
||||
|
@ -4295,6 +4332,7 @@
|
|||
AB8F3D041A53AC2600A80BF6 /* virtualmemory.cpp in Sources */,
|
||||
AB8F3D051A53AC2600A80BF6 /* zonememory.cpp in Sources */,
|
||||
AB8F3D061A53AC2600A80BF6 /* x86assembler.cpp in Sources */,
|
||||
ABBFFF851D6283C0003CD598 /* colorspacehandler.cpp in Sources */,
|
||||
AB8F3D071A53AC2600A80BF6 /* x86compiler.cpp in Sources */,
|
||||
AB8F3D081A53AC2600A80BF6 /* x86compilercontext.cpp in Sources */,
|
||||
AB8F3D091A53AC2600A80BF6 /* x86compilerfunc.cpp in Sources */,
|
||||
|
@ -4367,6 +4405,7 @@
|
|||
ABB3C6911501C04F00E0C22E /* SoundTouch.cpp in Sources */,
|
||||
ABB3C6921501C04F00E0C22E /* sse_optimized.cpp in Sources */,
|
||||
ABB3C6931501C04F00E0C22E /* TDStretch.cpp in Sources */,
|
||||
ABBFFF871D6283C1003CD598 /* colorspacehandler.cpp in Sources */,
|
||||
ABB3C6941501C04F00E0C22E /* WavFile.cpp in Sources */,
|
||||
ABB3C6951501C04F00E0C22E /* metaspu.cpp in Sources */,
|
||||
ABB3C6961501C04F00E0C22E /* SndOut.cpp in Sources */,
|
||||
|
@ -4436,6 +4475,7 @@
|
|||
ABB3C6D11501C04F00E0C22E /* slot1.cpp in Sources */,
|
||||
ABB3C6D31501C04F00E0C22E /* SPU.cpp in Sources */,
|
||||
ABB3C6D41501C04F00E0C22E /* texcache.cpp in Sources */,
|
||||
ABBFFF8B1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
AB9038BA17C5ED2200F410BD /* slot1comp_rom.cpp in Sources */,
|
||||
ABB3C6D51501C04F00E0C22E /* thumb_instructions.cpp in Sources */,
|
||||
AB2EE13317D57F5000F68622 /* fsnitro.cpp in Sources */,
|
||||
|
|
|
@ -740,6 +740,14 @@
|
|||
AB2F56F11704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
|
||||
AB2F56F21704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
|
||||
AB2F56F31704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
|
||||
AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
|
||||
AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */; };
|
||||
AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
|
||||
AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */; };
|
||||
AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
|
||||
AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
|
||||
AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
|
||||
AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
|
||||
AB3ACB7814C2361100D7D192 /* appDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6714C2361100D7D192 /* appDelegate.mm */; };
|
||||
AB3ACB7914C2361100D7D192 /* cheatWindowDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6914C2361100D7D192 /* cheatWindowDelegate.mm */; };
|
||||
AB3ACB7C14C2361100D7D192 /* inputPrefsView.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6F14C2361100D7D192 /* inputPrefsView.mm */; };
|
||||
|
@ -1156,6 +1164,8 @@
|
|||
AB73AA2E1507C9F500A310C8 /* OpenGL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABC570D4134431DA00E7B0B1 /* OpenGL.framework */; };
|
||||
AB73AA2F1507C9F500A310C8 /* libz.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = AB0A0D1914AACA9600E83E91 /* libz.dylib */; };
|
||||
AB75226F14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns in Resources */ = {isa = PBXBuildFile; fileRef = AB75226D14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns */; };
|
||||
AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
|
||||
AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
|
||||
AB7DDA6D173DC38F004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
|
||||
AB7DDA6E173DC399004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
|
||||
AB7DDA6F173DC39E004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
|
||||
|
@ -1835,6 +1845,12 @@
|
|||
AB2F56EF1704C86900E28885 /* utilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = utilities.c; sourceTree = "<group>"; };
|
||||
AB350BA41478AC96007165AC /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = System/Library/Frameworks/IOKit.framework; sourceTree = SDKROOT; };
|
||||
AB350D38147A1D8D007165AC /* English */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = English; path = translations/English.lproj/HID_usage_strings.plist; sourceTree = "<group>"; };
|
||||
AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler.cpp; sourceTree = "<group>"; };
|
||||
AB37E36D1D6188BC004A2C0D /* colorspacehandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler.h; sourceTree = "<group>"; };
|
||||
AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AltiVec.cpp; sourceTree = "<group>"; };
|
||||
AB37E36F1D6188BC004A2C0D /* colorspacehandler_AltiVec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AltiVec.h; sourceTree = "<group>"; };
|
||||
AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_SSE2.cpp; sourceTree = "<group>"; };
|
||||
AB37E3731D6188BC004A2C0D /* colorspacehandler_SSE2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_SSE2.h; sourceTree = "<group>"; };
|
||||
AB3ACB6614C2361100D7D192 /* appDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = appDelegate.h; sourceTree = "<group>"; };
|
||||
AB3ACB6714C2361100D7D192 /* appDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = appDelegate.mm; sourceTree = "<group>"; };
|
||||
AB3ACB6814C2361100D7D192 /* cheatWindowDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cheatWindowDelegate.h; sourceTree = "<group>"; };
|
||||
|
@ -2894,6 +2910,19 @@
|
|||
path = src;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
AB37E36B1D6188BC004A2C0D /* colorspacehandler */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */,
|
||||
AB37E36D1D6188BC004A2C0D /* colorspacehandler.h */,
|
||||
AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */,
|
||||
AB37E36F1D6188BC004A2C0D /* colorspacehandler_AltiVec.h */,
|
||||
AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */,
|
||||
AB37E3731D6188BC004A2C0D /* colorspacehandler_SSE2.h */,
|
||||
);
|
||||
path = colorspacehandler;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
AB3ACB6514C2361100D7D192 /* userinterface */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
|
@ -3207,6 +3236,7 @@
|
|||
isa = PBXGroup;
|
||||
children = (
|
||||
ABBCE2A115ACB29100A2C965 /* AsmJit */,
|
||||
AB37E36B1D6188BC004A2C0D /* colorspacehandler */,
|
||||
ABD1FF211345ACBF00AF11D1 /* decrypt */,
|
||||
ABD1FF2E1345ACBF00AF11D1 /* libfat */,
|
||||
ABE670241415DE6C00E8E4C9 /* tinyxml */,
|
||||
|
@ -4508,6 +4538,8 @@
|
|||
AB50200A1D09E712002FA150 /* file_path.c in Sources */,
|
||||
AB50200B1D09E712002FA150 /* retro_dirent.c in Sources */,
|
||||
AB50200C1D09E712002FA150 /* retro_stat.c in Sources */,
|
||||
AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */,
|
||||
AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -4687,6 +4719,8 @@
|
|||
AB5020161D09E712002FA150 /* file_path.c in Sources */,
|
||||
AB5020171D09E712002FA150 /* retro_dirent.c in Sources */,
|
||||
AB5020181D09E712002FA150 /* retro_stat.c in Sources */,
|
||||
AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
|
||||
AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -4896,6 +4930,8 @@
|
|||
AB50200D1D09E712002FA150 /* file_path.c in Sources */,
|
||||
AB50200E1D09E712002FA150 /* retro_dirent.c in Sources */,
|
||||
AB50200F1D09E712002FA150 /* retro_stat.c in Sources */,
|
||||
AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
|
||||
AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -5105,6 +5141,8 @@
|
|||
AB5020101D09E712002FA150 /* file_path.c in Sources */,
|
||||
AB5020111D09E712002FA150 /* retro_dirent.c in Sources */,
|
||||
AB5020121D09E712002FA150 /* retro_stat.c in Sources */,
|
||||
AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
|
||||
AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -5284,6 +5322,8 @@
|
|||
AB5020131D09E712002FA150 /* file_path.c in Sources */,
|
||||
AB5020141D09E712002FA150 /* retro_dirent.c in Sources */,
|
||||
AB5020151D09E712002FA150 /* retro_stat.c in Sources */,
|
||||
AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
|
||||
AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
|
|
@ -754,7 +754,7 @@
|
|||
|
||||
if (dispInfo.pixelBytes == 2)
|
||||
{
|
||||
ConvertColorBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h));
|
||||
ColorspaceConvertBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h));
|
||||
}
|
||||
else if (dispInfo.pixelBytes == 4)
|
||||
{
|
||||
|
|
|
@ -692,7 +692,7 @@ void RomIconToRGBA8888(uint32_t *bitmapData)
|
|||
//
|
||||
// The first entry always represents the alpha, so we can just ignore it.
|
||||
clut[0] = 0x00000000;
|
||||
ConvertColorBuffer555To8888Opaque<false, true>((u16 *)iconClutPtr, &clut[1], 15);
|
||||
ColorspaceConvertBuffer555To8888Opaque<false, true>((u16 *)iconClutPtr, &clut[1], 15);
|
||||
|
||||
// Load the image from the icon pixel data.
|
||||
//
|
||||
|
|
|
@ -1,65 +1,63 @@
|
|||
/*
|
||||
Copyright (C) 2008-2015 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <zlib.h>
|
||||
#include "types.h"
|
||||
#include "ImageOut.h"
|
||||
#include "formats/rpng.h"
|
||||
#include "formats/rbmp.h"
|
||||
#include "GPU.h"
|
||||
|
||||
static u8* Convert15To24(const u16* src, int width, int height)
|
||||
{
|
||||
u8 *tmp_buffer;
|
||||
u8 *tmp_inc;
|
||||
tmp_inc = tmp_buffer = (u8 *)malloc(width * height * 3);
|
||||
|
||||
for(int y=0;y<height;y++)
|
||||
{
|
||||
for(int x=0;x<width;x++)
|
||||
{
|
||||
u32 dst = ConvertColor555To8888Opaque<true>(*src++);
|
||||
*tmp_inc++ = dst&0xFF;
|
||||
*tmp_inc++ = (dst>>8)&0xFF;
|
||||
*tmp_inc++ = (dst>>16)&0xFF;
|
||||
}
|
||||
}
|
||||
return tmp_buffer;
|
||||
}
|
||||
|
||||
int NDS_WritePNG_15bpp(int width, int height, const u16 *data, const char *filename)
|
||||
{
|
||||
u8* tmp = Convert15To24(data,width,height);
|
||||
bool ok = rpng_save_image_bgr24(filename,tmp,width,height,width*3);
|
||||
free(tmp);
|
||||
return ok?1:0;
|
||||
}
|
||||
|
||||
int NDS_WriteBMP_15bpp(int width, int height, const u16 *data, const char *filename)
|
||||
{
|
||||
u8* tmp = Convert15To24(data,width,height);
|
||||
bool ok = rbmp_save_image(filename,tmp,width,height,width*3,RBMP_SOURCE_TYPE_BGR24);
|
||||
free(tmp);
|
||||
return ok?1:0;
|
||||
}
|
||||
|
||||
int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename)
|
||||
{
|
||||
bool ok = rbmp_save_image(filename,buf,width,height,width*4,RBMP_SOURCE_TYPE_ARGB8888);
|
||||
return ok?1:0;
|
||||
/*
|
||||
Copyright (C) 2008-2015 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <zlib.h>
|
||||
#include "types.h"
|
||||
#include "ImageOut.h"
|
||||
#include "formats/rpng.h"
|
||||
#include "formats/rbmp.h"
|
||||
#include "GPU.h"
|
||||
|
||||
static u8* Convert15To24(const u16* src, int width, int height)
|
||||
{
|
||||
u8 *tmp_buffer;
|
||||
u8 *tmp_inc;
|
||||
tmp_inc = tmp_buffer = (u8 *)malloc(width * height * 3);
|
||||
|
||||
for (int i = 0; i < width*height; i++)
|
||||
{
|
||||
u32 dst = ColorspaceConvert555To8888Opaque<true>(*src++);
|
||||
*tmp_inc++ = dst & 0xFF;
|
||||
*tmp_inc++ = (dst >> 8) & 0xFF;
|
||||
*tmp_inc++ = (dst >> 16) & 0xFF;
|
||||
}
|
||||
|
||||
return tmp_buffer;
|
||||
}
|
||||
|
||||
int NDS_WritePNG_15bpp(int width, int height, const u16 *data, const char *filename)
|
||||
{
|
||||
u8* tmp = Convert15To24(data,width,height);
|
||||
bool ok = rpng_save_image_bgr24(filename,tmp,width,height,width*3);
|
||||
free(tmp);
|
||||
return ok?1:0;
|
||||
}
|
||||
|
||||
int NDS_WriteBMP_15bpp(int width, int height, const u16 *data, const char *filename)
|
||||
{
|
||||
u8* tmp = Convert15To24(data,width,height);
|
||||
bool ok = rbmp_save_image(filename,tmp,width,height,width*3,RBMP_SOURCE_TYPE_BGR24);
|
||||
free(tmp);
|
||||
return ok?1:0;
|
||||
}
|
||||
|
||||
int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename)
|
||||
{
|
||||
bool ok = rbmp_save_image(filename,buf,width,height,width*4,RBMP_SOURCE_TYPE_ARGB8888);
|
||||
return ok?1:0;
|
||||
}
|
|
@ -605,11 +605,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
|
|||
{
|
||||
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
|
||||
{
|
||||
ConvertColorBuffer8888To6665<false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
ColorspaceConvertBuffer8888To6665<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
}
|
||||
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
|
||||
{
|
||||
ConvertColorBuffer6665To8888<false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
}
|
||||
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
|
||||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
|
||||
|
@ -622,11 +622,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
|
|||
{
|
||||
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
ConvertColorBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ColorspaceConvertBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
ConvertColorBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,10 @@
|
|||
#include "MMU.h"
|
||||
#include "NDSSystem.h"
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
|
||||
#endif
|
||||
|
||||
using std::min;
|
||||
using std::max;
|
||||
|
||||
|
@ -452,13 +456,13 @@ public:
|
|||
|
||||
if (TEXFORMAT == TexFormat_15bpp)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
|
||||
// Set converted colors to 0 if the palette index is 0.
|
||||
|
@ -518,13 +522,13 @@ public:
|
|||
|
||||
if (TEXFORMAT == TexFormat_15bpp)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
|
||||
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
|
||||
|
@ -581,13 +585,13 @@ public:
|
|||
|
||||
if (TEXFORMAT == TexFormat_15bpp)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
|
||||
// Set converted colors to 0 if the palette index is 0.
|
||||
|
@ -647,13 +651,13 @@ public:
|
|||
|
||||
if (TEXFORMAT == TexFormat_15bpp)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
|
||||
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
|
||||
|
@ -882,11 +886,11 @@ public:
|
|||
|
||||
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
|
||||
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
|
||||
ConvertColor555To6665<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To6665_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
|
||||
|
||||
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
|
||||
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
|
||||
ConvertColor555To6665<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To6665_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -896,11 +900,11 @@ public:
|
|||
|
||||
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
|
||||
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
|
||||
ConvertColor555To8888<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To8888_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
|
||||
|
||||
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
|
||||
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
|
||||
ConvertColor555To8888<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To8888_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
|
||||
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
|
||||
|
|
|
@ -76,6 +76,18 @@
|
|||
#ifdef __SSE4_2__
|
||||
#define ENABLE_SSE4_2
|
||||
#endif
|
||||
|
||||
#ifdef __AVX__
|
||||
#define ENABLE_AVX
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
#define ENABLE_AVX2
|
||||
#endif
|
||||
|
||||
#ifdef __ALTIVEC__
|
||||
#define ENABLE_ALTIVEC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
@ -223,6 +235,38 @@ typedef u32 uint32;
|
|||
#define uint32 u32 //uint32 is defined in Leopard somewhere, avoid conflicts
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_ALTIVEC
|
||||
#ifndef __APPLE_ALTIVEC__
|
||||
#include <altivec.h>
|
||||
#endif
|
||||
typedef vector unsigned char v128u8;
|
||||
typedef vector signed char v128s8;
|
||||
typedef vector unsigned short v128u16;
|
||||
typedef vector signed short v128s16;
|
||||
typedef vector unsigned int v128u32;
|
||||
typedef vector signed int v128s32;
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
typedef __m128i v128u8;
|
||||
typedef __m128i v128s8;
|
||||
typedef __m128i v128u16;
|
||||
typedef __m128i v128s16;
|
||||
typedef __m128i v128u32;
|
||||
typedef __m128i v128s32;
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_AVX2
|
||||
#include <immintrin.h>
|
||||
typedef __m256i v256u8;
|
||||
typedef __m256i v256s8;
|
||||
typedef __m256i v256u16;
|
||||
typedef __m256i v256s16;
|
||||
typedef __m256i v256u32;
|
||||
typedef __m256i v256s32;
|
||||
#endif
|
||||
|
||||
/*---------- GPU3D fixed-points types -----------*/
|
||||
|
||||
typedef s32 f32;
|
||||
|
|
|
@ -0,0 +1,776 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "colorspacehandler.h"
|
||||
|
||||
#if defined(ENABLE_AVX2)
|
||||
#include "colorspacehandler_AVX2.h"
|
||||
#elif defined(ENABLE_SSE2)
|
||||
#include "colorspacehandler_SSE2.h"
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
#include "colorspacehandler_AltiVec.h"
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
|
||||
#define USEVECTORSIZE_128
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_AVX2)
|
||||
#define USEVECTORSIZE_256
|
||||
#endif
|
||||
|
||||
// By default, the hand-coded vectorized code will be used instead of a compiler's built-in
|
||||
// autovectorization (if supported). However, if USEMANUALVECTORIZATION is not defined, then
|
||||
// the compiler will use autovectorization (if supported).
|
||||
#if defined(USEVECTORSIZE_128) || defined(USEVECTORSIZE_256) || defined(USEVECTORSIZE_512)
|
||||
// Comment out USEMANUALVECTORIZATION to disable the hand-coded vectorized code.
|
||||
#define USEMANUALVECTORIZATION
|
||||
#endif
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
#if defined(ENABLE_AVX2)
|
||||
static const ColorspaceHandler_AVX2 csh;
|
||||
#elif defined(ENABLE_SSE2)
|
||||
static const ColorspaceHandler_SSE2 csh;
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
static const ColorspaceHandler_AltiVec csh;
|
||||
#else
|
||||
static const ColorspaceHandler csh;
|
||||
#endif
|
||||
#else
|
||||
static const ColorspaceHandler csh;
|
||||
#endif
|
||||
|
||||
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
|
||||
CACHE_ALIGN u32 color_555_to_666[32768];
|
||||
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
|
||||
CACHE_ALIGN u32 color_555_to_888[32768];
|
||||
|
||||
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
|
||||
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
|
||||
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
|
||||
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
|
||||
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
|
||||
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
|
||||
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
|
||||
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
|
||||
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
|
||||
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
|
||||
};
|
||||
|
||||
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
|
||||
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
|
||||
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
|
||||
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
|
||||
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
|
||||
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
|
||||
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
|
||||
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
|
||||
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
|
||||
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
|
||||
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
|
||||
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
|
||||
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
|
||||
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
|
||||
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
|
||||
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
|
||||
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
|
||||
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
|
||||
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
|
||||
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
|
||||
};
|
||||
|
||||
//maybe not very precise
|
||||
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
|
||||
0, 4, 8, 13, 17, 22, 26, 31
|
||||
};
|
||||
|
||||
//TODO - generate this in the static init method more accurately
|
||||
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
|
||||
0, 8, 16, 26, 34, 44, 52, 63
|
||||
};
|
||||
|
||||
void ColorspaceHandlerInit()
|
||||
{
|
||||
static bool needInitTables = true;
|
||||
|
||||
if (needInitTables)
|
||||
{
|
||||
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
|
||||
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
|
||||
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
|
||||
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
|
||||
|
||||
for (size_t i = 0; i < 32768; i++)
|
||||
{
|
||||
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
|
||||
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
|
||||
color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 );
|
||||
|
||||
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
|
||||
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
|
||||
color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert555To8888Opaque(const u16 src)
|
||||
{
|
||||
return (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src)
|
||||
{
|
||||
return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert8888To6665(FragmentColor srcColor)
|
||||
{
|
||||
FragmentColor outColor;
|
||||
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2;
|
||||
outColor.g = srcColor.g >> 2;
|
||||
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2;
|
||||
outColor.a = srcColor.a >> 3;
|
||||
|
||||
return outColor.color;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert8888To6665(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ColorspaceConvert8888To6665<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert6665To8888(FragmentColor srcColor)
|
||||
{
|
||||
FragmentColor outColor;
|
||||
outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)];
|
||||
outColor.g = material_6bit_to_8bit[srcColor.g];
|
||||
outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)];
|
||||
outColor.a = material_5bit_to_8bit[srcColor.a];
|
||||
|
||||
return outColor.color;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert6665To8888(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ColorspaceConvert6665To8888<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ColorspaceConvert8888To5551(FragmentColor srcColor)
|
||||
{
|
||||
return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ColorspaceConvert8888To5551(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ColorspaceConvert8888To5551<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ColorspaceConvert6665To5551(FragmentColor srcColor)
|
||||
{
|
||||
return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ColorspaceConvert6665To5551(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ColorspaceConvert6665To5551<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 32);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer555To8888Opaque_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer555To8888Opaque(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To8888Opaque<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 32);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer555To6665Opaque_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer555To6665Opaque(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To6665Opaque<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 4);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer8888To6665_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer8888To6665_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer8888To6665_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer8888To6665(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To6665<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 4);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer6665To8888_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer6665To8888_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer6665To8888_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer6665To8888(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To8888<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 32);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer8888To5551_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer8888To5551_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer8888To5551_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer8888To5551(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To5551<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 32);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer6665To5551_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer6665To5551_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer6665To5551_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer6665To5551(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To5551<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To8888Opaque<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To8888Opaque<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To6665Opaque<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To6665Opaque<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To6665<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To6665<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer8888To6665(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer8888To6665_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To8888<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To8888<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer6665To8888(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer6665To8888_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To5551<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To5551<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer8888To5551(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer8888To5551_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To5551<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (;i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To5551<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer6665To5551(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
template u32 ColorspaceConvert555To8888Opaque<true>(const u16 src);
|
||||
template u32 ColorspaceConvert555To8888Opaque<false>(const u16 src);
|
||||
|
||||
template u32 ColorspaceConvert555To6665Opaque<true>(const u16 src);
|
||||
template u32 ColorspaceConvert555To6665Opaque<false>(const u16 src);
|
||||
|
||||
template u32 ColorspaceConvert8888To6665<true>(FragmentColor srcColor);
|
||||
template u32 ColorspaceConvert8888To6665<false>(FragmentColor srcColor);
|
||||
|
||||
template u32 ColorspaceConvert8888To6665<true>(u32 srcColor);
|
||||
template u32 ColorspaceConvert8888To6665<false>(u32 srcColor);
|
||||
|
||||
template u32 ColorspaceConvert6665To8888<true>(FragmentColor srcColor);
|
||||
template u32 ColorspaceConvert6665To8888<false>(FragmentColor srcColor);
|
||||
|
||||
template u32 ColorspaceConvert6665To8888<true>(u32 srcColor);
|
||||
template u32 ColorspaceConvert6665To8888<false>(u32 srcColor);
|
||||
|
||||
template u16 ColorspaceConvert8888To5551<true>(FragmentColor srcColor);
|
||||
template u16 ColorspaceConvert8888To5551<false>(FragmentColor srcColor);
|
||||
|
||||
template u16 ColorspaceConvert8888To5551<true>(u32 srcColor);
|
||||
template u16 ColorspaceConvert8888To5551<false>(u32 srcColor);
|
||||
|
||||
template u16 ColorspaceConvert6665To5551<true>(FragmentColor srcColor);
|
||||
template u16 ColorspaceConvert6665To5551<false>(FragmentColor srcColor);
|
||||
|
||||
template u16 ColorspaceConvert6665To5551<true>(u32 srcColor);
|
||||
template u16 ColorspaceConvert6665To5551<false>(u32 srcColor);
|
||||
|
||||
template void ColorspaceConvertBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To8888Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ColorspaceConvertBuffer555To6665Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To6665Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To6665Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To6665Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ColorspaceConvertBuffer8888To6665<true, true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To6665<true, false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To6665<false, true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To6665<false, false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
|
||||
template void ColorspaceConvertBuffer6665To8888<true, true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To8888<true, false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To8888<false, true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To8888<false, false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
|
||||
template void ColorspaceConvertBuffer8888To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ColorspaceConvertBuffer6665To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
|
@ -0,0 +1,194 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef COLORSPACEHANDLER_H
|
||||
#define COLORSPACEHANDLER_H
|
||||
|
||||
#include "types.h"
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
enum NDSColorFormat
|
||||
{
|
||||
// The color format information is packed in a 32-bit value.
|
||||
// The bits are as follows:
|
||||
// FFFOOOOO AAAAAABB BBBBGGGG GGRRRRRR
|
||||
//
|
||||
// F = Flags (see below)
|
||||
// O = Color order (see below)
|
||||
// A = Bit count for alpha [0-63]
|
||||
// B = Bit count for blue [0-63]
|
||||
// G = Bit count for green [0-63]
|
||||
// R = Bit count for red [0-63]
|
||||
//
|
||||
// Flags:
|
||||
// Bit 29: Reverse order flag.
|
||||
// Set = Bits are in reverse order, usually for little-endian usage.
|
||||
// Cleared = Bits are in normal order, usually for big-endian usage.
|
||||
//
|
||||
// Color order bits, 24-28:
|
||||
// 0x00 = RGBA, common format
|
||||
// 0x01 = RGAB
|
||||
// 0x02 = RBGA
|
||||
// 0x03 = RBAG
|
||||
// 0x04 = RAGB
|
||||
// 0x05 = RABG
|
||||
// 0x06 = GRBA
|
||||
// 0x07 = GRAB
|
||||
// 0x08 = GBRA
|
||||
// 0x09 = GBAR
|
||||
// 0x0A = GARB
|
||||
// 0x0B = GABR
|
||||
// 0x0C = BRGA
|
||||
// 0x0D = BRAG
|
||||
// 0x0E = BGRA, common format
|
||||
// 0x0F = BGAR
|
||||
// 0x10 = BARG
|
||||
// 0x11 = BAGR
|
||||
// 0x12 = ARGB
|
||||
// 0x13 = ARBG
|
||||
// 0x14 = AGRB
|
||||
// 0x15 = AGBR
|
||||
// 0x16 = ABRG
|
||||
// 0x17 = ABGR
|
||||
|
||||
// Color formats used for internal processing.
|
||||
//NDSColorFormat_ABGR1555_Rev = 0x20045145,
|
||||
//NDSColorFormat_ABGR5666_Rev = 0x20186186,
|
||||
//NDSColorFormat_ABGR8888_Rev = 0x20208208,
|
||||
|
||||
// Color formats used by the output framebuffers.
|
||||
NDSColorFormat_BGR555_Rev = 0x20005145,
|
||||
NDSColorFormat_BGR666_Rev = 0x20006186,
|
||||
NDSColorFormat_BGR888_Rev = 0x20008208
|
||||
};
|
||||
|
||||
union FragmentColor
|
||||
{
|
||||
u32 color;
|
||||
struct
|
||||
{
|
||||
u8 r,g,b,a;
|
||||
};
|
||||
};
|
||||
|
||||
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
|
||||
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
|
||||
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
|
||||
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
|
||||
|
||||
extern CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_666[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_888[32768];
|
||||
|
||||
#define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color
|
||||
#define COLOR555TO6665_OPAQUE_SWAP_RB(col) (color_555_to_6665_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color with R and B components swapped
|
||||
#define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color
|
||||
|
||||
#ifdef LOCAL_LE
|
||||
#define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, little-endian
|
||||
#else
|
||||
#define COLOR555TO6665(col,alpha5) ((alpha5) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, big-endian
|
||||
#endif
|
||||
|
||||
#define COLOR555TO8888_OPAQUE(col) (color_555_to_8888_opaque[(col)]) // Convert a 15-bit color to an opaque 32-bit color
|
||||
#define COLOR555TO8888_OPAQUE_SWAP_RB(col) (color_555_to_8888_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque 32-bit color with R and B components swapped
|
||||
#define COLOR555TO888(col) (color_555_to_888[(col)]) // Convert a 15-bit color to an opaque 24-bit color or a fully transparent 32-bit color
|
||||
|
||||
#ifdef LOCAL_LE
|
||||
#define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, little-endian
|
||||
#else
|
||||
#define COLOR555TO8888(col,alpha8) ((alpha8) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, big-endian
|
||||
#endif
|
||||
|
||||
//produce a 15bpp color from individual 5bit components
|
||||
#define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) )
|
||||
|
||||
//produce a 16bpp color from individual 5bit components
|
||||
#define R6G6B6TORGB15(r,g,b) ( ((r)>>1) | (((g)&0x3E)<<4) | (((b)&0x3E)<<9) )
|
||||
|
||||
void ColorspaceHandlerInit();
|
||||
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert555To8888Opaque(const u16 src);
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert555To6665Opaque(const u16 src);
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert8888To6665(FragmentColor srcColor);
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert8888To6665(u32 srcColor);
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert6665To8888(FragmentColor srcColor);
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert6665To8888(u32 srcColor);
|
||||
template<bool SWAP_RB> u16 ColorspaceConvert8888To5551(FragmentColor srcColor);
|
||||
template<bool SWAP_RB> u16 ColorspaceConvert8888To5551(u32 srcColor);
|
||||
template<bool SWAP_RB> u16 ColorspaceConvert6665To5551(FragmentColor srcColor);
|
||||
template<bool SWAP_RB> u16 ColorspaceConvert6665To5551(u32 srcColor);
|
||||
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
class ColorspaceHandler
|
||||
{
|
||||
public:
|
||||
ColorspaceHandler() {};
|
||||
|
||||
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
};
|
||||
|
||||
FORCEINLINE FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)
|
||||
{
|
||||
FragmentColor ret;
|
||||
ret.r = r; ret.g = g; ret.b = b; ret.a = a;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* COLORSPACEHANDLER_H */
|
|
@ -0,0 +1,491 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "colorspacehandler_AVX2.h"
|
||||
|
||||
#ifndef ENABLE_AVX2
|
||||
#error This code requires AVX2 support.
|
||||
#else
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi)
|
||||
{
|
||||
v256u32 src32;
|
||||
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
|
||||
src32 = _mm256_unpacklo_epi16(srcColor, _mm256_setzero_si256());
|
||||
dstLo = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 19), _mm256_srli_epi32(src32, 7)) : _mm256_or_si256(_mm256_slli_epi32(src32, 3), _mm256_slli_epi32(src32, 9));
|
||||
dstLo = _mm256_and_si256( dstLo, _mm256_set1_epi32(0x00F800F8) );
|
||||
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_slli_epi32(src32, 6), _mm256_set1_epi32(0x0000F800)) );
|
||||
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_srli_epi32(dstLo, 5), _mm256_set1_epi32(0x00070707)) );
|
||||
dstLo = _mm256_or_si256( dstLo, srcAlphaBits32Lo );
|
||||
|
||||
src32 = _mm256_unpackhi_epi16(srcColor, _mm256_setzero_si256());
|
||||
dstHi = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 19), _mm256_srli_epi32(src32, 7)) : _mm256_or_si256(_mm256_slli_epi32(src32, 3), _mm256_slli_epi32(src32, 9));
|
||||
dstHi = _mm256_and_si256( dstHi, _mm256_set1_epi32(0x00F800F8) );
|
||||
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_slli_epi32(src32, 6), _mm256_set1_epi32(0x0000F800)) );
|
||||
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_srli_epi32(dstHi, 5), _mm256_set1_epi32(0x00070707)) );
|
||||
dstHi = _mm256_or_si256( dstHi, srcAlphaBits32Hi );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi)
|
||||
{
|
||||
v256u32 src32;
|
||||
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
|
||||
src32 = _mm256_unpacklo_epi16(srcColor, _mm256_setzero_si256());
|
||||
dstLo = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 17), _mm256_srli_epi32(src32, 9)) : _mm256_or_si256(_mm256_slli_epi32(src32, 1), _mm256_slli_epi32(src32, 7));
|
||||
dstLo = _mm256_and_si256( dstLo, _mm256_set1_epi32(0x003E003E) );
|
||||
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_slli_epi32(src32, 4), _mm256_set1_epi32(0x00003E00)) );
|
||||
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_srli_epi32(dstLo, 5), _mm256_set1_epi32(0x00010101)) );
|
||||
dstLo = _mm256_or_si256( dstLo, srcAlphaBits32Lo );
|
||||
|
||||
src32 = _mm256_unpackhi_epi16(srcColor, _mm256_setzero_si256());
|
||||
dstHi = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 17), _mm256_srli_epi32(src32, 9)) : _mm256_or_si256(_mm256_slli_epi32(src32, 1), _mm256_slli_epi32(src32, 7));
|
||||
dstHi = _mm256_and_si256( dstHi, _mm256_set1_epi32(0x003E003E) );
|
||||
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_slli_epi32(src32, 4), _mm256_set1_epi32(0x00003E00)) );
|
||||
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_srli_epi32(dstHi, 5), _mm256_set1_epi32(0x00010101)) );
|
||||
dstHi = _mm256_or_si256( dstHi, srcAlphaBits32Hi );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
|
||||
{
|
||||
const v256u32 srcAlphaBits32 = _mm256_set1_epi32(0xFF000000);
|
||||
ColorspaceConvert555To8888_AVX2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
|
||||
{
|
||||
const v256u32 srcAlphaBits32 = _mm256_set1_epi32(0x1F000000);
|
||||
ColorspaceConvert555To6665_AVX2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
|
||||
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
|
||||
v256u32 rgb;
|
||||
const v256u32 a = _mm256_and_si256( _mm256_srli_epi32(src, 3), _mm256_set1_epi32(0x1F000000) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgb = _mm256_and_si256( _mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x003F3F3F) );
|
||||
rgb = _mm256_shuffle_epi8( rgb, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
|
||||
}
|
||||
else
|
||||
{
|
||||
rgb = _mm256_and_si256( _mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x003F3F3F) );
|
||||
}
|
||||
|
||||
return _mm256_or_si256(rgb, a);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
|
||||
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
|
||||
v256u32 rgb = _mm256_or_si256( _mm256_and_si256(_mm256_slli_epi32(src, 2), _mm256_set1_epi32(0x00FCFCFC)), _mm256_and_si256(_mm256_srli_epi32(src, 4), _mm256_set1_epi32(0x00030303)) );
|
||||
const v256u32 a = _mm256_or_si256( _mm256_and_si256(_mm256_slli_epi32(src, 3), _mm256_set1_epi32(0xF8000000)), _mm256_and_si256(_mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x07000000)) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgb = _mm256_shuffle_epi8( rgb, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
|
||||
}
|
||||
|
||||
return _mm256_or_si256(rgb, a);
|
||||
}
|
||||
|
||||
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
|
||||
FORCEINLINE v256u16 _ConvertColorBaseTo5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
|
||||
{
|
||||
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
|
||||
{
|
||||
return srcLo;
|
||||
}
|
||||
|
||||
v256u32 rgbLo;
|
||||
v256u32 rgbHi;
|
||||
v256u16 alpha;
|
||||
|
||||
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 17), _mm256_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 4), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_slli_epi32(srcLo, 9), _mm256_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 17), _mm256_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 4), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_slli_epi32(srcHi, 9), _mm256_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 1), _mm256_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 4), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 7), _mm256_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 1), _mm256_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 4), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 7), _mm256_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
|
||||
// Convert alpha
|
||||
alpha = _mm256_packs_epi32( _mm256_and_si256(_mm256_srli_epi32(srcLo, 24), _mm256_set1_epi32(0x0000001F)), _mm256_and_si256(_mm256_srli_epi32(srcHi, 24), _mm256_set1_epi32(0x0000001F)) );
|
||||
alpha = _mm256_cmpgt_epi16(alpha, _mm256_setzero_si256());
|
||||
alpha = _mm256_and_si256(alpha, _mm256_set1_epi16(0x8000));
|
||||
}
|
||||
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 19), _mm256_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 6), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_slli_epi32(srcLo, 7), _mm256_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 19), _mm256_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 6), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_slli_epi32(srcHi, 7), _mm256_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 3), _mm256_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 6), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 9), _mm256_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 3), _mm256_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 6), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 9), _mm256_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
|
||||
// Convert alpha
|
||||
alpha = _mm256_packs_epi32( _mm256_srli_epi32(srcLo, 24), _mm256_srli_epi32(srcHi, 24) );
|
||||
alpha = _mm256_cmpgt_epi16(alpha, _mm256_setzero_si256());
|
||||
alpha = _mm256_and_si256(alpha, _mm256_set1_epi16(0x8000));
|
||||
}
|
||||
|
||||
return _mm256_or_si256(_mm256_packs_epi32(rgbLo, rgbHi), alpha);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_AVX2<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_AVX2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=16)
|
||||
{
|
||||
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
|
||||
v256u32 dstConvertedLo, dstConvertedHi;
|
||||
ColorspaceConvert555To8888Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256((v256u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm256_storeu_si256((v256u32 *)(dst+i+8), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256((v256u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm256_store_si256((v256u32 *)(dst+i+8), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=16)
|
||||
{
|
||||
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
|
||||
v256u32 dstConvertedLo, dstConvertedHi;
|
||||
ColorspaceConvert555To6665Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256((v256u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm256_storeu_si256((v256u32 *)(dst+i+8), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256((v256u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm256_store_si256((v256u32 *)(dst+i+8), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer8888To6665_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert8888To6665_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert8888To6665_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer6665To8888_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert6665To8888_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert6665To8888_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer8888To5551_AVX2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=16)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256( (v256u16 *)(dst+i), ColorspaceConvert8888To5551_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i)), _mm256_loadu_si256((v256u32 *)(src+i+8))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256( (v256u16 *)(dst+i), ColorspaceConvert8888To5551_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i)), _mm256_load_si256((v256u32 *)(src+i+8))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer6665To5551_AVX2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=16)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256( (v256u16 *)(dst+i), ColorspaceConvert6665To5551_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i)), _mm256_loadu_si256((v256u32 *)(src+i+8))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256( (v256u16 *)(dst+i), ColorspaceConvert6665To5551_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i)), _mm256_load_si256((v256u32 *)(src+i+8))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
template void ColorspaceConvert555To8888_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To8888Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
|
||||
template v256u32 ColorspaceConvert8888To6665_AVX2<true>(const v256u32 &src);
|
||||
template v256u32 ColorspaceConvert8888To6665_AVX2<false>(const v256u32 &src);
|
||||
|
||||
template v256u32 ColorspaceConvert6665To8888_AVX2<true>(const v256u32 &src);
|
||||
template v256u32 ColorspaceConvert6665To8888_AVX2<false>(const v256u32 &src);
|
||||
|
||||
template v256u16 ColorspaceConvert8888To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
template v256u16 ColorspaceConvert8888To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
|
||||
template v256u16 ColorspaceConvert6665To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
template v256u16 ColorspaceConvert6665To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
|
||||
#endif // ENABLE_AVX2
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef COLORSPACEHANDLER_AVX2_H
|
||||
#define COLORSPACEHANDLER_AVX2_H
|
||||
|
||||
#include "colorspacehandler.h"
|
||||
|
||||
#ifndef ENABLE_AVX2
|
||||
#warning This header requires AVX2 support.
|
||||
#else
|
||||
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template<bool SWAP_RB> v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src);
|
||||
template<bool SWAP_RB> v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src);
|
||||
template<bool SWAP_RB> v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
template<bool SWAP_RB> v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
|
||||
class ColorspaceHandler_AVX2 : public ColorspaceHandler
|
||||
{
|
||||
public:
|
||||
ColorspaceHandler_AVX2() {};
|
||||
|
||||
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
};
|
||||
|
||||
#endif // ENABLE_AVX2
|
||||
|
||||
#endif /* COLORSPACEHANDLER_AVX2_H */
|
|
@ -0,0 +1,345 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "colorspacehandler_Altivec.h"
|
||||
|
||||
#ifndef ENABLE_ALTIVEC
|
||||
#error This code requires PowerPC AltiVec support.
|
||||
#else
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
|
||||
dstLo = vec_unpackl((vector pixel)srcColor);
|
||||
dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){3,3,3,0, 3,3,3,0, 3,3,3,0, 3,3,3,0})), vec_sr((v128u8)dstLo, ((v128u8){2,2,2,0, 2,2,2,0, 2,2,2,0, 2,2,2,0})) );
|
||||
dstLo = vec_sel(dstLo, srcAlphaBits32Lo, vec_splat_u32(0xFF000000));
|
||||
|
||||
dstHi = vec_unpackh((vector pixel)srcColor);
|
||||
dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){3,3,3,0, 3,3,3,0, 3,3,3,0, 3,3,3,0})), vec_sr((v128u8)dstHi, ((v128u8){2,2,2,0, 2,2,2,0, 2,2,2,0, 2,2,2,0})) );
|
||||
dstHi = vec_sel(dstHi, srcAlphaBits32Hi, vec_splat_u32(0xFF000000));
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
|
||||
dstLo = vec_unpackl((vector pixel)srcColor);
|
||||
dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){1,1,1,0, 1,1,1,0, 1,1,1,0, 1,1,1,0})), vec_sr((v128u8)dstLo, ((v128u8){4,4,4,0, 4,4,4,0, 4,4,4,0, 4,4,4,0})) );
|
||||
dstLo = vec_sel(dstLo, srcAlphaBits32Lo, vec_splat_u32(0xFF000000));
|
||||
|
||||
dstHi = vec_unpackh((vector pixel)srcColor);
|
||||
dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){1,1,1,0, 1,1,1,0, 1,1,1,0, 1,1,1,0})), vec_sr((v128u8)dstHi, ((v128u8){4,4,4,0, 4,4,4,0, 4,4,4,0, 4,4,4,0})) );
|
||||
dstHi = vec_sel(dstHi, srcAlphaBits32Hi, vec_splat_u32(0xFF000000));
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
const v128u32 srcAlphaBits32 = {0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000};
|
||||
ColorspaceConvert555To8888_AltiVec<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
const v128u32 srcAlphaBits32 = {0x1F000000, 0x1F000000, 0x1F000000, 0x1F000000};
|
||||
ColorspaceConvert555To6665_AltiVec<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
|
||||
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
|
||||
v128u8 rgba = vec_sr( (v128u8)src, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3}) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgba = vec_perm( rgba, rgba, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
}
|
||||
|
||||
return (v128u32)rgba;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
|
||||
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
|
||||
v128u8 rgba = vec_or( vec_sl((v128u8)src, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3})), vec_sr((v128u8)src, ((v128u8){4,4,4,2, 4,4,4,2, 4,4,4,2, 4,4,4,2})) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgba = vec_perm( rgba, rgba, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
}
|
||||
|
||||
return (v128u32)rgba;
|
||||
}
|
||||
|
||||
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
|
||||
FORCEINLINE v128u16 _ConvertColorBaseTo5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
|
||||
{
|
||||
return srcLo;
|
||||
}
|
||||
|
||||
v128u32 rgbLo;
|
||||
v128u32 rgbHi;
|
||||
|
||||
v128u16 dstColor;
|
||||
v128u16 dstAlpha;
|
||||
|
||||
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
// Convert alpha
|
||||
dstAlpha = vec_packsu( vec_and(vec_sr(srcLo, vec_splat_u32(24)), vec_splat_u32(0x0000001F)), vec_and(vec_sr(srcHi, vec_splat_u32(24)), vec_splat_u32(0x0000001F)) );
|
||||
dstAlpha = vec_cmpgt(dstAlpha, vec_splat_u16(0));
|
||||
dstAlpha = vec_and(dstAlpha, vec_splat_u16(0x8000));
|
||||
|
||||
// Convert RGB
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgbLo = vec_perm( srcLo, srcLo, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
rgbHi = vec_perm( srcHi, srcHi, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
|
||||
rgbLo = vec_sl( rgbLo, vec_splat_u32(2) );
|
||||
rgbHi = vec_sl( rgbHi, vec_splat_u32(2) );
|
||||
|
||||
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
rgbLo = vec_sl( srcLo, vec_splat_u32(2) );
|
||||
rgbHi = vec_sl( srcHi, vec_splat_u32(2) );
|
||||
|
||||
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
|
||||
}
|
||||
}
|
||||
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
// Convert alpha
|
||||
dstAlpha = vec_packsu( vec_sr(srcLo, vec_splat_u32(24)), vec_sr(srcHi, vec_splat_u32(24)) );
|
||||
dstAlpha = vec_cmpgt(dstAlpha, vec_splat_u16(0));
|
||||
dstAlpha = vec_and(dstAlpha, vec_splat_u16(0x8000));
|
||||
|
||||
// Convert RGB
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgbLo = vec_perm( srcLo, srcLo, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
rgbHi = vec_perm( srcHi, srcHi, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
|
||||
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
dstColor = (v128u16)vec_packpx(srcLo, srcHi);
|
||||
}
|
||||
}
|
||||
|
||||
dstColor = vec_and(dstColor, vec_splat_u16(0x7FFF));
|
||||
return vec_or(dstColor, dstAlpha);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_AltiVec<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_AltiVec<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
v128u32 dstConvertedLo, dstConvertedHi;
|
||||
|
||||
ColorspaceConvert555To8888Opaque_AltiVec<SWAP_RB>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
|
||||
vec_st(dstConvertedHi, 0, dst+i);
|
||||
vec_st(dstConvertedLo, 16, dst+i);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
v128u32 dstConvertedLo, dstConvertedHi;
|
||||
|
||||
ColorspaceConvert555To6665Opaque_AltiVec<SWAP_RB>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
|
||||
vec_st(dstConvertedHi, 0, dst+i);
|
||||
vec_st(dstConvertedLo, 16, dst+i);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
size_t ColorspaceConvertBuffer8888To6665_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=4)
|
||||
{
|
||||
vec_st( ColorspaceConvert8888To6665_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
size_t ColorspaceConvertBuffer6665To8888_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=4)
|
||||
{
|
||||
vec_st( ColorspaceConvert6665To8888_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
size_t ColorspaceConvertBuffer8888To5551_AltiVec(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
vec_st( ColorspaceConvert8888To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
size_t ColorspaceConvertBuffer6665To5551_AltiVec(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
vec_st( ColorspaceConvert6665To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
template void ColorspaceConvert555To8888_AltiVec<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888_AltiVec<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665_AltiVec<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665_AltiVec<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To8888Opaque_AltiVec<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888Opaque_AltiVec<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665Opaque_AltiVec<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665Opaque_AltiVec<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template v128u32 ColorspaceConvert8888To6665_AltiVec<true>(const v128u32 &src);
|
||||
template v128u32 ColorspaceConvert8888To6665_AltiVec<false>(const v128u32 &src);
|
||||
|
||||
template v128u32 ColorspaceConvert6665To8888_AltiVec<true>(const v128u32 &src);
|
||||
template v128u32 ColorspaceConvert6665To8888_AltiVec<false>(const v128u32 &src);
|
||||
|
||||
template v128u16 ColorspaceConvert8888To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template v128u16 ColorspaceConvert8888To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
template v128u16 ColorspaceConvert6665To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template v128u16 ColorspaceConvert6665To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
#endif // ENABLE_SSE2
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef COLORSPACEHANDLER_ALTIVEC_H
|
||||
#define COLORSPACEHANDLER_ALTIVEC_H
|
||||
|
||||
#include "colorspacehandler.h"
|
||||
|
||||
#ifndef ENABLE_ALTIVEC
|
||||
#warning This header requires PowerPC AltiVec support.
|
||||
#else
|
||||
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src);
|
||||
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src);
|
||||
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
// AltiVec has very poor support for dealing with unaligned addresses (it's possible, just
|
||||
// very obtuse), so we're not even going to bother dealing with any unaligned addresses.
|
||||
class ColorspaceHandler_AltiVec : public ColorspaceHandler
|
||||
{
|
||||
public:
|
||||
ColorspaceHandler_AltiVec() {};
|
||||
|
||||
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
};
|
||||
|
||||
#endif // ENABLE_ALTIVEC
|
||||
|
||||
#endif /* COLORSPACEHANDLER_ALTIVEC_H */
|
|
@ -0,0 +1,503 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "colorspacehandler_SSE2.h"
|
||||
|
||||
#ifndef ENABLE_SSE2
|
||||
#error This code requires SSE2 support.
|
||||
#else
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
v128u32 src32;
|
||||
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
|
||||
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
|
||||
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
|
||||
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x00F800F8) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00070707)) );
|
||||
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
|
||||
|
||||
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
|
||||
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
|
||||
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x00F800F8) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00070707)) );
|
||||
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
v128u32 src32;
|
||||
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
|
||||
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
|
||||
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
|
||||
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x003E003E) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00010101)) );
|
||||
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
|
||||
|
||||
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
|
||||
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
|
||||
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x003E003E) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00010101)) );
|
||||
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
const v128u32 srcAlphaBits32 = _mm_set1_epi32(0xFF000000);
|
||||
ColorspaceConvert555To8888_SSE2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
const v128u32 srcAlphaBits32 = _mm_set1_epi32(0x1F000000);
|
||||
ColorspaceConvert555To6665_SSE2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
|
||||
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
|
||||
v128u32 rgb;
|
||||
const v128u32 a = _mm_and_si128( _mm_srli_epi32(src, 3), _mm_set1_epi32(0x1F000000) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
|
||||
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
|
||||
#else
|
||||
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x003F0000)), 18), _mm_or_si128(_mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00003F00)), 2), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x0000003F)), 14)) );
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
|
||||
}
|
||||
|
||||
return _mm_or_si128(rgb, a);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
|
||||
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
|
||||
v128u32 rgb = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 2), _mm_set1_epi32(0x00FCFCFC)), _mm_and_si128(_mm_srli_epi32(src, 4), _mm_set1_epi32(0x00030303)) );
|
||||
const v128u32 a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 3), _mm_set1_epi32(0xF8000000)), _mm_and_si128(_mm_srli_epi32(src, 2), _mm_set1_epi32(0x07000000)) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
|
||||
#else
|
||||
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x000000FF)), 16)) );
|
||||
#endif
|
||||
}
|
||||
|
||||
return _mm_or_si128(rgb, a);
|
||||
}
|
||||
|
||||
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
|
||||
FORCEINLINE v128u16 _ConvertColorBaseTo5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
|
||||
{
|
||||
return srcLo;
|
||||
}
|
||||
|
||||
v128u32 rgbLo;
|
||||
v128u32 rgbHi;
|
||||
v128u16 alpha;
|
||||
|
||||
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 17), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 17), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 1), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 1), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
|
||||
// Convert alpha
|
||||
alpha = _mm_packs_epi32( _mm_and_si128(_mm_srli_epi32(srcLo, 24), _mm_set1_epi32(0x0000001F)), _mm_and_si128(_mm_srli_epi32(srcHi, 24), _mm_set1_epi32(0x0000001F)) );
|
||||
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
|
||||
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
|
||||
}
|
||||
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 19), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 19), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 3), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 3), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
|
||||
// Convert alpha
|
||||
alpha = _mm_packs_epi32( _mm_srli_epi32(srcLo, 24), _mm_srli_epi32(srcHi, 24) );
|
||||
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
|
||||
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
|
||||
}
|
||||
|
||||
return _mm_or_si128(_mm_packs_epi32(rgbLo, rgbHi), alpha);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_SSE2<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_SSE2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
static size_t ColorspaceConvertBuffer555To8888Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
|
||||
v128u32 dstConvertedLo, dstConvertedHi;
|
||||
ColorspaceConvert555To8888Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128((v128u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm_storeu_si128((v128u32 *)(dst+i+4), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128((v128u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm_store_si128((v128u32 *)(dst+i+4), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer555To6665Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
|
||||
v128u32 dstConvertedLo, dstConvertedHi;
|
||||
ColorspaceConvert555To6665Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128((v128u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm_storeu_si128((v128u32 *)(dst+i+4), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128((v128u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm_store_si128((v128u32 *)(dst+i+4), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer8888To6665_SSE2(const u32 *src, u32 *dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=4)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer6665To8888_SSE2(const u32 *src, u32 *dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=4)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert6665To8888_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert6665To8888_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer8888To5551_SSE2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (v128u16 *)(dst+i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i)), _mm_loadu_si128((v128u32 *)(src+i+4))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (v128u16 *)(dst+i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i)), _mm_load_si128((v128u32 *)(src+i+4))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer6665To5551_SSE2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (v128u16 *)(dst+i), ColorspaceConvert6665To5551_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i)), _mm_loadu_si128((v128u32 *)(src+i+4))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (v128u16 *)(dst+i), ColorspaceConvert6665To5551_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i)), _mm_load_si128((v128u32 *)(src+i+4))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
template void ColorspaceConvert555To8888_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To8888Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template v128u32 ColorspaceConvert8888To6665_SSE2<true>(const v128u32 &src);
|
||||
template v128u32 ColorspaceConvert8888To6665_SSE2<false>(const v128u32 &src);
|
||||
|
||||
template v128u32 ColorspaceConvert6665To8888_SSE2<true>(const v128u32 &src);
|
||||
template v128u32 ColorspaceConvert6665To8888_SSE2<false>(const v128u32 &src);
|
||||
|
||||
template v128u16 ColorspaceConvert8888To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template v128u16 ColorspaceConvert8888To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
template v128u16 ColorspaceConvert6665To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template v128u16 ColorspaceConvert6665To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
#endif // ENABLE_SSE2
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef COLORSPACEHANDLER_SSE2_H
|
||||
#define COLORSPACEHANDLER_SSE2_H
|
||||
|
||||
#include "colorspacehandler.h"
|
||||
|
||||
#ifndef ENABLE_SSE2
|
||||
#warning This header requires SSE2 support.
|
||||
#else
|
||||
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src);
|
||||
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src);
|
||||
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
class ColorspaceHandler_SSE2 : public ColorspaceHandler
|
||||
{
|
||||
public:
|
||||
ColorspaceHandler_SSE2() {};
|
||||
|
||||
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
};
|
||||
|
||||
#endif // ENABLE_SSE2
|
||||
|
||||
#endif /* COLORSPACEHANDLER_SSE2_H */
|
|
@ -59,44 +59,41 @@
|
|||
#define DESMUME_PLATFORM_STRING ""
|
||||
#endif
|
||||
|
||||
#define DESMUME_SSE_STRING ""
|
||||
#define DESMUME_AVX_STRING ""
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING ""
|
||||
#define DESMUME_CPUEXT_SECONDARY_STRING ""
|
||||
|
||||
#ifdef ENABLE_SSE
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSE"
|
||||
#endif
|
||||
#ifdef ENABLE_SSE2
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSE2"
|
||||
#endif
|
||||
#ifdef ENABLE_SSE3
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSE3"
|
||||
#endif
|
||||
#ifdef ENABLE_SSSE3
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSSE3"
|
||||
#endif
|
||||
#ifdef ENABLE_SSE4_1
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSE4.1"
|
||||
#endif
|
||||
#ifdef ENABLE_SSE4_2
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSE4.2"
|
||||
#endif
|
||||
#ifdef ENABLE_AVX
|
||||
#undef DESMUME_AVX_STRING
|
||||
#define DESMUME_AVX_STRING "+AVX"
|
||||
#endif
|
||||
#ifdef ENABLE_AVX2
|
||||
#undef DESMUME_AVX_STRING
|
||||
#define DESMUME_AVX_STRING "+AVX2"
|
||||
#if defined(ENABLE_SSE4_2)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE4.2"
|
||||
#elif defined(ENABLE_SSE4_1)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE4.1"
|
||||
#elif defined(ENABLE_SSSE3)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSSE3"
|
||||
#elif defined(ENABLE_SSE3)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE3"
|
||||
#elif defined(ENABLE_SSE2)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE2"
|
||||
#elif defined(ENABLE_SSE)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE"
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " AltiVec"
|
||||
#endif
|
||||
|
||||
#define DESMUME_CPUEXT_STRING DESMUME_SSE_STRING DESMUME_AVX_STRING
|
||||
#if defined(ENABLE_AVX2)
|
||||
#undef DESMUME_CPUEXT_SECONDARY_STRING
|
||||
#define DESMUME_CPUEXT_SECONDARY_STRING "+AVX2"
|
||||
#elif defined(ENABLE_AVX)
|
||||
#undef DESMUME_CPUEXT_SECONDARY_STRING
|
||||
#define DESMUME_CPUEXT_SECONDARY_STRING "+AVX"
|
||||
#endif
|
||||
|
||||
#define DESMUME_CPUEXT_STRING DESMUME_CPUEXT_PRIMARY_STRING DESMUME_CPUEXT_SECONDARY_STRING
|
||||
|
||||
#ifdef DEVELOPER
|
||||
#define DESMUME_FEATURE_STRING " dev+"
|
||||
|
|
|
@ -171,6 +171,8 @@
|
|||
<ClCompile Include="..\utils\AsmJit\x86\x86func.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86operand.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86util.cpp" />
|
||||
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler.cpp" />
|
||||
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler_SSE2.cpp" />
|
||||
<ClCompile Include="..\utils\datetime.cpp" />
|
||||
<ClCompile Include="..\utils\dlditool.cpp" />
|
||||
<ClCompile Include="..\utils\emufat.cpp" />
|
||||
|
@ -442,6 +444,8 @@
|
|||
<ClInclude Include="..\utils\AsmJit\x86\x86func.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86operand.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86util.h" />
|
||||
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler.h" />
|
||||
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler_SSE2.h" />
|
||||
<ClInclude Include="..\utils\datetime.h" />
|
||||
<ClInclude Include="..\utils\emufat.h" />
|
||||
<ClInclude Include="..\utils\emufat_types.h" />
|
||||
|
|
|
@ -121,6 +121,9 @@
|
|||
<Filter Include="Core\libretro-common\lists">
|
||||
<UniqueIdentifier>{18cba3ce-aaa6-441d-8111-408d0fcef7d2}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Core\utils\colorspacehandler">
|
||||
<UniqueIdentifier>{db5dc512-2b75-4476-8cac-75fd4acfd85f}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\aggdraw.cpp">
|
||||
|
@ -966,6 +969,12 @@
|
|||
<ClCompile Include="..\libretro-common\file\archive_file_zlib.c">
|
||||
<Filter>Core\libretro-common\file</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler.cpp">
|
||||
<Filter>Core\utils\colorspacehandler</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler_SSE2.cpp">
|
||||
<Filter>Core\utils\colorspacehandler</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\armcpu.h">
|
||||
|
@ -1739,6 +1748,12 @@
|
|||
<ClInclude Include="..\libretro-common\include\compat\msvc.h">
|
||||
<Filter>Core\libretro-common\include\compat</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler.h">
|
||||
<Filter>Core\utils\colorspacehandler</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler_SSE2.h">
|
||||
<Filter>Core\utils\colorspacehandler</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\instruction_tabdef.inc">
|
||||
|
|
|
@ -316,13 +316,14 @@ static void do_video_conversion(AVIFile* avi, const u16* buffer)
|
|||
int height = avi->prescaleLevel*384;
|
||||
u8* outbuf = avi_file->convert_buffer + width*(height-1)*3;
|
||||
|
||||
for(int y=0;y<height;y++)
|
||||
for (int y = 0; y < height; y++)
|
||||
{
|
||||
for(int x=0;x<width;x++)
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
u32 dst = ConvertColor555To8888Opaque<true>(*buffer++);
|
||||
*(u32 *)outbuf = (dst & 0x00FFFFFF) | (*(u32 *)outbuf & 0xFF000000);
|
||||
outbuf += 3;
|
||||
u32 dst = ColorspaceConvert555To8888Opaque<true>(*buffer++);
|
||||
*outbuf++ = dst & 0xFF;
|
||||
*outbuf++ = (dst >> 8) & 0xFF;
|
||||
*outbuf++ = (dst >> 16) & 0xFF;
|
||||
}
|
||||
|
||||
outbuf -= width*3*2;
|
||||
|
|
|
@ -1920,7 +1920,7 @@ static void DoDisplay(bool firstTime)
|
|||
//convert pixel format to 32bpp for compositing
|
||||
//why do we do this over and over? well, we are compositing to
|
||||
//filteredbuffer32bpp, and it needs to get refreshed each frame.
|
||||
ConvertColorBuffer555To8888Opaque<true, false>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16));
|
||||
ColorspaceConvertBuffer555To8888Opaque<true, false>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16));
|
||||
|
||||
if(firstTime)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue