Colorspace Handler:

- Factor out the generic colorspace handling routines out of GPU.cpp/GPU.h into their own separate files.
- Add vectorized routines using AVX2 and AltiVec.
This commit is contained in:
rogerman 2016-08-16 06:47:22 +00:00
parent d837653b5f
commit d8735a803b
25 changed files with 2877 additions and 876 deletions

View File

@ -18,6 +18,14 @@
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef FASTBUILD
#undef FORCEINLINE
#define FORCEINLINE
//compilation speed hack (cuts time exactly in half by cutting out permutations)
#define DISABLE_MOSAIC
#define DISABLE_COLOREFFECTDISABLEHINT
#endif
#include "GPU.h"
#include <assert.h>
@ -40,75 +48,8 @@
#include "matrix.h"
#include "emufile.h"
#ifdef FASTBUILD
#undef FORCEINLINE
#define FORCEINLINE
//compilation speed hack (cuts time exactly in half by cutting out permutations)
#define DISABLE_MOSAIC
#endif
u32 Render3DFramesPerSecond;
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_666[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_888[32768];
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
};
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
};
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
};
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
};
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
};
//maybe not very precise
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
0, 4, 8, 13, 17, 22, 26, 31
};
//TODO - generate this in the static init method more accurately
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
0, 8, 16, 26, 34, 44, 52, 63
};
//instantiate static instance
u16 GPUEngineBase::_brightnessUpTable555[17][0x8000];
FragmentColor GPUEngineBase::_brightnessUpTable666[17][0x8000];
@ -167,7 +108,7 @@ const CACHE_ALIGN BGLayerSize GPUEngineBase::_BGLayerSizeLUT[8][4] = {
{{128,128}, {256,256}, {512,256}, {512,512}}, //affine ext direct
};
static void ExpandLine8(u8 *__restrict dst, const u8 *__restrict src, size_t dstLength)
static FORCEINLINE void ExpandLine8(u8 *__restrict dst, const u8 *__restrict src, size_t dstLength)
{
#ifdef ENABLE_SSSE3
const bool isIntegerScale = ((dstLength % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0);
@ -1655,11 +1596,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break;
case NDSColorFormat_BGR666_Rev:
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
break;
case NDSColorFormat_BGR888_Rev:
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
break;
}
@ -1682,11 +1623,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break;
case NDSColorFormat_BGR666_Rev:
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
break;
case NDSColorFormat_BGR888_Rev:
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
break;
}
@ -1767,11 +1708,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break;
case NDSColorFormat_BGR666_Rev:
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
break;
case NDSColorFormat_BGR888_Rev:
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
break;
}
break;
@ -1833,13 +1774,13 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break;
case NDSColorFormat_BGR666_Rev:
srcColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
srcColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
dstColor32.a = 0x1F;
break;
case NDSColorFormat_BGR888_Rev:
srcColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
srcColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
dstColor32.a = 0xFF;
break;
@ -2132,7 +2073,7 @@ FORCEINLINE void GPUEngineBase::_RenderPixel3D(GPUEngineCompositorInfo &compInfo
// Render the pixel using the selected color effect.
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
{
const u16 srcColor16 = ConvertColor6665To5551<false>(srcColor32);
const u16 srcColor16 = ColorspaceConvert6665To5551<false>(srcColor32);
switch (selectedEffect)
{
@ -2695,13 +2636,13 @@ void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compInfo)
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]);
ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[0], src[0], src[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[1], src[2], src[3]);
}
else
{
ConvertColor555To8888Opaque<false>(src16[0], src[0], src[1]);
ConvertColor555To8888Opaque<false>(src16[1], src[2], src[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[0], src[0], src[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[1], src[2], src[3]);
}
}
@ -2796,13 +2737,13 @@ void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compInfo)
{
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]);
ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[0], src[0], src[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[1], src[2], src[3]);
}
else
{
ConvertColor555To8888Opaque<false>(src16[0], src[0], src[1]);
ConvertColor555To8888Opaque<false>(src16[1], src[2], src[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[0], src[0], src[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[1], src[2], src[3]);
}
}
@ -4502,7 +4443,7 @@ void GPUEngineBase::UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex)
}
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
{
bool useCustomVRAM = false;
@ -4538,26 +4479,28 @@ void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
}
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo)
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo)
{
this->_RenderLine_LayerBG_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compInfo);
}
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo)
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo)
{
#ifndef DISABLE_COLOREFFECTDISABLEHINT
if (compInfo.renderState.colorEffect == ColorEffect_Disable)
{
this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, true, ISCUSTOMRENDERINGNEEDED>(compInfo);
}
else
#endif
{
this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, false, ISCUSTOMRENDERINGNEEDED>(compInfo);
}
}
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo)
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo)
{
if (ISDEBUGRENDER)
{
@ -4951,7 +4894,7 @@ void GPUEngineBase::ResolveCustomRendering()
void GPUEngineBase::ResolveRGB666ToRGB888()
{
ConvertColorBuffer6665To8888<false>((u32 *)this->renderedBuffer, (u32 *)this->renderedBuffer, this->renderedWidth * this->renderedHeight);
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)this->renderedBuffer, (u32 *)this->renderedBuffer, this->renderedWidth * this->renderedHeight);
}
void GPUEngineBase::ResolveToCustomFramebuffer()
@ -5575,12 +5518,12 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
case NDSColorFormat_BGR666_Rev:
renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16));
ConvertColorBuffer6665To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
ColorspaceConvertBuffer6665To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
break;
case NDSColorFormat_BGR888_Rev:
renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16));
ConvertColorBuffer8888To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
break;
}
}
@ -6570,7 +6513,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555To6665Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
break;
}
@ -6578,7 +6521,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555To8888Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
break;
}
}
@ -6598,7 +6541,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, customPixCount);
ColorspaceConvertBuffer555To6665Opaque<false, false>(src, (u32 *)dst, customPixCount);
break;
}
@ -6606,7 +6549,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, customPixCount);
ColorspaceConvertBuffer555To8888Opaque<false, false>(src, (u32 *)dst, customPixCount);
break;
}
}
@ -6802,28 +6745,7 @@ void GPUEngineB::RenderLine(const u16 l)
GPUSubsystem::GPUSubsystem()
{
static bool needInitTables = true;
if (needInitTables)
{
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
for (size_t i = 0; i < 32768; i++)
{
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 );
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 );
}
needInitTables = false;
}
ColorspaceHandlerInit();
_defaultEventHandler = new GPUEventHandlerDefault;
_event = _defaultEventHandler;
@ -7581,178 +7503,6 @@ void NDSDisplay::SetEngineByID(const GPUEngineID theID)
this->_gpu->SetDisplayByID(this->_ID);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
__m128i src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
__m128i dstConvertedLo, dstConvertedHi;
ConvertColor555To8888Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
else
{
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor555To8888Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
__m128i src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
__m128i dstConvertedLo, dstConvertedHi;
ConvertColor555To6665Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
else
{
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor555To6665Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB>
void ConvertColorBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 4);
for (; i < ssePixCount; i += 4)
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To6665<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor8888To6665<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB>
void ConvertColorBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 4);
for (; i < ssePixCount; i += 4)
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To8888<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor6665To8888<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
}
else
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor8888To5551<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
}
else
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor6665To5551<SWAP_RB>(src[i]);
}
}
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG0>();
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG1>();
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG2>();
@ -7774,29 +7524,3 @@ template void GPUEngineBase::ParseReg_BGnY<GPULayerID_BG3>();
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR555_Rev>(const u16 l, bool skip);
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR666_Rev>(const u16 l, bool skip);
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR888_Rev>(const u16 l, bool skip);
template void ConvertColorBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To8888Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To6665<true>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer8888To6665<false>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer6665To8888<true>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer6665To8888<false>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);

View File

@ -25,9 +25,11 @@
#include <iosfwd>
#include "types.h"
#include "./utils/colorspacehandler/colorspacehandler.h"
#ifdef ENABLE_SSE2
#include <emmintrin.h>
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
#endif
#ifdef ENABLE_SSSE3
@ -101,15 +103,6 @@ enum DisplayCaptureSize
DisplayCaptureSize_256x192 = 3,
};
union FragmentColor
{
u32 color;
struct
{
u8 r,g,b,a;
};
};
typedef union
{
u32 value;
@ -1052,61 +1045,6 @@ enum NDSDisplayID
NDSDisplayID_Touch = 1
};
enum NDSColorFormat
{
// The color format information is packed in a 32-bit value.
// The bits are as follows:
// FFFOOOOO AAAAAABB BBBBGGGG GGRRRRRR
//
// F = Flags (see below)
// O = Color order (see below)
// A = Bit count for alpha [0-63]
// B = Bit count for blue [0-63]
// G = Bit count for green [0-63]
// R = Bit count for red [0-63]
//
// Flags:
// Bit 29: Reverse order flag.
// Set = Bits are in reverse order, usually for little-endian usage.
// Cleared = Bits are in normal order, usually for big-endian usage.
//
// Color order bits, 24-28:
// 0x00 = RGBA, common format
// 0x01 = RGAB
// 0x02 = RBGA
// 0x03 = RBAG
// 0x04 = RAGB
// 0x05 = RABG
// 0x06 = GRBA
// 0x07 = GRAB
// 0x08 = GBRA
// 0x09 = GBAR
// 0x0A = GARB
// 0x0B = GABR
// 0x0C = BRGA
// 0x0D = BRAG
// 0x0E = BGRA, common format
// 0x0F = BGAR
// 0x10 = BARG
// 0x11 = BAGR
// 0x12 = ARGB
// 0x13 = ARBG
// 0x14 = AGRB
// 0x15 = AGBR
// 0x16 = ABRG
// 0x17 = ABGR
// Color formats used for internal processing.
//NDSColorFormat_ABGR1555_Rev = 0x20045145,
//NDSColorFormat_ABGR5666_Rev = 0x20186186,
//NDSColorFormat_ABGR8888_Rev = 0x20208208,
// Color formats used by the output framebuffers.
NDSColorFormat_BGR555_Rev = 0x20005145,
NDSColorFormat_BGR666_Rev = 0x20006186,
NDSColorFormat_BGR888_Rev = 0x20008208
};
struct DISPCAPCNT_parsed
{
u8 EVA;
@ -1410,9 +1348,9 @@ protected:
template<size_t WIN_NUM> bool _IsWindowInsideVerticalRange(GPUEngineCompositorInfo &compInfo);
void _PerformWindowTesting(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> FORCEINLINE void _RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo);
template<NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, itemsForPriority_t *__restrict item);
@ -1733,346 +1671,4 @@ public:
extern GPUSubsystem *GPU;
extern MMU_struct MMU;
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
extern CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
extern CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
extern CACHE_ALIGN u32 color_555_to_666[32768];
extern CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
extern CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
extern CACHE_ALIGN u32 color_555_to_888[32768];
#define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color
#define COLOR555TO6665_OPAQUE_SWAP_RB(col) (color_555_to_6665_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color with R and B components swapped
#define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color
#ifdef LOCAL_LE
#define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, little-endian
#else
#define COLOR555TO6665(col,alpha5) ((alpha5) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, big-endian
#endif
#define COLOR555TO8888_OPAQUE(col) (color_555_to_8888_opaque[(col)]) // Convert a 15-bit color to an opaque 32-bit color
#define COLOR555TO8888_OPAQUE_SWAP_RB(col) (color_555_to_8888_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque 32-bit color with R and B components swapped
#define COLOR555TO888(col) (color_555_to_888[(col)]) // Convert a 15-bit color to an opaque 24-bit color or a fully transparent 32-bit color
#ifdef LOCAL_LE
#define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, little-endian
#else
#define COLOR555TO8888(col,alpha8) ((alpha8) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, big-endian
#endif
//produce a 15bpp color from individual 5bit components
#define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) )
//produce a 16bpp color from individual 5bit components
#define R6G6B6TORGB15(r,g,b) ( ((r)>>1) | (((g)&0x3E)<<4) | (((b)&0x3E)<<9) )
inline FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)
{
FragmentColor ret;
ret.r = r; ret.g = g; ret.b = b; ret.a = a;
return ret;
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor555To8888Opaque(const u16 src)
{
return (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF);
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor555To6665Opaque(const u16 src)
{
return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor8888To6665(FragmentColor srcColor)
{
FragmentColor outColor;
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2;
outColor.g = srcColor.g >> 2;
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2;
outColor.a = srcColor.a >> 3;
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor8888To6665(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ConvertColor8888To6665<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor6665To8888(FragmentColor srcColor)
{
FragmentColor outColor;
outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)];
outColor.g = material_6bit_to_8bit[srcColor.g];
outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)];
outColor.a = material_5bit_to_8bit[srcColor.a];
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ConvertColor6665To8888(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ConvertColor6665To8888<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u16 ConvertColor8888To5551(FragmentColor srcColor)
{
return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 );
}
template <bool SWAP_RB>
FORCEINLINE u16 ConvertColor8888To5551(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ConvertColor8888To5551<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u16 ConvertColor6665To5551(FragmentColor srcColor)
{
return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000);
}
template <bool SWAP_RB>
FORCEINLINE u16 ConvertColor6665To5551(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ConvertColor6665To5551<SWAP_RB>(srcColorComponent);
}
#ifdef ENABLE_SSE2
template <bool SWAP_RB>
FORCEINLINE void ConvertColor555To8888(const __m128i &srcColor, const __m128i &srcAlphaBits32Lo, const __m128i &srcAlphaBits32Hi, __m128i &dstLo, __m128i &dstHi)
{
__m128i src32;
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x00F800F8) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00070707)) );
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x00F800F8) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00070707)) );
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ConvertColor555To6665(const __m128i &srcColor, const __m128i &srcAlphaBits32Lo, const __m128i &srcAlphaBits32Hi, __m128i &dstLo, __m128i &dstHi)
{
__m128i src32;
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x003E003E) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00010101)) );
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x003E003E) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00010101)) );
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ConvertColor555To8888Opaque(const __m128i &srcColor, __m128i &dstLo, __m128i &dstHi)
{
const __m128i srcAlphaBits32 = _mm_set1_epi32(0xFF000000);
ConvertColor555To8888<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ConvertColor555To6665Opaque(const __m128i &srcColor, __m128i &dstLo, __m128i &dstHi)
{
const __m128i srcAlphaBits32 = _mm_set1_epi32(0x1F000000);
ConvertColor555To6665<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE __m128i ConvertColor8888To6665(const __m128i &src)
{
// Conversion algorithm:
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
__m128i rgb;
const __m128i a = _mm_and_si128( _mm_srli_epi32(src, 3), _mm_set1_epi32(0x1F000000) );
if (SWAP_RB)
{
#ifdef ENABLE_SSSE3
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2) );
#else
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x003F0000)), 18), _mm_or_si128(_mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00003F00)), 2), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x0000003F)), 14)) );
#endif
}
else
{
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
}
return _mm_or_si128(rgb, a);
}
template <bool SWAP_RB>
FORCEINLINE __m128i ConvertColor6665To8888(const __m128i &src)
{
// Conversion algorithm:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
__m128i rgb = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 2), _mm_set1_epi32(0x00FCFCFC)), _mm_and_si128(_mm_srli_epi32(src, 4), _mm_set1_epi32(0x00030303)) );
const __m128i a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 3), _mm_set1_epi32(0xF8000000)), _mm_and_si128(_mm_srli_epi32(src, 2), _mm_set1_epi32(0x07000000)) );
if (SWAP_RB)
{
#ifdef ENABLE_SSSE3
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2) );
#else
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x000000FF)), 16)) );
#endif
}
return _mm_or_si128(rgb, a);
}
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
FORCEINLINE __m128i _ConvertColorBaseTo5551(const __m128i &srcLo, const __m128i &srcHi)
{
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
{
return srcLo;
}
__m128i rgbLo;
__m128i rgbHi;
__m128i alpha;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 17), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 17), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 1), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 1), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm_packs_epi32( _mm_and_si128(_mm_srli_epi32(srcLo, 24), _mm_set1_epi32(0x0000001F)), _mm_and_si128(_mm_srli_epi32(srcHi, 24), _mm_set1_epi32(0x0000001F)) );
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
}
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 19), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 19), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 3), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 3), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm_packs_epi32( _mm_and_si128(_mm_srli_epi32(srcLo, 24), _mm_set1_epi32(0x000000FF)), _mm_and_si128(_mm_srli_epi32(srcHi, 24), _mm_set1_epi32(0x000000FF)) );
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
}
return _mm_or_si128(_mm_packs_epi32(rgbLo, rgbHi), alpha);
}
template <bool SWAP_RB>
FORCEINLINE __m128i ConvertColor8888To5551(const __m128i &srcLo, const __m128i &srcHi)
{
return _ConvertColorBaseTo5551<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE __m128i ConvertColor6665To5551(const __m128i &srcLo, const __m128i &srcHi)
{
return _ConvertColorBaseTo5551<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
#endif
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB> void ConvertColorBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB> void ConvertColorBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool UNALIGNED> void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
#endif

View File

@ -52,6 +52,7 @@ libdesmume_a_SOURCES = \
utils/decrypt/decrypt.h utils/decrypt/header.cpp utils/decrypt/header.h \
utils/task.cpp utils/task.h \
utils/vfat.h utils/vfat.cpp \
utils/colorspacehandler/colorspacehandler.cpp \
utils/dlditool.cpp \
utils/libfat/bit_ops.h \
utils/libfat/cache.cpp \
@ -107,6 +108,21 @@ libdesmume_a_SOURCES = \
libretro-common/rthreads/async_job.c \
libretro-common/rthreads/rsemaphore.c \
libretro-common/rthreads/rthreads.c
if SUPPORT_SSE2 += \
libdesmume_a_SOURCES += \
utils/colorspacehandler/colorspacehandler_SSE2.cpp
endif
if SUPPORT_AVX2 += \
libdesmume_a_SOURCES += \
utils/colorspacehandler/colorspacehandler_AVX2.cpp
endif
if SUPPORT_ALTIVEC += \
libdesmume_a_SOURCES += \
utils/colorspacehandler/colorspacehandler_AltiVec.cpp
endif
if HAVE_JIT
libdesmume_a_SOURCES += \

View File

@ -32,6 +32,7 @@
#ifdef ENABLE_SSE2
#include <emmintrin.h>
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
#endif
typedef struct
@ -990,9 +991,9 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), ConvertColor8888To6665<true>(srcColorLo) );
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), ConvertColor8888To6665<true>(srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
}
#endif
@ -1001,17 +1002,17 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
#endif
for (; i < pixCount; i++)
{
dstFramebuffer[i].color = ConvertColor8888To6665<true>(srcFramebuffer[i]);
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]);
dstFramebuffer[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
dstRGBA5551[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
}
}
else if (dstFramebuffer != NULL)
{
ConvertColorBuffer8888To6665<true>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
}
else
{
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
}
}
else if (this->_outputFormat == NDSColorFormat_BGR888_Rev)
@ -1027,7 +1028,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), srcColorLo );
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), srcColorHi );
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
}
#endif
@ -1036,8 +1037,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
#endif
for (; i < pixCount; i++)
{
dstFramebuffer[i].color = ConvertColor8888To6665<true>(srcFramebuffer[i]);
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]);
dstFramebuffer[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
dstRGBA5551[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
}
}
else if (dstFramebuffer != NULL)
@ -1046,7 +1047,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
}
else
{
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
}
}
}
@ -1068,9 +1069,9 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), ConvertColor8888To6665<true>(srcColorLo) );
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), ConvertColor8888To6665<true>(srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
}
#endif
@ -1079,8 +1080,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
#endif
for (; x < pixCount; x++, ir++, iw++)
{
dstFramebuffer[iw].color = ConvertColor8888To6665<true>(srcFramebuffer[ir]);
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]);
dstFramebuffer[iw].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[ir]);
dstRGBA5551[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
}
}
}
@ -1088,14 +1089,14 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
{
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
{
ConvertColorBuffer8888To6665<true>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebuffer + iw, pixCount);
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebuffer + iw, pixCount);
}
}
else
{
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
{
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
}
}
}
@ -1115,7 +1116,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), srcColorLo );
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), srcColorHi );
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
}
#endif
@ -1125,7 +1126,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
for (; x < pixCount; x++, ir++, iw++)
{
dstFramebuffer[iw] = srcFramebuffer[ir];
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]);
dstRGBA5551[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
}
}
}
@ -1146,7 +1147,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
{
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
{
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
}
}
}

View File

@ -243,6 +243,8 @@
AB564915186E6F67002740F4 /* Image_Piano.png in Resources */ = {isa = PBXBuildFile; fileRef = AB56490B186E6F67002740F4 /* Image_Piano.png */; };
AB5785FD17176AFC002C5FC7 /* OpenEmuBase.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB5785FC17176AFC002C5FC7 /* OpenEmuBase.framework */; };
AB58F32D1364F44B0074C376 /* cocoa_file.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB58F32C1364F44B0074C376 /* cocoa_file.mm */; };
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
AB5FDDAD1D62C8A00094617C /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
AB64987C13ECC73800EE7DD2 /* FileTypeInfo.plist in Resources */ = {isa = PBXBuildFile; fileRef = AB64987B13ECC73800EE7DD2 /* FileTypeInfo.plist */; };
AB68101B187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; };
AB68101C187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; };
@ -974,6 +976,12 @@
ABB97878144E89CC00793FA3 /* Icon_DeSmuME_32x32.png in Resources */ = {isa = PBXBuildFile; fileRef = ABB97875144E89CC00793FA3 /* Icon_DeSmuME_32x32.png */; };
ABBC0F8D1394B1AA0028B6BD /* DefaultUserPrefs.plist in Resources */ = {isa = PBXBuildFile; fileRef = ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */; };
ABBF04A514B515F300E505A0 /* AppIcon_ROMCheats.icns in Resources */ = {isa = PBXBuildFile; fileRef = ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */; };
ABBFFF851D6283C0003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
ABBFFF861D6283C1003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
ABBFFF871D6283C1003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
ABBFFF891D6283D2003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
ABBFFF8A1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
ABBFFF8B1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
ABC3AF2F14B7F06900D5B13D /* Icon_VolumeFull_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */; };
ABC3AF3014B7F06900D5B13D /* Icon_VolumeMute_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */; };
ABC3AF3114B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */; };
@ -1534,6 +1542,14 @@
ABBB421516B4A5F30012E5AB /* OGLRender_3_2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OGLRender_3_2.h; path = ../OGLRender_3_2.h; sourceTree = "<group>"; };
ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */ = {isa = PBXFileReference; lastKnownFileType = file.bplist; path = DefaultUserPrefs.plist; sourceTree = "<group>"; };
ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; path = AppIcon_ROMCheats.icns; sourceTree = "<group>"; };
ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler.cpp; sourceTree = "<group>"; };
ABBFFF701D5F9C52003CD598 /* colorspacehandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler.h; sourceTree = "<group>"; };
ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_SSE2.cpp; sourceTree = "<group>"; };
ABBFFF761D5FD2ED003CD598 /* colorspacehandler_SSE2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_SSE2.h; sourceTree = "<group>"; };
ABBFFF7B1D610457003CD598 /* colorspacehandler_AVX2.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AVX2.cpp; sourceTree = "<group>"; };
ABBFFF7C1D610457003CD598 /* colorspacehandler_AVX2.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AVX2.h; sourceTree = "<group>"; };
ABBFFF811D611A36003CD598 /* colorspacehandler_AltiVec.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AltiVec.cpp; sourceTree = "<group>"; };
ABBFFF821D611A36003CD598 /* colorspacehandler_AltiVec.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AltiVec.h; sourceTree = "<group>"; };
ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeFull_16x16.png; path = images/Icon_VolumeFull_16x16.png; sourceTree = "<group>"; };
ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeMute_16x16.png; path = images/Icon_VolumeMute_16x16.png; sourceTree = "<group>"; };
ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeOneThird_16x16.png; path = images/Icon_VolumeOneThird_16x16.png; sourceTree = "<group>"; };
@ -2508,6 +2524,21 @@
path = openemu;
sourceTree = "<group>";
};
ABBFFF6E1D5F9C10003CD598 /* colorspacehandler */ = {
isa = PBXGroup;
children = (
ABBFFF811D611A36003CD598 /* colorspacehandler_AltiVec.cpp */,
ABBFFF7B1D610457003CD598 /* colorspacehandler_AVX2.cpp */,
ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */,
ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */,
ABBFFF821D611A36003CD598 /* colorspacehandler_AltiVec.h */,
ABBFFF7C1D610457003CD598 /* colorspacehandler_AVX2.h */,
ABBFFF761D5FD2ED003CD598 /* colorspacehandler_SSE2.h */,
ABBFFF701D5F9C52003CD598 /* colorspacehandler.h */,
);
path = colorspacehandler;
sourceTree = "<group>";
};
ABC2ECD613B1C87000FAAA2A /* Images */ = {
isa = PBXGroup;
children = (
@ -2759,6 +2790,7 @@
ABD1FF211345ACBF00AF11D1 /* decrypt */,
ABD1FF2E1345ACBF00AF11D1 /* libfat */,
ABE670241415DE6C00E8E4C9 /* tinyxml */,
ABBFFF6E1D5F9C10003CD598 /* colorspacehandler */,
ABD1FF1D1345ACBF00AF11D1 /* ConvertUTF.c */,
AB9038A517C5ECFD00F410BD /* advanscene.cpp */,
ABD1FF1F1345ACBF00AF11D1 /* datetime.cpp */,
@ -3770,6 +3802,7 @@
ABE6840D189E33BC007FD69C /* OGLDisplayOutput.cpp in Sources */,
ABD1FF121345AC9C00AF11D1 /* slot2_none.cpp in Sources */,
ABD1FF131345AC9C00AF11D1 /* slot2_paddle.cpp in Sources */,
ABBFFF8A1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
ABD1FF141345AC9C00AF11D1 /* slot2_piano.cpp in Sources */,
ABD1FF151345AC9C00AF11D1 /* slot2_rumblepak.cpp in Sources */,
ABD1041F1346652500AF11D1 /* sndOSX.cpp in Sources */,
@ -3864,6 +3897,7 @@
AB40565E169F5DBB0016AC3E /* virtualmemory.cpp in Sources */,
AB405661169F5DBB0016AC3E /* zonememory.cpp in Sources */,
AB405679169F5DCC0016AC3E /* x86assembler.cpp in Sources */,
ABBFFF861D6283C1003CD598 /* colorspacehandler.cpp in Sources */,
AB40567C169F5DCC0016AC3E /* x86compiler.cpp in Sources */,
ABFEA8A41BB4EC1100B08C25 /* sfnt.c in Sources */,
ABA731691BB51FDC00B26147 /* type1cid.c in Sources */,
@ -4017,6 +4051,7 @@
AB796D4315CDCBA200C59155 /* version.cpp in Sources */,
ABFEA82B1BB4EC1100B08C25 /* ftinit.c in Sources */,
AB796D4415CDCBA200C59155 /* vfat.cpp in Sources */,
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */,
AB796D4515CDCBA200C59155 /* videofilter.cpp in Sources */,
AB796D4615CDCBA200C59155 /* WavFile.cpp in Sources */,
AB796D4715CDCBA200C59155 /* wifi.cpp in Sources */,
@ -4096,6 +4131,7 @@
AB26D87C16B5253D00A2305C /* OGLRender_3_2.cpp in Sources */,
AB3A655E16CC5421001F5D4A /* EmuControllerDelegate.mm in Sources */,
AB3A656116CC5438001F5D4A /* cocoa_GPU.mm in Sources */,
AB5FDDAD1D62C8A00094617C /* colorspacehandler_SSE2.cpp in Sources */,
AB8967D916D2ED0700F826F1 /* DisplayWindowController.mm in Sources */,
AB29B33116D4BEBF000EF671 /* InputManager.mm in Sources */,
AB8B7AAC17CE8C440051CEBF /* slot1comp_protocol.cpp in Sources */,
@ -4272,6 +4308,7 @@
AB2ABA401C9F9CFA00173B15 /* rsemaphore.c in Sources */,
AB8F3CF01A53AC2600A80BF6 /* ringbuffer.cpp in Sources */,
AB8F3CF11A53AC2600A80BF6 /* arm_jit.cpp in Sources */,
ABBFFF891D6283D2003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
AB8F3CF21A53AC2600A80BF6 /* troubleshootingWindowDelegate.mm in Sources */,
AB8F3CF31A53AC2600A80BF6 /* assembler.cpp in Sources */,
AB8F3CF41A53AC2600A80BF6 /* assert.cpp in Sources */,
@ -4295,6 +4332,7 @@
AB8F3D041A53AC2600A80BF6 /* virtualmemory.cpp in Sources */,
AB8F3D051A53AC2600A80BF6 /* zonememory.cpp in Sources */,
AB8F3D061A53AC2600A80BF6 /* x86assembler.cpp in Sources */,
ABBFFF851D6283C0003CD598 /* colorspacehandler.cpp in Sources */,
AB8F3D071A53AC2600A80BF6 /* x86compiler.cpp in Sources */,
AB8F3D081A53AC2600A80BF6 /* x86compilercontext.cpp in Sources */,
AB8F3D091A53AC2600A80BF6 /* x86compilerfunc.cpp in Sources */,
@ -4367,6 +4405,7 @@
ABB3C6911501C04F00E0C22E /* SoundTouch.cpp in Sources */,
ABB3C6921501C04F00E0C22E /* sse_optimized.cpp in Sources */,
ABB3C6931501C04F00E0C22E /* TDStretch.cpp in Sources */,
ABBFFF871D6283C1003CD598 /* colorspacehandler.cpp in Sources */,
ABB3C6941501C04F00E0C22E /* WavFile.cpp in Sources */,
ABB3C6951501C04F00E0C22E /* metaspu.cpp in Sources */,
ABB3C6961501C04F00E0C22E /* SndOut.cpp in Sources */,
@ -4436,6 +4475,7 @@
ABB3C6D11501C04F00E0C22E /* slot1.cpp in Sources */,
ABB3C6D31501C04F00E0C22E /* SPU.cpp in Sources */,
ABB3C6D41501C04F00E0C22E /* texcache.cpp in Sources */,
ABBFFF8B1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
AB9038BA17C5ED2200F410BD /* slot1comp_rom.cpp in Sources */,
ABB3C6D51501C04F00E0C22E /* thumb_instructions.cpp in Sources */,
AB2EE13317D57F5000F68622 /* fsnitro.cpp in Sources */,

View File

@ -740,6 +740,14 @@
AB2F56F11704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
AB2F56F21704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
AB2F56F31704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */; };
AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */; };
AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
AB3ACB7814C2361100D7D192 /* appDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6714C2361100D7D192 /* appDelegate.mm */; };
AB3ACB7914C2361100D7D192 /* cheatWindowDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6914C2361100D7D192 /* cheatWindowDelegate.mm */; };
AB3ACB7C14C2361100D7D192 /* inputPrefsView.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6F14C2361100D7D192 /* inputPrefsView.mm */; };
@ -1156,6 +1164,8 @@
AB73AA2E1507C9F500A310C8 /* OpenGL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABC570D4134431DA00E7B0B1 /* OpenGL.framework */; };
AB73AA2F1507C9F500A310C8 /* libz.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = AB0A0D1914AACA9600E83E91 /* libz.dylib */; };
AB75226F14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns in Resources */ = {isa = PBXBuildFile; fileRef = AB75226D14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns */; };
AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
AB7DDA6D173DC38F004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
AB7DDA6E173DC399004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
AB7DDA6F173DC39E004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
@ -1835,6 +1845,12 @@
AB2F56EF1704C86900E28885 /* utilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = utilities.c; sourceTree = "<group>"; };
AB350BA41478AC96007165AC /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = System/Library/Frameworks/IOKit.framework; sourceTree = SDKROOT; };
AB350D38147A1D8D007165AC /* English */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = English; path = translations/English.lproj/HID_usage_strings.plist; sourceTree = "<group>"; };
AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler.cpp; sourceTree = "<group>"; };
AB37E36D1D6188BC004A2C0D /* colorspacehandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler.h; sourceTree = "<group>"; };
AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AltiVec.cpp; sourceTree = "<group>"; };
AB37E36F1D6188BC004A2C0D /* colorspacehandler_AltiVec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AltiVec.h; sourceTree = "<group>"; };
AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_SSE2.cpp; sourceTree = "<group>"; };
AB37E3731D6188BC004A2C0D /* colorspacehandler_SSE2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_SSE2.h; sourceTree = "<group>"; };
AB3ACB6614C2361100D7D192 /* appDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = appDelegate.h; sourceTree = "<group>"; };
AB3ACB6714C2361100D7D192 /* appDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = appDelegate.mm; sourceTree = "<group>"; };
AB3ACB6814C2361100D7D192 /* cheatWindowDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cheatWindowDelegate.h; sourceTree = "<group>"; };
@ -2894,6 +2910,19 @@
path = src;
sourceTree = "<group>";
};
AB37E36B1D6188BC004A2C0D /* colorspacehandler */ = {
isa = PBXGroup;
children = (
AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */,
AB37E36D1D6188BC004A2C0D /* colorspacehandler.h */,
AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */,
AB37E36F1D6188BC004A2C0D /* colorspacehandler_AltiVec.h */,
AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */,
AB37E3731D6188BC004A2C0D /* colorspacehandler_SSE2.h */,
);
path = colorspacehandler;
sourceTree = "<group>";
};
AB3ACB6514C2361100D7D192 /* userinterface */ = {
isa = PBXGroup;
children = (
@ -3207,6 +3236,7 @@
isa = PBXGroup;
children = (
ABBCE2A115ACB29100A2C965 /* AsmJit */,
AB37E36B1D6188BC004A2C0D /* colorspacehandler */,
ABD1FF211345ACBF00AF11D1 /* decrypt */,
ABD1FF2E1345ACBF00AF11D1 /* libfat */,
ABE670241415DE6C00E8E4C9 /* tinyxml */,
@ -4508,6 +4538,8 @@
AB50200A1D09E712002FA150 /* file_path.c in Sources */,
AB50200B1D09E712002FA150 /* retro_dirent.c in Sources */,
AB50200C1D09E712002FA150 /* retro_stat.c in Sources */,
AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */,
AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -4687,6 +4719,8 @@
AB5020161D09E712002FA150 /* file_path.c in Sources */,
AB5020171D09E712002FA150 /* retro_dirent.c in Sources */,
AB5020181D09E712002FA150 /* retro_stat.c in Sources */,
AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -4896,6 +4930,8 @@
AB50200D1D09E712002FA150 /* file_path.c in Sources */,
AB50200E1D09E712002FA150 /* retro_dirent.c in Sources */,
AB50200F1D09E712002FA150 /* retro_stat.c in Sources */,
AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -5105,6 +5141,8 @@
AB5020101D09E712002FA150 /* file_path.c in Sources */,
AB5020111D09E712002FA150 /* retro_dirent.c in Sources */,
AB5020121D09E712002FA150 /* retro_stat.c in Sources */,
AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -5284,6 +5322,8 @@
AB5020131D09E712002FA150 /* file_path.c in Sources */,
AB5020141D09E712002FA150 /* retro_dirent.c in Sources */,
AB5020151D09E712002FA150 /* retro_stat.c in Sources */,
AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

View File

@ -754,7 +754,7 @@
if (dispInfo.pixelBytes == 2)
{
ConvertColorBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h));
ColorspaceConvertBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h));
}
else if (dispInfo.pixelBytes == 4)
{

View File

@ -692,7 +692,7 @@ void RomIconToRGBA8888(uint32_t *bitmapData)
//
// The first entry always represents the alpha, so we can just ignore it.
clut[0] = 0x00000000;
ConvertColorBuffer555To8888Opaque<false, true>((u16 *)iconClutPtr, &clut[1], 15);
ColorspaceConvertBuffer555To8888Opaque<false, true>((u16 *)iconClutPtr, &clut[1], 15);
// Load the image from the icon pixel data.
//

View File

@ -1,65 +1,63 @@
/*
Copyright (C) 2008-2015 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <zlib.h>
#include "types.h"
#include "ImageOut.h"
#include "formats/rpng.h"
#include "formats/rbmp.h"
#include "GPU.h"
static u8* Convert15To24(const u16* src, int width, int height)
{
u8 *tmp_buffer;
u8 *tmp_inc;
tmp_inc = tmp_buffer = (u8 *)malloc(width * height * 3);
for(int y=0;y<height;y++)
{
for(int x=0;x<width;x++)
{
u32 dst = ConvertColor555To8888Opaque<true>(*src++);
*tmp_inc++ = dst&0xFF;
*tmp_inc++ = (dst>>8)&0xFF;
*tmp_inc++ = (dst>>16)&0xFF;
}
}
return tmp_buffer;
}
int NDS_WritePNG_15bpp(int width, int height, const u16 *data, const char *filename)
{
u8* tmp = Convert15To24(data,width,height);
bool ok = rpng_save_image_bgr24(filename,tmp,width,height,width*3);
free(tmp);
return ok?1:0;
}
int NDS_WriteBMP_15bpp(int width, int height, const u16 *data, const char *filename)
{
u8* tmp = Convert15To24(data,width,height);
bool ok = rbmp_save_image(filename,tmp,width,height,width*3,RBMP_SOURCE_TYPE_BGR24);
free(tmp);
return ok?1:0;
}
int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename)
{
bool ok = rbmp_save_image(filename,buf,width,height,width*4,RBMP_SOURCE_TYPE_ARGB8888);
return ok?1:0;
/*
Copyright (C) 2008-2015 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <zlib.h>
#include "types.h"
#include "ImageOut.h"
#include "formats/rpng.h"
#include "formats/rbmp.h"
#include "GPU.h"
static u8* Convert15To24(const u16* src, int width, int height)
{
u8 *tmp_buffer;
u8 *tmp_inc;
tmp_inc = tmp_buffer = (u8 *)malloc(width * height * 3);
for (int i = 0; i < width*height; i++)
{
u32 dst = ColorspaceConvert555To8888Opaque<true>(*src++);
*tmp_inc++ = dst & 0xFF;
*tmp_inc++ = (dst >> 8) & 0xFF;
*tmp_inc++ = (dst >> 16) & 0xFF;
}
return tmp_buffer;
}
int NDS_WritePNG_15bpp(int width, int height, const u16 *data, const char *filename)
{
u8* tmp = Convert15To24(data,width,height);
bool ok = rpng_save_image_bgr24(filename,tmp,width,height,width*3);
free(tmp);
return ok?1:0;
}
int NDS_WriteBMP_15bpp(int width, int height, const u16 *data, const char *filename)
{
u8* tmp = Convert15To24(data,width,height);
bool ok = rbmp_save_image(filename,tmp,width,height,width*3,RBMP_SOURCE_TYPE_BGR24);
free(tmp);
return ok?1:0;
}
int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename)
{
bool ok = rbmp_save_image(filename,buf,width,height,width*4,RBMP_SOURCE_TYPE_ARGB8888);
return ok?1:0;
}

View File

@ -605,11 +605,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
{
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
{
ConvertColorBuffer8888To6665<false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
ColorspaceConvertBuffer8888To6665<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
}
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
{
ConvertColorBuffer6665To8888<false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
}
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
@ -622,11 +622,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
{
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
{
ConvertColorBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
ColorspaceConvertBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
}
else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev)
{
ConvertColorBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
}
}

View File

@ -31,6 +31,10 @@
#include "MMU.h"
#include "NDSSystem.h"
#ifdef ENABLE_SSE2
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
#endif
using std::min;
using std::max;
@ -452,13 +456,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp)
{
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
// Set converted colors to 0 if the palette index is 0.
@ -518,13 +522,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp)
{
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
@ -581,13 +585,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp)
{
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
// Set converted colors to 0 if the palette index is 0.
@ -647,13 +651,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp)
{
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
@ -882,11 +886,11 @@ public:
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
ConvertColor555To6665<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
ConvertColor555To6665<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
ColorspaceConvert555To6665_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
}
else
{
@ -896,11 +900,11 @@ public:
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
ConvertColor555To8888<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
ConvertColor555To8888<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
ColorspaceConvert555To8888_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
}
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);

View File

@ -76,6 +76,18 @@
#ifdef __SSE4_2__
#define ENABLE_SSE4_2
#endif
#ifdef __AVX__
#define ENABLE_AVX
#endif
#ifdef __AVX2__
#define ENABLE_AVX2
#endif
#ifdef __ALTIVEC__
#define ENABLE_ALTIVEC
#endif
#endif
#ifdef _MSC_VER
@ -223,6 +235,38 @@ typedef u32 uint32;
#define uint32 u32 //uint32 is defined in Leopard somewhere, avoid conflicts
#endif
#ifdef ENABLE_ALTIVEC
#ifndef __APPLE_ALTIVEC__
#include <altivec.h>
#endif
typedef vector unsigned char v128u8;
typedef vector signed char v128s8;
typedef vector unsigned short v128u16;
typedef vector signed short v128s16;
typedef vector unsigned int v128u32;
typedef vector signed int v128s32;
#endif
#ifdef ENABLE_SSE2
#include <emmintrin.h>
typedef __m128i v128u8;
typedef __m128i v128s8;
typedef __m128i v128u16;
typedef __m128i v128s16;
typedef __m128i v128u32;
typedef __m128i v128s32;
#endif
#ifdef ENABLE_AVX2
#include <immintrin.h>
typedef __m256i v256u8;
typedef __m256i v256s8;
typedef __m256i v256u16;
typedef __m256i v256s16;
typedef __m256i v256u32;
typedef __m256i v256s32;
#endif
/*---------- GPU3D fixed-points types -----------*/
typedef s32 f32;

View File

@ -0,0 +1,776 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler.h"
#if defined(ENABLE_AVX2)
#include "colorspacehandler_AVX2.h"
#elif defined(ENABLE_SSE2)
#include "colorspacehandler_SSE2.h"
#elif defined(ENABLE_ALTIVEC)
#include "colorspacehandler_AltiVec.h"
#endif
#if defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
#define USEVECTORSIZE_128
#endif
#if defined(ENABLE_AVX2)
#define USEVECTORSIZE_256
#endif
// By default, the hand-coded vectorized code will be used instead of a compiler's built-in
// autovectorization (if supported). However, if USEMANUALVECTORIZATION is not defined, then
// the compiler will use autovectorization (if supported).
#if defined(USEVECTORSIZE_128) || defined(USEVECTORSIZE_256) || defined(USEVECTORSIZE_512)
// Comment out USEMANUALVECTORIZATION to disable the hand-coded vectorized code.
#define USEMANUALVECTORIZATION
#endif
#ifdef USEMANUALVECTORIZATION
#if defined(ENABLE_AVX2)
static const ColorspaceHandler_AVX2 csh;
#elif defined(ENABLE_SSE2)
static const ColorspaceHandler_SSE2 csh;
#elif defined(ENABLE_ALTIVEC)
static const ColorspaceHandler_AltiVec csh;
#else
static const ColorspaceHandler csh;
#endif
#else
static const ColorspaceHandler csh;
#endif
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_666[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_888[32768];
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
};
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
};
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
};
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
};
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
};
//maybe not very precise
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
0, 4, 8, 13, 17, 22, 26, 31
};
//TODO - generate this in the static init method more accurately
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
0, 8, 16, 26, 34, 44, 52, 63
};
void ColorspaceHandlerInit()
{
static bool needInitTables = true;
if (needInitTables)
{
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
for (size_t i = 0; i < 32768; i++)
{
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 );
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 );
}
}
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert555To8888Opaque(const u16 src)
{
return (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF);
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src)
{
return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert8888To6665(FragmentColor srcColor)
{
FragmentColor outColor;
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2;
outColor.g = srcColor.g >> 2;
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2;
outColor.a = srcColor.a >> 3;
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert8888To6665(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert8888To6665<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert6665To8888(FragmentColor srcColor)
{
FragmentColor outColor;
outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)];
outColor.g = material_6bit_to_8bit[srcColor.g];
outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)];
outColor.a = material_5bit_to_8bit[srcColor.a];
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert6665To8888(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert6665To8888<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert8888To5551(FragmentColor srcColor)
{
return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 );
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert8888To5551(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert8888To5551<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert6665To5551(FragmentColor srcColor)
{
return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000);
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert6665To5551(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert6665To5551<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To8888Opaque_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To8888Opaque(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To8888Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To6665Opaque_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To6665Opaque(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To6665Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 4);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 16);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To6665_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To6665_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To6665_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To6665(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To6665<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 4);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 16);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To8888_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To8888_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To8888_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To8888(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To8888<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To5551_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To5551_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To5551_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To5551(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To5551<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To5551_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To5551_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To5551_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To5551(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To5551<SWAP_RB>(src[i]);
}
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To8888Opaque<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To8888Opaque<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To6665Opaque<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To6665Opaque<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To6665<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To6665<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To6665(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To6665_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To8888<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To8888<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To8888(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To8888_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To5551<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To5551<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To5551(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To5551_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To5551<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (;i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To5551<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To5551(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount);
}
template u32 ColorspaceConvert555To8888Opaque<true>(const u16 src);
template u32 ColorspaceConvert555To8888Opaque<false>(const u16 src);
template u32 ColorspaceConvert555To6665Opaque<true>(const u16 src);
template u32 ColorspaceConvert555To6665Opaque<false>(const u16 src);
template u32 ColorspaceConvert8888To6665<true>(FragmentColor srcColor);
template u32 ColorspaceConvert8888To6665<false>(FragmentColor srcColor);
template u32 ColorspaceConvert8888To6665<true>(u32 srcColor);
template u32 ColorspaceConvert8888To6665<false>(u32 srcColor);
template u32 ColorspaceConvert6665To8888<true>(FragmentColor srcColor);
template u32 ColorspaceConvert6665To8888<false>(FragmentColor srcColor);
template u32 ColorspaceConvert6665To8888<true>(u32 srcColor);
template u32 ColorspaceConvert6665To8888<false>(u32 srcColor);
template u16 ColorspaceConvert8888To5551<true>(FragmentColor srcColor);
template u16 ColorspaceConvert8888To5551<false>(FragmentColor srcColor);
template u16 ColorspaceConvert8888To5551<true>(u32 srcColor);
template u16 ColorspaceConvert8888To5551<false>(u32 srcColor);
template u16 ColorspaceConvert6665To5551<true>(FragmentColor srcColor);
template u16 ColorspaceConvert6665To5551<false>(FragmentColor srcColor);
template u16 ColorspaceConvert6665To5551<true>(u32 srcColor);
template u16 ColorspaceConvert6665To5551<false>(u32 srcColor);
template void ColorspaceConvertBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<true, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<false, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<false, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<true, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<false, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<false, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);

View File

@ -0,0 +1,194 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_H
#define COLORSPACEHANDLER_H
#include "types.h"
#include <stdio.h>
#include <stdint.h>
enum NDSColorFormat
{
// The color format information is packed in a 32-bit value.
// The bits are as follows:
// FFFOOOOO AAAAAABB BBBBGGGG GGRRRRRR
//
// F = Flags (see below)
// O = Color order (see below)
// A = Bit count for alpha [0-63]
// B = Bit count for blue [0-63]
// G = Bit count for green [0-63]
// R = Bit count for red [0-63]
//
// Flags:
// Bit 29: Reverse order flag.
// Set = Bits are in reverse order, usually for little-endian usage.
// Cleared = Bits are in normal order, usually for big-endian usage.
//
// Color order bits, 24-28:
// 0x00 = RGBA, common format
// 0x01 = RGAB
// 0x02 = RBGA
// 0x03 = RBAG
// 0x04 = RAGB
// 0x05 = RABG
// 0x06 = GRBA
// 0x07 = GRAB
// 0x08 = GBRA
// 0x09 = GBAR
// 0x0A = GARB
// 0x0B = GABR
// 0x0C = BRGA
// 0x0D = BRAG
// 0x0E = BGRA, common format
// 0x0F = BGAR
// 0x10 = BARG
// 0x11 = BAGR
// 0x12 = ARGB
// 0x13 = ARBG
// 0x14 = AGRB
// 0x15 = AGBR
// 0x16 = ABRG
// 0x17 = ABGR
// Color formats used for internal processing.
//NDSColorFormat_ABGR1555_Rev = 0x20045145,
//NDSColorFormat_ABGR5666_Rev = 0x20186186,
//NDSColorFormat_ABGR8888_Rev = 0x20208208,
// Color formats used by the output framebuffers.
NDSColorFormat_BGR555_Rev = 0x20005145,
NDSColorFormat_BGR666_Rev = 0x20006186,
NDSColorFormat_BGR888_Rev = 0x20008208
};
union FragmentColor
{
u32 color;
struct
{
u8 r,g,b,a;
};
};
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
extern CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
extern CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
extern CACHE_ALIGN u32 color_555_to_666[32768];
extern CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
extern CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
extern CACHE_ALIGN u32 color_555_to_888[32768];
#define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color
#define COLOR555TO6665_OPAQUE_SWAP_RB(col) (color_555_to_6665_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color with R and B components swapped
#define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color
#ifdef LOCAL_LE
#define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, little-endian
#else
#define COLOR555TO6665(col,alpha5) ((alpha5) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, big-endian
#endif
#define COLOR555TO8888_OPAQUE(col) (color_555_to_8888_opaque[(col)]) // Convert a 15-bit color to an opaque 32-bit color
#define COLOR555TO8888_OPAQUE_SWAP_RB(col) (color_555_to_8888_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque 32-bit color with R and B components swapped
#define COLOR555TO888(col) (color_555_to_888[(col)]) // Convert a 15-bit color to an opaque 24-bit color or a fully transparent 32-bit color
#ifdef LOCAL_LE
#define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, little-endian
#else
#define COLOR555TO8888(col,alpha8) ((alpha8) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, big-endian
#endif
//produce a 15bpp color from individual 5bit components
#define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) )
//produce a 16bpp color from individual 5bit components
#define R6G6B6TORGB15(r,g,b) ( ((r)>>1) | (((g)&0x3E)<<4) | (((b)&0x3E)<<9) )
void ColorspaceHandlerInit();
template<bool SWAP_RB> u32 ColorspaceConvert555To8888Opaque(const u16 src);
template<bool SWAP_RB> u32 ColorspaceConvert555To6665Opaque(const u16 src);
template<bool SWAP_RB> u32 ColorspaceConvert8888To6665(FragmentColor srcColor);
template<bool SWAP_RB> u32 ColorspaceConvert8888To6665(u32 srcColor);
template<bool SWAP_RB> u32 ColorspaceConvert6665To8888(FragmentColor srcColor);
template<bool SWAP_RB> u32 ColorspaceConvert6665To8888(u32 srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert8888To5551(FragmentColor srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert8888To5551(u32 srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert6665To5551(FragmentColor srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert6665To5551(u32 srcColor);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
class ColorspaceHandler
{
public:
ColorspaceHandler() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
FORCEINLINE FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)
{
FragmentColor ret;
ret.r = r; ret.g = g; ret.b = b; ret.a = a;
return ret;
}
#endif /* COLORSPACEHANDLER_H */

View File

@ -0,0 +1,491 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler_AVX2.h"
#ifndef ENABLE_AVX2
#error This code requires AVX2 support.
#else
#include <immintrin.h>
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi)
{
v256u32 src32;
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
src32 = _mm256_unpacklo_epi16(srcColor, _mm256_setzero_si256());
dstLo = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 19), _mm256_srli_epi32(src32, 7)) : _mm256_or_si256(_mm256_slli_epi32(src32, 3), _mm256_slli_epi32(src32, 9));
dstLo = _mm256_and_si256( dstLo, _mm256_set1_epi32(0x00F800F8) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_slli_epi32(src32, 6), _mm256_set1_epi32(0x0000F800)) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_srli_epi32(dstLo, 5), _mm256_set1_epi32(0x00070707)) );
dstLo = _mm256_or_si256( dstLo, srcAlphaBits32Lo );
src32 = _mm256_unpackhi_epi16(srcColor, _mm256_setzero_si256());
dstHi = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 19), _mm256_srli_epi32(src32, 7)) : _mm256_or_si256(_mm256_slli_epi32(src32, 3), _mm256_slli_epi32(src32, 9));
dstHi = _mm256_and_si256( dstHi, _mm256_set1_epi32(0x00F800F8) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_slli_epi32(src32, 6), _mm256_set1_epi32(0x0000F800)) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_srli_epi32(dstHi, 5), _mm256_set1_epi32(0x00070707)) );
dstHi = _mm256_or_si256( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi)
{
v256u32 src32;
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
src32 = _mm256_unpacklo_epi16(srcColor, _mm256_setzero_si256());
dstLo = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 17), _mm256_srli_epi32(src32, 9)) : _mm256_or_si256(_mm256_slli_epi32(src32, 1), _mm256_slli_epi32(src32, 7));
dstLo = _mm256_and_si256( dstLo, _mm256_set1_epi32(0x003E003E) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_slli_epi32(src32, 4), _mm256_set1_epi32(0x00003E00)) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_srli_epi32(dstLo, 5), _mm256_set1_epi32(0x00010101)) );
dstLo = _mm256_or_si256( dstLo, srcAlphaBits32Lo );
src32 = _mm256_unpackhi_epi16(srcColor, _mm256_setzero_si256());
dstHi = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 17), _mm256_srli_epi32(src32, 9)) : _mm256_or_si256(_mm256_slli_epi32(src32, 1), _mm256_slli_epi32(src32, 7));
dstHi = _mm256_and_si256( dstHi, _mm256_set1_epi32(0x003E003E) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_slli_epi32(src32, 4), _mm256_set1_epi32(0x00003E00)) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_srli_epi32(dstHi, 5), _mm256_set1_epi32(0x00010101)) );
dstHi = _mm256_or_si256( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u32 srcAlphaBits32 = _mm256_set1_epi32(0xFF000000);
ColorspaceConvert555To8888_AVX2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u32 srcAlphaBits32 = _mm256_set1_epi32(0x1F000000);
ColorspaceConvert555To6665_AVX2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src)
{
// Conversion algorithm:
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
v256u32 rgb;
const v256u32 a = _mm256_and_si256( _mm256_srli_epi32(src, 3), _mm256_set1_epi32(0x1F000000) );
if (SWAP_RB)
{
rgb = _mm256_and_si256( _mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x003F3F3F) );
rgb = _mm256_shuffle_epi8( rgb, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
}
else
{
rgb = _mm256_and_si256( _mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x003F3F3F) );
}
return _mm256_or_si256(rgb, a);
}
template <bool SWAP_RB>
FORCEINLINE v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src)
{
// Conversion algorithm:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
v256u32 rgb = _mm256_or_si256( _mm256_and_si256(_mm256_slli_epi32(src, 2), _mm256_set1_epi32(0x00FCFCFC)), _mm256_and_si256(_mm256_srli_epi32(src, 4), _mm256_set1_epi32(0x00030303)) );
const v256u32 a = _mm256_or_si256( _mm256_and_si256(_mm256_slli_epi32(src, 3), _mm256_set1_epi32(0xF8000000)), _mm256_and_si256(_mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x07000000)) );
if (SWAP_RB)
{
rgb = _mm256_shuffle_epi8( rgb, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
}
return _mm256_or_si256(rgb, a);
}
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
FORCEINLINE v256u16 _ConvertColorBaseTo5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
{
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
{
return srcLo;
}
v256u32 rgbLo;
v256u32 rgbHi;
v256u16 alpha;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 17), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 4), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_slli_epi32(srcLo, 9), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 17), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 4), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_slli_epi32(srcHi, 9), _mm256_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 1), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 4), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 7), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 1), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 4), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 7), _mm256_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm256_packs_epi32( _mm256_and_si256(_mm256_srli_epi32(srcLo, 24), _mm256_set1_epi32(0x0000001F)), _mm256_and_si256(_mm256_srli_epi32(srcHi, 24), _mm256_set1_epi32(0x0000001F)) );
alpha = _mm256_cmpgt_epi16(alpha, _mm256_setzero_si256());
alpha = _mm256_and_si256(alpha, _mm256_set1_epi16(0x8000));
}
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 19), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 6), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_slli_epi32(srcLo, 7), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 19), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 6), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_slli_epi32(srcHi, 7), _mm256_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 3), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 6), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 9), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 3), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 6), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 9), _mm256_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm256_packs_epi32( _mm256_srli_epi32(srcLo, 24), _mm256_srli_epi32(srcHi, 24) );
alpha = _mm256_cmpgt_epi16(alpha, _mm256_setzero_si256());
alpha = _mm256_and_si256(alpha, _mm256_set1_epi16(0x8000));
}
return _mm256_or_si256(_mm256_packs_epi32(rgbLo, rgbHi), alpha);
}
template <bool SWAP_RB>
FORCEINLINE v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
{
return _ConvertColorBaseTo5551_AVX2<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
{
return _ConvertColorBaseTo5551_AVX2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_storeu_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
else
{
_mm256_store_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_store_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_storeu_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
else
{
_mm256_store_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_store_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To6665_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=8)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert8888To6665_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
}
else
{
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert8888To6665_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To8888_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=8)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert6665To8888_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
}
else
{
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert6665To8888_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To5551_AVX2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u16 *)(dst+i), ColorspaceConvert8888To5551_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i)), _mm256_loadu_si256((v256u32 *)(src+i+8))) );
}
else
{
_mm256_store_si256( (v256u16 *)(dst+i), ColorspaceConvert8888To5551_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i)), _mm256_load_si256((v256u32 *)(src+i+8))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To5551_AVX2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u16 *)(dst+i), ColorspaceConvert6665To5551_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i)), _mm256_loadu_si256((v256u32 *)(src+i+8))) );
}
else
{
_mm256_store_si256( (v256u16 *)(dst+i), ColorspaceConvert6665To5551_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i)), _mm256_load_si256((v256u32 *)(src+i+8))) );
}
}
return i;
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<true, true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template v256u32 ColorspaceConvert8888To6665_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert8888To6665_AVX2<false>(const v256u32 &src);
template v256u32 ColorspaceConvert6665To8888_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert6665To8888_AVX2<false>(const v256u32 &src);
template v256u16 ColorspaceConvert8888To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert8888To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert6665To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert6665To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
#endif // ENABLE_AVX2

View File

@ -0,0 +1,74 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_AVX2_H
#define COLORSPACEHANDLER_AVX2_H
#include "colorspacehandler.h"
#ifndef ENABLE_AVX2
#warning This header requires AVX2 support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
template<bool SWAP_RB> v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
class ColorspaceHandler_AVX2 : public ColorspaceHandler
{
public:
ColorspaceHandler_AVX2() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
#endif // ENABLE_AVX2
#endif /* COLORSPACEHANDLER_AVX2_H */

View File

@ -0,0 +1,345 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler_Altivec.h"
#ifndef ENABLE_ALTIVEC
#error This code requires PowerPC AltiVec support.
#else
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
dstLo = vec_unpackl((vector pixel)srcColor);
dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){3,3,3,0, 3,3,3,0, 3,3,3,0, 3,3,3,0})), vec_sr((v128u8)dstLo, ((v128u8){2,2,2,0, 2,2,2,0, 2,2,2,0, 2,2,2,0})) );
dstLo = vec_sel(dstLo, srcAlphaBits32Lo, vec_splat_u32(0xFF000000));
dstHi = vec_unpackh((vector pixel)srcColor);
dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){3,3,3,0, 3,3,3,0, 3,3,3,0, 3,3,3,0})), vec_sr((v128u8)dstHi, ((v128u8){2,2,2,0, 2,2,2,0, 2,2,2,0, 2,2,2,0})) );
dstHi = vec_sel(dstHi, srcAlphaBits32Hi, vec_splat_u32(0xFF000000));
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
dstLo = vec_unpackl((vector pixel)srcColor);
dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){1,1,1,0, 1,1,1,0, 1,1,1,0, 1,1,1,0})), vec_sr((v128u8)dstLo, ((v128u8){4,4,4,0, 4,4,4,0, 4,4,4,0, 4,4,4,0})) );
dstLo = vec_sel(dstLo, srcAlphaBits32Lo, vec_splat_u32(0xFF000000));
dstHi = vec_unpackh((vector pixel)srcColor);
dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){1,1,1,0, 1,1,1,0, 1,1,1,0, 1,1,1,0})), vec_sr((v128u8)dstHi, ((v128u8){4,4,4,0, 4,4,4,0, 4,4,4,0, 4,4,4,0})) );
dstHi = vec_sel(dstHi, srcAlphaBits32Hi, vec_splat_u32(0xFF000000));
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = {0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000};
ColorspaceConvert555To8888_AltiVec<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = {0x1F000000, 0x1F000000, 0x1F000000, 0x1F000000};
ColorspaceConvert555To6665_AltiVec<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src)
{
// Conversion algorithm:
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
v128u8 rgba = vec_sr( (v128u8)src, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3}) );
if (SWAP_RB)
{
rgba = vec_perm( rgba, rgba, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
}
return (v128u32)rgba;
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src)
{
// Conversion algorithm:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
v128u8 rgba = vec_or( vec_sl((v128u8)src, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3})), vec_sr((v128u8)src, ((v128u8){4,4,4,2, 4,4,4,2, 4,4,4,2, 4,4,4,2})) );
if (SWAP_RB)
{
rgba = vec_perm( rgba, rgba, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
}
return (v128u32)rgba;
}
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
FORCEINLINE v128u16 _ConvertColorBaseTo5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
{
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
{
return srcLo;
}
v128u32 rgbLo;
v128u32 rgbHi;
v128u16 dstColor;
v128u16 dstAlpha;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
// Convert alpha
dstAlpha = vec_packsu( vec_and(vec_sr(srcLo, vec_splat_u32(24)), vec_splat_u32(0x0000001F)), vec_and(vec_sr(srcHi, vec_splat_u32(24)), vec_splat_u32(0x0000001F)) );
dstAlpha = vec_cmpgt(dstAlpha, vec_splat_u16(0));
dstAlpha = vec_and(dstAlpha, vec_splat_u16(0x8000));
// Convert RGB
if (SWAP_RB)
{
rgbLo = vec_perm( srcLo, srcLo, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
rgbHi = vec_perm( srcHi, srcHi, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
rgbLo = vec_sl( rgbLo, vec_splat_u32(2) );
rgbHi = vec_sl( rgbHi, vec_splat_u32(2) );
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
}
else
{
rgbLo = vec_sl( srcLo, vec_splat_u32(2) );
rgbHi = vec_sl( srcHi, vec_splat_u32(2) );
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
}
}
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
{
// Convert alpha
dstAlpha = vec_packsu( vec_sr(srcLo, vec_splat_u32(24)), vec_sr(srcHi, vec_splat_u32(24)) );
dstAlpha = vec_cmpgt(dstAlpha, vec_splat_u16(0));
dstAlpha = vec_and(dstAlpha, vec_splat_u16(0x8000));
// Convert RGB
if (SWAP_RB)
{
rgbLo = vec_perm( srcLo, srcLo, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
rgbHi = vec_perm( srcHi, srcHi, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
}
else
{
dstColor = (v128u16)vec_packpx(srcLo, srcHi);
}
}
dstColor = vec_and(dstColor, vec_splat_u16(0x7FFF));
return vec_or(dstColor, dstAlpha);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_AltiVec<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_AltiVec<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AltiVec<SWAP_RB>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AltiVec<SWAP_RB>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer8888To6665_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
vec_st( ColorspaceConvert8888To6665_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer6665To8888_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
vec_st( ColorspaceConvert6665To8888_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer8888To5551_AltiVec(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
vec_st( ColorspaceConvert8888To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer6665To5551_AltiVec(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
vec_st( ColorspaceConvert6665To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
}
return i;
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AltiVec<true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_AltiVec<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_AltiVec<false>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_AltiVec<false>(const v128u32 &src);
template v128u16 ColorspaceConvert8888To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert8888To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
#endif // ENABLE_SSE2

View File

@ -0,0 +1,64 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_ALTIVEC_H
#define COLORSPACEHANDLER_ALTIVEC_H
#include "colorspacehandler.h"
#ifndef ENABLE_ALTIVEC
#warning This header requires PowerPC AltiVec support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
// AltiVec has very poor support for dealing with unaligned addresses (it's possible, just
// very obtuse), so we're not even going to bother dealing with any unaligned addresses.
class ColorspaceHandler_AltiVec : public ColorspaceHandler
{
public:
ColorspaceHandler_AltiVec() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
#endif // ENABLE_ALTIVEC
#endif /* COLORSPACEHANDLER_ALTIVEC_H */

View File

@ -0,0 +1,503 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler_SSE2.h"
#ifndef ENABLE_SSE2
#error This code requires SSE2 support.
#else
#include <emmintrin.h>
#ifdef ENABLE_SSSE3
#include <tmmintrin.h>
#endif
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
v128u32 src32;
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x00F800F8) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00070707)) );
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x00F800F8) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00070707)) );
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
v128u32 src32;
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x003E003E) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00010101)) );
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x003E003E) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00010101)) );
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = _mm_set1_epi32(0xFF000000);
ColorspaceConvert555To8888_SSE2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = _mm_set1_epi32(0x1F000000);
ColorspaceConvert555To6665_SSE2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src)
{
// Conversion algorithm:
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
v128u32 rgb;
const v128u32 a = _mm_and_si128( _mm_srli_epi32(src, 3), _mm_set1_epi32(0x1F000000) );
if (SWAP_RB)
{
#ifdef ENABLE_SSSE3
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
#else
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x003F0000)), 18), _mm_or_si128(_mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00003F00)), 2), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x0000003F)), 14)) );
#endif
}
else
{
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
}
return _mm_or_si128(rgb, a);
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src)
{
// Conversion algorithm:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
v128u32 rgb = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 2), _mm_set1_epi32(0x00FCFCFC)), _mm_and_si128(_mm_srli_epi32(src, 4), _mm_set1_epi32(0x00030303)) );
const v128u32 a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 3), _mm_set1_epi32(0xF8000000)), _mm_and_si128(_mm_srli_epi32(src, 2), _mm_set1_epi32(0x07000000)) );
if (SWAP_RB)
{
#ifdef ENABLE_SSSE3
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
#else
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x000000FF)), 16)) );
#endif
}
return _mm_or_si128(rgb, a);
}
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
FORCEINLINE v128u16 _ConvertColorBaseTo5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
{
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
{
return srcLo;
}
v128u32 rgbLo;
v128u32 rgbHi;
v128u16 alpha;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 17), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 17), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 1), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 1), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm_packs_epi32( _mm_and_si128(_mm_srli_epi32(srcLo, 24), _mm_set1_epi32(0x0000001F)), _mm_and_si128(_mm_srli_epi32(srcHi, 24), _mm_set1_epi32(0x0000001F)) );
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
}
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 19), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 19), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 3), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 3), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm_packs_epi32( _mm_srli_epi32(srcLo, 24), _mm_srli_epi32(srcHi, 24) );
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
}
return _mm_or_si128(_mm_packs_epi32(rgbLo, rgbHi), alpha);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_SSE2<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_SSE2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_storeu_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
else
{
_mm_store_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_store_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_storeu_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
else
{
_mm_store_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_store_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To6665_SSE2(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
}
else
{
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To8888_SSE2(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert6665To8888_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
}
else
{
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert6665To8888_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To5551_SSE2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u16 *)(dst+i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i)), _mm_loadu_si128((v128u32 *)(src+i+4))) );
}
else
{
_mm_store_si128( (v128u16 *)(dst+i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i)), _mm_load_si128((v128u32 *)(src+i+4))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To5551_SSE2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u16 *)(dst+i), ColorspaceConvert6665To5551_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i)), _mm_loadu_si128((v128u32 *)(src+i+4))) );
}
else
{
_mm_store_si128( (v128u16 *)(dst+i), ColorspaceConvert6665To5551_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i)), _mm_load_si128((v128u32 *)(src+i+4))) );
}
}
return i;
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<true, true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_SSE2<false>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_SSE2<false>(const v128u32 &src);
template v128u16 ColorspaceConvert8888To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert8888To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
#endif // ENABLE_SSE2

View File

@ -0,0 +1,74 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_SSE2_H
#define COLORSPACEHANDLER_SSE2_H
#include "colorspacehandler.h"
#ifndef ENABLE_SSE2
#warning This header requires SSE2 support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
class ColorspaceHandler_SSE2 : public ColorspaceHandler
{
public:
ColorspaceHandler_SSE2() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
#endif // ENABLE_SSE2
#endif /* COLORSPACEHANDLER_SSE2_H */

View File

@ -59,44 +59,41 @@
#define DESMUME_PLATFORM_STRING ""
#endif
#define DESMUME_SSE_STRING ""
#define DESMUME_AVX_STRING ""
#define DESMUME_CPUEXT_PRIMARY_STRING ""
#define DESMUME_CPUEXT_SECONDARY_STRING ""
#ifdef ENABLE_SSE
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSE"
#endif
#ifdef ENABLE_SSE2
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSE2"
#endif
#ifdef ENABLE_SSE3
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSE3"
#endif
#ifdef ENABLE_SSSE3
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSSE3"
#endif
#ifdef ENABLE_SSE4_1
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSE4.1"
#endif
#ifdef ENABLE_SSE4_2
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSE4.2"
#endif
#ifdef ENABLE_AVX
#undef DESMUME_AVX_STRING
#define DESMUME_AVX_STRING "+AVX"
#endif
#ifdef ENABLE_AVX2
#undef DESMUME_AVX_STRING
#define DESMUME_AVX_STRING "+AVX2"
#if defined(ENABLE_SSE4_2)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE4.2"
#elif defined(ENABLE_SSE4_1)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE4.1"
#elif defined(ENABLE_SSSE3)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSSE3"
#elif defined(ENABLE_SSE3)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE3"
#elif defined(ENABLE_SSE2)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE2"
#elif defined(ENABLE_SSE)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE"
#elif defined(ENABLE_ALTIVEC)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " AltiVec"
#endif
#define DESMUME_CPUEXT_STRING DESMUME_SSE_STRING DESMUME_AVX_STRING
#if defined(ENABLE_AVX2)
#undef DESMUME_CPUEXT_SECONDARY_STRING
#define DESMUME_CPUEXT_SECONDARY_STRING "+AVX2"
#elif defined(ENABLE_AVX)
#undef DESMUME_CPUEXT_SECONDARY_STRING
#define DESMUME_CPUEXT_SECONDARY_STRING "+AVX"
#endif
#define DESMUME_CPUEXT_STRING DESMUME_CPUEXT_PRIMARY_STRING DESMUME_CPUEXT_SECONDARY_STRING
#ifdef DEVELOPER
#define DESMUME_FEATURE_STRING " dev+"

View File

@ -171,6 +171,8 @@
<ClCompile Include="..\utils\AsmJit\x86\x86func.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86operand.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86util.cpp" />
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler.cpp" />
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler_SSE2.cpp" />
<ClCompile Include="..\utils\datetime.cpp" />
<ClCompile Include="..\utils\dlditool.cpp" />
<ClCompile Include="..\utils\emufat.cpp" />
@ -442,6 +444,8 @@
<ClInclude Include="..\utils\AsmJit\x86\x86func.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86operand.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86util.h" />
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler.h" />
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler_SSE2.h" />
<ClInclude Include="..\utils\datetime.h" />
<ClInclude Include="..\utils\emufat.h" />
<ClInclude Include="..\utils\emufat_types.h" />

View File

@ -121,6 +121,9 @@
<Filter Include="Core\libretro-common\lists">
<UniqueIdentifier>{18cba3ce-aaa6-441d-8111-408d0fcef7d2}</UniqueIdentifier>
</Filter>
<Filter Include="Core\utils\colorspacehandler">
<UniqueIdentifier>{db5dc512-2b75-4476-8cac-75fd4acfd85f}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\aggdraw.cpp">
@ -966,6 +969,12 @@
<ClCompile Include="..\libretro-common\file\archive_file_zlib.c">
<Filter>Core\libretro-common\file</Filter>
</ClCompile>
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler.cpp">
<Filter>Core\utils\colorspacehandler</Filter>
</ClCompile>
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler_SSE2.cpp">
<Filter>Core\utils\colorspacehandler</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\armcpu.h">
@ -1739,6 +1748,12 @@
<ClInclude Include="..\libretro-common\include\compat\msvc.h">
<Filter>Core\libretro-common\include\compat</Filter>
</ClInclude>
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler.h">
<Filter>Core\utils\colorspacehandler</Filter>
</ClInclude>
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler_SSE2.h">
<Filter>Core\utils\colorspacehandler</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="..\instruction_tabdef.inc">

View File

@ -316,13 +316,14 @@ static void do_video_conversion(AVIFile* avi, const u16* buffer)
int height = avi->prescaleLevel*384;
u8* outbuf = avi_file->convert_buffer + width*(height-1)*3;
for(int y=0;y<height;y++)
for (int y = 0; y < height; y++)
{
for(int x=0;x<width;x++)
for (int x = 0; x < width; x++)
{
u32 dst = ConvertColor555To8888Opaque<true>(*buffer++);
*(u32 *)outbuf = (dst & 0x00FFFFFF) | (*(u32 *)outbuf & 0xFF000000);
outbuf += 3;
u32 dst = ColorspaceConvert555To8888Opaque<true>(*buffer++);
*outbuf++ = dst & 0xFF;
*outbuf++ = (dst >> 8) & 0xFF;
*outbuf++ = (dst >> 16) & 0xFF;
}
outbuf -= width*3*2;

View File

@ -1920,7 +1920,7 @@ static void DoDisplay(bool firstTime)
//convert pixel format to 32bpp for compositing
//why do we do this over and over? well, we are compositing to
//filteredbuffer32bpp, and it needs to get refreshed each frame.
ConvertColorBuffer555To8888Opaque<true, false>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16));
ColorspaceConvertBuffer555To8888Opaque<true, false>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16));
if(firstTime)
{