Merge commit '9c1f523a725abca2fcfbf07cd11d12077154a80c' into round-1-start
# Conflicts: # desmume/src/GPU.h # desmume/src/frontend/windows/DeSmuME.vcxproj # desmume/src/frontend/windows/DeSmuME.vcxproj.filters # desmume/src/types.h
This commit is contained in:
commit
ae64d4659b
|
@ -237,6 +237,15 @@ void GFX_FIFOsend(u8 cmd, u32 param)
|
|||
if(IsMatrixStackCommand(cmd))
|
||||
gxFIFO.matrix_stack_op_size++;
|
||||
|
||||
//along the same lines:
|
||||
//american girls julie finds a way will put a bunch of stuff and then a box test into the fifo and then immediately test the busy flag
|
||||
//so we need to set the busy flag here.
|
||||
//does it expect the fifo to be running then? well, it's definitely jammed -- making it unjammed at one point did fix this bug.
|
||||
//it's still not clear whether we're handling the immediate vs fifo commands properly at all :(
|
||||
//anyway, here we go, similar treatment. consider this a hack.
|
||||
if(cmd == 0x70) MMU_new.gxstat.tb = 1; //just set the flag--youre insane if you queue more than one of these anyway
|
||||
if(cmd == 0x71) MMU_new.gxstat.tb = 1;
|
||||
|
||||
if(gxFIFO.size>=HACK_GXIFO_SIZE) {
|
||||
printf("--FIFO FULL-- : %d\n",gxFIFO.size);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,14 @@
|
|||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifdef FASTBUILD
|
||||
#undef FORCEINLINE
|
||||
#define FORCEINLINE
|
||||
//compilation speed hack (cuts time exactly in half by cutting out permutations)
|
||||
#define DISABLE_MOSAIC
|
||||
#define DISABLE_COLOREFFECTDISABLEHINT
|
||||
#endif
|
||||
|
||||
#include "GPU.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
@ -40,75 +48,8 @@
|
|||
#include "matrix.h"
|
||||
#include "emufile.h"
|
||||
|
||||
#ifdef FASTBUILD
|
||||
#undef FORCEINLINE
|
||||
#define FORCEINLINE
|
||||
//compilation speed hack (cuts time exactly in half by cutting out permutations)
|
||||
#define DISABLE_MOSAIC
|
||||
#endif
|
||||
|
||||
u32 Render3DFramesPerSecond;
|
||||
|
||||
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
|
||||
CACHE_ALIGN u32 color_555_to_666[32768];
|
||||
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
|
||||
CACHE_ALIGN u32 color_555_to_888[32768];
|
||||
|
||||
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
|
||||
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
|
||||
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
|
||||
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
|
||||
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
|
||||
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
|
||||
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
|
||||
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
|
||||
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
|
||||
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
|
||||
};
|
||||
|
||||
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
|
||||
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
|
||||
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
|
||||
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
|
||||
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
|
||||
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
|
||||
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
|
||||
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
|
||||
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
|
||||
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
|
||||
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
|
||||
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
|
||||
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
|
||||
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
|
||||
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
|
||||
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
|
||||
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
|
||||
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
|
||||
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
|
||||
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
|
||||
};
|
||||
|
||||
//maybe not very precise
|
||||
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
|
||||
0, 4, 8, 13, 17, 22, 26, 31
|
||||
};
|
||||
|
||||
//TODO - generate this in the static init method more accurately
|
||||
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
|
||||
0, 8, 16, 26, 34, 44, 52, 63
|
||||
};
|
||||
|
||||
//instantiate static instance
|
||||
u16 GPUEngineBase::_brightnessUpTable555[17][0x8000];
|
||||
FragmentColor GPUEngineBase::_brightnessUpTable666[17][0x8000];
|
||||
|
@ -167,7 +108,7 @@ const CACHE_ALIGN BGLayerSize GPUEngineBase::_BGLayerSizeLUT[8][4] = {
|
|||
{{128,128}, {256,256}, {512,256}, {512,512}}, //affine ext direct
|
||||
};
|
||||
|
||||
static void ExpandLine8(u8 *__restrict dst, const u8 *__restrict src, size_t dstLength)
|
||||
static FORCEINLINE void ExpandLine8(u8 *__restrict dst, const u8 *__restrict src, size_t dstLength)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
const bool isIntegerScale = ((dstLength % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0);
|
||||
|
@ -1655,11 +1596,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
|
|||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1682,11 +1623,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
|
|||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1767,11 +1708,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
|
|||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
|
||||
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -1833,13 +1774,13 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
|
|||
break;
|
||||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
srcColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
|
||||
srcColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
|
||||
dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
|
||||
dstColor32.a = 0x1F;
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
srcColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
|
||||
srcColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
|
||||
dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
|
||||
dstColor32.a = 0xFF;
|
||||
break;
|
||||
|
@ -2132,7 +2073,7 @@ FORCEINLINE void GPUEngineBase::_RenderPixel3D(GPUEngineCompositorInfo &compInfo
|
|||
// Render the pixel using the selected color effect.
|
||||
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
|
||||
{
|
||||
const u16 srcColor16 = ConvertColor6665To5551<false>(srcColor32);
|
||||
const u16 srcColor16 = ColorspaceConvert6665To5551<false>(srcColor32);
|
||||
|
||||
switch (selectedEffect)
|
||||
{
|
||||
|
@ -2695,13 +2636,13 @@ void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compInfo)
|
|||
|
||||
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]);
|
||||
ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[0], src[0], src[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[1], src[2], src[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(src16[0], src[0], src[1]);
|
||||
ConvertColor555To8888Opaque<false>(src16[1], src[2], src[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[0], src[0], src[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[1], src[2], src[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2796,13 +2737,13 @@ void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compInfo)
|
|||
{
|
||||
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]);
|
||||
ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[0], src[0], src[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[1], src[2], src[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(src16[0], src[0], src[1]);
|
||||
ConvertColor555To8888Opaque<false>(src16[1], src[2], src[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[0], src[0], src[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[1], src[2], src[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4502,7 +4443,7 @@ void GPUEngineBase::UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex)
|
|||
}
|
||||
|
||||
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED>
|
||||
void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
|
||||
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
|
||||
{
|
||||
bool useCustomVRAM = false;
|
||||
|
||||
|
@ -4538,26 +4479,28 @@ void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
|
|||
}
|
||||
|
||||
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED>
|
||||
void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo)
|
||||
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo)
|
||||
{
|
||||
this->_RenderLine_LayerBG_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compInfo);
|
||||
}
|
||||
|
||||
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED>
|
||||
void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo)
|
||||
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo)
|
||||
{
|
||||
#ifndef DISABLE_COLOREFFECTDISABLEHINT
|
||||
if (compInfo.renderState.colorEffect == ColorEffect_Disable)
|
||||
{
|
||||
this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, true, ISCUSTOMRENDERINGNEEDED>(compInfo);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, false, ISCUSTOMRENDERINGNEEDED>(compInfo);
|
||||
}
|
||||
}
|
||||
|
||||
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED>
|
||||
void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo)
|
||||
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo)
|
||||
{
|
||||
if (ISDEBUGRENDER)
|
||||
{
|
||||
|
@ -4951,7 +4894,7 @@ void GPUEngineBase::ResolveCustomRendering()
|
|||
|
||||
void GPUEngineBase::ResolveRGB666ToRGB888()
|
||||
{
|
||||
ConvertColorBuffer6665To8888<false>((u32 *)this->renderedBuffer, (u32 *)this->renderedBuffer, this->renderedWidth * this->renderedHeight);
|
||||
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)this->renderedBuffer, (u32 *)this->renderedBuffer, this->renderedWidth * this->renderedHeight);
|
||||
}
|
||||
|
||||
void GPUEngineBase::ResolveToCustomFramebuffer()
|
||||
|
@ -5575,12 +5518,12 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
|||
|
||||
case NDSColorFormat_BGR666_Rev:
|
||||
renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16));
|
||||
ConvertColorBuffer6665To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
|
||||
ColorspaceConvertBuffer6665To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
|
||||
break;
|
||||
|
||||
case NDSColorFormat_BGR888_Rev:
|
||||
renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16));
|
||||
ConvertColorBuffer8888To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
|
||||
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -6570,7 +6513,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
ColorspaceConvertBuffer555To6665Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -6578,7 +6521,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
ColorspaceConvertBuffer555To8888Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -6598,7 +6541,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
|
||||
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
|
||||
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, customPixCount);
|
||||
ColorspaceConvertBuffer555To6665Opaque<false, false>(src, (u32 *)dst, customPixCount);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -6606,7 +6549,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
|
|||
{
|
||||
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
|
||||
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
|
||||
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, customPixCount);
|
||||
ColorspaceConvertBuffer555To8888Opaque<false, false>(src, (u32 *)dst, customPixCount);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -6802,28 +6745,7 @@ void GPUEngineB::RenderLine(const u16 l)
|
|||
|
||||
GPUSubsystem::GPUSubsystem()
|
||||
{
|
||||
static bool needInitTables = true;
|
||||
|
||||
if (needInitTables)
|
||||
{
|
||||
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
|
||||
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
|
||||
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
|
||||
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
|
||||
|
||||
for (size_t i = 0; i < 32768; i++)
|
||||
{
|
||||
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
|
||||
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
|
||||
color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 );
|
||||
|
||||
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
|
||||
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
|
||||
color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 );
|
||||
}
|
||||
|
||||
needInitTables = false;
|
||||
}
|
||||
ColorspaceHandlerInit();
|
||||
|
||||
_defaultEventHandler = new GPUEventHandlerDefault;
|
||||
_event = _defaultEventHandler;
|
||||
|
@ -6957,6 +6879,22 @@ void GPUSubsystem::Reset()
|
|||
osd->clear();
|
||||
}
|
||||
|
||||
void GPUSubsystem::ForceRender3DFinishAndFlush(bool willFlush)
|
||||
{
|
||||
if (CurrentRenderer->GetRenderNeedsFinish())
|
||||
{
|
||||
bool need3DDisplayFramebuffer;
|
||||
bool need3DCaptureFramebuffer;
|
||||
CurrentRenderer->GetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer);
|
||||
|
||||
CurrentRenderer->SetFramebufferFlushStates(willFlush, willFlush);
|
||||
CurrentRenderer->RenderFinish();
|
||||
CurrentRenderer->SetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer);
|
||||
CurrentRenderer->SetRenderNeedsFinish(false);
|
||||
this->_event->DidRender3DEnd();
|
||||
}
|
||||
}
|
||||
|
||||
void GPUSubsystem::UpdateRenderProperties()
|
||||
{
|
||||
this->_engineMain->vramBlockOBJIndex = VRAM_NO_3D_USAGE;
|
||||
|
@ -7082,7 +7020,7 @@ void GPUSubsystem::SetCustomFramebufferSize(size_t w, size_t h, void *clientNati
|
|||
return;
|
||||
}
|
||||
|
||||
CurrentRenderer->RenderFinish();
|
||||
GPU->ForceRender3DFinishAndFlush(false);
|
||||
|
||||
const float customWidthScale = (float)w / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
const float customHeightScale = (float)h / (float)GPU_FRAMEBUFFER_NATIVE_HEIGHT;
|
||||
|
@ -7224,7 +7162,7 @@ void GPUSubsystem::SetCustomFramebufferSize(size_t w, size_t h)
|
|||
|
||||
void GPUSubsystem::SetColorFormat(const NDSColorFormat outputFormat, void *clientNativeBuffer, void *clientCustomBuffer)
|
||||
{
|
||||
CurrentRenderer->RenderFinish();
|
||||
GPU->ForceRender3DFinishAndFlush(false);
|
||||
|
||||
this->_displayInfo.colorFormat = outputFormat;
|
||||
this->_displayInfo.pixelBytes = (outputFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(FragmentColor);
|
||||
|
@ -7581,178 +7519,6 @@ void NDSDisplay::SetEngineByID(const GPUEngineID theID)
|
|||
this->_gpu->SetDisplayByID(this->_ID);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
__m128i src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
|
||||
__m128i dstConvertedLo, dstConvertedHi;
|
||||
ConvertColor555To8888Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor555To8888Opaque<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ConvertColorBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
__m128i src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
|
||||
__m128i dstConvertedLo, dstConvertedHi;
|
||||
ConvertColor555To6665Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
|
||||
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor555To6665Opaque<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
void ConvertColorBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To6665<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor8888To6665<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
void ConvertColorBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To8888<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor6665To8888<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor8888To5551<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ConvertColor6665To5551<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG0>();
|
||||
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG1>();
|
||||
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG2>();
|
||||
|
@ -7774,29 +7540,3 @@ template void GPUEngineBase::ParseReg_BGnY<GPULayerID_BG3>();
|
|||
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR555_Rev>(const u16 l, bool skip);
|
||||
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR666_Rev>(const u16 l, bool skip);
|
||||
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR888_Rev>(const u16 l, bool skip);
|
||||
|
||||
template void ConvertColorBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To8888Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer555To6665Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To6665Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To6665Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer555To6665Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer8888To6665<true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To6665<false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer6665To8888<true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To8888<false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer8888To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer8888To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ConvertColorBuffer6665To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ConvertColorBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -52,6 +52,7 @@ libdesmume_a_SOURCES = \
|
|||
utils/decrypt/decrypt.h utils/decrypt/header.cpp utils/decrypt/header.h \
|
||||
utils/task.cpp utils/task.h \
|
||||
utils/vfat.h utils/vfat.cpp \
|
||||
utils/colorspacehandler/colorspacehandler.cpp \
|
||||
utils/dlditool.cpp \
|
||||
utils/libfat/bit_ops.h \
|
||||
utils/libfat/cache.cpp \
|
||||
|
@ -110,6 +111,21 @@ libdesmume_a_SOURCES = \
|
|||
libretro-common/rthreads/rsemaphore.c \
|
||||
libretro-common/rthreads/rthreads.c
|
||||
|
||||
if SUPPORT_SSE2 += \
|
||||
libdesmume_a_SOURCES += \
|
||||
utils/colorspacehandler/colorspacehandler_SSE2.cpp
|
||||
endif
|
||||
|
||||
if SUPPORT_AVX2 += \
|
||||
libdesmume_a_SOURCES += \
|
||||
utils/colorspacehandler/colorspacehandler_AVX2.cpp
|
||||
endif
|
||||
|
||||
if SUPPORT_ALTIVEC += \
|
||||
libdesmume_a_SOURCES += \
|
||||
utils/colorspacehandler/colorspacehandler_AltiVec.cpp
|
||||
endif
|
||||
|
||||
if HAVE_JIT
|
||||
libdesmume_a_SOURCES += \
|
||||
arm_jit.cpp arm_jit.h instruction_attributes.h \
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
|
@ -990,9 +991,9 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
|
||||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), ConvertColor8888To6665<true>(srcColorLo) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), ConvertColor8888To6665<true>(srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1001,17 +1002,17 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dstFramebuffer[i].color = ConvertColor8888To6665<true>(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]);
|
||||
dstFramebuffer[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
|
||||
}
|
||||
}
|
||||
else if (dstFramebuffer != NULL)
|
||||
{
|
||||
ConvertColorBuffer8888To6665<true>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
else if (this->_outputFormat == NDSColorFormat_BGR888_Rev)
|
||||
|
@ -1027,7 +1028,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), srcColorLo );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), srcColorHi );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1036,8 +1037,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
#endif
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dstFramebuffer[i].color = ConvertColor8888To6665<true>(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]);
|
||||
dstFramebuffer[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
|
||||
dstRGBA5551[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
|
||||
}
|
||||
}
|
||||
else if (dstFramebuffer != NULL)
|
||||
|
@ -1046,7 +1047,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
}
|
||||
else
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1068,9 +1069,9 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
|
||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
|
||||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), ConvertColor8888To6665<true>(srcColorLo) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), ConvertColor8888To6665<true>(srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1079,8 +1080,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
#endif
|
||||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
dstFramebuffer[iw].color = ConvertColor8888To6665<true>(srcFramebuffer[ir]);
|
||||
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]);
|
||||
dstFramebuffer[iw].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[ir]);
|
||||
dstRGBA5551[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1088,14 +1089,14 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||
{
|
||||
ConvertColorBuffer8888To6665<true>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebuffer + iw, pixCount);
|
||||
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebuffer + iw, pixCount);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1115,7 +1116,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), srcColorLo );
|
||||
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), srcColorHi );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
|
||||
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1125,7 +1126,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
dstFramebuffer[iw] = srcFramebuffer[ir];
|
||||
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]);
|
||||
dstRGBA5551[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1146,7 +1147,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
|||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||
{
|
||||
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -243,6 +243,8 @@
|
|||
AB564915186E6F67002740F4 /* Image_Piano.png in Resources */ = {isa = PBXBuildFile; fileRef = AB56490B186E6F67002740F4 /* Image_Piano.png */; };
|
||||
AB5785FD17176AFC002C5FC7 /* OpenEmuBase.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB5785FC17176AFC002C5FC7 /* OpenEmuBase.framework */; };
|
||||
AB58F32D1364F44B0074C376 /* cocoa_file.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB58F32C1364F44B0074C376 /* cocoa_file.mm */; };
|
||||
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
|
||||
AB5FDDAD1D62C8A00094617C /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
|
||||
AB64987C13ECC73800EE7DD2 /* FileTypeInfo.plist in Resources */ = {isa = PBXBuildFile; fileRef = AB64987B13ECC73800EE7DD2 /* FileTypeInfo.plist */; };
|
||||
AB68101B187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; };
|
||||
AB68101C187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; };
|
||||
|
@ -974,6 +976,12 @@
|
|||
ABB97878144E89CC00793FA3 /* Icon_DeSmuME_32x32.png in Resources */ = {isa = PBXBuildFile; fileRef = ABB97875144E89CC00793FA3 /* Icon_DeSmuME_32x32.png */; };
|
||||
ABBC0F8D1394B1AA0028B6BD /* DefaultUserPrefs.plist in Resources */ = {isa = PBXBuildFile; fileRef = ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */; };
|
||||
ABBF04A514B515F300E505A0 /* AppIcon_ROMCheats.icns in Resources */ = {isa = PBXBuildFile; fileRef = ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */; };
|
||||
ABBFFF851D6283C0003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
|
||||
ABBFFF861D6283C1003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
|
||||
ABBFFF871D6283C1003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
|
||||
ABBFFF891D6283D2003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
|
||||
ABBFFF8A1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
|
||||
ABBFFF8B1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
|
||||
ABC3AF2F14B7F06900D5B13D /* Icon_VolumeFull_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */; };
|
||||
ABC3AF3014B7F06900D5B13D /* Icon_VolumeMute_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */; };
|
||||
ABC3AF3114B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */; };
|
||||
|
@ -1534,6 +1542,14 @@
|
|||
ABBB421516B4A5F30012E5AB /* OGLRender_3_2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OGLRender_3_2.h; path = ../OGLRender_3_2.h; sourceTree = "<group>"; };
|
||||
ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */ = {isa = PBXFileReference; lastKnownFileType = file.bplist; path = DefaultUserPrefs.plist; sourceTree = "<group>"; };
|
||||
ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; path = AppIcon_ROMCheats.icns; sourceTree = "<group>"; };
|
||||
ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler.cpp; sourceTree = "<group>"; };
|
||||
ABBFFF701D5F9C52003CD598 /* colorspacehandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler.h; sourceTree = "<group>"; };
|
||||
ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_SSE2.cpp; sourceTree = "<group>"; };
|
||||
ABBFFF761D5FD2ED003CD598 /* colorspacehandler_SSE2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_SSE2.h; sourceTree = "<group>"; };
|
||||
ABBFFF7B1D610457003CD598 /* colorspacehandler_AVX2.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AVX2.cpp; sourceTree = "<group>"; };
|
||||
ABBFFF7C1D610457003CD598 /* colorspacehandler_AVX2.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AVX2.h; sourceTree = "<group>"; };
|
||||
ABBFFF811D611A36003CD598 /* colorspacehandler_AltiVec.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AltiVec.cpp; sourceTree = "<group>"; };
|
||||
ABBFFF821D611A36003CD598 /* colorspacehandler_AltiVec.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AltiVec.h; sourceTree = "<group>"; };
|
||||
ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeFull_16x16.png; path = images/Icon_VolumeFull_16x16.png; sourceTree = "<group>"; };
|
||||
ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeMute_16x16.png; path = images/Icon_VolumeMute_16x16.png; sourceTree = "<group>"; };
|
||||
ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeOneThird_16x16.png; path = images/Icon_VolumeOneThird_16x16.png; sourceTree = "<group>"; };
|
||||
|
@ -2507,6 +2523,21 @@
|
|||
path = openemu;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
ABBFFF6E1D5F9C10003CD598 /* colorspacehandler */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
ABBFFF811D611A36003CD598 /* colorspacehandler_AltiVec.cpp */,
|
||||
ABBFFF7B1D610457003CD598 /* colorspacehandler_AVX2.cpp */,
|
||||
ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */,
|
||||
ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */,
|
||||
ABBFFF821D611A36003CD598 /* colorspacehandler_AltiVec.h */,
|
||||
ABBFFF7C1D610457003CD598 /* colorspacehandler_AVX2.h */,
|
||||
ABBFFF761D5FD2ED003CD598 /* colorspacehandler_SSE2.h */,
|
||||
ABBFFF701D5F9C52003CD598 /* colorspacehandler.h */,
|
||||
);
|
||||
path = colorspacehandler;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
ABC2ECD613B1C87000FAAA2A /* Images */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
|
@ -2757,6 +2788,7 @@
|
|||
ABD1FF211345ACBF00AF11D1 /* decrypt */,
|
||||
ABD1FF2E1345ACBF00AF11D1 /* libfat */,
|
||||
ABE670241415DE6C00E8E4C9 /* tinyxml */,
|
||||
ABBFFF6E1D5F9C10003CD598 /* colorspacehandler */,
|
||||
ABD1FF1D1345ACBF00AF11D1 /* ConvertUTF.c */,
|
||||
AB9038A517C5ECFD00F410BD /* advanscene.cpp */,
|
||||
ABD1FF1F1345ACBF00AF11D1 /* datetime.cpp */,
|
||||
|
@ -3768,6 +3800,7 @@
|
|||
ABE6840D189E33BC007FD69C /* OGLDisplayOutput.cpp in Sources */,
|
||||
ABD1FF121345AC9C00AF11D1 /* slot2_none.cpp in Sources */,
|
||||
ABD1FF131345AC9C00AF11D1 /* slot2_paddle.cpp in Sources */,
|
||||
ABBFFF8A1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
ABD1FF141345AC9C00AF11D1 /* slot2_piano.cpp in Sources */,
|
||||
ABD1FF151345AC9C00AF11D1 /* slot2_rumblepak.cpp in Sources */,
|
||||
ABD1041F1346652500AF11D1 /* sndOSX.cpp in Sources */,
|
||||
|
@ -3862,6 +3895,7 @@
|
|||
AB40565E169F5DBB0016AC3E /* virtualmemory.cpp in Sources */,
|
||||
AB405661169F5DBB0016AC3E /* zonememory.cpp in Sources */,
|
||||
AB405679169F5DCC0016AC3E /* x86assembler.cpp in Sources */,
|
||||
ABBFFF861D6283C1003CD598 /* colorspacehandler.cpp in Sources */,
|
||||
AB40567C169F5DCC0016AC3E /* x86compiler.cpp in Sources */,
|
||||
ABFEA8A41BB4EC1100B08C25 /* sfnt.c in Sources */,
|
||||
ABA731691BB51FDC00B26147 /* type1cid.c in Sources */,
|
||||
|
@ -4015,6 +4049,7 @@
|
|||
AB796D4315CDCBA200C59155 /* version.cpp in Sources */,
|
||||
ABFEA82B1BB4EC1100B08C25 /* ftinit.c in Sources */,
|
||||
AB796D4415CDCBA200C59155 /* vfat.cpp in Sources */,
|
||||
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */,
|
||||
AB796D4515CDCBA200C59155 /* videofilter.cpp in Sources */,
|
||||
AB796D4615CDCBA200C59155 /* WavFile.cpp in Sources */,
|
||||
AB796D4715CDCBA200C59155 /* wifi.cpp in Sources */,
|
||||
|
@ -4094,6 +4129,7 @@
|
|||
AB26D87C16B5253D00A2305C /* OGLRender_3_2.cpp in Sources */,
|
||||
AB3A655E16CC5421001F5D4A /* EmuControllerDelegate.mm in Sources */,
|
||||
AB3A656116CC5438001F5D4A /* cocoa_GPU.mm in Sources */,
|
||||
AB5FDDAD1D62C8A00094617C /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
AB8967D916D2ED0700F826F1 /* DisplayWindowController.mm in Sources */,
|
||||
AB29B33116D4BEBF000EF671 /* InputManager.mm in Sources */,
|
||||
AB8B7AAC17CE8C440051CEBF /* slot1comp_protocol.cpp in Sources */,
|
||||
|
@ -4270,6 +4306,7 @@
|
|||
AB2ABA401C9F9CFA00173B15 /* rsemaphore.c in Sources */,
|
||||
AB8F3CF01A53AC2600A80BF6 /* ringbuffer.cpp in Sources */,
|
||||
AB8F3CF11A53AC2600A80BF6 /* arm_jit.cpp in Sources */,
|
||||
ABBFFF891D6283D2003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
AB8F3CF21A53AC2600A80BF6 /* troubleshootingWindowDelegate.mm in Sources */,
|
||||
AB8F3CF31A53AC2600A80BF6 /* assembler.cpp in Sources */,
|
||||
AB8F3CF41A53AC2600A80BF6 /* assert.cpp in Sources */,
|
||||
|
@ -4293,6 +4330,7 @@
|
|||
AB8F3D041A53AC2600A80BF6 /* virtualmemory.cpp in Sources */,
|
||||
AB8F3D051A53AC2600A80BF6 /* zonememory.cpp in Sources */,
|
||||
AB8F3D061A53AC2600A80BF6 /* x86assembler.cpp in Sources */,
|
||||
ABBFFF851D6283C0003CD598 /* colorspacehandler.cpp in Sources */,
|
||||
AB8F3D071A53AC2600A80BF6 /* x86compiler.cpp in Sources */,
|
||||
AB8F3D081A53AC2600A80BF6 /* x86compilercontext.cpp in Sources */,
|
||||
AB8F3D091A53AC2600A80BF6 /* x86compilerfunc.cpp in Sources */,
|
||||
|
@ -4365,6 +4403,7 @@
|
|||
ABB3C6911501C04F00E0C22E /* SoundTouch.cpp in Sources */,
|
||||
ABB3C6921501C04F00E0C22E /* sse_optimized.cpp in Sources */,
|
||||
ABB3C6931501C04F00E0C22E /* TDStretch.cpp in Sources */,
|
||||
ABBFFF871D6283C1003CD598 /* colorspacehandler.cpp in Sources */,
|
||||
ABB3C6941501C04F00E0C22E /* WavFile.cpp in Sources */,
|
||||
ABB3C6951501C04F00E0C22E /* metaspu.cpp in Sources */,
|
||||
ABB3C6961501C04F00E0C22E /* SndOut.cpp in Sources */,
|
||||
|
@ -4434,6 +4473,7 @@
|
|||
ABB3C6D11501C04F00E0C22E /* slot1.cpp in Sources */,
|
||||
ABB3C6D31501C04F00E0C22E /* SPU.cpp in Sources */,
|
||||
ABB3C6D41501C04F00E0C22E /* texcache.cpp in Sources */,
|
||||
ABBFFF8B1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
AB9038BA17C5ED2200F410BD /* slot1comp_rom.cpp in Sources */,
|
||||
ABB3C6D51501C04F00E0C22E /* thumb_instructions.cpp in Sources */,
|
||||
AB2EE13317D57F5000F68622 /* fsnitro.cpp in Sources */,
|
||||
|
|
|
@ -740,6 +740,14 @@
|
|||
AB2F56F11704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
|
||||
AB2F56F21704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
|
||||
AB2F56F31704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
|
||||
AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
|
||||
AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */; };
|
||||
AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
|
||||
AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */; };
|
||||
AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
|
||||
AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
|
||||
AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
|
||||
AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
|
||||
AB3ACB7814C2361100D7D192 /* appDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6714C2361100D7D192 /* appDelegate.mm */; };
|
||||
AB3ACB7914C2361100D7D192 /* cheatWindowDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6914C2361100D7D192 /* cheatWindowDelegate.mm */; };
|
||||
AB3ACB7C14C2361100D7D192 /* inputPrefsView.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6F14C2361100D7D192 /* inputPrefsView.mm */; };
|
||||
|
@ -1156,6 +1164,8 @@
|
|||
AB73AA2E1507C9F500A310C8 /* OpenGL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABC570D4134431DA00E7B0B1 /* OpenGL.framework */; };
|
||||
AB73AA2F1507C9F500A310C8 /* libz.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = AB0A0D1914AACA9600E83E91 /* libz.dylib */; };
|
||||
AB75226F14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns in Resources */ = {isa = PBXBuildFile; fileRef = AB75226D14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns */; };
|
||||
AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
|
||||
AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
|
||||
AB7DDA6D173DC38F004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
|
||||
AB7DDA6E173DC399004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
|
||||
AB7DDA6F173DC39E004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
|
||||
|
@ -1835,6 +1845,12 @@
|
|||
AB2F56EF1704C86900E28885 /* utilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = utilities.c; sourceTree = "<group>"; };
|
||||
AB350BA41478AC96007165AC /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = System/Library/Frameworks/IOKit.framework; sourceTree = SDKROOT; };
|
||||
AB350D38147A1D8D007165AC /* English */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = English; path = translations/English.lproj/HID_usage_strings.plist; sourceTree = "<group>"; };
|
||||
AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler.cpp; sourceTree = "<group>"; };
|
||||
AB37E36D1D6188BC004A2C0D /* colorspacehandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler.h; sourceTree = "<group>"; };
|
||||
AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AltiVec.cpp; sourceTree = "<group>"; };
|
||||
AB37E36F1D6188BC004A2C0D /* colorspacehandler_AltiVec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AltiVec.h; sourceTree = "<group>"; };
|
||||
AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_SSE2.cpp; sourceTree = "<group>"; };
|
||||
AB37E3731D6188BC004A2C0D /* colorspacehandler_SSE2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_SSE2.h; sourceTree = "<group>"; };
|
||||
AB3ACB6614C2361100D7D192 /* appDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = appDelegate.h; sourceTree = "<group>"; };
|
||||
AB3ACB6714C2361100D7D192 /* appDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = appDelegate.mm; sourceTree = "<group>"; };
|
||||
AB3ACB6814C2361100D7D192 /* cheatWindowDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cheatWindowDelegate.h; sourceTree = "<group>"; };
|
||||
|
@ -2893,6 +2909,19 @@
|
|||
path = src;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
AB37E36B1D6188BC004A2C0D /* colorspacehandler */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */,
|
||||
AB37E36D1D6188BC004A2C0D /* colorspacehandler.h */,
|
||||
AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */,
|
||||
AB37E36F1D6188BC004A2C0D /* colorspacehandler_AltiVec.h */,
|
||||
AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */,
|
||||
AB37E3731D6188BC004A2C0D /* colorspacehandler_SSE2.h */,
|
||||
);
|
||||
path = colorspacehandler;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
AB3ACB6514C2361100D7D192 /* userinterface */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
|
@ -3205,6 +3234,7 @@
|
|||
isa = PBXGroup;
|
||||
children = (
|
||||
ABBCE2A115ACB29100A2C965 /* AsmJit */,
|
||||
AB37E36B1D6188BC004A2C0D /* colorspacehandler */,
|
||||
ABD1FF211345ACBF00AF11D1 /* decrypt */,
|
||||
ABD1FF2E1345ACBF00AF11D1 /* libfat */,
|
||||
ABE670241415DE6C00E8E4C9 /* tinyxml */,
|
||||
|
@ -4506,6 +4536,8 @@
|
|||
AB50200A1D09E712002FA150 /* file_path.c in Sources */,
|
||||
AB50200B1D09E712002FA150 /* retro_dirent.c in Sources */,
|
||||
AB50200C1D09E712002FA150 /* retro_stat.c in Sources */,
|
||||
AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */,
|
||||
AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -4685,6 +4717,8 @@
|
|||
AB5020161D09E712002FA150 /* file_path.c in Sources */,
|
||||
AB5020171D09E712002FA150 /* retro_dirent.c in Sources */,
|
||||
AB5020181D09E712002FA150 /* retro_stat.c in Sources */,
|
||||
AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
|
||||
AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -4894,6 +4928,8 @@
|
|||
AB50200D1D09E712002FA150 /* file_path.c in Sources */,
|
||||
AB50200E1D09E712002FA150 /* retro_dirent.c in Sources */,
|
||||
AB50200F1D09E712002FA150 /* retro_stat.c in Sources */,
|
||||
AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
|
||||
AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -5103,6 +5139,8 @@
|
|||
AB5020101D09E712002FA150 /* file_path.c in Sources */,
|
||||
AB5020111D09E712002FA150 /* retro_dirent.c in Sources */,
|
||||
AB5020121D09E712002FA150 /* retro_stat.c in Sources */,
|
||||
AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
|
||||
AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -5282,6 +5320,8 @@
|
|||
AB5020131D09E712002FA150 /* file_path.c in Sources */,
|
||||
AB5020141D09E712002FA150 /* retro_dirent.c in Sources */,
|
||||
AB5020151D09E712002FA150 /* retro_stat.c in Sources */,
|
||||
AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
|
||||
AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
|
|
@ -754,7 +754,7 @@
|
|||
|
||||
if (dispInfo.pixelBytes == 2)
|
||||
{
|
||||
ConvertColorBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h));
|
||||
ColorspaceConvertBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h));
|
||||
}
|
||||
else if (dispInfo.pixelBytes == 4)
|
||||
{
|
||||
|
|
|
@ -692,7 +692,7 @@ void RomIconToRGBA8888(uint32_t *bitmapData)
|
|||
//
|
||||
// The first entry always represents the alpha, so we can just ignore it.
|
||||
clut[0] = 0x00000000;
|
||||
ConvertColorBuffer555To8888Opaque<false, true>((u16 *)iconClutPtr, &clut[1], 15);
|
||||
ColorspaceConvertBuffer555To8888Opaque<false, true>((u16 *)iconClutPtr, &clut[1], 15);
|
||||
|
||||
// Load the image from the icon pixel data.
|
||||
//
|
||||
|
|
|
@ -66,6 +66,7 @@ CommandLine::CommandLine()
|
|||
, arm7_gdb_port(0)
|
||||
, start_paused(FALSE)
|
||||
, autodetect_method(-1)
|
||||
, render3d(COMMANDLINE_RENDER3D_DEFAULT)
|
||||
{
|
||||
#ifndef HOST_WINDOWS
|
||||
disable_sound = 0;
|
||||
|
@ -92,6 +93,8 @@ static const char* help_string = \
|
|||
" --num-cores N Override numcores detection and use this many" ENDL
|
||||
" --spu-synch Use SPU synch (crackles; helps streams; default ON)" ENDL
|
||||
" --spu-method N Select SPU synch method: 0:N, 1:Z, 2:P; default 0" ENDL
|
||||
" --3d-render [SW|AUTOGL|GL|OLDGL]" ENDL
|
||||
" Select 3d renderer; default SW" ENDL
|
||||
#ifndef HOST_WINDOWS
|
||||
" --disable-sound Disables the sound output" ENDL
|
||||
" --disable-limiter Disables the 60fps limiter" ENDL
|
||||
|
@ -154,6 +157,7 @@ ENDL
|
|||
|
||||
#define OPT_NUMCORES 1
|
||||
#define OPT_SPU_METHOD 2
|
||||
#define OPT_3D_RENDER 3
|
||||
#define OPT_JIT_SIZE 100
|
||||
|
||||
#define OPT_CONSOLE_TYPE 200
|
||||
|
@ -183,6 +187,8 @@ ENDL
|
|||
|
||||
bool CommandLine::parse(int argc,char **argv)
|
||||
{
|
||||
std::string _render3d;
|
||||
|
||||
int opt_help = 0;
|
||||
int option_index = 0;
|
||||
for(;;)
|
||||
|
@ -197,6 +203,7 @@ bool CommandLine::parse(int argc,char **argv)
|
|||
{ "num-cores", required_argument, NULL, OPT_NUMCORES },
|
||||
{ "spu-synch", no_argument, &_spu_sync_mode, 1 },
|
||||
{ "spu-method", required_argument, NULL, OPT_SPU_METHOD },
|
||||
{ "3d-render", required_argument, NULL, OPT_3D_RENDER },
|
||||
#ifndef HOST_WINDOWS
|
||||
{ "disable-sound", no_argument, &disable_sound, 1},
|
||||
{ "disable-limiter", no_argument, &disable_limiter, 1},
|
||||
|
@ -265,6 +272,7 @@ bool CommandLine::parse(int argc,char **argv)
|
|||
//user settings
|
||||
case OPT_NUMCORES: _num_cores = atoi(optarg); break;
|
||||
case OPT_SPU_METHOD: _spu_sync_method = atoi(optarg); break;
|
||||
case OPT_3D_RENDER: _render3d = optarg; break;
|
||||
|
||||
//sync settings
|
||||
case OPT_JIT_SIZE: _jit_size = atoi(optarg); break;
|
||||
|
@ -343,6 +351,14 @@ bool CommandLine::parse(int argc,char **argv)
|
|||
CommonSettings.DebugConsole = true;
|
||||
}
|
||||
|
||||
//process 3d renderer
|
||||
_render3d = strtoupper(_render3d);
|
||||
if(_render3d == "NONE") render3d = COMMANDLINE_RENDER3D_NONE;
|
||||
if(_render3d == "SW") render3d = COMMANDLINE_RENDER3D_SW;
|
||||
if(_render3d == "OLDGL") render3d = COMMANDLINE_RENDER3D_OLDGL;
|
||||
if(_render3d == "AUTOGL") render3d = COMMANDLINE_RENDER3D_AUTOGL;
|
||||
if(_render3d == "GL") render3d = COMMANDLINE_RENDER3D_GL;
|
||||
|
||||
if (autodetect_method != -1)
|
||||
CommonSettings.autodetectBackupMethod = autodetect_method;
|
||||
|
||||
|
|
|
@ -24,17 +24,29 @@
|
|||
//hacky commandline options that i didnt want to route through commonoptions
|
||||
extern int _commandline_linux_nojoy;
|
||||
|
||||
#define COMMANDLINE_RENDER3D_DEFAULT 0
|
||||
#define COMMANDLINE_RENDER3D_NONE 1
|
||||
#define COMMANDLINE_RENDER3D_SW 2
|
||||
#define COMMANDLINE_RENDER3D_OLDGL 3
|
||||
#define COMMANDLINE_RENDER3D_GL 4
|
||||
#define COMMANDLINE_RENDER3D_AUTOGL 5
|
||||
|
||||
//this class will also eventually try to take over the responsibility of using the args that it handles
|
||||
//for example: preparing the emulator run by loading the rom, savestate, and/or movie in the correct pattern.
|
||||
//it should also populate CommonSettings with its initial values
|
||||
//EDIT: not really. combining this with what a frontend wants to do is complicated.
|
||||
//you might design the API so that the frontend sets all those up, but I'm not sure I like that
|
||||
//Really, this should be a passive structure that just collects the results provided by the shared command line processing, to be used later as appropriate
|
||||
//(and the CommonSettings setup REMOVED or at least refactored into a separate method)
|
||||
|
||||
class CommandLine
|
||||
{
|
||||
public:
|
||||
//actual options: these may move to another sturct
|
||||
//actual options: these may move to another struct
|
||||
int load_slot;
|
||||
int depth_threshold;
|
||||
int autodetect_method;
|
||||
int render3d;
|
||||
std::string nds_file;
|
||||
std::string play_movie_file;
|
||||
std::string record_movie_file;
|
||||
|
|
|
@ -29,16 +29,14 @@ static u8* Convert15To24(const u16* src, int width, int height)
|
|||
u8 *tmp_inc;
|
||||
tmp_inc = tmp_buffer = (u8 *)malloc(width * height * 3);
|
||||
|
||||
for(int y=0;y<height;y++)
|
||||
for (int i = 0; i < width*height; i++)
|
||||
{
|
||||
for(int x=0;x<width;x++)
|
||||
{
|
||||
u32 dst = ConvertColor555To8888Opaque<true>(*src++);
|
||||
u32 dst = ColorspaceConvert555To8888Opaque<true>(*src++);
|
||||
*tmp_inc++ = dst & 0xFF;
|
||||
*tmp_inc++ = (dst >> 8) & 0xFF;
|
||||
*tmp_inc++ = (dst >> 16) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
return tmp_buffer;
|
||||
}
|
||||
|
||||
|
|
|
@ -56,174 +56,174 @@
|
|||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\addons\slot1comp_mc.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot1comp_protocol.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot1comp_rom.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot1_retail_auto.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot1_retail_mcrom.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot1_retail_mcrom_debug.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot2_auto.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot2_passme.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot2_piano.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot1_none.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot1_r4.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot1_retail_nand.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot2_mpcf.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot2_paddle.cpp" />
|
||||
<ClCompile Include="..\..\aggdraw.cpp" />
|
||||
<ClCompile Include="..\..\arm_instructions.cpp" />
|
||||
<ClCompile Include="..\..\armcpu.cpp" />
|
||||
<ClCompile Include="..\..\arm_jit.cpp" />
|
||||
<ClCompile Include="..\..\bios.cpp" />
|
||||
<ClCompile Include="..\..\cheatSystem.cpp" />
|
||||
<ClCompile Include="..\..\commandline.cpp" />
|
||||
<ClCompile Include="..\..\common.cpp" />
|
||||
<ClCompile Include="..\..\cp15.cpp" />
|
||||
<ClCompile Include="..\..\Database.cpp" />
|
||||
<ClCompile Include="..\..\debug.cpp" />
|
||||
<ClCompile Include="..\..\Disassembler.cpp" />
|
||||
<ClCompile Include="..\..\driver.cpp" />
|
||||
<ClCompile Include="..\..\emufile.cpp" />
|
||||
<ClCompile Include="..\..\encrypt.cpp" />
|
||||
<ClCompile Include="..\..\FIFO.cpp" />
|
||||
<ClCompile Include="..\..\filter\2xsai.cpp" />
|
||||
<ClCompile Include="..\..\filter\bilinear.cpp" />
|
||||
<ClCompile Include="..\..\filter\epx.cpp" />
|
||||
<ClCompile Include="..\..\filter\hq2x.cpp" />
|
||||
<ClCompile Include="..\..\filter\hq4x.cpp" />
|
||||
<ClCompile Include="..\..\filter\lq2x.cpp" />
|
||||
<ClCompile Include="..\..\filter\scanline.cpp" />
|
||||
<ClCompile Include="..\..\filter\xbrz.cpp" />
|
||||
<ClCompile Include="..\..\firmware.cpp" />
|
||||
<ClCompile Include="..\..\frontend\modules\ImageOut.cpp" />
|
||||
<ClCompile Include="..\..\gfx3d.cpp" />
|
||||
<ClCompile Include="..\..\GPU.cpp" />
|
||||
<ClCompile Include="..\..\GPU_OSD.cpp" />
|
||||
<ClCompile Include="..\..\libretro-common\compat\compat_fnmatch.c" />
|
||||
<ClCompile Include="..\..\libretro-common\compat\compat_getopt.c" />
|
||||
<ClCompile Include="..\..\libretro-common\compat\compat_posix_string.c" />
|
||||
<ClCompile Include="..\..\libretro-common\compat\compat_snprintf.c" />
|
||||
<ClCompile Include="..\..\libretro-common\compat\compat_strcasestr.c" />
|
||||
<ClCompile Include="..\..\libretro-common\compat\compat_strl.c" />
|
||||
<ClCompile Include="..\..\libretro-common\file\archive_file.c" />
|
||||
<ClCompile Include="..\..\libretro-common\file\archive_file_zlib.c" />
|
||||
<ClCompile Include="..\..\libretro-common\file\file_path.c" />
|
||||
<ClCompile Include="..\..\libretro-common\file\nbio\nbio_stdio.c" />
|
||||
<ClCompile Include="..\..\libretro-common\file\retro_dirent.c" />
|
||||
<ClCompile Include="..\..\libretro-common\file\retro_stat.c" />
|
||||
<ClCompile Include="..\..\libretro-common\formats\bmp\rbmp_encode.c" />
|
||||
<ClCompile Include="..\..\libretro-common\formats\png\rpng.c" />
|
||||
<ClCompile Include="..\..\libretro-common\formats\png\rpng_encode.c" />
|
||||
<ClCompile Include="..\..\libretro-common\hash\rhash.c" />
|
||||
<ClCompile Include="..\..\libretro-common\lists\dir_list.c" />
|
||||
<ClCompile Include="..\..\libretro-common\lists\file_list.c" />
|
||||
<ClCompile Include="..\..\libretro-common\lists\string_list.c" />
|
||||
<ClCompile Include="..\..\libretro-common\rthreads\rsemaphore.c" />
|
||||
<ClCompile Include="..\..\libretro-common\rthreads\rthreads.c" />
|
||||
<ClCompile Include="..\..\libretro-common\features\features_cpu.c" />
|
||||
<ClCompile Include="..\..\libretro-common\streams\file_stream.c" />
|
||||
<ClCompile Include="..\..\libretro-common\streams\memory_stream.c" />
|
||||
<ClCompile Include="..\..\lua-engine.cpp" />
|
||||
<ClCompile Include="..\..\matrix.cpp" />
|
||||
<ClCompile Include="..\..\mc.cpp" />
|
||||
<ClCompile Include="..\..\MMU.cpp" />
|
||||
<ClCompile Include="..\..\movie.cpp" />
|
||||
<ClCompile Include="..\..\NDSSystem.cpp" />
|
||||
<ClCompile Include="..\..\OGLRender.cpp" />
|
||||
<ClCompile Include="..\..\OGLRender_3_2.cpp" />
|
||||
<ClCompile Include="..\..\path.cpp" />
|
||||
<ClCompile Include="..\..\rasterize.cpp" />
|
||||
<ClCompile Include="..\..\readwrite.cpp" />
|
||||
<ClCompile Include="..\..\render3D.cpp" />
|
||||
<ClCompile Include="..\..\ROMReader.cpp" />
|
||||
<ClCompile Include="..\..\rtc.cpp" />
|
||||
<ClCompile Include="..\..\saves.cpp" />
|
||||
<ClCompile Include="..\..\slot1.cpp" />
|
||||
<ClCompile Include="..\..\slot2.cpp" />
|
||||
<ClCompile Include="..\..\SPU.cpp" />
|
||||
<ClCompile Include="..\..\texcache.cpp" />
|
||||
<ClCompile Include="..\..\thumb_instructions.cpp" />
|
||||
<ClCompile Include="..\..\utils\advanscene.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\assembler.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\assert.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\buffer.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\compiler.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\compilercontext.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\compilerfunc.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\compileritem.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\context.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\cpuinfo.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\defs.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\func.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\logger.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\memorymanager.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\memorymarker.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\operand.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\stringbuilder.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\stringutil.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\virtualmemory.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\core\zonememory.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\x86\x86assembler.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\x86\x86compiler.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\x86\x86compilercontext.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\x86\x86compilerfunc.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\x86\x86compileritem.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\x86\x86cpuinfo.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\x86\x86defs.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\x86\x86func.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\x86\x86operand.cpp" />
|
||||
<ClCompile Include="..\..\utils\AsmJit\x86\x86util.cpp" />
|
||||
<ClCompile Include="..\..\utils\datetime.cpp" />
|
||||
<ClCompile Include="..\..\utils\dlditool.cpp" />
|
||||
<ClCompile Include="..\..\utils\emufat.cpp" />
|
||||
<ClCompile Include="..\..\utils\fsnitro.cpp" />
|
||||
<ClCompile Include="..\..\utils\libfat\cache.cpp" />
|
||||
<ClCompile Include="..\..\utils\libfat\directory.cpp" />
|
||||
<ClCompile Include="..\..\utils\libfat\disc.cpp" />
|
||||
<ClCompile Include="..\..\utils\libfat\fatdir.cpp" />
|
||||
<ClCompile Include="..\..\utils\libfat\fatfile.cpp" />
|
||||
<ClCompile Include="..\..\utils\libfat\filetime.cpp" />
|
||||
<ClCompile Include="..\..\utils\libfat\file_allocation_table.cpp" />
|
||||
<ClCompile Include="..\..\utils\libfat\libfat.cpp" />
|
||||
<ClCompile Include="..\..\utils\libfat\libfat_public_api.cpp" />
|
||||
<ClCompile Include="..\..\utils\libfat\lock.cpp" />
|
||||
<ClCompile Include="..\..\utils\libfat\partition.cpp" />
|
||||
<ClCompile Include="..\..\utils\tinyxml\tinystr.cpp" />
|
||||
<ClCompile Include="..\..\utils\tinyxml\tinyxml.cpp" />
|
||||
<ClCompile Include="..\..\utils\tinyxml\tinyxmlerror.cpp" />
|
||||
<ClCompile Include="..\..\utils\tinyxml\tinyxmlparser.cpp" />
|
||||
<ClCompile Include="..\..\utils\vfat.cpp" />
|
||||
<ClCompile Include="..\..\version.cpp" />
|
||||
<ClCompile Include="..\..\wifi.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot2_expMemory.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot2_gbagame.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot2_guitarGrip.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot2_none.cpp" />
|
||||
<ClCompile Include="..\..\addons\slot2_rumblepak.cpp" />
|
||||
<ClCompile Include="..\..\gdbstub\gdbstub.cpp" />
|
||||
<ClCompile Include="..\..\utils\ConvertUTF.c" />
|
||||
<ClCompile Include="..\..\utils\guid.cpp" />
|
||||
<ClCompile Include="..\..\utils\md5.cpp" />
|
||||
<ClCompile Include="..\..\utils\task.cpp" />
|
||||
<ClCompile Include="..\..\utils\xstring.cpp" />
|
||||
<ClCompile Include="..\..\utils\decrypt\crc.cpp" />
|
||||
<ClCompile Include="..\..\utils\decrypt\decrypt.cpp" />
|
||||
<ClCompile Include="..\..\utils\decrypt\header.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\metaspu.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SndOut.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\Timestretcher.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\win32\ConfigSoundtouch.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SoundTouch\3dnow_win.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SoundTouch\AAFilter.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SoundTouch\cpu_detect_x86_win.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SoundTouch\FIFOSampleBuffer.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SoundTouch\FIRFilter.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SoundTouch\mmx_optimized.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SoundTouch\RateTransposer.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SoundTouch\SoundTouch.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SoundTouch\sse_optimized.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SoundTouch\TDStretch.cpp" />
|
||||
<ClCompile Include="..\..\metaspu\SoundTouch\WavFile.cpp" />
|
||||
<ClCompile Include="..\addons\slot1comp_mc.cpp" />
|
||||
<ClCompile Include="..\addons\slot1comp_protocol.cpp" />
|
||||
<ClCompile Include="..\addons\slot1comp_rom.cpp" />
|
||||
<ClCompile Include="..\addons\slot1_retail_auto.cpp" />
|
||||
<ClCompile Include="..\addons\slot1_retail_mcrom.cpp" />
|
||||
<ClCompile Include="..\addons\slot1_retail_mcrom_debug.cpp" />
|
||||
<ClCompile Include="..\addons\slot2_auto.cpp" />
|
||||
<ClCompile Include="..\addons\slot2_passme.cpp" />
|
||||
<ClCompile Include="..\addons\slot2_piano.cpp" />
|
||||
<ClCompile Include="..\addons\slot1_none.cpp" />
|
||||
<ClCompile Include="..\addons\slot1_r4.cpp" />
|
||||
<ClCompile Include="..\addons\slot1_retail_nand.cpp" />
|
||||
<ClCompile Include="..\addons\slot2_mpcf.cpp" />
|
||||
<ClCompile Include="..\addons\slot2_paddle.cpp" />
|
||||
<ClCompile Include="..\aggdraw.cpp" />
|
||||
<ClCompile Include="..\arm_instructions.cpp" />
|
||||
<ClCompile Include="..\armcpu.cpp" />
|
||||
<ClCompile Include="..\arm_jit.cpp" />
|
||||
<ClCompile Include="..\bios.cpp" />
|
||||
<ClCompile Include="..\cheatSystem.cpp" />
|
||||
<ClCompile Include="..\commandline.cpp" />
|
||||
<ClCompile Include="..\common.cpp" />
|
||||
<ClCompile Include="..\cp15.cpp" />
|
||||
<ClCompile Include="..\debug.cpp" />
|
||||
<ClCompile Include="..\Disassembler.cpp" />
|
||||
<ClCompile Include="..\driver.cpp" />
|
||||
<ClCompile Include="..\emufile.cpp" />
|
||||
<ClCompile Include="..\encrypt.cpp" />
|
||||
<ClCompile Include="..\FIFO.cpp" />
|
||||
<ClCompile Include="..\filter\2xsai.cpp" />
|
||||
<ClCompile Include="..\filter\bilinear.cpp" />
|
||||
<ClCompile Include="..\filter\epx.cpp" />
|
||||
<ClCompile Include="..\filter\hq2x.cpp" />
|
||||
<ClCompile Include="..\filter\hq4x.cpp" />
|
||||
<ClCompile Include="..\filter\lq2x.cpp" />
|
||||
<ClCompile Include="..\filter\scanline.cpp" />
|
||||
<ClCompile Include="..\filter\xbrz.cpp" />
|
||||
<ClCompile Include="..\firmware.cpp" />
|
||||
<ClCompile Include="..\frontend\modules\ImageOut.cpp" />
|
||||
<ClCompile Include="..\gfx3d.cpp" />
|
||||
<ClCompile Include="..\GPU.cpp" />
|
||||
<ClCompile Include="..\GPU_OSD.cpp" />
|
||||
<ClCompile Include="..\libretro-common\compat\compat_fnmatch.c" />
|
||||
<ClCompile Include="..\libretro-common\compat\compat_getopt.c" />
|
||||
<ClCompile Include="..\libretro-common\compat\compat_posix_string.c" />
|
||||
<ClCompile Include="..\libretro-common\compat\compat_snprintf.c" />
|
||||
<ClCompile Include="..\libretro-common\compat\compat_strcasestr.c" />
|
||||
<ClCompile Include="..\libretro-common\compat\compat_strl.c" />
|
||||
<ClCompile Include="..\libretro-common\file\archive_file.c" />
|
||||
<ClCompile Include="..\libretro-common\file\archive_file_zlib.c" />
|
||||
<ClCompile Include="..\libretro-common\file\file_path.c" />
|
||||
<ClCompile Include="..\libretro-common\file\nbio\nbio_stdio.c" />
|
||||
<ClCompile Include="..\libretro-common\file\retro_dirent.c" />
|
||||
<ClCompile Include="..\libretro-common\file\retro_stat.c" />
|
||||
<ClCompile Include="..\libretro-common\formats\bmp\rbmp_encode.c" />
|
||||
<ClCompile Include="..\libretro-common\formats\png\rpng.c" />
|
||||
<ClCompile Include="..\libretro-common\formats\png\rpng_encode.c" />
|
||||
<ClCompile Include="..\libretro-common\hash\rhash.c" />
|
||||
<ClCompile Include="..\libretro-common\lists\dir_list.c" />
|
||||
<ClCompile Include="..\libretro-common\lists\file_list.c" />
|
||||
<ClCompile Include="..\libretro-common\lists\string_list.c" />
|
||||
<ClCompile Include="..\libretro-common\rthreads\rsemaphore.c" />
|
||||
<ClCompile Include="..\libretro-common\rthreads\rthreads.c" />
|
||||
<ClCompile Include="..\libretro-common\streams\file_stream.c" />
|
||||
<ClCompile Include="..\libretro-common\streams\memory_stream.c" />
|
||||
<ClCompile Include="..\lua-engine.cpp" />
|
||||
<ClCompile Include="..\matrix.cpp" />
|
||||
<ClCompile Include="..\mc.cpp" />
|
||||
<ClCompile Include="..\MMU.cpp" />
|
||||
<ClCompile Include="..\movie.cpp" />
|
||||
<ClCompile Include="..\NDSSystem.cpp" />
|
||||
<ClCompile Include="..\OGLRender.cpp" />
|
||||
<ClCompile Include="..\OGLRender_3_2.cpp" />
|
||||
<ClCompile Include="..\path.cpp" />
|
||||
<ClCompile Include="..\rasterize.cpp" />
|
||||
<ClCompile Include="..\readwrite.cpp" />
|
||||
<ClCompile Include="..\render3D.cpp" />
|
||||
<ClCompile Include="..\ROMReader.cpp" />
|
||||
<ClCompile Include="..\rtc.cpp" />
|
||||
<ClCompile Include="..\saves.cpp" />
|
||||
<ClCompile Include="..\slot1.cpp" />
|
||||
<ClCompile Include="..\slot2.cpp" />
|
||||
<ClCompile Include="..\SPU.cpp" />
|
||||
<ClCompile Include="..\texcache.cpp" />
|
||||
<ClCompile Include="..\thumb_instructions.cpp" />
|
||||
<ClCompile Include="..\utils\advanscene.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\assembler.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\assert.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\buffer.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\compiler.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\compilercontext.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\compilerfunc.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\compileritem.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\context.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\cpuinfo.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\defs.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\func.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\logger.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\memorymanager.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\memorymarker.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\operand.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\stringbuilder.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\stringutil.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\virtualmemory.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\core\zonememory.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86assembler.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86compiler.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86compilercontext.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86compilerfunc.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86compileritem.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86cpuinfo.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86defs.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86func.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86operand.cpp" />
|
||||
<ClCompile Include="..\utils\AsmJit\x86\x86util.cpp" />
|
||||
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler.cpp" />
|
||||
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler_SSE2.cpp" />
|
||||
<ClCompile Include="..\utils\datetime.cpp" />
|
||||
<ClCompile Include="..\utils\dlditool.cpp" />
|
||||
<ClCompile Include="..\utils\emufat.cpp" />
|
||||
<ClCompile Include="..\utils\fsnitro.cpp" />
|
||||
<ClCompile Include="..\utils\libfat\cache.cpp" />
|
||||
<ClCompile Include="..\utils\libfat\directory.cpp" />
|
||||
<ClCompile Include="..\utils\libfat\disc.cpp" />
|
||||
<ClCompile Include="..\utils\libfat\fatdir.cpp" />
|
||||
<ClCompile Include="..\utils\libfat\fatfile.cpp" />
|
||||
<ClCompile Include="..\utils\libfat\filetime.cpp" />
|
||||
<ClCompile Include="..\utils\libfat\file_allocation_table.cpp" />
|
||||
<ClCompile Include="..\utils\libfat\libfat.cpp" />
|
||||
<ClCompile Include="..\utils\libfat\libfat_public_api.cpp" />
|
||||
<ClCompile Include="..\utils\libfat\lock.cpp" />
|
||||
<ClCompile Include="..\utils\libfat\partition.cpp" />
|
||||
<ClCompile Include="..\utils\tinyxml\tinystr.cpp" />
|
||||
<ClCompile Include="..\utils\tinyxml\tinyxml.cpp" />
|
||||
<ClCompile Include="..\utils\tinyxml\tinyxmlerror.cpp" />
|
||||
<ClCompile Include="..\utils\tinyxml\tinyxmlparser.cpp" />
|
||||
<ClCompile Include="..\utils\vfat.cpp" />
|
||||
<ClCompile Include="..\version.cpp" />
|
||||
<ClCompile Include="..\wifi.cpp" />
|
||||
<ClCompile Include="..\addons\slot2_expMemory.cpp" />
|
||||
<ClCompile Include="..\addons\slot2_gbagame.cpp" />
|
||||
<ClCompile Include="..\addons\slot2_guitarGrip.cpp" />
|
||||
<ClCompile Include="..\addons\slot2_none.cpp" />
|
||||
<ClCompile Include="..\addons\slot2_rumblepak.cpp" />
|
||||
<ClCompile Include="..\gdbstub\gdbstub.cpp" />
|
||||
<ClCompile Include="..\utils\ConvertUTF.c" />
|
||||
<ClCompile Include="..\utils\guid.cpp" />
|
||||
<ClCompile Include="..\utils\md5.cpp" />
|
||||
<ClCompile Include="..\utils\task.cpp" />
|
||||
<ClCompile Include="..\utils\xstring.cpp" />
|
||||
<ClCompile Include="..\utils\decrypt\crc.cpp" />
|
||||
<ClCompile Include="..\utils\decrypt\decrypt.cpp" />
|
||||
<ClCompile Include="..\utils\decrypt\header.cpp" />
|
||||
<ClCompile Include="..\metaspu\metaspu.cpp" />
|
||||
<ClCompile Include="..\metaspu\SndOut.cpp" />
|
||||
<ClCompile Include="..\metaspu\Timestretcher.cpp" />
|
||||
<ClCompile Include="..\metaspu\win32\ConfigSoundtouch.cpp" />
|
||||
<ClCompile Include="..\metaspu\SoundTouch\3dnow_win.cpp" />
|
||||
<ClCompile Include="..\metaspu\SoundTouch\AAFilter.cpp" />
|
||||
<ClCompile Include="..\metaspu\SoundTouch\cpu_detect_x86_win.cpp" />
|
||||
<ClCompile Include="..\metaspu\SoundTouch\FIFOSampleBuffer.cpp" />
|
||||
<ClCompile Include="..\metaspu\SoundTouch\FIRFilter.cpp" />
|
||||
<ClCompile Include="..\metaspu\SoundTouch\mmx_optimized.cpp" />
|
||||
<ClCompile Include="..\metaspu\SoundTouch\RateTransposer.cpp" />
|
||||
<ClCompile Include="..\metaspu\SoundTouch\SoundTouch.cpp" />
|
||||
<ClCompile Include="..\metaspu\SoundTouch\sse_optimized.cpp" />
|
||||
<ClCompile Include="..\metaspu\SoundTouch\TDStretch.cpp" />
|
||||
<ClCompile Include="..\metaspu\SoundTouch\WavFile.cpp" />
|
||||
<ClCompile Include="AboutBox.cpp" />
|
||||
<ClCompile Include="aviout.cpp" />
|
||||
<ClCompile Include="cheatsWin.cpp" />
|
||||
|
@ -315,7 +315,7 @@
|
|||
<ClCompile Include="inputdx.cpp" />
|
||||
<ClCompile Include="luaconsole.cpp" />
|
||||
<ClCompile Include="main.cpp" />
|
||||
<ClCompile Include="mic-win.cpp" />
|
||||
<ClCompile Include="mic.cpp" />
|
||||
<ClCompile Include="ogl.cpp" />
|
||||
<ClCompile Include="OpenArchive.cpp" />
|
||||
<ClCompile Include="pathsettings.cpp" />
|
||||
|
@ -341,158 +341,160 @@
|
|||
<ClCompile Include="tileView.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\addons\slot1comp_mc.h" />
|
||||
<ClInclude Include="..\..\addons\slot1comp_protocol.h" />
|
||||
<ClInclude Include="..\..\addons\slot1comp_rom.h" />
|
||||
<ClInclude Include="..\..\armcpu.h" />
|
||||
<ClInclude Include="..\..\arm_jit.h" />
|
||||
<ClInclude Include="..\..\bios.h" />
|
||||
<ClInclude Include="..\..\cheatSystem.h" />
|
||||
<ClInclude Include="..\..\commandline.h" />
|
||||
<ClInclude Include="..\..\common.h" />
|
||||
<ClInclude Include="..\..\cp15.h" />
|
||||
<ClInclude Include="..\..\Database.h" />
|
||||
<ClInclude Include="..\..\debug.h" />
|
||||
<ClInclude Include="..\..\Disassembler.h" />
|
||||
<ClInclude Include="..\..\driver.h" />
|
||||
<ClInclude Include="..\..\emufile.h" />
|
||||
<ClInclude Include="..\..\encrypt.h" />
|
||||
<ClInclude Include="..\..\FIFO.h" />
|
||||
<ClInclude Include="..\..\filter\filter.h" />
|
||||
<ClInclude Include="..\..\filter\hq2x.h" />
|
||||
<ClInclude Include="..\..\filter\interp.h" />
|
||||
<ClInclude Include="..\..\filter\lq2x.h" />
|
||||
<ClInclude Include="..\..\filter\xbrz.h" />
|
||||
<ClInclude Include="..\..\firmware.h" />
|
||||
<ClInclude Include="..\..\frontend\modules\ImageOut.h" />
|
||||
<ClInclude Include="..\..\gfx3d.h" />
|
||||
<ClInclude Include="..\..\GPU.h" />
|
||||
<ClInclude Include="..\..\GPU_osd.h" />
|
||||
<ClInclude Include="..\..\instructions.h" />
|
||||
<ClInclude Include="..\..\instruction_attributes.h" />
|
||||
<ClInclude Include="..\..\libretro-common\include\boolean.h" />
|
||||
<ClInclude Include="..\..\libretro-common\include\compat\getopt.h" />
|
||||
<ClInclude Include="..\..\libretro-common\include\compat\msvc.h" />
|
||||
<ClInclude Include="..\..\libretro-common\include\formats\image.h" />
|
||||
<ClInclude Include="..\..\libretro-common\include\formats\rbmp.h" />
|
||||
<ClInclude Include="..\..\libretro-common\include\formats\rpng.h" />
|
||||
<ClInclude Include="..\..\libretro-common\include\retro_inline.h" />
|
||||
<ClInclude Include="..\..\libretro-common\include\retro_miscellaneous.h" />
|
||||
<ClInclude Include="..\..\libretro-common\include\rthreads\rthreads.h" />
|
||||
<ClInclude Include="..\..\lua-engine.h" />
|
||||
<ClInclude Include="..\..\matrix.h" />
|
||||
<ClInclude Include="..\..\mc.h" />
|
||||
<ClInclude Include="..\..\mem.h" />
|
||||
<ClInclude Include="..\..\mic.h" />
|
||||
<ClInclude Include="..\..\MMU.h" />
|
||||
<ClInclude Include="..\..\MMU_timing.h" />
|
||||
<ClInclude Include="..\..\movie.h" />
|
||||
<ClInclude Include="..\..\NDSSystem.h" />
|
||||
<ClInclude Include="..\..\OGLRender.h" />
|
||||
<ClInclude Include="..\..\OGLRender_3_2.h" />
|
||||
<ClInclude Include="..\..\path.h" />
|
||||
<ClInclude Include="..\..\rasterize.h" />
|
||||
<ClInclude Include="..\..\readwrite.h" />
|
||||
<ClInclude Include="..\..\registers.h" />
|
||||
<ClInclude Include="..\..\render3D.h" />
|
||||
<ClInclude Include="..\..\ROMReader.h" />
|
||||
<ClInclude Include="..\..\rtc.h" />
|
||||
<ClInclude Include="..\..\saves.h" />
|
||||
<ClInclude Include="..\..\slot1.h" />
|
||||
<ClInclude Include="..\..\slot2.h" />
|
||||
<ClInclude Include="..\..\SPU.h" />
|
||||
<ClInclude Include="..\..\texcache.h" />
|
||||
<ClInclude Include="..\..\types.h" />
|
||||
<ClInclude Include="..\..\utils\advanscene.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\asmjit.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\config.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\apibegin.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\apiend.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\assembler.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\assert.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\buffer.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\build.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\compiler.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\compilercontext.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\compilerfunc.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\compileritem.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\context.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\cpuinfo.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\defs.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\func.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\intutil.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\lock.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\logger.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\memorymanager.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\memorymarker.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\operand.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\podvector.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\stringbuilder.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\stringutil.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\virtualmemory.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\core\zonememory.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\x86.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\x86\x86assembler.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\x86\x86compiler.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\x86\x86compilercontext.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\x86\x86compilerfunc.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\x86\x86compileritem.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\x86\x86cpuinfo.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\x86\x86defs.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\x86\x86func.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\x86\x86operand.h" />
|
||||
<ClInclude Include="..\..\utils\AsmJit\x86\x86util.h" />
|
||||
<ClInclude Include="..\..\utils\bits.h" />
|
||||
<ClInclude Include="..\..\utils\datetime.h" />
|
||||
<ClInclude Include="..\..\utils\emufat.h" />
|
||||
<ClInclude Include="..\..\utils\emufat_types.h" />
|
||||
<ClInclude Include="..\..\utils\fsnitro.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\bit_ops.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\cache.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\common.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\directory.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\disc.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\disc_io.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\fat.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\fatdir.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\fatfile.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\filetime.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\file_allocation_table.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\libfat_pc.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\libfat_public_api.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\lock.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\mem_allocate.h" />
|
||||
<ClInclude Include="..\..\utils\libfat\partition.h" />
|
||||
<ClInclude Include="..\..\utils\tinyxml\tinystr.h" />
|
||||
<ClInclude Include="..\..\utils\tinyxml\tinyxml.h" />
|
||||
<ClInclude Include="..\..\utils\vfat.h" />
|
||||
<ClInclude Include="..\..\utils\xstring.h" />
|
||||
<ClInclude Include="..\..\version.h" />
|
||||
<ClInclude Include="..\..\wifi.h" />
|
||||
<ClInclude Include="..\..\gdbstub.h" />
|
||||
<ClInclude Include="..\..\utils\ConvertUTF.h" />
|
||||
<ClInclude Include="..\..\utils\guid.h" />
|
||||
<ClInclude Include="..\..\utils\md5.h" />
|
||||
<ClInclude Include="..\..\utils\task.h" />
|
||||
<ClInclude Include="..\..\utils\valuearray.h" />
|
||||
<ClInclude Include="..\..\utils\decrypt\crc.h" />
|
||||
<ClInclude Include="..\..\utils\decrypt\decrypt.h" />
|
||||
<ClInclude Include="..\..\utils\decrypt\header.h" />
|
||||
<ClInclude Include="..\..\metaspu\metaspu.h" />
|
||||
<ClInclude Include="..\..\metaspu\SndOut.h" />
|
||||
<ClInclude Include="..\..\metaspu\win32\Dialogs.h" />
|
||||
<ClInclude Include="..\..\metaspu\SoundTouch\AAFilter.h" />
|
||||
<ClInclude Include="..\..\metaspu\SoundTouch\BPMDetect.h" />
|
||||
<ClInclude Include="..\..\metaspu\SoundTouch\cpu_detect.h" />
|
||||
<ClInclude Include="..\..\metaspu\SoundTouch\FIFOSampleBuffer.h" />
|
||||
<ClInclude Include="..\..\metaspu\SoundTouch\FIFOSamplePipe.h" />
|
||||
<ClInclude Include="..\..\metaspu\SoundTouch\FIRFilter.h" />
|
||||
<ClInclude Include="..\..\metaspu\SoundTouch\RateTransposer.h" />
|
||||
<ClInclude Include="..\..\metaspu\SoundTouch\SoundTouch.h" />
|
||||
<ClInclude Include="..\..\metaspu\SoundTouch\STTypes.h" />
|
||||
<ClInclude Include="..\..\metaspu\SoundTouch\TDStretch.h" />
|
||||
<ClInclude Include="..\..\metaspu\SoundTouch\WavFile.h" />
|
||||
<ClInclude Include="..\addons\slot1comp_mc.h" />
|
||||
<ClInclude Include="..\addons\slot1comp_protocol.h" />
|
||||
<ClInclude Include="..\addons\slot1comp_rom.h" />
|
||||
<ClInclude Include="..\armcpu.h" />
|
||||
<ClInclude Include="..\arm_jit.h" />
|
||||
<ClInclude Include="..\bios.h" />
|
||||
<ClInclude Include="..\bits.h" />
|
||||
<ClInclude Include="..\cheatSystem.h" />
|
||||
<ClInclude Include="..\commandline.h" />
|
||||
<ClInclude Include="..\common.h" />
|
||||
<ClInclude Include="..\cp15.h" />
|
||||
<ClInclude Include="..\debug.h" />
|
||||
<ClInclude Include="..\Disassembler.h" />
|
||||
<ClInclude Include="..\driver.h" />
|
||||
<ClInclude Include="..\emufile.h" />
|
||||
<ClInclude Include="..\encrypt.h" />
|
||||
<ClInclude Include="..\FIFO.h" />
|
||||
<ClInclude Include="..\filter\filter.h" />
|
||||
<ClInclude Include="..\filter\hq2x.h" />
|
||||
<ClInclude Include="..\filter\interp.h" />
|
||||
<ClInclude Include="..\filter\lq2x.h" />
|
||||
<ClInclude Include="..\filter\xbrz.h" />
|
||||
<ClInclude Include="..\firmware.h" />
|
||||
<ClInclude Include="..\frontend\modules\ImageOut.h" />
|
||||
<ClInclude Include="..\gfx3d.h" />
|
||||
<ClInclude Include="..\GPU.h" />
|
||||
<ClInclude Include="..\GPU_osd.h" />
|
||||
<ClInclude Include="..\instructions.h" />
|
||||
<ClInclude Include="..\instruction_attributes.h" />
|
||||
<ClInclude Include="..\libretro-common\formats\png\rpng_internal.h" />
|
||||
<ClInclude Include="..\libretro-common\include\boolean.h" />
|
||||
<ClInclude Include="..\libretro-common\include\compat\getopt.h" />
|
||||
<ClInclude Include="..\libretro-common\include\compat\msvc.h" />
|
||||
<ClInclude Include="..\libretro-common\include\formats\image.h" />
|
||||
<ClInclude Include="..\libretro-common\include\formats\rbmp.h" />
|
||||
<ClInclude Include="..\libretro-common\include\formats\rpng.h" />
|
||||
<ClInclude Include="..\libretro-common\include\retro_inline.h" />
|
||||
<ClInclude Include="..\libretro-common\include\retro_miscellaneous.h" />
|
||||
<ClInclude Include="..\libretro-common\include\rthreads\rthreads.h" />
|
||||
<ClInclude Include="..\lua-engine.h" />
|
||||
<ClInclude Include="..\matrix.h" />
|
||||
<ClInclude Include="..\mc.h" />
|
||||
<ClInclude Include="..\mem.h" />
|
||||
<ClInclude Include="..\mic.h" />
|
||||
<ClInclude Include="..\MMU.h" />
|
||||
<ClInclude Include="..\MMU_timing.h" />
|
||||
<ClInclude Include="..\movie.h" />
|
||||
<ClInclude Include="..\NDSSystem.h" />
|
||||
<ClInclude Include="..\OGLRender.h" />
|
||||
<ClInclude Include="..\OGLRender_3_2.h" />
|
||||
<ClInclude Include="..\path.h" />
|
||||
<ClInclude Include="..\rasterize.h" />
|
||||
<ClInclude Include="..\readwrite.h" />
|
||||
<ClInclude Include="..\registers.h" />
|
||||
<ClInclude Include="..\render3D.h" />
|
||||
<ClInclude Include="..\ROMReader.h" />
|
||||
<ClInclude Include="..\rtc.h" />
|
||||
<ClInclude Include="..\saves.h" />
|
||||
<ClInclude Include="..\slot1.h" />
|
||||
<ClInclude Include="..\slot2.h" />
|
||||
<ClInclude Include="..\SPU.h" />
|
||||
<ClInclude Include="..\texcache.h" />
|
||||
<ClInclude Include="..\types.h" />
|
||||
<ClInclude Include="..\utils\advanscene.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\asmjit.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\config.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\apibegin.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\apiend.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\assembler.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\assert.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\buffer.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\build.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\compiler.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\compilercontext.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\compilerfunc.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\compileritem.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\context.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\cpuinfo.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\defs.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\func.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\intutil.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\lock.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\logger.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\memorymanager.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\memorymarker.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\operand.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\podvector.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\stringbuilder.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\stringutil.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\virtualmemory.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\core\zonememory.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86assembler.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86compiler.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86compilercontext.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86compilerfunc.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86compileritem.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86cpuinfo.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86defs.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86func.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86operand.h" />
|
||||
<ClInclude Include="..\utils\AsmJit\x86\x86util.h" />
|
||||
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler.h" />
|
||||
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler_SSE2.h" />
|
||||
<ClInclude Include="..\utils\datetime.h" />
|
||||
<ClInclude Include="..\utils\emufat.h" />
|
||||
<ClInclude Include="..\utils\emufat_types.h" />
|
||||
<ClInclude Include="..\utils\fsnitro.h" />
|
||||
<ClInclude Include="..\utils\libfat\bit_ops.h" />
|
||||
<ClInclude Include="..\utils\libfat\cache.h" />
|
||||
<ClInclude Include="..\utils\libfat\common.h" />
|
||||
<ClInclude Include="..\utils\libfat\directory.h" />
|
||||
<ClInclude Include="..\utils\libfat\disc.h" />
|
||||
<ClInclude Include="..\utils\libfat\disc_io.h" />
|
||||
<ClInclude Include="..\utils\libfat\fat.h" />
|
||||
<ClInclude Include="..\utils\libfat\fatdir.h" />
|
||||
<ClInclude Include="..\utils\libfat\fatfile.h" />
|
||||
<ClInclude Include="..\utils\libfat\filetime.h" />
|
||||
<ClInclude Include="..\utils\libfat\file_allocation_table.h" />
|
||||
<ClInclude Include="..\utils\libfat\libfat_pc.h" />
|
||||
<ClInclude Include="..\utils\libfat\libfat_public_api.h" />
|
||||
<ClInclude Include="..\utils\libfat\lock.h" />
|
||||
<ClInclude Include="..\utils\libfat\mem_allocate.h" />
|
||||
<ClInclude Include="..\utils\libfat\partition.h" />
|
||||
<ClInclude Include="..\utils\tinyxml\tinystr.h" />
|
||||
<ClInclude Include="..\utils\tinyxml\tinyxml.h" />
|
||||
<ClInclude Include="..\utils\vfat.h" />
|
||||
<ClInclude Include="..\version.h" />
|
||||
<ClInclude Include="..\wifi.h" />
|
||||
<ClInclude Include="..\utils\xstring.h" />
|
||||
<ClInclude Include="..\gdbstub.h" />
|
||||
<ClInclude Include="..\utils\ConvertUTF.h" />
|
||||
<ClInclude Include="..\utils\guid.h" />
|
||||
<ClInclude Include="..\utils\md5.h" />
|
||||
<ClInclude Include="..\utils\task.h" />
|
||||
<ClInclude Include="..\utils\valuearray.h" />
|
||||
<ClInclude Include="..\utils\decrypt\crc.h" />
|
||||
<ClInclude Include="..\utils\decrypt\decrypt.h" />
|
||||
<ClInclude Include="..\utils\decrypt\header.h" />
|
||||
<ClInclude Include="..\metaspu\metaspu.h" />
|
||||
<ClInclude Include="..\metaspu\SndOut.h" />
|
||||
<ClInclude Include="..\metaspu\win32\Dialogs.h" />
|
||||
<ClInclude Include="..\metaspu\SoundTouch\AAFilter.h" />
|
||||
<ClInclude Include="..\metaspu\SoundTouch\BPMDetect.h" />
|
||||
<ClInclude Include="..\metaspu\SoundTouch\cpu_detect.h" />
|
||||
<ClInclude Include="..\metaspu\SoundTouch\FIFOSampleBuffer.h" />
|
||||
<ClInclude Include="..\metaspu\SoundTouch\FIFOSamplePipe.h" />
|
||||
<ClInclude Include="..\metaspu\SoundTouch\FIRFilter.h" />
|
||||
<ClInclude Include="..\metaspu\SoundTouch\RateTransposer.h" />
|
||||
<ClInclude Include="..\metaspu\SoundTouch\SoundTouch.h" />
|
||||
<ClInclude Include="..\metaspu\SoundTouch\STTypes.h" />
|
||||
<ClInclude Include="..\metaspu\SoundTouch\TDStretch.h" />
|
||||
<ClInclude Include="..\metaspu\SoundTouch\WavFile.h" />
|
||||
<ClInclude Include="AboutBox.h" />
|
||||
<ClInclude Include="aviout.h" />
|
||||
<ClInclude Include="cheatsWin.h" />
|
||||
|
@ -600,10 +602,10 @@
|
|||
<ClInclude Include="tileView.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\..\filter\hq4x.dat" />
|
||||
<None Include="..\..\instruction_tabdef.inc" />
|
||||
<None Include="..\..\thumb_tabdef.inc" />
|
||||
<None Include="..\..\utils\AsmJit\COPYING.txt" />
|
||||
<None Include="..\filter\hq4x.dat" />
|
||||
<None Include="..\instruction_tabdef.inc" />
|
||||
<None Include="..\thumb_tabdef.inc" />
|
||||
<None Include="..\utils\AsmJit\COPYING.txt" />
|
||||
<None Include="bitmap1.bmp" />
|
||||
<None Include="bitmaps\FileBinary.ico" />
|
||||
<None Include="bitmaps\FolderClosed.ico" />
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -320,9 +320,10 @@ static void do_video_conversion(AVIFile* avi, const u16* buffer)
|
|||
{
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
u32 dst = ConvertColor555To8888Opaque<true>(*buffer++);
|
||||
*(u32 *)outbuf = (dst & 0x00FFFFFF) | (*(u32 *)outbuf & 0xFF000000);
|
||||
outbuf += 3;
|
||||
u32 dst = ColorspaceConvert555To8888Opaque<true>(*buffer++);
|
||||
*outbuf++ = dst & 0xFF;
|
||||
*outbuf++ = (dst >> 8) & 0xFF;
|
||||
*outbuf++ = (dst >> 16) & 0xFF;
|
||||
}
|
||||
|
||||
outbuf -= width*3*2;
|
||||
|
|
|
@ -94,7 +94,7 @@
|
|||
<!-- BETA_VERSION ? -->
|
||||
<PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'Debug'">_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'Release'">RELEASE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'FastBuild'">RELEASE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'FastBuild'">FASTBUILD;RELEASE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
|
||||
|
||||
<!-- These work together -->
|
||||
|
|
|
@ -1919,7 +1919,7 @@ static void DoDisplay(bool firstTime)
|
|||
//convert pixel format to 32bpp for compositing
|
||||
//why do we do this over and over? well, we are compositing to
|
||||
//filteredbuffer32bpp, and it needs to get refreshed each frame.
|
||||
ConvertColorBuffer555To8888Opaque<true, false>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16));
|
||||
ColorspaceConvertBuffer555To8888Opaque<true, false>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16));
|
||||
|
||||
if(firstTime)
|
||||
{
|
||||
|
@ -3282,6 +3282,13 @@ int _main()
|
|||
cur3DCore = GPU3D_NULL;
|
||||
else if(cur3DCore == GPU3D_NULL) // this value shouldn't be saved anymore
|
||||
cur3DCore = GPU3D_DEFAULT;
|
||||
|
||||
if(cmdline.render3d == COMMANDLINE_RENDER3D_NONE) cur3DCore = GPU3D_NULL;
|
||||
if(cmdline.render3d == COMMANDLINE_RENDER3D_SW) cur3DCore = GPU3D_SWRAST;
|
||||
if(cmdline.render3d == COMMANDLINE_RENDER3D_OLDGL) cur3DCore = GPU3D_OPENGL_OLD;
|
||||
if(cmdline.render3d == COMMANDLINE_RENDER3D_GL) cur3DCore = GPU3D_OPENGL_3_2; //no way of forcing it, at least not right now. I dont care.
|
||||
if(cmdline.render3d == COMMANDLINE_RENDER3D_AUTOGL) cur3DCore = GPU3D_OPENGL_3_2; //this will fallback i guess
|
||||
|
||||
CommonSettings.GFX3D_HighResolutionInterpolateColor = GetPrivateProfileBool("3D", "HighResolutionInterpolateColor", 1, IniName);
|
||||
CommonSettings.GFX3D_EdgeMark = GetPrivateProfileBool("3D", "EnableEdgeMark", 1, IniName);
|
||||
CommonSettings.GFX3D_Fog = GetPrivateProfileBool("3D", "EnableFog", 1, IniName);
|
||||
|
|
|
@ -526,7 +526,7 @@ void gfx3d_deinit()
|
|||
|
||||
void gfx3d_reset()
|
||||
{
|
||||
CurrentRenderer->RenderFinish();
|
||||
GPU->ForceRender3DFinishAndFlush(false);
|
||||
|
||||
#ifdef _SHOW_VTX_COUNTERS
|
||||
max_polys = max_verts = 0;
|
||||
|
@ -627,6 +627,53 @@ FORCEINLINE s32 vec3dot_fixed32(const s32* a, const s32* b) {
|
|||
return sfx32_shiftdown(fx32_mul(a[0],b[0]) + fx32_mul(a[1],b[1]) + fx32_mul(a[2],b[2]));
|
||||
}
|
||||
|
||||
//---------------
|
||||
//I'm going to start name these functions GE for GEOMETRY ENGINE MATH.
|
||||
//Pretty much any math function in this file should be explicit about how it's handling precision.
|
||||
//Handling that stuff generically globally is not a winning proposition.
|
||||
|
||||
FORCEINLINE s64 GEM_Mul32x32To64(const s32 a, const s32 b)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __emul(a,b);
|
||||
#else
|
||||
return ((s64)a)*((s64)b);
|
||||
#endif
|
||||
}
|
||||
|
||||
static s32 GEM_SaturateAndShiftdown36To32(const s64 val)
|
||||
{
|
||||
if(val>(s64)0x000007FFFFFFFFFFULL) return (s32)0x7FFFFFFFU;
|
||||
if(val<(s64)0xFFFFF80000000000ULL) return (s32)0x80000000U;
|
||||
|
||||
return fx32_shiftdown(val);
|
||||
}
|
||||
|
||||
static void GEM_TransformVertex(const s32 *matrix, s32 *vecPtr)
|
||||
{
|
||||
const s32 x = vecPtr[0];
|
||||
const s32 y = vecPtr[1];
|
||||
const s32 z = vecPtr[2];
|
||||
const s32 w = vecPtr[3];
|
||||
|
||||
//saturation logic is most carefully tested by:
|
||||
//+ spectrobes beyond the portals excavation blower and drill tools: sets very large overflowing +x,+y in the modelview matrix to push things offscreen
|
||||
//You can see this happening quite clearly: vertices will get translated to extreme values and overflow from a 7FFF-like to an 8000-like
|
||||
//but if it's done wrongly, you can get bugs in:
|
||||
//+ kingdom hearts re-coded: first conversation with cast characters will place them oddly with something overflowing to about 0xA???????
|
||||
|
||||
//other test cases that cropped up during this development, but are probably not actually related to this after all
|
||||
//+ SM64: outside castle skybox
|
||||
//+ NSMB: mario head screen wipe
|
||||
|
||||
vecPtr[0] = GEM_SaturateAndShiftdown36To32(GEM_Mul32x32To64(x,matrix[0]) + GEM_Mul32x32To64(y,matrix[4]) + GEM_Mul32x32To64(z,matrix [8]) + GEM_Mul32x32To64(w,matrix[12]));
|
||||
vecPtr[1] = GEM_SaturateAndShiftdown36To32(GEM_Mul32x32To64(x,matrix[1]) + GEM_Mul32x32To64(y,matrix[5]) + GEM_Mul32x32To64(z,matrix[ 9]) + GEM_Mul32x32To64(w,matrix[13]));
|
||||
vecPtr[2] = GEM_SaturateAndShiftdown36To32(GEM_Mul32x32To64(x,matrix[2]) + GEM_Mul32x32To64(y,matrix[6]) + GEM_Mul32x32To64(z,matrix[10]) + GEM_Mul32x32To64(w,matrix[14]));
|
||||
vecPtr[3] = GEM_SaturateAndShiftdown36To32(GEM_Mul32x32To64(x,matrix[3]) + GEM_Mul32x32To64(y,matrix[7]) + GEM_Mul32x32To64(z,matrix[11]) + GEM_Mul32x32To64(w,matrix[15]));
|
||||
}
|
||||
//---------------
|
||||
|
||||
|
||||
#define SUBMITVERTEX(ii, nn) polylist->list[polylist->count].vertIndexes[ii] = tempVertInfo.map[nn];
|
||||
//Submit a vertex to the GE
|
||||
static void SetVertex()
|
||||
|
@ -659,15 +706,8 @@ static void SetVertex()
|
|||
if(polylist->count >= POLYLIST_SIZE)
|
||||
return;
|
||||
|
||||
//TODO - think about keeping the clip matrix concatenated,
|
||||
//so that we only have to multiply one matrix here
|
||||
//(we could lazy cache the concatenated clip matrix and only generate it
|
||||
//when we need to)
|
||||
MatrixMultVec4x4_M2(mtxCurrent[0], coordTransformed);
|
||||
|
||||
//printf("%f %f %f\n",s16coord[0]/4096.0f,s16coord[1]/4096.0f,s16coord[2]/4096.0f);
|
||||
//printf("x %f %f %f %f\n",mtxCurrent[0][0]/4096.0f,mtxCurrent[0][1]/4096.0f,mtxCurrent[0][2]/4096.0f,mtxCurrent[0][3]/4096.0f);
|
||||
//printf(" = %f %f %f %f\n",coordTransformed[0]/4096.0f,coordTransformed[1]/4096.0f,coordTransformed[2]/4096.0f,coordTransformed[3]/4096.0f);
|
||||
GEM_TransformVertex(mtxCurrent[1],coordTransformed); //modelview
|
||||
GEM_TransformVertex(mtxCurrent[0],coordTransformed); //projection
|
||||
|
||||
//TODO - culling should be done here.
|
||||
//TODO - viewport transform?
|
||||
|
@ -1484,8 +1524,9 @@ static void gfx3d_glViewPort(u32 v)
|
|||
static BOOL gfx3d_glBoxTest(u32 v)
|
||||
{
|
||||
//printf("boxtest\n");
|
||||
MMU_new.gxstat.tr = 0; // clear boxtest bit
|
||||
MMU_new.gxstat.tb = 1; // busy
|
||||
|
||||
//clear result flag. busy flag has been set by fifo component already
|
||||
MMU_new.gxstat.tr = 0;
|
||||
|
||||
BTcoords[BTind++] = v & 0xFFFF;
|
||||
BTcoords[BTind++] = v >> 16;
|
||||
|
@ -1493,9 +1534,11 @@ static BOOL gfx3d_glBoxTest(u32 v)
|
|||
if (BTind < 5) return FALSE;
|
||||
BTind = 0;
|
||||
|
||||
MMU_new.gxstat.tb = 0; // clear busy
|
||||
GFX_DELAY(103);
|
||||
|
||||
//now that we're executing this, we're not busy anymore
|
||||
MMU_new.gxstat.tb = 0;
|
||||
|
||||
#if 0
|
||||
INFO("BoxTEST: x %f y %f width %f height %f depth %f\n",
|
||||
BTcoords[0], BTcoords[1], BTcoords[2], BTcoords[3], BTcoords[4], BTcoords[5]);
|
||||
|
@ -1608,27 +1651,31 @@ static BOOL gfx3d_glBoxTest(u32 v)
|
|||
//if any portion of this poly was retained, then the test passes.
|
||||
if (boxtestClipper.clippedPolyCounter > 0)
|
||||
{
|
||||
//printf("%06d PASS %d\n",boxcounter,gxFIFO.size);
|
||||
//printf("%06d PASS %d\n",gxFIFO.size, i);
|
||||
MMU_new.gxstat.tr = 1;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
}
|
||||
|
||||
if (MMU_new.gxstat.tr == 0)
|
||||
{
|
||||
//printf("%06d FAIL %d\n",boxcounter,gxFIFO.size);
|
||||
//if(i==5) printf("%06d FAIL\n",gxFIFO.size);
|
||||
}
|
||||
|
||||
//printf("%06d RESULT %d\n",gxFIFO.size, MMU_new.gxstat.tr);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL gfx3d_glPosTest(u32 v)
|
||||
{
|
||||
//printf("postest\n");
|
||||
//this is apparently tested by transformers decepticons and ultimate spiderman
|
||||
|
||||
//printf("POSTEST\n");
|
||||
MMU_new.gxstat.tb = 1;
|
||||
//clear result flag. busy flag has been set by fifo component already
|
||||
MMU_new.gxstat.tr = 0;
|
||||
|
||||
//now that we're executing this, we're not busy anymore
|
||||
MMU_new.gxstat.tb = 0;
|
||||
|
||||
PTcoords[PTind++] = float16table[v & 0xFFFF];
|
||||
PTcoords[PTind++] = float16table[v >> 16];
|
||||
|
@ -2252,24 +2299,13 @@ void gfx3d_VBlankSignal()
|
|||
|
||||
void gfx3d_VBlankEndSignal(bool skipFrame)
|
||||
{
|
||||
GPU->ForceRender3DFinishAndFlush(false);
|
||||
|
||||
if (!drawPending) return;
|
||||
if (skipFrame) return;
|
||||
|
||||
drawPending = FALSE;
|
||||
|
||||
if (CurrentRenderer->GetRenderNeedsFinish())
|
||||
{
|
||||
bool need3DDisplayFramebuffer;
|
||||
bool need3DCaptureFramebuffer;
|
||||
CurrentRenderer->GetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer);
|
||||
|
||||
CurrentRenderer->SetFramebufferFlushStates(false, false);
|
||||
CurrentRenderer->RenderFinish();
|
||||
CurrentRenderer->SetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer);
|
||||
CurrentRenderer->SetRenderNeedsFinish(false);
|
||||
GPU->GetEventHandler()->DidRender3DEnd();
|
||||
}
|
||||
|
||||
GPU->GetEventHandler()->DidRender3DBegin();
|
||||
|
||||
if (CommonSettings.showGpu.main)
|
||||
|
@ -2486,7 +2522,7 @@ void gfx3d_Update3DFramebuffers(FragmentColor *framebufferRGBA6665, u16 *framebu
|
|||
//-------------savestate
|
||||
void gfx3d_savestate(EMUFILE* os)
|
||||
{
|
||||
CurrentRenderer->RenderFinish();
|
||||
GPU->ForceRender3DFinishAndFlush(true);
|
||||
|
||||
//version
|
||||
write32le(4,os);
|
||||
|
|
|
@ -427,8 +427,3 @@ void MatrixTranslate(s32 *matrix, const s32 *ptr)
|
|||
});
|
||||
}
|
||||
|
||||
void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr)
|
||||
{
|
||||
MatrixMultVec4x4(matrix+16,vecPtr);
|
||||
MatrixMultVec4x4(matrix,vecPtr);
|
||||
}
|
||||
|
|
|
@ -276,13 +276,6 @@ FORCEINLINE void MatrixMultVec4x4(const float *matrix, float *vecPtr)
|
|||
_mm_store_ps(vecPtr,_util_MatrixMultVec4x4_((SSE_MATRIX)matrix,_mm_load_ps(vecPtr)));
|
||||
}
|
||||
|
||||
FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr)
|
||||
{
|
||||
//there are hardly any gains from merging these manually
|
||||
MatrixMultVec4x4(matrix+16,vecPtr);
|
||||
MatrixMultVec4x4(matrix,vecPtr);
|
||||
}
|
||||
|
||||
FORCEINLINE void MatrixMultVec3x3(const float * matrix, float * vecPtr)
|
||||
{
|
||||
const __m128 vec = _mm_load_ps(vecPtr);
|
||||
|
@ -355,13 +348,6 @@ void MatrixMultiply(float * matrix, const float * rightMatrix);
|
|||
void MatrixTranslate(float *matrix, const float *ptr);
|
||||
void MatrixScale(float * matrix, const float * ptr);
|
||||
|
||||
FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr)
|
||||
{
|
||||
//there are hardly any gains from merging these manually
|
||||
MatrixMultVec4x4(matrix+16,vecPtr);
|
||||
MatrixMultVec4x4(matrix,vecPtr);
|
||||
}
|
||||
|
||||
template<int NUM_ROWS>
|
||||
FORCEINLINE void vector_fix2float(float* matrix, const float divisor)
|
||||
{
|
||||
|
@ -373,8 +359,6 @@ FORCEINLINE void vector_fix2float(float* matrix, const float divisor)
|
|||
|
||||
void MatrixMultVec4x4 (const s32 *matrix, s32 *vecPtr);
|
||||
|
||||
void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr);
|
||||
|
||||
void MatrixMultiply(s32* matrix, const s32* rightMatrix);
|
||||
void MatrixScale(s32 *matrix, const s32 *ptr);
|
||||
void MatrixTranslate(s32 *matrix, const s32 *ptr);
|
||||
|
|
|
@ -619,6 +619,21 @@ void BackupDevice::reset()
|
|||
ensure((u32)savesize); //expand properly if necessary
|
||||
addr_size = addr_size_for_old_save_type(savetype);
|
||||
}
|
||||
|
||||
//automatically detect these hardcodes
|
||||
if(state == DETECTING)
|
||||
{
|
||||
if(!memcmp(gameInfo.header.gameCode,"ASMK", 4)) addr_size = 1; //super mario 64 ds (KOR, which is different somehow)
|
||||
else if(!memcmp(gameInfo.header.gameCode,"ASM", 3)) addr_size = 2; //super mario 64 ds
|
||||
else if(!memcmp(gameInfo.header.gameCode,"BDE", 3)) addr_size = 2; // Dementium II
|
||||
else if(!memcmp(gameInfo.header.gameCode,"AL3", 3)) addr_size = 1; //spongebob atlantis squarepantis.
|
||||
else if(!memcmp(gameInfo.header.gameCode,"AH5", 3)) addr_size = 1; //over the hedge
|
||||
else if(!memcmp(gameInfo.header.gameCode,"AVH", 3)) addr_size = 1; //over the hedge - Hammy Goes Nuts!
|
||||
else if(!memcmp(gameInfo.header.gameCode,"AQ3", 3)) addr_size = 1; //spider-man 3
|
||||
|
||||
//if we found a whitelist match, we dont need to run detection
|
||||
if(addr_size) state = RUNNING;
|
||||
}
|
||||
}
|
||||
|
||||
void BackupDevice::close_rom()
|
||||
|
@ -662,36 +677,33 @@ void BackupDevice::detect()
|
|||
addr_size = 1; //choose 1 just to keep the busted savefile from growing too big
|
||||
msgbox->error("Catastrophic error while autodetecting save type.\nIt will need to be specified manually\n");
|
||||
break;
|
||||
|
||||
case 2:
|
||||
//the modern typical case for small eeproms
|
||||
addr_size = 1;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
//another modern typical case..
|
||||
//but unfortunately we select this case on accident sometimes when what it meant to do was present the archaic 1+2 case
|
||||
//(the archaic 1+2 case is: specifying one address byte, and then reading the first two bytes, instead of the first one byte, as most other games would do.)
|
||||
//so, we're gonna hack in checks for the games that are doing this
|
||||
addr_size = 2;
|
||||
|
||||
// TODO: will study a deep, why this happens (wrong detect size)
|
||||
if(!memcmp(gameInfo.header.gameCode,"AL3", 3)) addr_size = 1; //spongebob atlantis squarepantis.
|
||||
if(!memcmp(gameInfo.header.gameCode,"AH5", 3)) addr_size = 1; //over the hedge
|
||||
if(!memcmp(gameInfo.header.gameCode,"AVH", 3)) addr_size = 1; //over the hedge - Hammy Goes Nuts!
|
||||
if(!memcmp(gameInfo.header.gameCode,"AQ3", 3)) addr_size = 1; //spider-man 3
|
||||
|
||||
break;
|
||||
|
||||
case 4:
|
||||
//a modern typical case
|
||||
addr_size = 3;
|
||||
if(!memcmp(gameInfo.header.gameCode,"ASM", 3)) addr_size = 2; //super mario 64 ds
|
||||
|
||||
break;
|
||||
default:
|
||||
//the archaic case: write the address and then some modulo-4 number of bytes
|
||||
//why modulo 4? who knows.
|
||||
//SM64 (KOR) makes it here with autodetect_size=11 and nothing interesting in the buffer
|
||||
addr_size = autodetect_size & 3;
|
||||
|
||||
if(!memcmp(gameInfo.header.gameCode,"BDE", 3)) addr_size = 2; // Dementium II
|
||||
//SM64 (KOR) makes it here with autodetect_size=11 and nothing interesting in the buffer
|
||||
//we whitelisted it earlier though
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -604,11 +604,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
|
|||
{
|
||||
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
|
||||
{
|
||||
ConvertColorBuffer8888To6665<false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
ColorspaceConvertBuffer8888To6665<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
}
|
||||
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
|
||||
{
|
||||
ConvertColorBuffer6665To8888<false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
|
||||
}
|
||||
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
|
||||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
|
||||
|
@ -621,11 +621,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
|
|||
{
|
||||
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
ConvertColorBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ColorspaceConvertBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
ConvertColorBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,10 @@
|
|||
#include "MMU.h"
|
||||
#include "NDSSystem.h"
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
|
||||
#endif
|
||||
|
||||
using std::min;
|
||||
using std::max;
|
||||
|
||||
|
@ -451,13 +455,13 @@ public:
|
|||
|
||||
if (TEXFORMAT == TexFormat_15bpp)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
|
||||
// Set converted colors to 0 if the palette index is 0.
|
||||
|
@ -517,13 +521,13 @@ public:
|
|||
|
||||
if (TEXFORMAT == TexFormat_15bpp)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
|
||||
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
|
||||
|
@ -580,13 +584,13 @@ public:
|
|||
|
||||
if (TEXFORMAT == TexFormat_15bpp)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
|
||||
// Set converted colors to 0 if the palette index is 0.
|
||||
|
@ -646,13 +650,13 @@ public:
|
|||
|
||||
if (TEXFORMAT == TexFormat_15bpp)
|
||||
{
|
||||
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
|
||||
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
|
||||
|
@ -881,11 +885,11 @@ public:
|
|||
|
||||
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
|
||||
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
|
||||
ConvertColor555To6665<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To6665_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
|
||||
|
||||
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
|
||||
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
|
||||
ConvertColor555To6665<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To6665_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -895,11 +899,11 @@ public:
|
|||
|
||||
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
|
||||
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
|
||||
ConvertColor555To8888<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
|
||||
ColorspaceConvert555To8888_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
|
||||
|
||||
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
|
||||
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
|
||||
ConvertColor555To8888<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
|
||||
ColorspaceConvert555To8888_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
|
||||
}
|
||||
|
||||
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
|
||||
|
|
|
@ -19,10 +19,6 @@
|
|||
#ifndef TYPES_HPP
|
||||
#define TYPES_HPP
|
||||
|
||||
#include <retro_miscellaneous.h>
|
||||
#include <retro_inline.h>
|
||||
#include <math/fxp.h>
|
||||
|
||||
//analyze microsoft compilers
|
||||
#ifdef _MSC_VER
|
||||
#define HOST_WINDOWS
|
||||
|
@ -80,6 +76,18 @@
|
|||
#ifdef __SSE4_2__
|
||||
#define ENABLE_SSE4_2
|
||||
#endif
|
||||
|
||||
#ifdef __AVX__
|
||||
#define ENABLE_AVX
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
#define ENABLE_AVX2
|
||||
#endif
|
||||
|
||||
#ifdef __ALTIVEC__
|
||||
#define ENABLE_ALTIVEC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
@ -148,6 +156,14 @@
|
|||
#define _CDECL_
|
||||
#endif
|
||||
|
||||
#ifndef INLINE
|
||||
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
|
||||
#define INLINE _inline
|
||||
#else
|
||||
#define INLINE inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef FORCEINLINE
|
||||
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
|
||||
#define FORCEINLINE __forceinline
|
||||
|
@ -219,6 +235,38 @@ typedef u32 uint32;
|
|||
#define uint32 u32 //uint32 is defined in Leopard somewhere, avoid conflicts
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_ALTIVEC
|
||||
#ifndef __APPLE_ALTIVEC__
|
||||
#include <altivec.h>
|
||||
#endif
|
||||
typedef vector unsigned char v128u8;
|
||||
typedef vector signed char v128s8;
|
||||
typedef vector unsigned short v128u16;
|
||||
typedef vector signed short v128s16;
|
||||
typedef vector unsigned int v128u32;
|
||||
typedef vector signed int v128s32;
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
typedef __m128i v128u8;
|
||||
typedef __m128i v128s8;
|
||||
typedef __m128i v128u16;
|
||||
typedef __m128i v128s16;
|
||||
typedef __m128i v128u32;
|
||||
typedef __m128i v128s32;
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_AVX2
|
||||
#include <immintrin.h>
|
||||
typedef __m256i v256u8;
|
||||
typedef __m256i v256s8;
|
||||
typedef __m256i v256u16;
|
||||
typedef __m256i v256s16;
|
||||
typedef __m256i v256u32;
|
||||
typedef __m256i v256s32;
|
||||
#endif
|
||||
|
||||
/*---------- GPU3D fixed-points types -----------*/
|
||||
|
||||
typedef s32 f32;
|
||||
|
@ -266,8 +314,20 @@ typedef int desmume_BOOL;
|
|||
#define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifdef __BIG_ENDIAN__
|
||||
#ifndef WORDS_BIGENDIAN
|
||||
#define WORDS_BIGENDIAN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
# define LOCAL_BE 1
|
||||
#else
|
||||
# define LOCAL_LE 1
|
||||
#endif
|
||||
|
||||
/* little endian (ds' endianess) to local endianess convert macros */
|
||||
#ifdef MSB_FIRST /* local arch is big endian */
|
||||
#ifdef LOCAL_BE /* local arch is big endian */
|
||||
# define LE_TO_LOCAL_16(x) ((((x)&0xff)<<8)|(((x)>>8)&0xff))
|
||||
# define LE_TO_LOCAL_32(x) ((((x)&0xff)<<24)|(((x)&0xff00)<<8)|(((x)>>8)&0xff00)|(((x)>>24)&0xff))
|
||||
# define LE_TO_LOCAL_64(x) ((((x)&0xff)<<56)|(((x)&0xff00)<<40)|(((x)&0xff0000)<<24)|(((x)&0xff000000)<<8)|(((x)>>8)&0xff000000)|(((x)>>24)&0xff0000)|(((x)>>40)&0xff00)|(((x)>>56)&0xff))
|
||||
|
@ -287,6 +347,8 @@ typedef int desmume_BOOL;
|
|||
#define MB(x) ((x)*1024*1024)
|
||||
#define KB(x) ((x)*1024)
|
||||
|
||||
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
|
||||
|
||||
#define CPU_STR(c) ((c==ARM9)?"ARM9":"ARM7")
|
||||
typedef enum
|
||||
{
|
||||
|
@ -294,6 +356,28 @@ typedef enum
|
|||
ARM7 = 1
|
||||
} cpu_id_t;
|
||||
|
||||
///endian-flips count bytes. count should be even and nonzero.
|
||||
inline void FlipByteOrder(u8 *src, u32 count)
|
||||
{
|
||||
u8 *start=src;
|
||||
u8 *end=src+count-1;
|
||||
|
||||
if((count&1) || !count) return; /* This shouldn't happen. */
|
||||
|
||||
while(count--)
|
||||
{
|
||||
u8 tmp;
|
||||
|
||||
tmp=*end;
|
||||
*end=*start;
|
||||
*start=tmp;
|
||||
end--;
|
||||
start++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline u64 double_to_u64(double d) {
|
||||
union {
|
||||
u64 a;
|
||||
|
@ -312,6 +396,68 @@ inline double u64_to_double(u64 u) {
|
|||
return fuxor.b;
|
||||
}
|
||||
|
||||
inline u32 float_to_u32(float f) {
|
||||
union {
|
||||
u32 a;
|
||||
float b;
|
||||
} fuxor;
|
||||
fuxor.b = f;
|
||||
return fuxor.a;
|
||||
}
|
||||
|
||||
inline float u32_to_float(u32 u) {
|
||||
union {
|
||||
u32 a;
|
||||
float b;
|
||||
} fuxor;
|
||||
fuxor.a = u;
|
||||
return fuxor.b;
|
||||
}
|
||||
|
||||
|
||||
///stores a 32bit value into the provided byte array in guaranteed little endian form
|
||||
inline void en32lsb(u8 *buf, u32 morp)
|
||||
{
|
||||
buf[0]=(u8)(morp);
|
||||
buf[1]=(u8)(morp>>8);
|
||||
buf[2]=(u8)(morp>>16);
|
||||
buf[3]=(u8)(morp>>24);
|
||||
}
|
||||
|
||||
inline void en16lsb(u8* buf, u16 morp)
|
||||
{
|
||||
buf[0]=(u8)morp;
|
||||
buf[1]=(u8)(morp>>8);
|
||||
}
|
||||
|
||||
///unpacks a 64bit little endian value from the provided byte array into host byte order
|
||||
inline u64 de64lsb(u8 *morp)
|
||||
{
|
||||
return morp[0]|(morp[1]<<8)|(morp[2]<<16)|(morp[3]<<24)|((u64)morp[4]<<32)|((u64)morp[5]<<40)|((u64)morp[6]<<48)|((u64)morp[7]<<56);
|
||||
}
|
||||
|
||||
///unpacks a 32bit little endian value from the provided byte array into host byte order
|
||||
inline u32 de32lsb(u8 *morp)
|
||||
{
|
||||
return morp[0]|(morp[1]<<8)|(morp[2]<<16)|(morp[3]<<24);
|
||||
}
|
||||
|
||||
///unpacks a 16bit little endian value from the provided byte array into host byte order
|
||||
inline u16 de16lsb(u8 *morp)
|
||||
{
|
||||
return morp[0]|(morp[1]<<8);
|
||||
}
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
//taken from winnt.h
|
||||
extern "C++" // templates cannot be declared to have 'C' linkage
|
||||
template <typename T, size_t N>
|
||||
char (*BLAHBLAHBLAH( UNALIGNED T (&)[N] ))[N];
|
||||
|
||||
#define ARRAY_SIZE(A) (sizeof(*BLAHBLAHBLAH(A)))
|
||||
#endif
|
||||
|
||||
|
||||
//fairly standard for loop macros
|
||||
#define MACRODO1(TRICK,TODO) { const size_t X = TRICK; TODO; }
|
||||
#define MACRODO2(X,TODO) { MACRODO1((X),TODO) MACRODO1(((X)+1),TODO) }
|
||||
|
@ -385,30 +531,37 @@ template<typename T> inline void reconstruct(T* t) {
|
|||
new(t) T();
|
||||
}
|
||||
|
||||
/* fixed point speedup macros */
|
||||
//-------------fixed point speedup macros
|
||||
|
||||
FORCEINLINE s32 sfx32_shiftdown(const s64 a)
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
FORCEINLINE s64 fx32_mul(const s32 a, const s32 b)
|
||||
{
|
||||
s64 shifted = fx32_shiftdown(a);
|
||||
#ifdef _MSC_VER
|
||||
return __emul(a,b);
|
||||
#else
|
||||
return ((s64)a)*((s64)b);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*either matrix math is happening at higher precision (an extra bit would suffice,
|
||||
* I think), or the sums sent to this are saturated.
|
||||
*
|
||||
*tested by: spectrobes beyond the portals excavation blower
|
||||
*(it sets very large +x,+y in the modelview matrix to push things offscreen,
|
||||
*but the +y will overflow and become negative if we're not careful)
|
||||
*
|
||||
*I didnt think very hard about what would be fastest here on 32bit systems
|
||||
*NOTE: this was intended for use in MatrixMultVec4x4_M2; it may not be appropriate for
|
||||
* other uses of fx32_shiftdown.
|
||||
*if this causes problems we should refactor the math routines a bit to take care of
|
||||
* saturating in another function
|
||||
*/
|
||||
if(shifted>(s32)0x7FFFFFFF)
|
||||
return 0x7FFFFFFF;
|
||||
if(shifted<=(s32)0x80000000)
|
||||
return 0x80000000;
|
||||
return shifted;
|
||||
FORCEINLINE s32 fx32_shiftdown(const s64 a)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return (s32)__ll_rshift(a,12);
|
||||
#else
|
||||
return (s32)(a>>12);
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCEINLINE s64 fx32_shiftup(const s32 a)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __ll_lshift(a,12);
|
||||
#else
|
||||
return ((s64)a)<<12;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,776 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "colorspacehandler.h"
|
||||
|
||||
#if defined(ENABLE_AVX2)
|
||||
#include "colorspacehandler_AVX2.h"
|
||||
#elif defined(ENABLE_SSE2)
|
||||
#include "colorspacehandler_SSE2.h"
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
#include "colorspacehandler_AltiVec.h"
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
|
||||
#define USEVECTORSIZE_128
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_AVX2)
|
||||
#define USEVECTORSIZE_256
|
||||
#endif
|
||||
|
||||
// By default, the hand-coded vectorized code will be used instead of a compiler's built-in
|
||||
// autovectorization (if supported). However, if USEMANUALVECTORIZATION is not defined, then
|
||||
// the compiler will use autovectorization (if supported).
|
||||
#if defined(USEVECTORSIZE_128) || defined(USEVECTORSIZE_256) || defined(USEVECTORSIZE_512)
|
||||
// Comment out USEMANUALVECTORIZATION to disable the hand-coded vectorized code.
|
||||
#define USEMANUALVECTORIZATION
|
||||
#endif
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
#if defined(ENABLE_AVX2)
|
||||
static const ColorspaceHandler_AVX2 csh;
|
||||
#elif defined(ENABLE_SSE2)
|
||||
static const ColorspaceHandler_SSE2 csh;
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
static const ColorspaceHandler_AltiVec csh;
|
||||
#else
|
||||
static const ColorspaceHandler csh;
|
||||
#endif
|
||||
#else
|
||||
static const ColorspaceHandler csh;
|
||||
#endif
|
||||
|
||||
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
|
||||
CACHE_ALIGN u32 color_555_to_666[32768];
|
||||
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
|
||||
CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
|
||||
CACHE_ALIGN u32 color_555_to_888[32768];
|
||||
|
||||
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
|
||||
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
|
||||
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
|
||||
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
|
||||
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
|
||||
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
|
||||
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
|
||||
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
|
||||
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
|
||||
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
|
||||
};
|
||||
|
||||
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
|
||||
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
|
||||
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
|
||||
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
|
||||
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
|
||||
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
|
||||
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
|
||||
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
|
||||
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
|
||||
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
|
||||
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
|
||||
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
|
||||
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
|
||||
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
|
||||
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
|
||||
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
|
||||
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
|
||||
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
|
||||
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
|
||||
};
|
||||
|
||||
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
|
||||
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
|
||||
};
|
||||
|
||||
//maybe not very precise
|
||||
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
|
||||
0, 4, 8, 13, 17, 22, 26, 31
|
||||
};
|
||||
|
||||
//TODO - generate this in the static init method more accurately
|
||||
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
|
||||
0, 8, 16, 26, 34, 44, 52, 63
|
||||
};
|
||||
|
||||
void ColorspaceHandlerInit()
|
||||
{
|
||||
static bool needInitTables = true;
|
||||
|
||||
if (needInitTables)
|
||||
{
|
||||
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
|
||||
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
|
||||
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
|
||||
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
|
||||
|
||||
for (size_t i = 0; i < 32768; i++)
|
||||
{
|
||||
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
|
||||
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
|
||||
color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 );
|
||||
|
||||
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
|
||||
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
|
||||
color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert555To8888Opaque(const u16 src)
|
||||
{
|
||||
return (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src)
|
||||
{
|
||||
return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert8888To6665(FragmentColor srcColor)
|
||||
{
|
||||
FragmentColor outColor;
|
||||
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2;
|
||||
outColor.g = srcColor.g >> 2;
|
||||
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2;
|
||||
outColor.a = srcColor.a >> 3;
|
||||
|
||||
return outColor.color;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert8888To6665(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ColorspaceConvert8888To6665<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert6665To8888(FragmentColor srcColor)
|
||||
{
|
||||
FragmentColor outColor;
|
||||
outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)];
|
||||
outColor.g = material_6bit_to_8bit[srcColor.g];
|
||||
outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)];
|
||||
outColor.a = material_5bit_to_8bit[srcColor.a];
|
||||
|
||||
return outColor.color;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u32 ColorspaceConvert6665To8888(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ColorspaceConvert6665To8888<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ColorspaceConvert8888To5551(FragmentColor srcColor)
|
||||
{
|
||||
return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ColorspaceConvert8888To5551(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ColorspaceConvert8888To5551<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ColorspaceConvert6665To5551(FragmentColor srcColor)
|
||||
{
|
||||
return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE u16 ColorspaceConvert6665To5551(u32 srcColor)
|
||||
{
|
||||
FragmentColor srcColorComponent;
|
||||
srcColorComponent.color = srcColor;
|
||||
|
||||
return ColorspaceConvert6665To5551<SWAP_RB>(srcColorComponent);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 32);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer555To8888Opaque_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer555To8888Opaque(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To8888Opaque<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 32);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer555To6665Opaque_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer555To6665Opaque(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To6665Opaque<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 4);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer8888To6665_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer8888To6665_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer8888To6665_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer8888To6665(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To6665<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 4);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer6665To8888_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer6665To8888_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer6665To8888_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer6665To8888(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To8888<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 32);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer8888To5551_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer8888To5551_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer8888To5551_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer8888To5551(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To5551<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef USEMANUALVECTORIZATION
|
||||
|
||||
#if defined(USEVECTORSIZE_128)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 8);
|
||||
#elif defined(USEVECTORSIZE_256)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 16);
|
||||
#elif defined(USEVECTORSIZE_512)
|
||||
const size_t pixCountVector = pixCount - (pixCount % 32);
|
||||
#endif
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer6665To5551_SwapRB_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer6665To5551_SwapRB(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
i = csh.ConvertBuffer6665To5551_IsUnaligned(src, dst, pixCountVector);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = csh.ConvertBuffer6665To5551(src, dst, pixCountVector);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
|
||||
#endif // USEMANUALVECTORIZATION
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To5551<SWAP_RB>(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To8888Opaque<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To8888Opaque<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To6665Opaque<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert555To6665Opaque<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To6665<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To6665<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer8888To6665(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer8888To6665_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To8888<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To8888<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer6665To8888(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer6665To8888_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To5551<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert8888To5551<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer8888To5551(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer8888To5551_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To5551<false>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (;i < pixCount; i++)
|
||||
{
|
||||
dst[i] = ColorspaceConvert6665To5551<true>(src[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer6665To5551(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount);
|
||||
}
|
||||
|
||||
template u32 ColorspaceConvert555To8888Opaque<true>(const u16 src);
|
||||
template u32 ColorspaceConvert555To8888Opaque<false>(const u16 src);
|
||||
|
||||
template u32 ColorspaceConvert555To6665Opaque<true>(const u16 src);
|
||||
template u32 ColorspaceConvert555To6665Opaque<false>(const u16 src);
|
||||
|
||||
template u32 ColorspaceConvert8888To6665<true>(FragmentColor srcColor);
|
||||
template u32 ColorspaceConvert8888To6665<false>(FragmentColor srcColor);
|
||||
|
||||
template u32 ColorspaceConvert8888To6665<true>(u32 srcColor);
|
||||
template u32 ColorspaceConvert8888To6665<false>(u32 srcColor);
|
||||
|
||||
template u32 ColorspaceConvert6665To8888<true>(FragmentColor srcColor);
|
||||
template u32 ColorspaceConvert6665To8888<false>(FragmentColor srcColor);
|
||||
|
||||
template u32 ColorspaceConvert6665To8888<true>(u32 srcColor);
|
||||
template u32 ColorspaceConvert6665To8888<false>(u32 srcColor);
|
||||
|
||||
template u16 ColorspaceConvert8888To5551<true>(FragmentColor srcColor);
|
||||
template u16 ColorspaceConvert8888To5551<false>(FragmentColor srcColor);
|
||||
|
||||
template u16 ColorspaceConvert8888To5551<true>(u32 srcColor);
|
||||
template u16 ColorspaceConvert8888To5551<false>(u32 srcColor);
|
||||
|
||||
template u16 ColorspaceConvert6665To5551<true>(FragmentColor srcColor);
|
||||
template u16 ColorspaceConvert6665To5551<false>(FragmentColor srcColor);
|
||||
|
||||
template u16 ColorspaceConvert6665To5551<true>(u32 srcColor);
|
||||
template u16 ColorspaceConvert6665To5551<false>(u32 srcColor);
|
||||
|
||||
template void ColorspaceConvertBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To8888Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ColorspaceConvertBuffer555To6665Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To6665Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To6665Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer555To6665Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ColorspaceConvertBuffer8888To6665<true, true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To6665<true, false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To6665<false, true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To6665<false, false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
|
||||
template void ColorspaceConvertBuffer6665To8888<true, true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To8888<true, false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To8888<false, true>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To8888<false, false>(const u32 *src, u32 *dst, size_t pixCount);
|
||||
|
||||
template void ColorspaceConvertBuffer8888To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer8888To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
template void ColorspaceConvertBuffer6665To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template void ColorspaceConvertBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
|
@ -0,0 +1,194 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef COLORSPACEHANDLER_H
|
||||
#define COLORSPACEHANDLER_H
|
||||
|
||||
#include "types.h"
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
enum NDSColorFormat
|
||||
{
|
||||
// The color format information is packed in a 32-bit value.
|
||||
// The bits are as follows:
|
||||
// FFFOOOOO AAAAAABB BBBBGGGG GGRRRRRR
|
||||
//
|
||||
// F = Flags (see below)
|
||||
// O = Color order (see below)
|
||||
// A = Bit count for alpha [0-63]
|
||||
// B = Bit count for blue [0-63]
|
||||
// G = Bit count for green [0-63]
|
||||
// R = Bit count for red [0-63]
|
||||
//
|
||||
// Flags:
|
||||
// Bit 29: Reverse order flag.
|
||||
// Set = Bits are in reverse order, usually for little-endian usage.
|
||||
// Cleared = Bits are in normal order, usually for big-endian usage.
|
||||
//
|
||||
// Color order bits, 24-28:
|
||||
// 0x00 = RGBA, common format
|
||||
// 0x01 = RGAB
|
||||
// 0x02 = RBGA
|
||||
// 0x03 = RBAG
|
||||
// 0x04 = RAGB
|
||||
// 0x05 = RABG
|
||||
// 0x06 = GRBA
|
||||
// 0x07 = GRAB
|
||||
// 0x08 = GBRA
|
||||
// 0x09 = GBAR
|
||||
// 0x0A = GARB
|
||||
// 0x0B = GABR
|
||||
// 0x0C = BRGA
|
||||
// 0x0D = BRAG
|
||||
// 0x0E = BGRA, common format
|
||||
// 0x0F = BGAR
|
||||
// 0x10 = BARG
|
||||
// 0x11 = BAGR
|
||||
// 0x12 = ARGB
|
||||
// 0x13 = ARBG
|
||||
// 0x14 = AGRB
|
||||
// 0x15 = AGBR
|
||||
// 0x16 = ABRG
|
||||
// 0x17 = ABGR
|
||||
|
||||
// Color formats used for internal processing.
|
||||
//NDSColorFormat_ABGR1555_Rev = 0x20045145,
|
||||
//NDSColorFormat_ABGR5666_Rev = 0x20186186,
|
||||
//NDSColorFormat_ABGR8888_Rev = 0x20208208,
|
||||
|
||||
// Color formats used by the output framebuffers.
|
||||
NDSColorFormat_BGR555_Rev = 0x20005145,
|
||||
NDSColorFormat_BGR666_Rev = 0x20006186,
|
||||
NDSColorFormat_BGR888_Rev = 0x20008208
|
||||
};
|
||||
|
||||
union FragmentColor
|
||||
{
|
||||
u32 color;
|
||||
struct
|
||||
{
|
||||
u8 r,g,b,a;
|
||||
};
|
||||
};
|
||||
|
||||
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
|
||||
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
|
||||
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
|
||||
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
|
||||
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
|
||||
|
||||
extern CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_666[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
|
||||
extern CACHE_ALIGN u32 color_555_to_888[32768];
|
||||
|
||||
#define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color
|
||||
#define COLOR555TO6665_OPAQUE_SWAP_RB(col) (color_555_to_6665_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color with R and B components swapped
|
||||
#define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color
|
||||
|
||||
#ifdef LOCAL_LE
|
||||
#define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, little-endian
|
||||
#else
|
||||
#define COLOR555TO6665(col,alpha5) ((alpha5) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, big-endian
|
||||
#endif
|
||||
|
||||
#define COLOR555TO8888_OPAQUE(col) (color_555_to_8888_opaque[(col)]) // Convert a 15-bit color to an opaque 32-bit color
|
||||
#define COLOR555TO8888_OPAQUE_SWAP_RB(col) (color_555_to_8888_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque 32-bit color with R and B components swapped
|
||||
#define COLOR555TO888(col) (color_555_to_888[(col)]) // Convert a 15-bit color to an opaque 24-bit color or a fully transparent 32-bit color
|
||||
|
||||
#ifdef LOCAL_LE
|
||||
#define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, little-endian
|
||||
#else
|
||||
#define COLOR555TO8888(col,alpha8) ((alpha8) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, big-endian
|
||||
#endif
|
||||
|
||||
//produce a 15bpp color from individual 5bit components
|
||||
#define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) )
|
||||
|
||||
//produce a 16bpp color from individual 5bit components
|
||||
#define R6G6B6TORGB15(r,g,b) ( ((r)>>1) | (((g)&0x3E)<<4) | (((b)&0x3E)<<9) )
|
||||
|
||||
void ColorspaceHandlerInit();
|
||||
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert555To8888Opaque(const u16 src);
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert555To6665Opaque(const u16 src);
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert8888To6665(FragmentColor srcColor);
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert8888To6665(u32 srcColor);
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert6665To8888(FragmentColor srcColor);
|
||||
template<bool SWAP_RB> u32 ColorspaceConvert6665To8888(u32 srcColor);
|
||||
template<bool SWAP_RB> u16 ColorspaceConvert8888To5551(FragmentColor srcColor);
|
||||
template<bool SWAP_RB> u16 ColorspaceConvert8888To5551(u32 srcColor);
|
||||
template<bool SWAP_RB> u16 ColorspaceConvert6665To5551(FragmentColor srcColor);
|
||||
template<bool SWAP_RB> u16 ColorspaceConvert6665To5551(u32 srcColor);
|
||||
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
|
||||
|
||||
class ColorspaceHandler
|
||||
{
|
||||
public:
|
||||
ColorspaceHandler() {};
|
||||
|
||||
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
};
|
||||
|
||||
FORCEINLINE FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)
|
||||
{
|
||||
FragmentColor ret;
|
||||
ret.r = r; ret.g = g; ret.b = b; ret.a = a;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* COLORSPACEHANDLER_H */
|
|
@ -0,0 +1,491 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "colorspacehandler_AVX2.h"
|
||||
|
||||
#ifndef ENABLE_AVX2
|
||||
#error This code requires AVX2 support.
|
||||
#else
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi)
|
||||
{
|
||||
v256u32 src32;
|
||||
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
|
||||
src32 = _mm256_unpacklo_epi16(srcColor, _mm256_setzero_si256());
|
||||
dstLo = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 19), _mm256_srli_epi32(src32, 7)) : _mm256_or_si256(_mm256_slli_epi32(src32, 3), _mm256_slli_epi32(src32, 9));
|
||||
dstLo = _mm256_and_si256( dstLo, _mm256_set1_epi32(0x00F800F8) );
|
||||
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_slli_epi32(src32, 6), _mm256_set1_epi32(0x0000F800)) );
|
||||
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_srli_epi32(dstLo, 5), _mm256_set1_epi32(0x00070707)) );
|
||||
dstLo = _mm256_or_si256( dstLo, srcAlphaBits32Lo );
|
||||
|
||||
src32 = _mm256_unpackhi_epi16(srcColor, _mm256_setzero_si256());
|
||||
dstHi = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 19), _mm256_srli_epi32(src32, 7)) : _mm256_or_si256(_mm256_slli_epi32(src32, 3), _mm256_slli_epi32(src32, 9));
|
||||
dstHi = _mm256_and_si256( dstHi, _mm256_set1_epi32(0x00F800F8) );
|
||||
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_slli_epi32(src32, 6), _mm256_set1_epi32(0x0000F800)) );
|
||||
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_srli_epi32(dstHi, 5), _mm256_set1_epi32(0x00070707)) );
|
||||
dstHi = _mm256_or_si256( dstHi, srcAlphaBits32Hi );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi)
|
||||
{
|
||||
v256u32 src32;
|
||||
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
|
||||
src32 = _mm256_unpacklo_epi16(srcColor, _mm256_setzero_si256());
|
||||
dstLo = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 17), _mm256_srli_epi32(src32, 9)) : _mm256_or_si256(_mm256_slli_epi32(src32, 1), _mm256_slli_epi32(src32, 7));
|
||||
dstLo = _mm256_and_si256( dstLo, _mm256_set1_epi32(0x003E003E) );
|
||||
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_slli_epi32(src32, 4), _mm256_set1_epi32(0x00003E00)) );
|
||||
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_srli_epi32(dstLo, 5), _mm256_set1_epi32(0x00010101)) );
|
||||
dstLo = _mm256_or_si256( dstLo, srcAlphaBits32Lo );
|
||||
|
||||
src32 = _mm256_unpackhi_epi16(srcColor, _mm256_setzero_si256());
|
||||
dstHi = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 17), _mm256_srli_epi32(src32, 9)) : _mm256_or_si256(_mm256_slli_epi32(src32, 1), _mm256_slli_epi32(src32, 7));
|
||||
dstHi = _mm256_and_si256( dstHi, _mm256_set1_epi32(0x003E003E) );
|
||||
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_slli_epi32(src32, 4), _mm256_set1_epi32(0x00003E00)) );
|
||||
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_srli_epi32(dstHi, 5), _mm256_set1_epi32(0x00010101)) );
|
||||
dstHi = _mm256_or_si256( dstHi, srcAlphaBits32Hi );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
|
||||
{
|
||||
const v256u32 srcAlphaBits32 = _mm256_set1_epi32(0xFF000000);
|
||||
ColorspaceConvert555To8888_AVX2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
|
||||
{
|
||||
const v256u32 srcAlphaBits32 = _mm256_set1_epi32(0x1F000000);
|
||||
ColorspaceConvert555To6665_AVX2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
|
||||
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
|
||||
v256u32 rgb;
|
||||
const v256u32 a = _mm256_and_si256( _mm256_srli_epi32(src, 3), _mm256_set1_epi32(0x1F000000) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgb = _mm256_and_si256( _mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x003F3F3F) );
|
||||
rgb = _mm256_shuffle_epi8( rgb, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
|
||||
}
|
||||
else
|
||||
{
|
||||
rgb = _mm256_and_si256( _mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x003F3F3F) );
|
||||
}
|
||||
|
||||
return _mm256_or_si256(rgb, a);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
|
||||
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
|
||||
v256u32 rgb = _mm256_or_si256( _mm256_and_si256(_mm256_slli_epi32(src, 2), _mm256_set1_epi32(0x00FCFCFC)), _mm256_and_si256(_mm256_srli_epi32(src, 4), _mm256_set1_epi32(0x00030303)) );
|
||||
const v256u32 a = _mm256_or_si256( _mm256_and_si256(_mm256_slli_epi32(src, 3), _mm256_set1_epi32(0xF8000000)), _mm256_and_si256(_mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x07000000)) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgb = _mm256_shuffle_epi8( rgb, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
|
||||
}
|
||||
|
||||
return _mm256_or_si256(rgb, a);
|
||||
}
|
||||
|
||||
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
|
||||
FORCEINLINE v256u16 _ConvertColorBaseTo5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
|
||||
{
|
||||
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
|
||||
{
|
||||
return srcLo;
|
||||
}
|
||||
|
||||
v256u32 rgbLo;
|
||||
v256u32 rgbHi;
|
||||
v256u16 alpha;
|
||||
|
||||
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 17), _mm256_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 4), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_slli_epi32(srcLo, 9), _mm256_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 17), _mm256_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 4), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_slli_epi32(srcHi, 9), _mm256_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 1), _mm256_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 4), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 7), _mm256_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 1), _mm256_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 4), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 7), _mm256_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
|
||||
// Convert alpha
|
||||
alpha = _mm256_packs_epi32( _mm256_and_si256(_mm256_srli_epi32(srcLo, 24), _mm256_set1_epi32(0x0000001F)), _mm256_and_si256(_mm256_srli_epi32(srcHi, 24), _mm256_set1_epi32(0x0000001F)) );
|
||||
alpha = _mm256_cmpgt_epi16(alpha, _mm256_setzero_si256());
|
||||
alpha = _mm256_and_si256(alpha, _mm256_set1_epi16(0x8000));
|
||||
}
|
||||
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 19), _mm256_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 6), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_slli_epi32(srcLo, 7), _mm256_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 19), _mm256_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 6), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_slli_epi32(srcHi, 7), _mm256_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 3), _mm256_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 6), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 9), _mm256_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 3), _mm256_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 6), _mm256_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 9), _mm256_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
|
||||
// Convert alpha
|
||||
alpha = _mm256_packs_epi32( _mm256_srli_epi32(srcLo, 24), _mm256_srli_epi32(srcHi, 24) );
|
||||
alpha = _mm256_cmpgt_epi16(alpha, _mm256_setzero_si256());
|
||||
alpha = _mm256_and_si256(alpha, _mm256_set1_epi16(0x8000));
|
||||
}
|
||||
|
||||
return _mm256_or_si256(_mm256_packs_epi32(rgbLo, rgbHi), alpha);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_AVX2<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_AVX2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=16)
|
||||
{
|
||||
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
|
||||
v256u32 dstConvertedLo, dstConvertedHi;
|
||||
ColorspaceConvert555To8888Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256((v256u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm256_storeu_si256((v256u32 *)(dst+i+8), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256((v256u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm256_store_si256((v256u32 *)(dst+i+8), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=16)
|
||||
{
|
||||
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
|
||||
v256u32 dstConvertedLo, dstConvertedHi;
|
||||
ColorspaceConvert555To6665Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256((v256u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm256_storeu_si256((v256u32 *)(dst+i+8), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256((v256u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm256_store_si256((v256u32 *)(dst+i+8), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer8888To6665_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert8888To6665_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert8888To6665_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer6665To8888_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert6665To8888_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert6665To8888_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer8888To5551_AVX2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=16)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256( (v256u16 *)(dst+i), ColorspaceConvert8888To5551_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i)), _mm256_loadu_si256((v256u32 *)(src+i+8))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256( (v256u16 *)(dst+i), ColorspaceConvert8888To5551_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i)), _mm256_load_si256((v256u32 *)(src+i+8))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer6665To5551_AVX2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec256)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec256; i+=16)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm256_storeu_si256( (v256u16 *)(dst+i), ColorspaceConvert6665To5551_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i)), _mm256_loadu_si256((v256u32 *)(src+i+8))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_store_si256( (v256u16 *)(dst+i), ColorspaceConvert6665To5551_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i)), _mm256_load_si256((v256u32 *)(src+i+8))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AVX2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AVX2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AVX2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AVX2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
template void ColorspaceConvert555To8888_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To8888Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
|
||||
template v256u32 ColorspaceConvert8888To6665_AVX2<true>(const v256u32 &src);
|
||||
template v256u32 ColorspaceConvert8888To6665_AVX2<false>(const v256u32 &src);
|
||||
|
||||
template v256u32 ColorspaceConvert6665To8888_AVX2<true>(const v256u32 &src);
|
||||
template v256u32 ColorspaceConvert6665To8888_AVX2<false>(const v256u32 &src);
|
||||
|
||||
template v256u16 ColorspaceConvert8888To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
template v256u16 ColorspaceConvert8888To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
|
||||
template v256u16 ColorspaceConvert6665To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
template v256u16 ColorspaceConvert6665To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
|
||||
#endif // ENABLE_AVX2
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef COLORSPACEHANDLER_AVX2_H
|
||||
#define COLORSPACEHANDLER_AVX2_H
|
||||
|
||||
#include "colorspacehandler.h"
|
||||
|
||||
#ifndef ENABLE_AVX2
|
||||
#warning This header requires AVX2 support.
|
||||
#else
|
||||
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
|
||||
template<bool SWAP_RB> v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src);
|
||||
template<bool SWAP_RB> v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src);
|
||||
template<bool SWAP_RB> v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
template<bool SWAP_RB> v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
|
||||
|
||||
class ColorspaceHandler_AVX2 : public ColorspaceHandler
|
||||
{
|
||||
public:
|
||||
ColorspaceHandler_AVX2() {};
|
||||
|
||||
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
};
|
||||
|
||||
#endif // ENABLE_AVX2
|
||||
|
||||
#endif /* COLORSPACEHANDLER_AVX2_H */
|
|
@ -0,0 +1,345 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "colorspacehandler_Altivec.h"
|
||||
|
||||
#ifndef ENABLE_ALTIVEC
|
||||
#error This code requires PowerPC AltiVec support.
|
||||
#else
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
|
||||
dstLo = vec_unpackl((vector pixel)srcColor);
|
||||
dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){3,3,3,0, 3,3,3,0, 3,3,3,0, 3,3,3,0})), vec_sr((v128u8)dstLo, ((v128u8){2,2,2,0, 2,2,2,0, 2,2,2,0, 2,2,2,0})) );
|
||||
dstLo = vec_sel(dstLo, srcAlphaBits32Lo, vec_splat_u32(0xFF000000));
|
||||
|
||||
dstHi = vec_unpackh((vector pixel)srcColor);
|
||||
dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){3,3,3,0, 3,3,3,0, 3,3,3,0, 3,3,3,0})), vec_sr((v128u8)dstHi, ((v128u8){2,2,2,0, 2,2,2,0, 2,2,2,0, 2,2,2,0})) );
|
||||
dstHi = vec_sel(dstHi, srcAlphaBits32Hi, vec_splat_u32(0xFF000000));
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
|
||||
dstLo = vec_unpackl((vector pixel)srcColor);
|
||||
dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){1,1,1,0, 1,1,1,0, 1,1,1,0, 1,1,1,0})), vec_sr((v128u8)dstLo, ((v128u8){4,4,4,0, 4,4,4,0, 4,4,4,0, 4,4,4,0})) );
|
||||
dstLo = vec_sel(dstLo, srcAlphaBits32Lo, vec_splat_u32(0xFF000000));
|
||||
|
||||
dstHi = vec_unpackh((vector pixel)srcColor);
|
||||
dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){1,1,1,0, 1,1,1,0, 1,1,1,0, 1,1,1,0})), vec_sr((v128u8)dstHi, ((v128u8){4,4,4,0, 4,4,4,0, 4,4,4,0, 4,4,4,0})) );
|
||||
dstHi = vec_sel(dstHi, srcAlphaBits32Hi, vec_splat_u32(0xFF000000));
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
const v128u32 srcAlphaBits32 = {0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000};
|
||||
ColorspaceConvert555To8888_AltiVec<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
const v128u32 srcAlphaBits32 = {0x1F000000, 0x1F000000, 0x1F000000, 0x1F000000};
|
||||
ColorspaceConvert555To6665_AltiVec<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
|
||||
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
|
||||
v128u8 rgba = vec_sr( (v128u8)src, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3}) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgba = vec_perm( rgba, rgba, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
}
|
||||
|
||||
return (v128u32)rgba;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
|
||||
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
|
||||
v128u8 rgba = vec_or( vec_sl((v128u8)src, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3})), vec_sr((v128u8)src, ((v128u8){4,4,4,2, 4,4,4,2, 4,4,4,2, 4,4,4,2})) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgba = vec_perm( rgba, rgba, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
}
|
||||
|
||||
return (v128u32)rgba;
|
||||
}
|
||||
|
||||
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
|
||||
FORCEINLINE v128u16 _ConvertColorBaseTo5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
|
||||
{
|
||||
return srcLo;
|
||||
}
|
||||
|
||||
v128u32 rgbLo;
|
||||
v128u32 rgbHi;
|
||||
|
||||
v128u16 dstColor;
|
||||
v128u16 dstAlpha;
|
||||
|
||||
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
// Convert alpha
|
||||
dstAlpha = vec_packsu( vec_and(vec_sr(srcLo, vec_splat_u32(24)), vec_splat_u32(0x0000001F)), vec_and(vec_sr(srcHi, vec_splat_u32(24)), vec_splat_u32(0x0000001F)) );
|
||||
dstAlpha = vec_cmpgt(dstAlpha, vec_splat_u16(0));
|
||||
dstAlpha = vec_and(dstAlpha, vec_splat_u16(0x8000));
|
||||
|
||||
// Convert RGB
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgbLo = vec_perm( srcLo, srcLo, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
rgbHi = vec_perm( srcHi, srcHi, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
|
||||
rgbLo = vec_sl( rgbLo, vec_splat_u32(2) );
|
||||
rgbHi = vec_sl( rgbHi, vec_splat_u32(2) );
|
||||
|
||||
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
rgbLo = vec_sl( srcLo, vec_splat_u32(2) );
|
||||
rgbHi = vec_sl( srcHi, vec_splat_u32(2) );
|
||||
|
||||
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
|
||||
}
|
||||
}
|
||||
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
// Convert alpha
|
||||
dstAlpha = vec_packsu( vec_sr(srcLo, vec_splat_u32(24)), vec_sr(srcHi, vec_splat_u32(24)) );
|
||||
dstAlpha = vec_cmpgt(dstAlpha, vec_splat_u16(0));
|
||||
dstAlpha = vec_and(dstAlpha, vec_splat_u16(0x8000));
|
||||
|
||||
// Convert RGB
|
||||
if (SWAP_RB)
|
||||
{
|
||||
rgbLo = vec_perm( srcLo, srcLo, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
rgbHi = vec_perm( srcHi, srcHi, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
|
||||
|
||||
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
dstColor = (v128u16)vec_packpx(srcLo, srcHi);
|
||||
}
|
||||
}
|
||||
|
||||
dstColor = vec_and(dstColor, vec_splat_u16(0x7FFF));
|
||||
return vec_or(dstColor, dstAlpha);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_AltiVec<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_AltiVec<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
v128u32 dstConvertedLo, dstConvertedHi;
|
||||
|
||||
ColorspaceConvert555To8888Opaque_AltiVec<SWAP_RB>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
|
||||
vec_st(dstConvertedHi, 0, dst+i);
|
||||
vec_st(dstConvertedLo, 16, dst+i);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
v128u32 dstConvertedLo, dstConvertedHi;
|
||||
|
||||
ColorspaceConvert555To6665Opaque_AltiVec<SWAP_RB>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
|
||||
vec_st(dstConvertedHi, 0, dst+i);
|
||||
vec_st(dstConvertedLo, 16, dst+i);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
size_t ColorspaceConvertBuffer8888To6665_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=4)
|
||||
{
|
||||
vec_st( ColorspaceConvert8888To6665_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
size_t ColorspaceConvertBuffer6665To8888_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=4)
|
||||
{
|
||||
vec_st( ColorspaceConvert6665To8888_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
size_t ColorspaceConvertBuffer8888To5551_AltiVec(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
vec_st( ColorspaceConvert8888To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
size_t ColorspaceConvertBuffer6665To5551_AltiVec(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
vec_st( ColorspaceConvert6665To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AltiVec<false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_AltiVec<true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
template void ColorspaceConvert555To8888_AltiVec<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888_AltiVec<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665_AltiVec<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665_AltiVec<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To8888Opaque_AltiVec<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888Opaque_AltiVec<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665Opaque_AltiVec<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665Opaque_AltiVec<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template v128u32 ColorspaceConvert8888To6665_AltiVec<true>(const v128u32 &src);
|
||||
template v128u32 ColorspaceConvert8888To6665_AltiVec<false>(const v128u32 &src);
|
||||
|
||||
template v128u32 ColorspaceConvert6665To8888_AltiVec<true>(const v128u32 &src);
|
||||
template v128u32 ColorspaceConvert6665To8888_AltiVec<false>(const v128u32 &src);
|
||||
|
||||
template v128u16 ColorspaceConvert8888To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template v128u16 ColorspaceConvert8888To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
template v128u16 ColorspaceConvert6665To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template v128u16 ColorspaceConvert6665To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
#endif // ENABLE_SSE2
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef COLORSPACEHANDLER_ALTIVEC_H
|
||||
#define COLORSPACEHANDLER_ALTIVEC_H
|
||||
|
||||
#include "colorspacehandler.h"
|
||||
|
||||
#ifndef ENABLE_ALTIVEC
|
||||
#warning This header requires PowerPC AltiVec support.
|
||||
#else
|
||||
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src);
|
||||
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src);
|
||||
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
// AltiVec has very poor support for dealing with unaligned addresses (it's possible, just
|
||||
// very obtuse), so we're not even going to bother dealing with any unaligned addresses.
|
||||
class ColorspaceHandler_AltiVec : public ColorspaceHandler
|
||||
{
|
||||
public:
|
||||
ColorspaceHandler_AltiVec() {};
|
||||
|
||||
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
};
|
||||
|
||||
#endif // ENABLE_ALTIVEC
|
||||
|
||||
#endif /* COLORSPACEHANDLER_ALTIVEC_H */
|
|
@ -0,0 +1,503 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "colorspacehandler_SSE2.h"
|
||||
|
||||
#ifndef ENABLE_SSE2
|
||||
#error This code requires SSE2 support.
|
||||
#else
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
v128u32 src32;
|
||||
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
|
||||
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
|
||||
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
|
||||
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x00F800F8) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00070707)) );
|
||||
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
|
||||
|
||||
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
|
||||
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
|
||||
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x00F800F8) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00070707)) );
|
||||
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
v128u32 src32;
|
||||
|
||||
// Conversion algorithm:
|
||||
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
|
||||
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
|
||||
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
|
||||
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x003E003E) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
|
||||
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00010101)) );
|
||||
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
|
||||
|
||||
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
|
||||
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
|
||||
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x003E003E) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
|
||||
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00010101)) );
|
||||
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
const v128u32 srcAlphaBits32 = _mm_set1_epi32(0xFF000000);
|
||||
ColorspaceConvert555To8888_SSE2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
|
||||
{
|
||||
const v128u32 srcAlphaBits32 = _mm_set1_epi32(0x1F000000);
|
||||
ColorspaceConvert555To6665_SSE2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
|
||||
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
|
||||
v128u32 rgb;
|
||||
const v128u32 a = _mm_and_si128( _mm_srli_epi32(src, 3), _mm_set1_epi32(0x1F000000) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
|
||||
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
|
||||
#else
|
||||
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x003F0000)), 18), _mm_or_si128(_mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00003F00)), 2), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x0000003F)), 14)) );
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
|
||||
}
|
||||
|
||||
return _mm_or_si128(rgb, a);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src)
|
||||
{
|
||||
// Conversion algorithm:
|
||||
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
|
||||
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
|
||||
v128u32 rgb = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 2), _mm_set1_epi32(0x00FCFCFC)), _mm_and_si128(_mm_srli_epi32(src, 4), _mm_set1_epi32(0x00030303)) );
|
||||
const v128u32 a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 3), _mm_set1_epi32(0xF8000000)), _mm_and_si128(_mm_srli_epi32(src, 2), _mm_set1_epi32(0x07000000)) );
|
||||
|
||||
if (SWAP_RB)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
|
||||
#else
|
||||
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x000000FF)), 16)) );
|
||||
#endif
|
||||
}
|
||||
|
||||
return _mm_or_si128(rgb, a);
|
||||
}
|
||||
|
||||
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
|
||||
FORCEINLINE v128u16 _ConvertColorBaseTo5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
|
||||
{
|
||||
return srcLo;
|
||||
}
|
||||
|
||||
v128u32 rgbLo;
|
||||
v128u32 rgbHi;
|
||||
v128u16 alpha;
|
||||
|
||||
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 17), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 17), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 1), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 1), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
|
||||
// Convert alpha
|
||||
alpha = _mm_packs_epi32( _mm_and_si128(_mm_srli_epi32(srcLo, 24), _mm_set1_epi32(0x0000001F)), _mm_and_si128(_mm_srli_epi32(srcHi, 24), _mm_set1_epi32(0x0000001F)) );
|
||||
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
|
||||
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
|
||||
}
|
||||
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
if (SWAP_RB)
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 19), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 19), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert color from low bits
|
||||
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 3), _mm_set1_epi32(0x0000001F));
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
|
||||
// Convert color from high bits
|
||||
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 3), _mm_set1_epi32(0x0000001F));
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
|
||||
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
|
||||
}
|
||||
|
||||
// Convert alpha
|
||||
alpha = _mm_packs_epi32( _mm_srli_epi32(srcLo, 24), _mm_srli_epi32(srcHi, 24) );
|
||||
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
|
||||
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
|
||||
}
|
||||
|
||||
return _mm_or_si128(_mm_packs_epi32(rgbLo, rgbHi), alpha);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_SSE2<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB>
|
||||
FORCEINLINE v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
|
||||
{
|
||||
return _ConvertColorBaseTo5551_SSE2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
static size_t ColorspaceConvertBuffer555To8888Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
|
||||
v128u32 dstConvertedLo, dstConvertedHi;
|
||||
ColorspaceConvert555To8888Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128((v128u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm_storeu_si128((v128u32 *)(dst+i+4), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128((v128u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm_store_si128((v128u32 *)(dst+i+4), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer555To6665Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
|
||||
v128u32 dstConvertedLo, dstConvertedHi;
|
||||
ColorspaceConvert555To6665Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
|
||||
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128((v128u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm_storeu_si128((v128u32 *)(dst+i+4), dstConvertedHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128((v128u32 *)(dst+i+0), dstConvertedLo);
|
||||
_mm_store_si128((v128u32 *)(dst+i+4), dstConvertedHi);
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer8888To6665_SSE2(const u32 *src, u32 *dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=4)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer6665To8888_SSE2(const u32 *src, u32 *dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=4)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert6665To8888_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert6665To8888_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer8888To5551_SSE2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (v128u16 *)(dst+i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i)), _mm_loadu_si128((v128u32 *)(src+i+4))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (v128u16 *)(dst+i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i)), _mm_load_si128((v128u32 *)(src+i+4))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
template <bool SWAP_RB, bool IS_UNALIGNED>
|
||||
size_t ColorspaceConvertBuffer6665To5551_SSE2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < pixCountVec128; i+=8)
|
||||
{
|
||||
if (IS_UNALIGNED)
|
||||
{
|
||||
_mm_storeu_si128( (v128u16 *)(dst+i), ColorspaceConvert6665To5551_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i)), _mm_loadu_si128((v128u32 *)(src+i+4))) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_store_si128( (v128u16 *)(dst+i), ColorspaceConvert6665To5551_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i)), _mm_load_si128((v128u32 *)(src+i+4))) );
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To6665_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To8888_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer8888To5551_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_SSE2<false, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_SSE2<true, false>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_SSE2<false, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
|
||||
{
|
||||
return ColorspaceConvertBuffer6665To5551_SSE2<true, true>(src, dst, pixCount);
|
||||
}
|
||||
|
||||
template void ColorspaceConvert555To8888_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To8888Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To8888Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template void ColorspaceConvert555To6665Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template void ColorspaceConvert555To6665Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
|
||||
template v128u32 ColorspaceConvert8888To6665_SSE2<true>(const v128u32 &src);
|
||||
template v128u32 ColorspaceConvert8888To6665_SSE2<false>(const v128u32 &src);
|
||||
|
||||
template v128u32 ColorspaceConvert6665To8888_SSE2<true>(const v128u32 &src);
|
||||
template v128u32 ColorspaceConvert6665To8888_SSE2<false>(const v128u32 &src);
|
||||
|
||||
template v128u16 ColorspaceConvert8888To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template v128u16 ColorspaceConvert8888To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
template v128u16 ColorspaceConvert6665To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template v128u16 ColorspaceConvert6665To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
#endif // ENABLE_SSE2
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
Copyright (C) 2016 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef COLORSPACEHANDLER_SSE2_H
|
||||
#define COLORSPACEHANDLER_SSE2_H
|
||||
|
||||
#include "colorspacehandler.h"
|
||||
|
||||
#ifndef ENABLE_SSE2
|
||||
#warning This header requires SSE2 support.
|
||||
#else
|
||||
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
|
||||
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src);
|
||||
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src);
|
||||
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
|
||||
|
||||
class ColorspaceHandler_SSE2 : public ColorspaceHandler
|
||||
{
|
||||
public:
|
||||
ColorspaceHandler_SSE2() {};
|
||||
|
||||
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
|
||||
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
|
||||
};
|
||||
|
||||
#endif // ENABLE_SSE2
|
||||
|
||||
#endif /* COLORSPACEHANDLER_SSE2_H */
|
|
@ -59,44 +59,41 @@
|
|||
#define DESMUME_PLATFORM_STRING ""
|
||||
#endif
|
||||
|
||||
#define DESMUME_SSE_STRING ""
|
||||
#define DESMUME_AVX_STRING ""
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING ""
|
||||
#define DESMUME_CPUEXT_SECONDARY_STRING ""
|
||||
|
||||
#ifdef ENABLE_SSE
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSE"
|
||||
#endif
|
||||
#ifdef ENABLE_SSE2
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSE2"
|
||||
#endif
|
||||
#ifdef ENABLE_SSE3
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSE3"
|
||||
#endif
|
||||
#ifdef ENABLE_SSSE3
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSSE3"
|
||||
#endif
|
||||
#ifdef ENABLE_SSE4_1
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSE4.1"
|
||||
#endif
|
||||
#ifdef ENABLE_SSE4_2
|
||||
#undef DESMUME_SSE_STRING
|
||||
#define DESMUME_SSE_STRING " SSE4.2"
|
||||
#endif
|
||||
#ifdef ENABLE_AVX
|
||||
#undef DESMUME_AVX_STRING
|
||||
#define DESMUME_AVX_STRING "+AVX"
|
||||
#endif
|
||||
#ifdef ENABLE_AVX2
|
||||
#undef DESMUME_AVX_STRING
|
||||
#define DESMUME_AVX_STRING "+AVX2"
|
||||
#if defined(ENABLE_SSE4_2)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE4.2"
|
||||
#elif defined(ENABLE_SSE4_1)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE4.1"
|
||||
#elif defined(ENABLE_SSSE3)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSSE3"
|
||||
#elif defined(ENABLE_SSE3)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE3"
|
||||
#elif defined(ENABLE_SSE2)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE2"
|
||||
#elif defined(ENABLE_SSE)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE"
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
#undef DESMUME_CPUEXT_PRIMARY_STRING
|
||||
#define DESMUME_CPUEXT_PRIMARY_STRING " AltiVec"
|
||||
#endif
|
||||
|
||||
#define DESMUME_CPUEXT_STRING DESMUME_SSE_STRING DESMUME_AVX_STRING
|
||||
#if defined(ENABLE_AVX2)
|
||||
#undef DESMUME_CPUEXT_SECONDARY_STRING
|
||||
#define DESMUME_CPUEXT_SECONDARY_STRING "+AVX2"
|
||||
#elif defined(ENABLE_AVX)
|
||||
#undef DESMUME_CPUEXT_SECONDARY_STRING
|
||||
#define DESMUME_CPUEXT_SECONDARY_STRING "+AVX"
|
||||
#endif
|
||||
|
||||
#define DESMUME_CPUEXT_STRING DESMUME_CPUEXT_PRIMARY_STRING DESMUME_CPUEXT_SECONDARY_STRING
|
||||
|
||||
#ifdef DEVELOPER
|
||||
#define DESMUME_FEATURE_STRING " dev+"
|
||||
|
|
Loading…
Reference in New Issue