Merge commit '9c1f523a725abca2fcfbf07cd11d12077154a80c' into round-1-start

# Conflicts:
#	desmume/src/GPU.h
#	desmume/src/frontend/windows/DeSmuME.vcxproj
#	desmume/src/frontend/windows/DeSmuME.vcxproj.filters
#	desmume/src/types.h
This commit is contained in:
zeromus 2016-11-23 21:09:50 -06:00
commit ae64d4659b
33 changed files with 4033 additions and 1847 deletions

View File

@ -237,6 +237,15 @@ void GFX_FIFOsend(u8 cmd, u32 param)
if(IsMatrixStackCommand(cmd))
gxFIFO.matrix_stack_op_size++;
//along the same lines:
//american girls julie finds a way will put a bunch of stuff and then a box test into the fifo and then immediately test the busy flag
//so we need to set the busy flag here.
//does it expect the fifo to be running then? well, it's definitely jammed -- making it unjammed at one point did fix this bug.
//it's still not clear whether we're handling the immediate vs fifo commands properly at all :(
//anyway, here we go, similar treatment. consider this a hack.
if(cmd == 0x70) MMU_new.gxstat.tb = 1; //just set the flag--youre insane if you queue more than one of these anyway
if(cmd == 0x71) MMU_new.gxstat.tb = 1;
if(gxFIFO.size>=HACK_GXIFO_SIZE) {
printf("--FIFO FULL-- : %d\n",gxFIFO.size);
}

View File

@ -18,6 +18,14 @@
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef FASTBUILD
#undef FORCEINLINE
#define FORCEINLINE
//compilation speed hack (cuts time exactly in half by cutting out permutations)
#define DISABLE_MOSAIC
#define DISABLE_COLOREFFECTDISABLEHINT
#endif
#include "GPU.h"
#include <assert.h>
@ -40,75 +48,8 @@
#include "matrix.h"
#include "emufile.h"
#ifdef FASTBUILD
#undef FORCEINLINE
#define FORCEINLINE
//compilation speed hack (cuts time exactly in half by cutting out permutations)
#define DISABLE_MOSAIC
#endif
u32 Render3DFramesPerSecond;
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_666[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_888[32768];
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
};
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
};
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
};
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
};
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
};
//maybe not very precise
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
0, 4, 8, 13, 17, 22, 26, 31
};
//TODO - generate this in the static init method more accurately
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
0, 8, 16, 26, 34, 44, 52, 63
};
//instantiate static instance
u16 GPUEngineBase::_brightnessUpTable555[17][0x8000];
FragmentColor GPUEngineBase::_brightnessUpTable666[17][0x8000];
@ -167,7 +108,7 @@ const CACHE_ALIGN BGLayerSize GPUEngineBase::_BGLayerSizeLUT[8][4] = {
{{128,128}, {256,256}, {512,256}, {512,512}}, //affine ext direct
};
static void ExpandLine8(u8 *__restrict dst, const u8 *__restrict src, size_t dstLength)
static FORCEINLINE void ExpandLine8(u8 *__restrict dst, const u8 *__restrict src, size_t dstLength)
{
#ifdef ENABLE_SSSE3
const bool isIntegerScale = ((dstLength % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0);
@ -1655,11 +1596,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break;
case NDSColorFormat_BGR666_Rev:
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
break;
case NDSColorFormat_BGR888_Rev:
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
break;
}
@ -1682,11 +1623,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break;
case NDSColorFormat_BGR666_Rev:
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
break;
case NDSColorFormat_BGR888_Rev:
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
break;
}
@ -1767,11 +1708,11 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break;
case NDSColorFormat_BGR666_Rev:
dstColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
break;
case NDSColorFormat_BGR888_Rev:
dstColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
dstColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
break;
}
break;
@ -1833,13 +1774,13 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compInfo,
break;
case NDSColorFormat_BGR666_Rev:
srcColor32.color = ConvertColor555To6665Opaque<false>(srcColor16);
srcColor32.color = ColorspaceConvert555To6665Opaque<false>(srcColor16);
dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
dstColor32.a = 0x1F;
break;
case NDSColorFormat_BGR888_Rev:
srcColor32.color = ConvertColor555To8888Opaque<false>(srcColor16);
srcColor32.color = ColorspaceConvert555To8888Opaque<false>(srcColor16);
dstColor32 = this->_ColorEffectBlend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
dstColor32.a = 0xFF;
break;
@ -2132,7 +2073,7 @@ FORCEINLINE void GPUEngineBase::_RenderPixel3D(GPUEngineCompositorInfo &compInfo
// Render the pixel using the selected color effect.
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
{
const u16 srcColor16 = ConvertColor6665To5551<false>(srcColor32);
const u16 srcColor16 = ColorspaceConvert6665To5551<false>(srcColor32);
switch (selectedEffect)
{
@ -2695,13 +2636,13 @@ void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compInfo)
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]);
ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[0], src[0], src[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[1], src[2], src[3]);
}
else
{
ConvertColor555To8888Opaque<false>(src16[0], src[0], src[1]);
ConvertColor555To8888Opaque<false>(src16[1], src[2], src[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[0], src[0], src[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[1], src[2], src[3]);
}
}
@ -2796,13 +2737,13 @@ void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compInfo)
{
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
ConvertColor555To6665Opaque<false>(src16[0], src[0], src[1]);
ConvertColor555To6665Opaque<false>(src16[1], src[2], src[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[0], src[0], src[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[1], src[2], src[3]);
}
else
{
ConvertColor555To8888Opaque<false>(src16[0], src[0], src[1]);
ConvertColor555To8888Opaque<false>(src16[1], src[2], src[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[0], src[0], src[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(src16[1], src[2], src[3]);
}
}
@ -4502,7 +4443,7 @@ void GPUEngineBase::UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex)
}
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
{
bool useCustomVRAM = false;
@ -4538,26 +4479,28 @@ void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compInfo)
}
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool COLOREFFECTDISABLEDHINT, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo)
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compInfo)
{
this->_RenderLine_LayerBG_Final<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED>(compInfo);
}
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo)
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compInfo)
{
#ifndef DISABLE_COLOREFFECTDISABLEHINT
if (compInfo.renderState.colorEffect == ColorEffect_Disable)
{
this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, true, ISCUSTOMRENDERINGNEEDED>(compInfo);
}
else
#endif
{
this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint<OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, false, ISCUSTOMRENDERINGNEEDED>(compInfo);
}
}
template <NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER, bool WILLPERFORMWINDOWTEST, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo)
FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compInfo)
{
if (ISDEBUGRENDER)
{
@ -4951,7 +4894,7 @@ void GPUEngineBase::ResolveCustomRendering()
void GPUEngineBase::ResolveRGB666ToRGB888()
{
ConvertColorBuffer6665To8888<false>((u32 *)this->renderedBuffer, (u32 *)this->renderedBuffer, this->renderedWidth * this->renderedHeight);
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)this->renderedBuffer, (u32 *)this->renderedBuffer, this->renderedWidth * this->renderedHeight);
}
void GPUEngineBase::ResolveToCustomFramebuffer()
@ -5575,12 +5518,12 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
case NDSColorFormat_BGR666_Rev:
renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16));
ConvertColorBuffer6665To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
ColorspaceConvertBuffer6665To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
break;
case NDSColorFormat_BGR888_Rev:
renderedLineSrcA16 = (u16 *)malloc_alignedCacheLine(compInfo.line.pixelCount * sizeof(u16));
ConvertColorBuffer8888To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)compInfo.target.lineColorHead, renderedLineSrcA16, compInfo.line.pixelCount);
break;
}
}
@ -6570,7 +6513,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555To6665Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
break;
}
@ -6578,7 +6521,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{
const u16 *src = this->_VRAMNativeBlockPtr[DISPCNT.VRAM_Block] + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
FragmentColor *dst = (FragmentColor *)this->nativeBuffer + (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555To8888Opaque<false, false>(src, (u32 *)dst, GPU_FRAMEBUFFER_NATIVE_WIDTH);
break;
}
}
@ -6598,7 +6541,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
ConvertColorBuffer555To6665Opaque<false, false>(src, (u32 *)dst, customPixCount);
ColorspaceConvertBuffer555To6665Opaque<false, false>(src, (u32 *)dst, customPixCount);
break;
}
@ -6606,7 +6549,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const size_t l)
{
const u16 *src = this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + (_gpuDstLineIndex[l] * customWidth);
FragmentColor *dst = (FragmentColor *)this->customBuffer + (_gpuDstLineIndex[l] * customWidth);
ConvertColorBuffer555To8888Opaque<false, false>(src, (u32 *)dst, customPixCount);
ColorspaceConvertBuffer555To8888Opaque<false, false>(src, (u32 *)dst, customPixCount);
break;
}
}
@ -6802,28 +6745,7 @@ void GPUEngineB::RenderLine(const u16 l)
GPUSubsystem::GPUSubsystem()
{
static bool needInitTables = true;
if (needInitTables)
{
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
for (size_t i = 0; i < 32768; i++)
{
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 );
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 );
}
needInitTables = false;
}
ColorspaceHandlerInit();
_defaultEventHandler = new GPUEventHandlerDefault;
_event = _defaultEventHandler;
@ -6957,6 +6879,22 @@ void GPUSubsystem::Reset()
osd->clear();
}
void GPUSubsystem::ForceRender3DFinishAndFlush(bool willFlush)
{
if (CurrentRenderer->GetRenderNeedsFinish())
{
bool need3DDisplayFramebuffer;
bool need3DCaptureFramebuffer;
CurrentRenderer->GetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer);
CurrentRenderer->SetFramebufferFlushStates(willFlush, willFlush);
CurrentRenderer->RenderFinish();
CurrentRenderer->SetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer);
CurrentRenderer->SetRenderNeedsFinish(false);
this->_event->DidRender3DEnd();
}
}
void GPUSubsystem::UpdateRenderProperties()
{
this->_engineMain->vramBlockOBJIndex = VRAM_NO_3D_USAGE;
@ -7082,7 +7020,7 @@ void GPUSubsystem::SetCustomFramebufferSize(size_t w, size_t h, void *clientNati
return;
}
CurrentRenderer->RenderFinish();
GPU->ForceRender3DFinishAndFlush(false);
const float customWidthScale = (float)w / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH;
const float customHeightScale = (float)h / (float)GPU_FRAMEBUFFER_NATIVE_HEIGHT;
@ -7224,7 +7162,7 @@ void GPUSubsystem::SetCustomFramebufferSize(size_t w, size_t h)
void GPUSubsystem::SetColorFormat(const NDSColorFormat outputFormat, void *clientNativeBuffer, void *clientCustomBuffer)
{
CurrentRenderer->RenderFinish();
GPU->ForceRender3DFinishAndFlush(false);
this->_displayInfo.colorFormat = outputFormat;
this->_displayInfo.pixelBytes = (outputFormat == NDSColorFormat_BGR555_Rev) ? sizeof(u16) : sizeof(FragmentColor);
@ -7581,178 +7519,6 @@ void NDSDisplay::SetEngineByID(const GPUEngineID theID)
this->_gpu->SetDisplayByID(this->_ID);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
__m128i src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
__m128i dstConvertedLo, dstConvertedHi;
ConvertColor555To8888Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
else
{
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor555To8888Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
__m128i src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((__m128i *)(src + i)) : _mm_load_si128((__m128i *)(src + i));
__m128i dstConvertedLo, dstConvertedHi;
ConvertColor555To6665Opaque<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_storeu_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
else
{
_mm_store_si128((__m128i *)(dst + i + 0), dstConvertedLo);
_mm_store_si128((__m128i *)(dst + i + 4), dstConvertedHi);
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor555To6665Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB>
void ConvertColorBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 4);
for (; i < ssePixCount; i += 4)
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To6665<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor8888To6665<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB>
void ConvertColorBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 4);
for (; i < ssePixCount; i += 4)
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To8888<SWAP_RB>(_mm_load_si128((__m128i *)(src + i))) );
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor6665To8888<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
}
else
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor8888To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor8888To5551<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ConvertColorBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8);
for (; i < ssePixCount; i += 8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_loadu_si128((__m128i *)(src + i)), _mm_loadu_si128((__m128i *)(src + i + 4))) );
}
else
{
_mm_store_si128( (__m128i *)(dst + i), ConvertColor6665To5551<SWAP_RB>(_mm_load_si128((__m128i *)(src + i)), _mm_load_si128((__m128i *)(src + i + 4))) );
}
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < pixCount; i++)
{
dst[i] = ConvertColor6665To5551<SWAP_RB>(src[i]);
}
}
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG0>();
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG1>();
template void GPUEngineBase::ParseReg_BGnHOFS<GPULayerID_BG2>();
@ -7774,29 +7540,3 @@ template void GPUEngineBase::ParseReg_BGnY<GPULayerID_BG3>();
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR555_Rev>(const u16 l, bool skip);
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR666_Rev>(const u16 l, bool skip);
template void GPUSubsystem::RenderLine<NDSColorFormat_BGR888_Rev>(const u16 l, bool skip);
template void ConvertColorBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To8888Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer555To6665Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To6665<true>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer8888To6665<false>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer6665To8888<true>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer6665To8888<false>(const u32 *src, u32 *dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer8888To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ConvertColorBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);

File diff suppressed because it is too large Load Diff

View File

@ -52,6 +52,7 @@ libdesmume_a_SOURCES = \
utils/decrypt/decrypt.h utils/decrypt/header.cpp utils/decrypt/header.h \
utils/task.cpp utils/task.h \
utils/vfat.h utils/vfat.cpp \
utils/colorspacehandler/colorspacehandler.cpp \
utils/dlditool.cpp \
utils/libfat/bit_ops.h \
utils/libfat/cache.cpp \
@ -109,6 +110,21 @@ libdesmume_a_SOURCES = \
libretro-common/rthreads/async_job.c \
libretro-common/rthreads/rsemaphore.c \
libretro-common/rthreads/rthreads.c
if SUPPORT_SSE2 += \
libdesmume_a_SOURCES += \
utils/colorspacehandler/colorspacehandler_SSE2.cpp
endif
if SUPPORT_AVX2 += \
libdesmume_a_SOURCES += \
utils/colorspacehandler/colorspacehandler_AVX2.cpp
endif
if SUPPORT_ALTIVEC += \
libdesmume_a_SOURCES += \
utils/colorspacehandler/colorspacehandler_AltiVec.cpp
endif
if HAVE_JIT
libdesmume_a_SOURCES += \

View File

@ -32,6 +32,7 @@
#ifdef ENABLE_SSE2
#include <emmintrin.h>
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
#endif
typedef struct
@ -990,9 +991,9 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), ConvertColor8888To6665<true>(srcColorLo) );
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), ConvertColor8888To6665<true>(srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
}
#endif
@ -1001,17 +1002,17 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
#endif
for (; i < pixCount; i++)
{
dstFramebuffer[i].color = ConvertColor8888To6665<true>(srcFramebuffer[i]);
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]);
dstFramebuffer[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
dstRGBA5551[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
}
}
else if (dstFramebuffer != NULL)
{
ConvertColorBuffer8888To6665<true>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
}
else
{
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
}
}
else if (this->_outputFormat == NDSColorFormat_BGR888_Rev)
@ -1027,7 +1028,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 0), srcColorLo );
_mm_store_si128( (__m128i *)(dstFramebuffer + i + 4), srcColorHi );
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
}
#endif
@ -1036,8 +1037,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
#endif
for (; i < pixCount; i++)
{
dstFramebuffer[i].color = ConvertColor8888To6665<true>(srcFramebuffer[i]);
dstRGBA5551[i] = ConvertColor8888To5551<true>(srcFramebuffer[i]);
dstFramebuffer[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
dstRGBA5551[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
}
}
else if (dstFramebuffer != NULL)
@ -1046,7 +1047,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
}
else
{
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
}
}
}
@ -1068,9 +1069,9 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), ConvertColor8888To6665<true>(srcColorLo) );
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), ConvertColor8888To6665<true>(srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
}
#endif
@ -1079,8 +1080,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
#endif
for (; x < pixCount; x++, ir++, iw++)
{
dstFramebuffer[iw].color = ConvertColor8888To6665<true>(srcFramebuffer[ir]);
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]);
dstFramebuffer[iw].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[ir]);
dstRGBA5551[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
}
}
}
@ -1088,14 +1089,14 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
{
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
{
ConvertColorBuffer8888To6665<true>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebuffer + iw, pixCount);
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebuffer + iw, pixCount);
}
}
else
{
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
{
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
}
}
}
@ -1115,7 +1116,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 0), srcColorLo );
_mm_store_si128( (__m128i *)(dstFramebuffer + iw + 4), srcColorHi );
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ConvertColor8888To5551<true>(srcColorLo, srcColorHi) );
_mm_store_si128( (__m128i *)(dstRGBA5551 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
}
#endif
@ -1125,7 +1126,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
for (; x < pixCount; x++, ir++, iw++)
{
dstFramebuffer[iw] = srcFramebuffer[ir];
dstRGBA5551[iw] = ConvertColor8888To5551<true>(srcFramebuffer[ir]);
dstRGBA5551[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
}
}
}
@ -1146,7 +1147,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
{
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
{
ConvertColorBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstRGBA5551 + iw, pixCount);
}
}
}

View File

@ -243,6 +243,8 @@
AB564915186E6F67002740F4 /* Image_Piano.png in Resources */ = {isa = PBXBuildFile; fileRef = AB56490B186E6F67002740F4 /* Image_Piano.png */; };
AB5785FD17176AFC002C5FC7 /* OpenEmuBase.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB5785FC17176AFC002C5FC7 /* OpenEmuBase.framework */; };
AB58F32D1364F44B0074C376 /* cocoa_file.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB58F32C1364F44B0074C376 /* cocoa_file.mm */; };
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
AB5FDDAD1D62C8A00094617C /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
AB64987C13ECC73800EE7DD2 /* FileTypeInfo.plist in Resources */ = {isa = PBXBuildFile; fileRef = AB64987B13ECC73800EE7DD2 /* FileTypeInfo.plist */; };
AB68101B187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; };
AB68101C187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png in Resources */ = {isa = PBXBuildFile; fileRef = AB681013187D4AEF0049F2C2 /* Icon_GuitarGrip_Button_Blue_512x512.png */; };
@ -974,6 +976,12 @@
ABB97878144E89CC00793FA3 /* Icon_DeSmuME_32x32.png in Resources */ = {isa = PBXBuildFile; fileRef = ABB97875144E89CC00793FA3 /* Icon_DeSmuME_32x32.png */; };
ABBC0F8D1394B1AA0028B6BD /* DefaultUserPrefs.plist in Resources */ = {isa = PBXBuildFile; fileRef = ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */; };
ABBF04A514B515F300E505A0 /* AppIcon_ROMCheats.icns in Resources */ = {isa = PBXBuildFile; fileRef = ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */; };
ABBFFF851D6283C0003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
ABBFFF861D6283C1003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
ABBFFF871D6283C1003CD598 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */; };
ABBFFF891D6283D2003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
ABBFFF8A1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
ABBFFF8B1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */; };
ABC3AF2F14B7F06900D5B13D /* Icon_VolumeFull_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */; };
ABC3AF3014B7F06900D5B13D /* Icon_VolumeMute_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */; };
ABC3AF3114B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png in Resources */ = {isa = PBXBuildFile; fileRef = ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */; };
@ -1534,6 +1542,14 @@
ABBB421516B4A5F30012E5AB /* OGLRender_3_2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OGLRender_3_2.h; path = ../OGLRender_3_2.h; sourceTree = "<group>"; };
ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */ = {isa = PBXFileReference; lastKnownFileType = file.bplist; path = DefaultUserPrefs.plist; sourceTree = "<group>"; };
ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; path = AppIcon_ROMCheats.icns; sourceTree = "<group>"; };
ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler.cpp; sourceTree = "<group>"; };
ABBFFF701D5F9C52003CD598 /* colorspacehandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler.h; sourceTree = "<group>"; };
ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_SSE2.cpp; sourceTree = "<group>"; };
ABBFFF761D5FD2ED003CD598 /* colorspacehandler_SSE2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_SSE2.h; sourceTree = "<group>"; };
ABBFFF7B1D610457003CD598 /* colorspacehandler_AVX2.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AVX2.cpp; sourceTree = "<group>"; };
ABBFFF7C1D610457003CD598 /* colorspacehandler_AVX2.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AVX2.h; sourceTree = "<group>"; };
ABBFFF811D611A36003CD598 /* colorspacehandler_AltiVec.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AltiVec.cpp; sourceTree = "<group>"; };
ABBFFF821D611A36003CD598 /* colorspacehandler_AltiVec.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AltiVec.h; sourceTree = "<group>"; };
ABC3AF2B14B7F06900D5B13D /* Icon_VolumeFull_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeFull_16x16.png; path = images/Icon_VolumeFull_16x16.png; sourceTree = "<group>"; };
ABC3AF2C14B7F06900D5B13D /* Icon_VolumeMute_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeMute_16x16.png; path = images/Icon_VolumeMute_16x16.png; sourceTree = "<group>"; };
ABC3AF2D14B7F06900D5B13D /* Icon_VolumeOneThird_16x16.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_VolumeOneThird_16x16.png; path = images/Icon_VolumeOneThird_16x16.png; sourceTree = "<group>"; };
@ -2507,6 +2523,21 @@
path = openemu;
sourceTree = "<group>";
};
ABBFFF6E1D5F9C10003CD598 /* colorspacehandler */ = {
isa = PBXGroup;
children = (
ABBFFF811D611A36003CD598 /* colorspacehandler_AltiVec.cpp */,
ABBFFF7B1D610457003CD598 /* colorspacehandler_AVX2.cpp */,
ABBFFF751D5FD2ED003CD598 /* colorspacehandler_SSE2.cpp */,
ABBFFF6F1D5F9C52003CD598 /* colorspacehandler.cpp */,
ABBFFF821D611A36003CD598 /* colorspacehandler_AltiVec.h */,
ABBFFF7C1D610457003CD598 /* colorspacehandler_AVX2.h */,
ABBFFF761D5FD2ED003CD598 /* colorspacehandler_SSE2.h */,
ABBFFF701D5F9C52003CD598 /* colorspacehandler.h */,
);
path = colorspacehandler;
sourceTree = "<group>";
};
ABC2ECD613B1C87000FAAA2A /* Images */ = {
isa = PBXGroup;
children = (
@ -2757,6 +2788,7 @@
ABD1FF211345ACBF00AF11D1 /* decrypt */,
ABD1FF2E1345ACBF00AF11D1 /* libfat */,
ABE670241415DE6C00E8E4C9 /* tinyxml */,
ABBFFF6E1D5F9C10003CD598 /* colorspacehandler */,
ABD1FF1D1345ACBF00AF11D1 /* ConvertUTF.c */,
AB9038A517C5ECFD00F410BD /* advanscene.cpp */,
ABD1FF1F1345ACBF00AF11D1 /* datetime.cpp */,
@ -3768,6 +3800,7 @@
ABE6840D189E33BC007FD69C /* OGLDisplayOutput.cpp in Sources */,
ABD1FF121345AC9C00AF11D1 /* slot2_none.cpp in Sources */,
ABD1FF131345AC9C00AF11D1 /* slot2_paddle.cpp in Sources */,
ABBFFF8A1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
ABD1FF141345AC9C00AF11D1 /* slot2_piano.cpp in Sources */,
ABD1FF151345AC9C00AF11D1 /* slot2_rumblepak.cpp in Sources */,
ABD1041F1346652500AF11D1 /* sndOSX.cpp in Sources */,
@ -3862,6 +3895,7 @@
AB40565E169F5DBB0016AC3E /* virtualmemory.cpp in Sources */,
AB405661169F5DBB0016AC3E /* zonememory.cpp in Sources */,
AB405679169F5DCC0016AC3E /* x86assembler.cpp in Sources */,
ABBFFF861D6283C1003CD598 /* colorspacehandler.cpp in Sources */,
AB40567C169F5DCC0016AC3E /* x86compiler.cpp in Sources */,
ABFEA8A41BB4EC1100B08C25 /* sfnt.c in Sources */,
ABA731691BB51FDC00B26147 /* type1cid.c in Sources */,
@ -4015,6 +4049,7 @@
AB796D4315CDCBA200C59155 /* version.cpp in Sources */,
ABFEA82B1BB4EC1100B08C25 /* ftinit.c in Sources */,
AB796D4415CDCBA200C59155 /* vfat.cpp in Sources */,
AB5FDDAC1D62C89E0094617C /* colorspacehandler.cpp in Sources */,
AB796D4515CDCBA200C59155 /* videofilter.cpp in Sources */,
AB796D4615CDCBA200C59155 /* WavFile.cpp in Sources */,
AB796D4715CDCBA200C59155 /* wifi.cpp in Sources */,
@ -4094,6 +4129,7 @@
AB26D87C16B5253D00A2305C /* OGLRender_3_2.cpp in Sources */,
AB3A655E16CC5421001F5D4A /* EmuControllerDelegate.mm in Sources */,
AB3A656116CC5438001F5D4A /* cocoa_GPU.mm in Sources */,
AB5FDDAD1D62C8A00094617C /* colorspacehandler_SSE2.cpp in Sources */,
AB8967D916D2ED0700F826F1 /* DisplayWindowController.mm in Sources */,
AB29B33116D4BEBF000EF671 /* InputManager.mm in Sources */,
AB8B7AAC17CE8C440051CEBF /* slot1comp_protocol.cpp in Sources */,
@ -4270,6 +4306,7 @@
AB2ABA401C9F9CFA00173B15 /* rsemaphore.c in Sources */,
AB8F3CF01A53AC2600A80BF6 /* ringbuffer.cpp in Sources */,
AB8F3CF11A53AC2600A80BF6 /* arm_jit.cpp in Sources */,
ABBFFF891D6283D2003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
AB8F3CF21A53AC2600A80BF6 /* troubleshootingWindowDelegate.mm in Sources */,
AB8F3CF31A53AC2600A80BF6 /* assembler.cpp in Sources */,
AB8F3CF41A53AC2600A80BF6 /* assert.cpp in Sources */,
@ -4293,6 +4330,7 @@
AB8F3D041A53AC2600A80BF6 /* virtualmemory.cpp in Sources */,
AB8F3D051A53AC2600A80BF6 /* zonememory.cpp in Sources */,
AB8F3D061A53AC2600A80BF6 /* x86assembler.cpp in Sources */,
ABBFFF851D6283C0003CD598 /* colorspacehandler.cpp in Sources */,
AB8F3D071A53AC2600A80BF6 /* x86compiler.cpp in Sources */,
AB8F3D081A53AC2600A80BF6 /* x86compilercontext.cpp in Sources */,
AB8F3D091A53AC2600A80BF6 /* x86compilerfunc.cpp in Sources */,
@ -4365,6 +4403,7 @@
ABB3C6911501C04F00E0C22E /* SoundTouch.cpp in Sources */,
ABB3C6921501C04F00E0C22E /* sse_optimized.cpp in Sources */,
ABB3C6931501C04F00E0C22E /* TDStretch.cpp in Sources */,
ABBFFF871D6283C1003CD598 /* colorspacehandler.cpp in Sources */,
ABB3C6941501C04F00E0C22E /* WavFile.cpp in Sources */,
ABB3C6951501C04F00E0C22E /* metaspu.cpp in Sources */,
ABB3C6961501C04F00E0C22E /* SndOut.cpp in Sources */,
@ -4434,6 +4473,7 @@
ABB3C6D11501C04F00E0C22E /* slot1.cpp in Sources */,
ABB3C6D31501C04F00E0C22E /* SPU.cpp in Sources */,
ABB3C6D41501C04F00E0C22E /* texcache.cpp in Sources */,
ABBFFF8B1D6283D3003CD598 /* colorspacehandler_SSE2.cpp in Sources */,
AB9038BA17C5ED2200F410BD /* slot1comp_rom.cpp in Sources */,
ABB3C6D51501C04F00E0C22E /* thumb_instructions.cpp in Sources */,
AB2EE13317D57F5000F68622 /* fsnitro.cpp in Sources */,

View File

@ -740,6 +740,14 @@
AB2F56F11704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
AB2F56F21704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
AB2F56F31704C86900E28885 /* utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = AB2F56EF1704C86900E28885 /* utilities.c */; };
AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */; };
AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */; };
AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
AB3ACB7814C2361100D7D192 /* appDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6714C2361100D7D192 /* appDelegate.mm */; };
AB3ACB7914C2361100D7D192 /* cheatWindowDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6914C2361100D7D192 /* cheatWindowDelegate.mm */; };
AB3ACB7C14C2361100D7D192 /* inputPrefsView.mm in Sources */ = {isa = PBXBuildFile; fileRef = AB3ACB6F14C2361100D7D192 /* inputPrefsView.mm */; };
@ -1156,6 +1164,8 @@
AB73AA2E1507C9F500A310C8 /* OpenGL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABC570D4134431DA00E7B0B1 /* OpenGL.framework */; };
AB73AA2F1507C9F500A310C8 /* libz.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = AB0A0D1914AACA9600E83E91 /* libz.dylib */; };
AB75226F14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns in Resources */ = {isa = PBXBuildFile; fileRef = AB75226D14C7BB51009B97B3 /* AppIcon_FirmwareConfig.icns */; };
AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */; };
AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */; };
AB7DDA6D173DC38F004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
AB7DDA6E173DC399004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
AB7DDA6F173DC39E004F3D07 /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ABB6AD5C173A3F2B00EC2E8D /* Carbon.framework */; };
@ -1835,6 +1845,12 @@
AB2F56EF1704C86900E28885 /* utilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = utilities.c; sourceTree = "<group>"; };
AB350BA41478AC96007165AC /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = System/Library/Frameworks/IOKit.framework; sourceTree = SDKROOT; };
AB350D38147A1D8D007165AC /* English */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = English; path = translations/English.lproj/HID_usage_strings.plist; sourceTree = "<group>"; };
AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler.cpp; sourceTree = "<group>"; };
AB37E36D1D6188BC004A2C0D /* colorspacehandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler.h; sourceTree = "<group>"; };
AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_AltiVec.cpp; sourceTree = "<group>"; };
AB37E36F1D6188BC004A2C0D /* colorspacehandler_AltiVec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_AltiVec.h; sourceTree = "<group>"; };
AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colorspacehandler_SSE2.cpp; sourceTree = "<group>"; };
AB37E3731D6188BC004A2C0D /* colorspacehandler_SSE2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colorspacehandler_SSE2.h; sourceTree = "<group>"; };
AB3ACB6614C2361100D7D192 /* appDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = appDelegate.h; sourceTree = "<group>"; };
AB3ACB6714C2361100D7D192 /* appDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = appDelegate.mm; sourceTree = "<group>"; };
AB3ACB6814C2361100D7D192 /* cheatWindowDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cheatWindowDelegate.h; sourceTree = "<group>"; };
@ -2893,6 +2909,19 @@
path = src;
sourceTree = "<group>";
};
AB37E36B1D6188BC004A2C0D /* colorspacehandler */ = {
isa = PBXGroup;
children = (
AB37E36C1D6188BC004A2C0D /* colorspacehandler.cpp */,
AB37E36D1D6188BC004A2C0D /* colorspacehandler.h */,
AB37E36E1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp */,
AB37E36F1D6188BC004A2C0D /* colorspacehandler_AltiVec.h */,
AB37E3721D6188BC004A2C0D /* colorspacehandler_SSE2.cpp */,
AB37E3731D6188BC004A2C0D /* colorspacehandler_SSE2.h */,
);
path = colorspacehandler;
sourceTree = "<group>";
};
AB3ACB6514C2361100D7D192 /* userinterface */ = {
isa = PBXGroup;
children = (
@ -3205,6 +3234,7 @@
isa = PBXGroup;
children = (
ABBCE2A115ACB29100A2C965 /* AsmJit */,
AB37E36B1D6188BC004A2C0D /* colorspacehandler */,
ABD1FF211345ACBF00AF11D1 /* decrypt */,
ABD1FF2E1345ACBF00AF11D1 /* libfat */,
ABE670241415DE6C00E8E4C9 /* tinyxml */,
@ -4506,6 +4536,8 @@
AB50200A1D09E712002FA150 /* file_path.c in Sources */,
AB50200B1D09E712002FA150 /* retro_dirent.c in Sources */,
AB50200C1D09E712002FA150 /* retro_stat.c in Sources */,
AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */,
AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -4685,6 +4717,8 @@
AB5020161D09E712002FA150 /* file_path.c in Sources */,
AB5020171D09E712002FA150 /* retro_dirent.c in Sources */,
AB5020181D09E712002FA150 /* retro_stat.c in Sources */,
AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -4894,6 +4928,8 @@
AB50200D1D09E712002FA150 /* file_path.c in Sources */,
AB50200E1D09E712002FA150 /* retro_dirent.c in Sources */,
AB50200F1D09E712002FA150 /* retro_stat.c in Sources */,
AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -5103,6 +5139,8 @@
AB5020101D09E712002FA150 /* file_path.c in Sources */,
AB5020111D09E712002FA150 /* retro_dirent.c in Sources */,
AB5020121D09E712002FA150 /* retro_stat.c in Sources */,
AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -5282,6 +5320,8 @@
AB5020131D09E712002FA150 /* file_path.c in Sources */,
AB5020141D09E712002FA150 /* retro_dirent.c in Sources */,
AB5020151D09E712002FA150 /* retro_stat.c in Sources */,
AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */,
AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

View File

@ -754,7 +754,7 @@
if (dispInfo.pixelBytes == 2)
{
ConvertColorBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h));
ColorspaceConvertBuffer555To8888Opaque<false, false>((u16 *)displayBuffer, bitmapData, (w * h));
}
else if (dispInfo.pixelBytes == 4)
{

View File

@ -692,7 +692,7 @@ void RomIconToRGBA8888(uint32_t *bitmapData)
//
// The first entry always represents the alpha, so we can just ignore it.
clut[0] = 0x00000000;
ConvertColorBuffer555To8888Opaque<false, true>((u16 *)iconClutPtr, &clut[1], 15);
ColorspaceConvertBuffer555To8888Opaque<false, true>((u16 *)iconClutPtr, &clut[1], 15);
// Load the image from the icon pixel data.
//

View File

@ -66,6 +66,7 @@ CommandLine::CommandLine()
, arm7_gdb_port(0)
, start_paused(FALSE)
, autodetect_method(-1)
, render3d(COMMANDLINE_RENDER3D_DEFAULT)
{
#ifndef HOST_WINDOWS
disable_sound = 0;
@ -92,6 +93,8 @@ static const char* help_string = \
" --num-cores N Override numcores detection and use this many" ENDL
" --spu-synch Use SPU synch (crackles; helps streams; default ON)" ENDL
" --spu-method N Select SPU synch method: 0:N, 1:Z, 2:P; default 0" ENDL
" --3d-render [SW|AUTOGL|GL|OLDGL]" ENDL
" Select 3d renderer; default SW" ENDL
#ifndef HOST_WINDOWS
" --disable-sound Disables the sound output" ENDL
" --disable-limiter Disables the 60fps limiter" ENDL
@ -154,6 +157,7 @@ ENDL
#define OPT_NUMCORES 1
#define OPT_SPU_METHOD 2
#define OPT_3D_RENDER 3
#define OPT_JIT_SIZE 100
#define OPT_CONSOLE_TYPE 200
@ -183,6 +187,8 @@ ENDL
bool CommandLine::parse(int argc,char **argv)
{
std::string _render3d;
int opt_help = 0;
int option_index = 0;
for(;;)
@ -197,6 +203,7 @@ bool CommandLine::parse(int argc,char **argv)
{ "num-cores", required_argument, NULL, OPT_NUMCORES },
{ "spu-synch", no_argument, &_spu_sync_mode, 1 },
{ "spu-method", required_argument, NULL, OPT_SPU_METHOD },
{ "3d-render", required_argument, NULL, OPT_3D_RENDER },
#ifndef HOST_WINDOWS
{ "disable-sound", no_argument, &disable_sound, 1},
{ "disable-limiter", no_argument, &disable_limiter, 1},
@ -265,6 +272,7 @@ bool CommandLine::parse(int argc,char **argv)
//user settings
case OPT_NUMCORES: _num_cores = atoi(optarg); break;
case OPT_SPU_METHOD: _spu_sync_method = atoi(optarg); break;
case OPT_3D_RENDER: _render3d = optarg; break;
//sync settings
case OPT_JIT_SIZE: _jit_size = atoi(optarg); break;
@ -343,6 +351,14 @@ bool CommandLine::parse(int argc,char **argv)
CommonSettings.DebugConsole = true;
}
//process 3d renderer
_render3d = strtoupper(_render3d);
if(_render3d == "NONE") render3d = COMMANDLINE_RENDER3D_NONE;
if(_render3d == "SW") render3d = COMMANDLINE_RENDER3D_SW;
if(_render3d == "OLDGL") render3d = COMMANDLINE_RENDER3D_OLDGL;
if(_render3d == "AUTOGL") render3d = COMMANDLINE_RENDER3D_AUTOGL;
if(_render3d == "GL") render3d = COMMANDLINE_RENDER3D_GL;
if (autodetect_method != -1)
CommonSettings.autodetectBackupMethod = autodetect_method;

View File

@ -24,17 +24,29 @@
//hacky commandline options that i didnt want to route through commonoptions
extern int _commandline_linux_nojoy;
#define COMMANDLINE_RENDER3D_DEFAULT 0
#define COMMANDLINE_RENDER3D_NONE 1
#define COMMANDLINE_RENDER3D_SW 2
#define COMMANDLINE_RENDER3D_OLDGL 3
#define COMMANDLINE_RENDER3D_GL 4
#define COMMANDLINE_RENDER3D_AUTOGL 5
//this class will also eventually try to take over the responsibility of using the args that it handles
//for example: preparing the emulator run by loading the rom, savestate, and/or movie in the correct pattern.
//it should also populate CommonSettings with its initial values
//EDIT: not really. combining this with what a frontend wants to do is complicated.
//you might design the API so that the frontend sets all those up, but I'm not sure I like that
//Really, this should be a passive structure that just collects the results provided by the shared command line processing, to be used later as appropriate
//(and the CommonSettings setup REMOVED or at least refactored into a separate method)
class CommandLine
{
public:
//actual options: these may move to another sturct
//actual options: these may move to another struct
int load_slot;
int depth_threshold;
int autodetect_method;
int render3d;
std::string nds_file;
std::string play_movie_file;
std::string record_movie_file;

View File

@ -1,65 +1,63 @@
/*
Copyright (C) 2008-2015 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <zlib.h>
#include "types.h"
#include "ImageOut.h"
#include "formats/rpng.h"
#include "formats/rbmp.h"
#include "GPU.h"
static u8* Convert15To24(const u16* src, int width, int height)
{
u8 *tmp_buffer;
u8 *tmp_inc;
tmp_inc = tmp_buffer = (u8 *)malloc(width * height * 3);
for(int y=0;y<height;y++)
{
for(int x=0;x<width;x++)
{
u32 dst = ConvertColor555To8888Opaque<true>(*src++);
*tmp_inc++ = dst&0xFF;
*tmp_inc++ = (dst>>8)&0xFF;
*tmp_inc++ = (dst>>16)&0xFF;
}
}
return tmp_buffer;
}
int NDS_WritePNG_15bpp(int width, int height, const u16 *data, const char *filename)
{
u8* tmp = Convert15To24(data,width,height);
bool ok = rpng_save_image_bgr24(filename,tmp,width,height,width*3);
free(tmp);
return ok?1:0;
}
int NDS_WriteBMP_15bpp(int width, int height, const u16 *data, const char *filename)
{
u8* tmp = Convert15To24(data,width,height);
bool ok = rbmp_save_image(filename,tmp,width,height,width*3,RBMP_SOURCE_TYPE_BGR24);
free(tmp);
return ok?1:0;
}
int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename)
{
bool ok = rbmp_save_image(filename,buf,width,height,width*4,RBMP_SOURCE_TYPE_ARGB8888);
return ok?1:0;
/*
Copyright (C) 2008-2015 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <zlib.h>
#include "types.h"
#include "ImageOut.h"
#include "formats/rpng.h"
#include "formats/rbmp.h"
#include "GPU.h"
static u8* Convert15To24(const u16* src, int width, int height)
{
u8 *tmp_buffer;
u8 *tmp_inc;
tmp_inc = tmp_buffer = (u8 *)malloc(width * height * 3);
for (int i = 0; i < width*height; i++)
{
u32 dst = ColorspaceConvert555To8888Opaque<true>(*src++);
*tmp_inc++ = dst & 0xFF;
*tmp_inc++ = (dst >> 8) & 0xFF;
*tmp_inc++ = (dst >> 16) & 0xFF;
}
return tmp_buffer;
}
int NDS_WritePNG_15bpp(int width, int height, const u16 *data, const char *filename)
{
u8* tmp = Convert15To24(data,width,height);
bool ok = rpng_save_image_bgr24(filename,tmp,width,height,width*3);
free(tmp);
return ok?1:0;
}
int NDS_WriteBMP_15bpp(int width, int height, const u16 *data, const char *filename)
{
u8* tmp = Convert15To24(data,width,height);
bool ok = rbmp_save_image(filename,tmp,width,height,width*3,RBMP_SOURCE_TYPE_BGR24);
free(tmp);
return ok?1:0;
}
int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename)
{
bool ok = rbmp_save_image(filename,buf,width,height,width*4,RBMP_SOURCE_TYPE_ARGB8888);
return ok?1:0;
}

View File

@ -56,174 +56,174 @@
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemGroup>
<ClCompile Include="..\..\addons\slot1comp_mc.cpp" />
<ClCompile Include="..\..\addons\slot1comp_protocol.cpp" />
<ClCompile Include="..\..\addons\slot1comp_rom.cpp" />
<ClCompile Include="..\..\addons\slot1_retail_auto.cpp" />
<ClCompile Include="..\..\addons\slot1_retail_mcrom.cpp" />
<ClCompile Include="..\..\addons\slot1_retail_mcrom_debug.cpp" />
<ClCompile Include="..\..\addons\slot2_auto.cpp" />
<ClCompile Include="..\..\addons\slot2_passme.cpp" />
<ClCompile Include="..\..\addons\slot2_piano.cpp" />
<ClCompile Include="..\..\addons\slot1_none.cpp" />
<ClCompile Include="..\..\addons\slot1_r4.cpp" />
<ClCompile Include="..\..\addons\slot1_retail_nand.cpp" />
<ClCompile Include="..\..\addons\slot2_mpcf.cpp" />
<ClCompile Include="..\..\addons\slot2_paddle.cpp" />
<ClCompile Include="..\..\aggdraw.cpp" />
<ClCompile Include="..\..\arm_instructions.cpp" />
<ClCompile Include="..\..\armcpu.cpp" />
<ClCompile Include="..\..\arm_jit.cpp" />
<ClCompile Include="..\..\bios.cpp" />
<ClCompile Include="..\..\cheatSystem.cpp" />
<ClCompile Include="..\..\commandline.cpp" />
<ClCompile Include="..\..\common.cpp" />
<ClCompile Include="..\..\cp15.cpp" />
<ClCompile Include="..\..\Database.cpp" />
<ClCompile Include="..\..\debug.cpp" />
<ClCompile Include="..\..\Disassembler.cpp" />
<ClCompile Include="..\..\driver.cpp" />
<ClCompile Include="..\..\emufile.cpp" />
<ClCompile Include="..\..\encrypt.cpp" />
<ClCompile Include="..\..\FIFO.cpp" />
<ClCompile Include="..\..\filter\2xsai.cpp" />
<ClCompile Include="..\..\filter\bilinear.cpp" />
<ClCompile Include="..\..\filter\epx.cpp" />
<ClCompile Include="..\..\filter\hq2x.cpp" />
<ClCompile Include="..\..\filter\hq4x.cpp" />
<ClCompile Include="..\..\filter\lq2x.cpp" />
<ClCompile Include="..\..\filter\scanline.cpp" />
<ClCompile Include="..\..\filter\xbrz.cpp" />
<ClCompile Include="..\..\firmware.cpp" />
<ClCompile Include="..\..\frontend\modules\ImageOut.cpp" />
<ClCompile Include="..\..\gfx3d.cpp" />
<ClCompile Include="..\..\GPU.cpp" />
<ClCompile Include="..\..\GPU_OSD.cpp" />
<ClCompile Include="..\..\libretro-common\compat\compat_fnmatch.c" />
<ClCompile Include="..\..\libretro-common\compat\compat_getopt.c" />
<ClCompile Include="..\..\libretro-common\compat\compat_posix_string.c" />
<ClCompile Include="..\..\libretro-common\compat\compat_snprintf.c" />
<ClCompile Include="..\..\libretro-common\compat\compat_strcasestr.c" />
<ClCompile Include="..\..\libretro-common\compat\compat_strl.c" />
<ClCompile Include="..\..\libretro-common\file\archive_file.c" />
<ClCompile Include="..\..\libretro-common\file\archive_file_zlib.c" />
<ClCompile Include="..\..\libretro-common\file\file_path.c" />
<ClCompile Include="..\..\libretro-common\file\nbio\nbio_stdio.c" />
<ClCompile Include="..\..\libretro-common\file\retro_dirent.c" />
<ClCompile Include="..\..\libretro-common\file\retro_stat.c" />
<ClCompile Include="..\..\libretro-common\formats\bmp\rbmp_encode.c" />
<ClCompile Include="..\..\libretro-common\formats\png\rpng.c" />
<ClCompile Include="..\..\libretro-common\formats\png\rpng_encode.c" />
<ClCompile Include="..\..\libretro-common\hash\rhash.c" />
<ClCompile Include="..\..\libretro-common\lists\dir_list.c" />
<ClCompile Include="..\..\libretro-common\lists\file_list.c" />
<ClCompile Include="..\..\libretro-common\lists\string_list.c" />
<ClCompile Include="..\..\libretro-common\rthreads\rsemaphore.c" />
<ClCompile Include="..\..\libretro-common\rthreads\rthreads.c" />
<ClCompile Include="..\..\libretro-common\features\features_cpu.c" />
<ClCompile Include="..\..\libretro-common\streams\file_stream.c" />
<ClCompile Include="..\..\libretro-common\streams\memory_stream.c" />
<ClCompile Include="..\..\lua-engine.cpp" />
<ClCompile Include="..\..\matrix.cpp" />
<ClCompile Include="..\..\mc.cpp" />
<ClCompile Include="..\..\MMU.cpp" />
<ClCompile Include="..\..\movie.cpp" />
<ClCompile Include="..\..\NDSSystem.cpp" />
<ClCompile Include="..\..\OGLRender.cpp" />
<ClCompile Include="..\..\OGLRender_3_2.cpp" />
<ClCompile Include="..\..\path.cpp" />
<ClCompile Include="..\..\rasterize.cpp" />
<ClCompile Include="..\..\readwrite.cpp" />
<ClCompile Include="..\..\render3D.cpp" />
<ClCompile Include="..\..\ROMReader.cpp" />
<ClCompile Include="..\..\rtc.cpp" />
<ClCompile Include="..\..\saves.cpp" />
<ClCompile Include="..\..\slot1.cpp" />
<ClCompile Include="..\..\slot2.cpp" />
<ClCompile Include="..\..\SPU.cpp" />
<ClCompile Include="..\..\texcache.cpp" />
<ClCompile Include="..\..\thumb_instructions.cpp" />
<ClCompile Include="..\..\utils\advanscene.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\assembler.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\assert.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\buffer.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\compiler.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\compilercontext.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\compilerfunc.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\compileritem.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\context.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\cpuinfo.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\defs.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\func.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\logger.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\memorymanager.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\memorymarker.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\operand.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\stringbuilder.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\stringutil.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\virtualmemory.cpp" />
<ClCompile Include="..\..\utils\AsmJit\core\zonememory.cpp" />
<ClCompile Include="..\..\utils\AsmJit\x86\x86assembler.cpp" />
<ClCompile Include="..\..\utils\AsmJit\x86\x86compiler.cpp" />
<ClCompile Include="..\..\utils\AsmJit\x86\x86compilercontext.cpp" />
<ClCompile Include="..\..\utils\AsmJit\x86\x86compilerfunc.cpp" />
<ClCompile Include="..\..\utils\AsmJit\x86\x86compileritem.cpp" />
<ClCompile Include="..\..\utils\AsmJit\x86\x86cpuinfo.cpp" />
<ClCompile Include="..\..\utils\AsmJit\x86\x86defs.cpp" />
<ClCompile Include="..\..\utils\AsmJit\x86\x86func.cpp" />
<ClCompile Include="..\..\utils\AsmJit\x86\x86operand.cpp" />
<ClCompile Include="..\..\utils\AsmJit\x86\x86util.cpp" />
<ClCompile Include="..\..\utils\datetime.cpp" />
<ClCompile Include="..\..\utils\dlditool.cpp" />
<ClCompile Include="..\..\utils\emufat.cpp" />
<ClCompile Include="..\..\utils\fsnitro.cpp" />
<ClCompile Include="..\..\utils\libfat\cache.cpp" />
<ClCompile Include="..\..\utils\libfat\directory.cpp" />
<ClCompile Include="..\..\utils\libfat\disc.cpp" />
<ClCompile Include="..\..\utils\libfat\fatdir.cpp" />
<ClCompile Include="..\..\utils\libfat\fatfile.cpp" />
<ClCompile Include="..\..\utils\libfat\filetime.cpp" />
<ClCompile Include="..\..\utils\libfat\file_allocation_table.cpp" />
<ClCompile Include="..\..\utils\libfat\libfat.cpp" />
<ClCompile Include="..\..\utils\libfat\libfat_public_api.cpp" />
<ClCompile Include="..\..\utils\libfat\lock.cpp" />
<ClCompile Include="..\..\utils\libfat\partition.cpp" />
<ClCompile Include="..\..\utils\tinyxml\tinystr.cpp" />
<ClCompile Include="..\..\utils\tinyxml\tinyxml.cpp" />
<ClCompile Include="..\..\utils\tinyxml\tinyxmlerror.cpp" />
<ClCompile Include="..\..\utils\tinyxml\tinyxmlparser.cpp" />
<ClCompile Include="..\..\utils\vfat.cpp" />
<ClCompile Include="..\..\version.cpp" />
<ClCompile Include="..\..\wifi.cpp" />
<ClCompile Include="..\..\addons\slot2_expMemory.cpp" />
<ClCompile Include="..\..\addons\slot2_gbagame.cpp" />
<ClCompile Include="..\..\addons\slot2_guitarGrip.cpp" />
<ClCompile Include="..\..\addons\slot2_none.cpp" />
<ClCompile Include="..\..\addons\slot2_rumblepak.cpp" />
<ClCompile Include="..\..\gdbstub\gdbstub.cpp" />
<ClCompile Include="..\..\utils\ConvertUTF.c" />
<ClCompile Include="..\..\utils\guid.cpp" />
<ClCompile Include="..\..\utils\md5.cpp" />
<ClCompile Include="..\..\utils\task.cpp" />
<ClCompile Include="..\..\utils\xstring.cpp" />
<ClCompile Include="..\..\utils\decrypt\crc.cpp" />
<ClCompile Include="..\..\utils\decrypt\decrypt.cpp" />
<ClCompile Include="..\..\utils\decrypt\header.cpp" />
<ClCompile Include="..\..\metaspu\metaspu.cpp" />
<ClCompile Include="..\..\metaspu\SndOut.cpp" />
<ClCompile Include="..\..\metaspu\Timestretcher.cpp" />
<ClCompile Include="..\..\metaspu\win32\ConfigSoundtouch.cpp" />
<ClCompile Include="..\..\metaspu\SoundTouch\3dnow_win.cpp" />
<ClCompile Include="..\..\metaspu\SoundTouch\AAFilter.cpp" />
<ClCompile Include="..\..\metaspu\SoundTouch\cpu_detect_x86_win.cpp" />
<ClCompile Include="..\..\metaspu\SoundTouch\FIFOSampleBuffer.cpp" />
<ClCompile Include="..\..\metaspu\SoundTouch\FIRFilter.cpp" />
<ClCompile Include="..\..\metaspu\SoundTouch\mmx_optimized.cpp" />
<ClCompile Include="..\..\metaspu\SoundTouch\RateTransposer.cpp" />
<ClCompile Include="..\..\metaspu\SoundTouch\SoundTouch.cpp" />
<ClCompile Include="..\..\metaspu\SoundTouch\sse_optimized.cpp" />
<ClCompile Include="..\..\metaspu\SoundTouch\TDStretch.cpp" />
<ClCompile Include="..\..\metaspu\SoundTouch\WavFile.cpp" />
<ClCompile Include="..\addons\slot1comp_mc.cpp" />
<ClCompile Include="..\addons\slot1comp_protocol.cpp" />
<ClCompile Include="..\addons\slot1comp_rom.cpp" />
<ClCompile Include="..\addons\slot1_retail_auto.cpp" />
<ClCompile Include="..\addons\slot1_retail_mcrom.cpp" />
<ClCompile Include="..\addons\slot1_retail_mcrom_debug.cpp" />
<ClCompile Include="..\addons\slot2_auto.cpp" />
<ClCompile Include="..\addons\slot2_passme.cpp" />
<ClCompile Include="..\addons\slot2_piano.cpp" />
<ClCompile Include="..\addons\slot1_none.cpp" />
<ClCompile Include="..\addons\slot1_r4.cpp" />
<ClCompile Include="..\addons\slot1_retail_nand.cpp" />
<ClCompile Include="..\addons\slot2_mpcf.cpp" />
<ClCompile Include="..\addons\slot2_paddle.cpp" />
<ClCompile Include="..\aggdraw.cpp" />
<ClCompile Include="..\arm_instructions.cpp" />
<ClCompile Include="..\armcpu.cpp" />
<ClCompile Include="..\arm_jit.cpp" />
<ClCompile Include="..\bios.cpp" />
<ClCompile Include="..\cheatSystem.cpp" />
<ClCompile Include="..\commandline.cpp" />
<ClCompile Include="..\common.cpp" />
<ClCompile Include="..\cp15.cpp" />
<ClCompile Include="..\debug.cpp" />
<ClCompile Include="..\Disassembler.cpp" />
<ClCompile Include="..\driver.cpp" />
<ClCompile Include="..\emufile.cpp" />
<ClCompile Include="..\encrypt.cpp" />
<ClCompile Include="..\FIFO.cpp" />
<ClCompile Include="..\filter\2xsai.cpp" />
<ClCompile Include="..\filter\bilinear.cpp" />
<ClCompile Include="..\filter\epx.cpp" />
<ClCompile Include="..\filter\hq2x.cpp" />
<ClCompile Include="..\filter\hq4x.cpp" />
<ClCompile Include="..\filter\lq2x.cpp" />
<ClCompile Include="..\filter\scanline.cpp" />
<ClCompile Include="..\filter\xbrz.cpp" />
<ClCompile Include="..\firmware.cpp" />
<ClCompile Include="..\frontend\modules\ImageOut.cpp" />
<ClCompile Include="..\gfx3d.cpp" />
<ClCompile Include="..\GPU.cpp" />
<ClCompile Include="..\GPU_OSD.cpp" />
<ClCompile Include="..\libretro-common\compat\compat_fnmatch.c" />
<ClCompile Include="..\libretro-common\compat\compat_getopt.c" />
<ClCompile Include="..\libretro-common\compat\compat_posix_string.c" />
<ClCompile Include="..\libretro-common\compat\compat_snprintf.c" />
<ClCompile Include="..\libretro-common\compat\compat_strcasestr.c" />
<ClCompile Include="..\libretro-common\compat\compat_strl.c" />
<ClCompile Include="..\libretro-common\file\archive_file.c" />
<ClCompile Include="..\libretro-common\file\archive_file_zlib.c" />
<ClCompile Include="..\libretro-common\file\file_path.c" />
<ClCompile Include="..\libretro-common\file\nbio\nbio_stdio.c" />
<ClCompile Include="..\libretro-common\file\retro_dirent.c" />
<ClCompile Include="..\libretro-common\file\retro_stat.c" />
<ClCompile Include="..\libretro-common\formats\bmp\rbmp_encode.c" />
<ClCompile Include="..\libretro-common\formats\png\rpng.c" />
<ClCompile Include="..\libretro-common\formats\png\rpng_encode.c" />
<ClCompile Include="..\libretro-common\hash\rhash.c" />
<ClCompile Include="..\libretro-common\lists\dir_list.c" />
<ClCompile Include="..\libretro-common\lists\file_list.c" />
<ClCompile Include="..\libretro-common\lists\string_list.c" />
<ClCompile Include="..\libretro-common\rthreads\rsemaphore.c" />
<ClCompile Include="..\libretro-common\rthreads\rthreads.c" />
<ClCompile Include="..\libretro-common\streams\file_stream.c" />
<ClCompile Include="..\libretro-common\streams\memory_stream.c" />
<ClCompile Include="..\lua-engine.cpp" />
<ClCompile Include="..\matrix.cpp" />
<ClCompile Include="..\mc.cpp" />
<ClCompile Include="..\MMU.cpp" />
<ClCompile Include="..\movie.cpp" />
<ClCompile Include="..\NDSSystem.cpp" />
<ClCompile Include="..\OGLRender.cpp" />
<ClCompile Include="..\OGLRender_3_2.cpp" />
<ClCompile Include="..\path.cpp" />
<ClCompile Include="..\rasterize.cpp" />
<ClCompile Include="..\readwrite.cpp" />
<ClCompile Include="..\render3D.cpp" />
<ClCompile Include="..\ROMReader.cpp" />
<ClCompile Include="..\rtc.cpp" />
<ClCompile Include="..\saves.cpp" />
<ClCompile Include="..\slot1.cpp" />
<ClCompile Include="..\slot2.cpp" />
<ClCompile Include="..\SPU.cpp" />
<ClCompile Include="..\texcache.cpp" />
<ClCompile Include="..\thumb_instructions.cpp" />
<ClCompile Include="..\utils\advanscene.cpp" />
<ClCompile Include="..\utils\AsmJit\core\assembler.cpp" />
<ClCompile Include="..\utils\AsmJit\core\assert.cpp" />
<ClCompile Include="..\utils\AsmJit\core\buffer.cpp" />
<ClCompile Include="..\utils\AsmJit\core\compiler.cpp" />
<ClCompile Include="..\utils\AsmJit\core\compilercontext.cpp" />
<ClCompile Include="..\utils\AsmJit\core\compilerfunc.cpp" />
<ClCompile Include="..\utils\AsmJit\core\compileritem.cpp" />
<ClCompile Include="..\utils\AsmJit\core\context.cpp" />
<ClCompile Include="..\utils\AsmJit\core\cpuinfo.cpp" />
<ClCompile Include="..\utils\AsmJit\core\defs.cpp" />
<ClCompile Include="..\utils\AsmJit\core\func.cpp" />
<ClCompile Include="..\utils\AsmJit\core\logger.cpp" />
<ClCompile Include="..\utils\AsmJit\core\memorymanager.cpp" />
<ClCompile Include="..\utils\AsmJit\core\memorymarker.cpp" />
<ClCompile Include="..\utils\AsmJit\core\operand.cpp" />
<ClCompile Include="..\utils\AsmJit\core\stringbuilder.cpp" />
<ClCompile Include="..\utils\AsmJit\core\stringutil.cpp" />
<ClCompile Include="..\utils\AsmJit\core\virtualmemory.cpp" />
<ClCompile Include="..\utils\AsmJit\core\zonememory.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86assembler.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86compiler.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86compilercontext.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86compilerfunc.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86compileritem.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86cpuinfo.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86defs.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86func.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86operand.cpp" />
<ClCompile Include="..\utils\AsmJit\x86\x86util.cpp" />
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler.cpp" />
<ClCompile Include="..\utils\colorspacehandler\colorspacehandler_SSE2.cpp" />
<ClCompile Include="..\utils\datetime.cpp" />
<ClCompile Include="..\utils\dlditool.cpp" />
<ClCompile Include="..\utils\emufat.cpp" />
<ClCompile Include="..\utils\fsnitro.cpp" />
<ClCompile Include="..\utils\libfat\cache.cpp" />
<ClCompile Include="..\utils\libfat\directory.cpp" />
<ClCompile Include="..\utils\libfat\disc.cpp" />
<ClCompile Include="..\utils\libfat\fatdir.cpp" />
<ClCompile Include="..\utils\libfat\fatfile.cpp" />
<ClCompile Include="..\utils\libfat\filetime.cpp" />
<ClCompile Include="..\utils\libfat\file_allocation_table.cpp" />
<ClCompile Include="..\utils\libfat\libfat.cpp" />
<ClCompile Include="..\utils\libfat\libfat_public_api.cpp" />
<ClCompile Include="..\utils\libfat\lock.cpp" />
<ClCompile Include="..\utils\libfat\partition.cpp" />
<ClCompile Include="..\utils\tinyxml\tinystr.cpp" />
<ClCompile Include="..\utils\tinyxml\tinyxml.cpp" />
<ClCompile Include="..\utils\tinyxml\tinyxmlerror.cpp" />
<ClCompile Include="..\utils\tinyxml\tinyxmlparser.cpp" />
<ClCompile Include="..\utils\vfat.cpp" />
<ClCompile Include="..\version.cpp" />
<ClCompile Include="..\wifi.cpp" />
<ClCompile Include="..\addons\slot2_expMemory.cpp" />
<ClCompile Include="..\addons\slot2_gbagame.cpp" />
<ClCompile Include="..\addons\slot2_guitarGrip.cpp" />
<ClCompile Include="..\addons\slot2_none.cpp" />
<ClCompile Include="..\addons\slot2_rumblepak.cpp" />
<ClCompile Include="..\gdbstub\gdbstub.cpp" />
<ClCompile Include="..\utils\ConvertUTF.c" />
<ClCompile Include="..\utils\guid.cpp" />
<ClCompile Include="..\utils\md5.cpp" />
<ClCompile Include="..\utils\task.cpp" />
<ClCompile Include="..\utils\xstring.cpp" />
<ClCompile Include="..\utils\decrypt\crc.cpp" />
<ClCompile Include="..\utils\decrypt\decrypt.cpp" />
<ClCompile Include="..\utils\decrypt\header.cpp" />
<ClCompile Include="..\metaspu\metaspu.cpp" />
<ClCompile Include="..\metaspu\SndOut.cpp" />
<ClCompile Include="..\metaspu\Timestretcher.cpp" />
<ClCompile Include="..\metaspu\win32\ConfigSoundtouch.cpp" />
<ClCompile Include="..\metaspu\SoundTouch\3dnow_win.cpp" />
<ClCompile Include="..\metaspu\SoundTouch\AAFilter.cpp" />
<ClCompile Include="..\metaspu\SoundTouch\cpu_detect_x86_win.cpp" />
<ClCompile Include="..\metaspu\SoundTouch\FIFOSampleBuffer.cpp" />
<ClCompile Include="..\metaspu\SoundTouch\FIRFilter.cpp" />
<ClCompile Include="..\metaspu\SoundTouch\mmx_optimized.cpp" />
<ClCompile Include="..\metaspu\SoundTouch\RateTransposer.cpp" />
<ClCompile Include="..\metaspu\SoundTouch\SoundTouch.cpp" />
<ClCompile Include="..\metaspu\SoundTouch\sse_optimized.cpp" />
<ClCompile Include="..\metaspu\SoundTouch\TDStretch.cpp" />
<ClCompile Include="..\metaspu\SoundTouch\WavFile.cpp" />
<ClCompile Include="AboutBox.cpp" />
<ClCompile Include="aviout.cpp" />
<ClCompile Include="cheatsWin.cpp" />
@ -315,7 +315,7 @@
<ClCompile Include="inputdx.cpp" />
<ClCompile Include="luaconsole.cpp" />
<ClCompile Include="main.cpp" />
<ClCompile Include="mic-win.cpp" />
<ClCompile Include="mic.cpp" />
<ClCompile Include="ogl.cpp" />
<ClCompile Include="OpenArchive.cpp" />
<ClCompile Include="pathsettings.cpp" />
@ -341,158 +341,160 @@
<ClCompile Include="tileView.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\addons\slot1comp_mc.h" />
<ClInclude Include="..\..\addons\slot1comp_protocol.h" />
<ClInclude Include="..\..\addons\slot1comp_rom.h" />
<ClInclude Include="..\..\armcpu.h" />
<ClInclude Include="..\..\arm_jit.h" />
<ClInclude Include="..\..\bios.h" />
<ClInclude Include="..\..\cheatSystem.h" />
<ClInclude Include="..\..\commandline.h" />
<ClInclude Include="..\..\common.h" />
<ClInclude Include="..\..\cp15.h" />
<ClInclude Include="..\..\Database.h" />
<ClInclude Include="..\..\debug.h" />
<ClInclude Include="..\..\Disassembler.h" />
<ClInclude Include="..\..\driver.h" />
<ClInclude Include="..\..\emufile.h" />
<ClInclude Include="..\..\encrypt.h" />
<ClInclude Include="..\..\FIFO.h" />
<ClInclude Include="..\..\filter\filter.h" />
<ClInclude Include="..\..\filter\hq2x.h" />
<ClInclude Include="..\..\filter\interp.h" />
<ClInclude Include="..\..\filter\lq2x.h" />
<ClInclude Include="..\..\filter\xbrz.h" />
<ClInclude Include="..\..\firmware.h" />
<ClInclude Include="..\..\frontend\modules\ImageOut.h" />
<ClInclude Include="..\..\gfx3d.h" />
<ClInclude Include="..\..\GPU.h" />
<ClInclude Include="..\..\GPU_osd.h" />
<ClInclude Include="..\..\instructions.h" />
<ClInclude Include="..\..\instruction_attributes.h" />
<ClInclude Include="..\..\libretro-common\include\boolean.h" />
<ClInclude Include="..\..\libretro-common\include\compat\getopt.h" />
<ClInclude Include="..\..\libretro-common\include\compat\msvc.h" />
<ClInclude Include="..\..\libretro-common\include\formats\image.h" />
<ClInclude Include="..\..\libretro-common\include\formats\rbmp.h" />
<ClInclude Include="..\..\libretro-common\include\formats\rpng.h" />
<ClInclude Include="..\..\libretro-common\include\retro_inline.h" />
<ClInclude Include="..\..\libretro-common\include\retro_miscellaneous.h" />
<ClInclude Include="..\..\libretro-common\include\rthreads\rthreads.h" />
<ClInclude Include="..\..\lua-engine.h" />
<ClInclude Include="..\..\matrix.h" />
<ClInclude Include="..\..\mc.h" />
<ClInclude Include="..\..\mem.h" />
<ClInclude Include="..\..\mic.h" />
<ClInclude Include="..\..\MMU.h" />
<ClInclude Include="..\..\MMU_timing.h" />
<ClInclude Include="..\..\movie.h" />
<ClInclude Include="..\..\NDSSystem.h" />
<ClInclude Include="..\..\OGLRender.h" />
<ClInclude Include="..\..\OGLRender_3_2.h" />
<ClInclude Include="..\..\path.h" />
<ClInclude Include="..\..\rasterize.h" />
<ClInclude Include="..\..\readwrite.h" />
<ClInclude Include="..\..\registers.h" />
<ClInclude Include="..\..\render3D.h" />
<ClInclude Include="..\..\ROMReader.h" />
<ClInclude Include="..\..\rtc.h" />
<ClInclude Include="..\..\saves.h" />
<ClInclude Include="..\..\slot1.h" />
<ClInclude Include="..\..\slot2.h" />
<ClInclude Include="..\..\SPU.h" />
<ClInclude Include="..\..\texcache.h" />
<ClInclude Include="..\..\types.h" />
<ClInclude Include="..\..\utils\advanscene.h" />
<ClInclude Include="..\..\utils\AsmJit\asmjit.h" />
<ClInclude Include="..\..\utils\AsmJit\config.h" />
<ClInclude Include="..\..\utils\AsmJit\core.h" />
<ClInclude Include="..\..\utils\AsmJit\core\apibegin.h" />
<ClInclude Include="..\..\utils\AsmJit\core\apiend.h" />
<ClInclude Include="..\..\utils\AsmJit\core\assembler.h" />
<ClInclude Include="..\..\utils\AsmJit\core\assert.h" />
<ClInclude Include="..\..\utils\AsmJit\core\buffer.h" />
<ClInclude Include="..\..\utils\AsmJit\core\build.h" />
<ClInclude Include="..\..\utils\AsmJit\core\compiler.h" />
<ClInclude Include="..\..\utils\AsmJit\core\compilercontext.h" />
<ClInclude Include="..\..\utils\AsmJit\core\compilerfunc.h" />
<ClInclude Include="..\..\utils\AsmJit\core\compileritem.h" />
<ClInclude Include="..\..\utils\AsmJit\core\context.h" />
<ClInclude Include="..\..\utils\AsmJit\core\cpuinfo.h" />
<ClInclude Include="..\..\utils\AsmJit\core\defs.h" />
<ClInclude Include="..\..\utils\AsmJit\core\func.h" />
<ClInclude Include="..\..\utils\AsmJit\core\intutil.h" />
<ClInclude Include="..\..\utils\AsmJit\core\lock.h" />
<ClInclude Include="..\..\utils\AsmJit\core\logger.h" />
<ClInclude Include="..\..\utils\AsmJit\core\memorymanager.h" />
<ClInclude Include="..\..\utils\AsmJit\core\memorymarker.h" />
<ClInclude Include="..\..\utils\AsmJit\core\operand.h" />
<ClInclude Include="..\..\utils\AsmJit\core\podvector.h" />
<ClInclude Include="..\..\utils\AsmJit\core\stringbuilder.h" />
<ClInclude Include="..\..\utils\AsmJit\core\stringutil.h" />
<ClInclude Include="..\..\utils\AsmJit\core\virtualmemory.h" />
<ClInclude Include="..\..\utils\AsmJit\core\zonememory.h" />
<ClInclude Include="..\..\utils\AsmJit\x86.h" />
<ClInclude Include="..\..\utils\AsmJit\x86\x86assembler.h" />
<ClInclude Include="..\..\utils\AsmJit\x86\x86compiler.h" />
<ClInclude Include="..\..\utils\AsmJit\x86\x86compilercontext.h" />
<ClInclude Include="..\..\utils\AsmJit\x86\x86compilerfunc.h" />
<ClInclude Include="..\..\utils\AsmJit\x86\x86compileritem.h" />
<ClInclude Include="..\..\utils\AsmJit\x86\x86cpuinfo.h" />
<ClInclude Include="..\..\utils\AsmJit\x86\x86defs.h" />
<ClInclude Include="..\..\utils\AsmJit\x86\x86func.h" />
<ClInclude Include="..\..\utils\AsmJit\x86\x86operand.h" />
<ClInclude Include="..\..\utils\AsmJit\x86\x86util.h" />
<ClInclude Include="..\..\utils\bits.h" />
<ClInclude Include="..\..\utils\datetime.h" />
<ClInclude Include="..\..\utils\emufat.h" />
<ClInclude Include="..\..\utils\emufat_types.h" />
<ClInclude Include="..\..\utils\fsnitro.h" />
<ClInclude Include="..\..\utils\libfat\bit_ops.h" />
<ClInclude Include="..\..\utils\libfat\cache.h" />
<ClInclude Include="..\..\utils\libfat\common.h" />
<ClInclude Include="..\..\utils\libfat\directory.h" />
<ClInclude Include="..\..\utils\libfat\disc.h" />
<ClInclude Include="..\..\utils\libfat\disc_io.h" />
<ClInclude Include="..\..\utils\libfat\fat.h" />
<ClInclude Include="..\..\utils\libfat\fatdir.h" />
<ClInclude Include="..\..\utils\libfat\fatfile.h" />
<ClInclude Include="..\..\utils\libfat\filetime.h" />
<ClInclude Include="..\..\utils\libfat\file_allocation_table.h" />
<ClInclude Include="..\..\utils\libfat\libfat_pc.h" />
<ClInclude Include="..\..\utils\libfat\libfat_public_api.h" />
<ClInclude Include="..\..\utils\libfat\lock.h" />
<ClInclude Include="..\..\utils\libfat\mem_allocate.h" />
<ClInclude Include="..\..\utils\libfat\partition.h" />
<ClInclude Include="..\..\utils\tinyxml\tinystr.h" />
<ClInclude Include="..\..\utils\tinyxml\tinyxml.h" />
<ClInclude Include="..\..\utils\vfat.h" />
<ClInclude Include="..\..\utils\xstring.h" />
<ClInclude Include="..\..\version.h" />
<ClInclude Include="..\..\wifi.h" />
<ClInclude Include="..\..\gdbstub.h" />
<ClInclude Include="..\..\utils\ConvertUTF.h" />
<ClInclude Include="..\..\utils\guid.h" />
<ClInclude Include="..\..\utils\md5.h" />
<ClInclude Include="..\..\utils\task.h" />
<ClInclude Include="..\..\utils\valuearray.h" />
<ClInclude Include="..\..\utils\decrypt\crc.h" />
<ClInclude Include="..\..\utils\decrypt\decrypt.h" />
<ClInclude Include="..\..\utils\decrypt\header.h" />
<ClInclude Include="..\..\metaspu\metaspu.h" />
<ClInclude Include="..\..\metaspu\SndOut.h" />
<ClInclude Include="..\..\metaspu\win32\Dialogs.h" />
<ClInclude Include="..\..\metaspu\SoundTouch\AAFilter.h" />
<ClInclude Include="..\..\metaspu\SoundTouch\BPMDetect.h" />
<ClInclude Include="..\..\metaspu\SoundTouch\cpu_detect.h" />
<ClInclude Include="..\..\metaspu\SoundTouch\FIFOSampleBuffer.h" />
<ClInclude Include="..\..\metaspu\SoundTouch\FIFOSamplePipe.h" />
<ClInclude Include="..\..\metaspu\SoundTouch\FIRFilter.h" />
<ClInclude Include="..\..\metaspu\SoundTouch\RateTransposer.h" />
<ClInclude Include="..\..\metaspu\SoundTouch\SoundTouch.h" />
<ClInclude Include="..\..\metaspu\SoundTouch\STTypes.h" />
<ClInclude Include="..\..\metaspu\SoundTouch\TDStretch.h" />
<ClInclude Include="..\..\metaspu\SoundTouch\WavFile.h" />
<ClInclude Include="..\addons\slot1comp_mc.h" />
<ClInclude Include="..\addons\slot1comp_protocol.h" />
<ClInclude Include="..\addons\slot1comp_rom.h" />
<ClInclude Include="..\armcpu.h" />
<ClInclude Include="..\arm_jit.h" />
<ClInclude Include="..\bios.h" />
<ClInclude Include="..\bits.h" />
<ClInclude Include="..\cheatSystem.h" />
<ClInclude Include="..\commandline.h" />
<ClInclude Include="..\common.h" />
<ClInclude Include="..\cp15.h" />
<ClInclude Include="..\debug.h" />
<ClInclude Include="..\Disassembler.h" />
<ClInclude Include="..\driver.h" />
<ClInclude Include="..\emufile.h" />
<ClInclude Include="..\encrypt.h" />
<ClInclude Include="..\FIFO.h" />
<ClInclude Include="..\filter\filter.h" />
<ClInclude Include="..\filter\hq2x.h" />
<ClInclude Include="..\filter\interp.h" />
<ClInclude Include="..\filter\lq2x.h" />
<ClInclude Include="..\filter\xbrz.h" />
<ClInclude Include="..\firmware.h" />
<ClInclude Include="..\frontend\modules\ImageOut.h" />
<ClInclude Include="..\gfx3d.h" />
<ClInclude Include="..\GPU.h" />
<ClInclude Include="..\GPU_osd.h" />
<ClInclude Include="..\instructions.h" />
<ClInclude Include="..\instruction_attributes.h" />
<ClInclude Include="..\libretro-common\formats\png\rpng_internal.h" />
<ClInclude Include="..\libretro-common\include\boolean.h" />
<ClInclude Include="..\libretro-common\include\compat\getopt.h" />
<ClInclude Include="..\libretro-common\include\compat\msvc.h" />
<ClInclude Include="..\libretro-common\include\formats\image.h" />
<ClInclude Include="..\libretro-common\include\formats\rbmp.h" />
<ClInclude Include="..\libretro-common\include\formats\rpng.h" />
<ClInclude Include="..\libretro-common\include\retro_inline.h" />
<ClInclude Include="..\libretro-common\include\retro_miscellaneous.h" />
<ClInclude Include="..\libretro-common\include\rthreads\rthreads.h" />
<ClInclude Include="..\lua-engine.h" />
<ClInclude Include="..\matrix.h" />
<ClInclude Include="..\mc.h" />
<ClInclude Include="..\mem.h" />
<ClInclude Include="..\mic.h" />
<ClInclude Include="..\MMU.h" />
<ClInclude Include="..\MMU_timing.h" />
<ClInclude Include="..\movie.h" />
<ClInclude Include="..\NDSSystem.h" />
<ClInclude Include="..\OGLRender.h" />
<ClInclude Include="..\OGLRender_3_2.h" />
<ClInclude Include="..\path.h" />
<ClInclude Include="..\rasterize.h" />
<ClInclude Include="..\readwrite.h" />
<ClInclude Include="..\registers.h" />
<ClInclude Include="..\render3D.h" />
<ClInclude Include="..\ROMReader.h" />
<ClInclude Include="..\rtc.h" />
<ClInclude Include="..\saves.h" />
<ClInclude Include="..\slot1.h" />
<ClInclude Include="..\slot2.h" />
<ClInclude Include="..\SPU.h" />
<ClInclude Include="..\texcache.h" />
<ClInclude Include="..\types.h" />
<ClInclude Include="..\utils\advanscene.h" />
<ClInclude Include="..\utils\AsmJit\asmjit.h" />
<ClInclude Include="..\utils\AsmJit\config.h" />
<ClInclude Include="..\utils\AsmJit\core.h" />
<ClInclude Include="..\utils\AsmJit\core\apibegin.h" />
<ClInclude Include="..\utils\AsmJit\core\apiend.h" />
<ClInclude Include="..\utils\AsmJit\core\assembler.h" />
<ClInclude Include="..\utils\AsmJit\core\assert.h" />
<ClInclude Include="..\utils\AsmJit\core\buffer.h" />
<ClInclude Include="..\utils\AsmJit\core\build.h" />
<ClInclude Include="..\utils\AsmJit\core\compiler.h" />
<ClInclude Include="..\utils\AsmJit\core\compilercontext.h" />
<ClInclude Include="..\utils\AsmJit\core\compilerfunc.h" />
<ClInclude Include="..\utils\AsmJit\core\compileritem.h" />
<ClInclude Include="..\utils\AsmJit\core\context.h" />
<ClInclude Include="..\utils\AsmJit\core\cpuinfo.h" />
<ClInclude Include="..\utils\AsmJit\core\defs.h" />
<ClInclude Include="..\utils\AsmJit\core\func.h" />
<ClInclude Include="..\utils\AsmJit\core\intutil.h" />
<ClInclude Include="..\utils\AsmJit\core\lock.h" />
<ClInclude Include="..\utils\AsmJit\core\logger.h" />
<ClInclude Include="..\utils\AsmJit\core\memorymanager.h" />
<ClInclude Include="..\utils\AsmJit\core\memorymarker.h" />
<ClInclude Include="..\utils\AsmJit\core\operand.h" />
<ClInclude Include="..\utils\AsmJit\core\podvector.h" />
<ClInclude Include="..\utils\AsmJit\core\stringbuilder.h" />
<ClInclude Include="..\utils\AsmJit\core\stringutil.h" />
<ClInclude Include="..\utils\AsmJit\core\virtualmemory.h" />
<ClInclude Include="..\utils\AsmJit\core\zonememory.h" />
<ClInclude Include="..\utils\AsmJit\x86.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86assembler.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86compiler.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86compilercontext.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86compilerfunc.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86compileritem.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86cpuinfo.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86defs.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86func.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86operand.h" />
<ClInclude Include="..\utils\AsmJit\x86\x86util.h" />
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler.h" />
<ClInclude Include="..\utils\colorspacehandler\colorspacehandler_SSE2.h" />
<ClInclude Include="..\utils\datetime.h" />
<ClInclude Include="..\utils\emufat.h" />
<ClInclude Include="..\utils\emufat_types.h" />
<ClInclude Include="..\utils\fsnitro.h" />
<ClInclude Include="..\utils\libfat\bit_ops.h" />
<ClInclude Include="..\utils\libfat\cache.h" />
<ClInclude Include="..\utils\libfat\common.h" />
<ClInclude Include="..\utils\libfat\directory.h" />
<ClInclude Include="..\utils\libfat\disc.h" />
<ClInclude Include="..\utils\libfat\disc_io.h" />
<ClInclude Include="..\utils\libfat\fat.h" />
<ClInclude Include="..\utils\libfat\fatdir.h" />
<ClInclude Include="..\utils\libfat\fatfile.h" />
<ClInclude Include="..\utils\libfat\filetime.h" />
<ClInclude Include="..\utils\libfat\file_allocation_table.h" />
<ClInclude Include="..\utils\libfat\libfat_pc.h" />
<ClInclude Include="..\utils\libfat\libfat_public_api.h" />
<ClInclude Include="..\utils\libfat\lock.h" />
<ClInclude Include="..\utils\libfat\mem_allocate.h" />
<ClInclude Include="..\utils\libfat\partition.h" />
<ClInclude Include="..\utils\tinyxml\tinystr.h" />
<ClInclude Include="..\utils\tinyxml\tinyxml.h" />
<ClInclude Include="..\utils\vfat.h" />
<ClInclude Include="..\version.h" />
<ClInclude Include="..\wifi.h" />
<ClInclude Include="..\utils\xstring.h" />
<ClInclude Include="..\gdbstub.h" />
<ClInclude Include="..\utils\ConvertUTF.h" />
<ClInclude Include="..\utils\guid.h" />
<ClInclude Include="..\utils\md5.h" />
<ClInclude Include="..\utils\task.h" />
<ClInclude Include="..\utils\valuearray.h" />
<ClInclude Include="..\utils\decrypt\crc.h" />
<ClInclude Include="..\utils\decrypt\decrypt.h" />
<ClInclude Include="..\utils\decrypt\header.h" />
<ClInclude Include="..\metaspu\metaspu.h" />
<ClInclude Include="..\metaspu\SndOut.h" />
<ClInclude Include="..\metaspu\win32\Dialogs.h" />
<ClInclude Include="..\metaspu\SoundTouch\AAFilter.h" />
<ClInclude Include="..\metaspu\SoundTouch\BPMDetect.h" />
<ClInclude Include="..\metaspu\SoundTouch\cpu_detect.h" />
<ClInclude Include="..\metaspu\SoundTouch\FIFOSampleBuffer.h" />
<ClInclude Include="..\metaspu\SoundTouch\FIFOSamplePipe.h" />
<ClInclude Include="..\metaspu\SoundTouch\FIRFilter.h" />
<ClInclude Include="..\metaspu\SoundTouch\RateTransposer.h" />
<ClInclude Include="..\metaspu\SoundTouch\SoundTouch.h" />
<ClInclude Include="..\metaspu\SoundTouch\STTypes.h" />
<ClInclude Include="..\metaspu\SoundTouch\TDStretch.h" />
<ClInclude Include="..\metaspu\SoundTouch\WavFile.h" />
<ClInclude Include="AboutBox.h" />
<ClInclude Include="aviout.h" />
<ClInclude Include="cheatsWin.h" />
@ -600,10 +602,10 @@
<ClInclude Include="tileView.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\filter\hq4x.dat" />
<None Include="..\..\instruction_tabdef.inc" />
<None Include="..\..\thumb_tabdef.inc" />
<None Include="..\..\utils\AsmJit\COPYING.txt" />
<None Include="..\filter\hq4x.dat" />
<None Include="..\instruction_tabdef.inc" />
<None Include="..\thumb_tabdef.inc" />
<None Include="..\utils\AsmJit\COPYING.txt" />
<None Include="bitmap1.bmp" />
<None Include="bitmaps\FileBinary.ico" />
<None Include="bitmaps\FolderClosed.ico" />

File diff suppressed because it is too large Load Diff

View File

@ -316,13 +316,14 @@ static void do_video_conversion(AVIFile* avi, const u16* buffer)
int height = avi->prescaleLevel*384;
u8* outbuf = avi_file->convert_buffer + width*(height-1)*3;
for(int y=0;y<height;y++)
for (int y = 0; y < height; y++)
{
for(int x=0;x<width;x++)
for (int x = 0; x < width; x++)
{
u32 dst = ConvertColor555To8888Opaque<true>(*buffer++);
*(u32 *)outbuf = (dst & 0x00FFFFFF) | (*(u32 *)outbuf & 0xFF000000);
outbuf += 3;
u32 dst = ColorspaceConvert555To8888Opaque<true>(*buffer++);
*outbuf++ = dst & 0xFF;
*outbuf++ = (dst >> 8) & 0xFF;
*outbuf++ = (dst >> 16) & 0xFF;
}
outbuf -= width*3*2;

View File

@ -94,7 +94,7 @@
<!-- BETA_VERSION ? -->
<PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'Debug'">_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'Release'">RELEASE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'FastBuild'">RELEASE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(NDS_OPT)' == 'FastBuild'">FASTBUILD;RELEASE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<!-- These work together -->

View File

@ -1919,7 +1919,7 @@ static void DoDisplay(bool firstTime)
//convert pixel format to 32bpp for compositing
//why do we do this over and over? well, we are compositing to
//filteredbuffer32bpp, and it needs to get refreshed each frame.
ConvertColorBuffer555To8888Opaque<true, false>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16));
ColorspaceConvertBuffer555To8888Opaque<true, false>((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / sizeof(u16));
if(firstTime)
{
@ -3282,6 +3282,13 @@ int _main()
cur3DCore = GPU3D_NULL;
else if(cur3DCore == GPU3D_NULL) // this value shouldn't be saved anymore
cur3DCore = GPU3D_DEFAULT;
if(cmdline.render3d == COMMANDLINE_RENDER3D_NONE) cur3DCore = GPU3D_NULL;
if(cmdline.render3d == COMMANDLINE_RENDER3D_SW) cur3DCore = GPU3D_SWRAST;
if(cmdline.render3d == COMMANDLINE_RENDER3D_OLDGL) cur3DCore = GPU3D_OPENGL_OLD;
if(cmdline.render3d == COMMANDLINE_RENDER3D_GL) cur3DCore = GPU3D_OPENGL_3_2; //no way of forcing it, at least not right now. I dont care.
if(cmdline.render3d == COMMANDLINE_RENDER3D_AUTOGL) cur3DCore = GPU3D_OPENGL_3_2; //this will fallback i guess
CommonSettings.GFX3D_HighResolutionInterpolateColor = GetPrivateProfileBool("3D", "HighResolutionInterpolateColor", 1, IniName);
CommonSettings.GFX3D_EdgeMark = GetPrivateProfileBool("3D", "EnableEdgeMark", 1, IniName);
CommonSettings.GFX3D_Fog = GetPrivateProfileBool("3D", "EnableFog", 1, IniName);

View File

@ -526,7 +526,7 @@ void gfx3d_deinit()
void gfx3d_reset()
{
CurrentRenderer->RenderFinish();
GPU->ForceRender3DFinishAndFlush(false);
#ifdef _SHOW_VTX_COUNTERS
max_polys = max_verts = 0;
@ -627,6 +627,53 @@ FORCEINLINE s32 vec3dot_fixed32(const s32* a, const s32* b) {
return sfx32_shiftdown(fx32_mul(a[0],b[0]) + fx32_mul(a[1],b[1]) + fx32_mul(a[2],b[2]));
}
//---------------
//I'm going to start name these functions GE for GEOMETRY ENGINE MATH.
//Pretty much any math function in this file should be explicit about how it's handling precision.
//Handling that stuff generically globally is not a winning proposition.
FORCEINLINE s64 GEM_Mul32x32To64(const s32 a, const s32 b)
{
#ifdef _MSC_VER
return __emul(a,b);
#else
return ((s64)a)*((s64)b);
#endif
}
static s32 GEM_SaturateAndShiftdown36To32(const s64 val)
{
if(val>(s64)0x000007FFFFFFFFFFULL) return (s32)0x7FFFFFFFU;
if(val<(s64)0xFFFFF80000000000ULL) return (s32)0x80000000U;
return fx32_shiftdown(val);
}
static void GEM_TransformVertex(const s32 *matrix, s32 *vecPtr)
{
const s32 x = vecPtr[0];
const s32 y = vecPtr[1];
const s32 z = vecPtr[2];
const s32 w = vecPtr[3];
//saturation logic is most carefully tested by:
//+ spectrobes beyond the portals excavation blower and drill tools: sets very large overflowing +x,+y in the modelview matrix to push things offscreen
//You can see this happening quite clearly: vertices will get translated to extreme values and overflow from a 7FFF-like to an 8000-like
//but if it's done wrongly, you can get bugs in:
//+ kingdom hearts re-coded: first conversation with cast characters will place them oddly with something overflowing to about 0xA???????
//other test cases that cropped up during this development, but are probably not actually related to this after all
//+ SM64: outside castle skybox
//+ NSMB: mario head screen wipe
vecPtr[0] = GEM_SaturateAndShiftdown36To32(GEM_Mul32x32To64(x,matrix[0]) + GEM_Mul32x32To64(y,matrix[4]) + GEM_Mul32x32To64(z,matrix [8]) + GEM_Mul32x32To64(w,matrix[12]));
vecPtr[1] = GEM_SaturateAndShiftdown36To32(GEM_Mul32x32To64(x,matrix[1]) + GEM_Mul32x32To64(y,matrix[5]) + GEM_Mul32x32To64(z,matrix[ 9]) + GEM_Mul32x32To64(w,matrix[13]));
vecPtr[2] = GEM_SaturateAndShiftdown36To32(GEM_Mul32x32To64(x,matrix[2]) + GEM_Mul32x32To64(y,matrix[6]) + GEM_Mul32x32To64(z,matrix[10]) + GEM_Mul32x32To64(w,matrix[14]));
vecPtr[3] = GEM_SaturateAndShiftdown36To32(GEM_Mul32x32To64(x,matrix[3]) + GEM_Mul32x32To64(y,matrix[7]) + GEM_Mul32x32To64(z,matrix[11]) + GEM_Mul32x32To64(w,matrix[15]));
}
//---------------
#define SUBMITVERTEX(ii, nn) polylist->list[polylist->count].vertIndexes[ii] = tempVertInfo.map[nn];
//Submit a vertex to the GE
static void SetVertex()
@ -658,16 +705,9 @@ static void SetVertex()
return;
if(polylist->count >= POLYLIST_SIZE)
return;
//TODO - think about keeping the clip matrix concatenated,
//so that we only have to multiply one matrix here
//(we could lazy cache the concatenated clip matrix and only generate it
//when we need to)
MatrixMultVec4x4_M2(mtxCurrent[0], coordTransformed);
//printf("%f %f %f\n",s16coord[0]/4096.0f,s16coord[1]/4096.0f,s16coord[2]/4096.0f);
//printf("x %f %f %f %f\n",mtxCurrent[0][0]/4096.0f,mtxCurrent[0][1]/4096.0f,mtxCurrent[0][2]/4096.0f,mtxCurrent[0][3]/4096.0f);
//printf(" = %f %f %f %f\n",coordTransformed[0]/4096.0f,coordTransformed[1]/4096.0f,coordTransformed[2]/4096.0f,coordTransformed[3]/4096.0f);
GEM_TransformVertex(mtxCurrent[1],coordTransformed); //modelview
GEM_TransformVertex(mtxCurrent[0],coordTransformed); //projection
//TODO - culling should be done here.
//TODO - viewport transform?
@ -1484,8 +1524,9 @@ static void gfx3d_glViewPort(u32 v)
static BOOL gfx3d_glBoxTest(u32 v)
{
//printf("boxtest\n");
MMU_new.gxstat.tr = 0; // clear boxtest bit
MMU_new.gxstat.tb = 1; // busy
//clear result flag. busy flag has been set by fifo component already
MMU_new.gxstat.tr = 0;
BTcoords[BTind++] = v & 0xFFFF;
BTcoords[BTind++] = v >> 16;
@ -1493,9 +1534,11 @@ static BOOL gfx3d_glBoxTest(u32 v)
if (BTind < 5) return FALSE;
BTind = 0;
MMU_new.gxstat.tb = 0; // clear busy
GFX_DELAY(103);
//now that we're executing this, we're not busy anymore
MMU_new.gxstat.tb = 0;
#if 0
INFO("BoxTEST: x %f y %f width %f height %f depth %f\n",
BTcoords[0], BTcoords[1], BTcoords[2], BTcoords[3], BTcoords[4], BTcoords[5]);
@ -1608,27 +1651,31 @@ static BOOL gfx3d_glBoxTest(u32 v)
//if any portion of this poly was retained, then the test passes.
if (boxtestClipper.clippedPolyCounter > 0)
{
//printf("%06d PASS %d\n",boxcounter,gxFIFO.size);
//printf("%06d PASS %d\n",gxFIFO.size, i);
MMU_new.gxstat.tr = 1;
break;
}
else
{
}
//if(i==5) printf("%06d FAIL\n",gxFIFO.size);
}
if (MMU_new.gxstat.tr == 0)
{
//printf("%06d FAIL %d\n",boxcounter,gxFIFO.size);
}
//printf("%06d RESULT %d\n",gxFIFO.size, MMU_new.gxstat.tr);
return TRUE;
}
static BOOL gfx3d_glPosTest(u32 v)
{
//printf("postest\n");
//this is apparently tested by transformers decepticons and ultimate spiderman
//printf("POSTEST\n");
MMU_new.gxstat.tb = 1;
//clear result flag. busy flag has been set by fifo component already
MMU_new.gxstat.tr = 0;
//now that we're executing this, we're not busy anymore
MMU_new.gxstat.tb = 0;
PTcoords[PTind++] = float16table[v & 0xFFFF];
PTcoords[PTind++] = float16table[v >> 16];
@ -2252,23 +2299,12 @@ void gfx3d_VBlankSignal()
void gfx3d_VBlankEndSignal(bool skipFrame)
{
GPU->ForceRender3DFinishAndFlush(false);
if (!drawPending) return;
if (skipFrame) return;
drawPending = FALSE;
if (CurrentRenderer->GetRenderNeedsFinish())
{
bool need3DDisplayFramebuffer;
bool need3DCaptureFramebuffer;
CurrentRenderer->GetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer);
CurrentRenderer->SetFramebufferFlushStates(false, false);
CurrentRenderer->RenderFinish();
CurrentRenderer->SetFramebufferFlushStates(need3DDisplayFramebuffer, need3DCaptureFramebuffer);
CurrentRenderer->SetRenderNeedsFinish(false);
GPU->GetEventHandler()->DidRender3DEnd();
}
drawPending = FALSE;
GPU->GetEventHandler()->DidRender3DBegin();
@ -2486,7 +2522,7 @@ void gfx3d_Update3DFramebuffers(FragmentColor *framebufferRGBA6665, u16 *framebu
//-------------savestate
void gfx3d_savestate(EMUFILE* os)
{
CurrentRenderer->RenderFinish();
GPU->ForceRender3DFinishAndFlush(true);
//version
write32le(4,os);

View File

@ -427,8 +427,3 @@ void MatrixTranslate(s32 *matrix, const s32 *ptr)
});
}
void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr)
{
MatrixMultVec4x4(matrix+16,vecPtr);
MatrixMultVec4x4(matrix,vecPtr);
}

View File

@ -276,13 +276,6 @@ FORCEINLINE void MatrixMultVec4x4(const float *matrix, float *vecPtr)
_mm_store_ps(vecPtr,_util_MatrixMultVec4x4_((SSE_MATRIX)matrix,_mm_load_ps(vecPtr)));
}
FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr)
{
//there are hardly any gains from merging these manually
MatrixMultVec4x4(matrix+16,vecPtr);
MatrixMultVec4x4(matrix,vecPtr);
}
FORCEINLINE void MatrixMultVec3x3(const float * matrix, float * vecPtr)
{
const __m128 vec = _mm_load_ps(vecPtr);
@ -355,13 +348,6 @@ void MatrixMultiply(float * matrix, const float * rightMatrix);
void MatrixTranslate(float *matrix, const float *ptr);
void MatrixScale(float * matrix, const float * ptr);
FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr)
{
//there are hardly any gains from merging these manually
MatrixMultVec4x4(matrix+16,vecPtr);
MatrixMultVec4x4(matrix,vecPtr);
}
template<int NUM_ROWS>
FORCEINLINE void vector_fix2float(float* matrix, const float divisor)
{
@ -373,8 +359,6 @@ FORCEINLINE void vector_fix2float(float* matrix, const float divisor)
void MatrixMultVec4x4 (const s32 *matrix, s32 *vecPtr);
void MatrixMultVec4x4_M2(const s32 *matrix, s32 *vecPtr);
void MatrixMultiply(s32* matrix, const s32* rightMatrix);
void MatrixScale(s32 *matrix, const s32 *ptr);
void MatrixTranslate(s32 *matrix, const s32 *ptr);

View File

@ -619,6 +619,21 @@ void BackupDevice::reset()
ensure((u32)savesize); //expand properly if necessary
addr_size = addr_size_for_old_save_type(savetype);
}
//automatically detect these hardcodes
if(state == DETECTING)
{
if(!memcmp(gameInfo.header.gameCode,"ASMK", 4)) addr_size = 1; //super mario 64 ds (KOR, which is different somehow)
else if(!memcmp(gameInfo.header.gameCode,"ASM", 3)) addr_size = 2; //super mario 64 ds
else if(!memcmp(gameInfo.header.gameCode,"BDE", 3)) addr_size = 2; // Dementium II
else if(!memcmp(gameInfo.header.gameCode,"AL3", 3)) addr_size = 1; //spongebob atlantis squarepantis.
else if(!memcmp(gameInfo.header.gameCode,"AH5", 3)) addr_size = 1; //over the hedge
else if(!memcmp(gameInfo.header.gameCode,"AVH", 3)) addr_size = 1; //over the hedge - Hammy Goes Nuts!
else if(!memcmp(gameInfo.header.gameCode,"AQ3", 3)) addr_size = 1; //spider-man 3
//if we found a whitelist match, we dont need to run detection
if(addr_size) state = RUNNING;
}
}
void BackupDevice::close_rom()
@ -662,36 +677,33 @@ void BackupDevice::detect()
addr_size = 1; //choose 1 just to keep the busted savefile from growing too big
msgbox->error("Catastrophic error while autodetecting save type.\nIt will need to be specified manually\n");
break;
case 2:
//the modern typical case for small eeproms
addr_size = 1;
break;
case 3:
//another modern typical case..
//but unfortunately we select this case on accident sometimes when what it meant to do was present the archaic 1+2 case
//(the archaic 1+2 case is: specifying one address byte, and then reading the first two bytes, instead of the first one byte, as most other games would do.)
//so, we're gonna hack in checks for the games that are doing this
addr_size = 2;
// TODO: will study a deep, why this happens (wrong detect size)
if(!memcmp(gameInfo.header.gameCode,"AL3", 3)) addr_size = 1; //spongebob atlantis squarepantis.
if(!memcmp(gameInfo.header.gameCode,"AH5", 3)) addr_size = 1; //over the hedge
if(!memcmp(gameInfo.header.gameCode,"AVH", 3)) addr_size = 1; //over the hedge - Hammy Goes Nuts!
if(!memcmp(gameInfo.header.gameCode,"AQ3", 3)) addr_size = 1; //spider-man 3
break;
case 4:
//a modern typical case
addr_size = 3;
if(!memcmp(gameInfo.header.gameCode,"ASM", 3)) addr_size = 2; //super mario 64 ds
break;
default:
//the archaic case: write the address and then some modulo-4 number of bytes
//why modulo 4? who knows.
//SM64 (KOR) makes it here with autodetect_size=11 and nothing interesting in the buffer
addr_size = autodetect_size & 3;
if(!memcmp(gameInfo.header.gameCode,"BDE", 3)) addr_size = 2; // Dementium II
//SM64 (KOR) makes it here with autodetect_size=11 and nothing interesting in the buffer
//we whitelisted it earlier though
break;
}

View File

@ -604,11 +604,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
{
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
{
ConvertColorBuffer8888To6665<false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
ColorspaceConvertBuffer8888To6665<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
}
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
{
ConvertColorBuffer6665To8888<false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebuffer, pixCount);
}
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
@ -621,11 +621,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
{
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
{
ConvertColorBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
ColorspaceConvertBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
}
else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev)
{
ConvertColorBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstRGBA5551, pixCount);
}
}

View File

@ -30,6 +30,10 @@
#include "MMU.h"
#include "NDSSystem.h"
#ifdef ENABLE_SSE2
#include "./utils/colorspacehandler/colorspacehandler_SSE2.h"
#endif
using std::min;
using std::max;
@ -451,13 +455,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp)
{
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
// Set converted colors to 0 if the palette index is 0.
@ -517,13 +521,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp)
{
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
@ -580,13 +584,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp)
{
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
// Set converted colors to 0 if the palette index is 0.
@ -646,13 +650,13 @@ public:
if (TEXFORMAT == TexFormat_15bpp)
{
ConvertColor555To6665Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To6665Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
else
{
ConvertColor555To8888Opaque<false>(palColor0, convertedColor[0], convertedColor[1]);
ConvertColor555To8888Opaque<false>(palColor1, convertedColor[2], convertedColor[3]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]);
}
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);
@ -881,11 +885,11 @@ public:
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
ConvertColor555To6665<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
ColorspaceConvert555To6665_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
ConvertColor555To6665<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
ColorspaceConvert555To6665_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
}
else
{
@ -895,11 +899,11 @@ public:
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo);
ConvertColor555To8888<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
ColorspaceConvert555To8888_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]);
tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi);
tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi);
ConvertColor555To8888<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
ColorspaceConvert555To8888_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]);
}
_mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]);

View File

@ -19,10 +19,6 @@
#ifndef TYPES_HPP
#define TYPES_HPP
#include <retro_miscellaneous.h>
#include <retro_inline.h>
#include <math/fxp.h>
//analyze microsoft compilers
#ifdef _MSC_VER
#define HOST_WINDOWS
@ -80,6 +76,18 @@
#ifdef __SSE4_2__
#define ENABLE_SSE4_2
#endif
#ifdef __AVX__
#define ENABLE_AVX
#endif
#ifdef __AVX2__
#define ENABLE_AVX2
#endif
#ifdef __ALTIVEC__
#define ENABLE_ALTIVEC
#endif
#endif
#ifdef _MSC_VER
@ -148,6 +156,14 @@
#define _CDECL_
#endif
#ifndef INLINE
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
#define INLINE _inline
#else
#define INLINE inline
#endif
#endif
#ifndef FORCEINLINE
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
#define FORCEINLINE __forceinline
@ -219,6 +235,38 @@ typedef u32 uint32;
#define uint32 u32 //uint32 is defined in Leopard somewhere, avoid conflicts
#endif
#ifdef ENABLE_ALTIVEC
#ifndef __APPLE_ALTIVEC__
#include <altivec.h>
#endif
typedef vector unsigned char v128u8;
typedef vector signed char v128s8;
typedef vector unsigned short v128u16;
typedef vector signed short v128s16;
typedef vector unsigned int v128u32;
typedef vector signed int v128s32;
#endif
#ifdef ENABLE_SSE2
#include <emmintrin.h>
typedef __m128i v128u8;
typedef __m128i v128s8;
typedef __m128i v128u16;
typedef __m128i v128s16;
typedef __m128i v128u32;
typedef __m128i v128s32;
#endif
#ifdef ENABLE_AVX2
#include <immintrin.h>
typedef __m256i v256u8;
typedef __m256i v256s8;
typedef __m256i v256u16;
typedef __m256i v256s16;
typedef __m256i v256u32;
typedef __m256i v256s32;
#endif
/*---------- GPU3D fixed-points types -----------*/
typedef s32 f32;
@ -266,8 +314,20 @@ typedef int desmume_BOOL;
#define FALSE 0
#endif
#ifdef __BIG_ENDIAN__
#ifndef WORDS_BIGENDIAN
#define WORDS_BIGENDIAN
#endif
#endif
#ifdef WORDS_BIGENDIAN
# define LOCAL_BE 1
#else
# define LOCAL_LE 1
#endif
/* little endian (ds' endianess) to local endianess convert macros */
#ifdef MSB_FIRST /* local arch is big endian */
#ifdef LOCAL_BE /* local arch is big endian */
# define LE_TO_LOCAL_16(x) ((((x)&0xff)<<8)|(((x)>>8)&0xff))
# define LE_TO_LOCAL_32(x) ((((x)&0xff)<<24)|(((x)&0xff00)<<8)|(((x)>>8)&0xff00)|(((x)>>24)&0xff))
# define LE_TO_LOCAL_64(x) ((((x)&0xff)<<56)|(((x)&0xff00)<<40)|(((x)&0xff0000)<<24)|(((x)&0xff000000)<<8)|(((x)>>8)&0xff000000)|(((x)>>24)&0xff0000)|(((x)>>40)&0xff00)|(((x)>>56)&0xff))
@ -287,6 +347,8 @@ typedef int desmume_BOOL;
#define MB(x) ((x)*1024*1024)
#define KB(x) ((x)*1024)
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
#define CPU_STR(c) ((c==ARM9)?"ARM9":"ARM7")
typedef enum
{
@ -294,6 +356,28 @@ typedef enum
ARM7 = 1
} cpu_id_t;
///endian-flips count bytes. count should be even and nonzero.
inline void FlipByteOrder(u8 *src, u32 count)
{
u8 *start=src;
u8 *end=src+count-1;
if((count&1) || !count) return; /* This shouldn't happen. */
while(count--)
{
u8 tmp;
tmp=*end;
*end=*start;
*start=tmp;
end--;
start++;
}
}
inline u64 double_to_u64(double d) {
union {
u64 a;
@ -312,6 +396,68 @@ inline double u64_to_double(u64 u) {
return fuxor.b;
}
inline u32 float_to_u32(float f) {
union {
u32 a;
float b;
} fuxor;
fuxor.b = f;
return fuxor.a;
}
inline float u32_to_float(u32 u) {
union {
u32 a;
float b;
} fuxor;
fuxor.a = u;
return fuxor.b;
}
///stores a 32bit value into the provided byte array in guaranteed little endian form
inline void en32lsb(u8 *buf, u32 morp)
{
buf[0]=(u8)(morp);
buf[1]=(u8)(morp>>8);
buf[2]=(u8)(morp>>16);
buf[3]=(u8)(morp>>24);
}
inline void en16lsb(u8* buf, u16 morp)
{
buf[0]=(u8)morp;
buf[1]=(u8)(morp>>8);
}
///unpacks a 64bit little endian value from the provided byte array into host byte order
inline u64 de64lsb(u8 *morp)
{
return morp[0]|(morp[1]<<8)|(morp[2]<<16)|(morp[3]<<24)|((u64)morp[4]<<32)|((u64)morp[5]<<40)|((u64)morp[6]<<48)|((u64)morp[7]<<56);
}
///unpacks a 32bit little endian value from the provided byte array into host byte order
inline u32 de32lsb(u8 *morp)
{
return morp[0]|(morp[1]<<8)|(morp[2]<<16)|(morp[3]<<24);
}
///unpacks a 16bit little endian value from the provided byte array into host byte order
inline u16 de16lsb(u8 *morp)
{
return morp[0]|(morp[1]<<8);
}
#ifndef ARRAY_SIZE
//taken from winnt.h
extern "C++" // templates cannot be declared to have 'C' linkage
template <typename T, size_t N>
char (*BLAHBLAHBLAH( UNALIGNED T (&)[N] ))[N];
#define ARRAY_SIZE(A) (sizeof(*BLAHBLAHBLAH(A)))
#endif
//fairly standard for loop macros
#define MACRODO1(TRICK,TODO) { const size_t X = TRICK; TODO; }
#define MACRODO2(X,TODO) { MACRODO1((X),TODO) MACRODO1(((X)+1),TODO) }
@ -385,30 +531,37 @@ template<typename T> inline void reconstruct(T* t) {
new(t) T();
}
/* fixed point speedup macros */
//-------------fixed point speedup macros
FORCEINLINE s32 sfx32_shiftdown(const s64 a)
#ifdef _MSC_VER
#include <intrin.h>
#endif
FORCEINLINE s64 fx32_mul(const s32 a, const s32 b)
{
s64 shifted = fx32_shiftdown(a);
#ifdef _MSC_VER
return __emul(a,b);
#else
return ((s64)a)*((s64)b);
#endif
}
/*either matrix math is happening at higher precision (an extra bit would suffice,
* I think), or the sums sent to this are saturated.
*
*tested by: spectrobes beyond the portals excavation blower
*(it sets very large +x,+y in the modelview matrix to push things offscreen,
*but the +y will overflow and become negative if we're not careful)
*
*I didnt think very hard about what would be fastest here on 32bit systems
*NOTE: this was intended for use in MatrixMultVec4x4_M2; it may not be appropriate for
* other uses of fx32_shiftdown.
*if this causes problems we should refactor the math routines a bit to take care of
* saturating in another function
*/
if(shifted>(s32)0x7FFFFFFF)
return 0x7FFFFFFF;
if(shifted<=(s32)0x80000000)
return 0x80000000;
return shifted;
FORCEINLINE s32 fx32_shiftdown(const s64 a)
{
#ifdef _MSC_VER
return (s32)__ll_rshift(a,12);
#else
return (s32)(a>>12);
#endif
}
FORCEINLINE s64 fx32_shiftup(const s32 a)
{
#ifdef _MSC_VER
return __ll_lshift(a,12);
#else
return ((s64)a)<<12;
#endif
}
#endif

View File

@ -0,0 +1,776 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler.h"
#if defined(ENABLE_AVX2)
#include "colorspacehandler_AVX2.h"
#elif defined(ENABLE_SSE2)
#include "colorspacehandler_SSE2.h"
#elif defined(ENABLE_ALTIVEC)
#include "colorspacehandler_AltiVec.h"
#endif
#if defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
#define USEVECTORSIZE_128
#endif
#if defined(ENABLE_AVX2)
#define USEVECTORSIZE_256
#endif
// By default, the hand-coded vectorized code will be used instead of a compiler's built-in
// autovectorization (if supported). However, if USEMANUALVECTORIZATION is not defined, then
// the compiler will use autovectorization (if supported).
#if defined(USEVECTORSIZE_128) || defined(USEVECTORSIZE_256) || defined(USEVECTORSIZE_512)
// Comment out USEMANUALVECTORIZATION to disable the hand-coded vectorized code.
#define USEMANUALVECTORIZATION
#endif
#ifdef USEMANUALVECTORIZATION
#if defined(ENABLE_AVX2)
static const ColorspaceHandler_AVX2 csh;
#elif defined(ENABLE_SSE2)
static const ColorspaceHandler_SSE2 csh;
#elif defined(ENABLE_ALTIVEC)
static const ColorspaceHandler_AltiVec csh;
#else
static const ColorspaceHandler csh;
#endif
#else
static const ColorspaceHandler csh;
#endif
CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_666[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
CACHE_ALIGN u32 color_555_to_888[32768];
//is this a crazy idea? this table spreads 5 bits evenly over 31 from exactly 0 to INT_MAX
CACHE_ALIGN const u32 material_5bit_to_31bit[] = {
0x00000000, 0x04210842, 0x08421084, 0x0C6318C6,
0x10842108, 0x14A5294A, 0x18C6318C, 0x1CE739CE,
0x21084210, 0x25294A52, 0x294A5294, 0x2D6B5AD6,
0x318C6318, 0x35AD6B5A, 0x39CE739C, 0x3DEF7BDE,
0x42108421, 0x46318C63, 0x4A5294A5, 0x4E739CE7,
0x5294A529, 0x56B5AD6B, 0x5AD6B5AD, 0x5EF7BDEF,
0x6318C631, 0x6739CE73, 0x6B5AD6B5, 0x6F7BDEF7,
0x739CE739, 0x77BDEF7B, 0x7BDEF7BD, 0x7FFFFFFF
};
// 5-bit to 6-bit conversions use this formula -- dst = (src == 0) ? 0 : (2*src) + 1
// Reference GBATEK: http://problemkaputt.de/gbatek.htm#ds3dtextureblending
CACHE_ALIGN const u8 material_5bit_to_6bit[] = {
0x00, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F,
0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, 0x2F,
0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F
};
CACHE_ALIGN const u8 material_5bit_to_8bit[] = {
0x00, 0x08, 0x10, 0x18, 0x21, 0x29, 0x31, 0x39,
0x42, 0x4A, 0x52, 0x5A, 0x63, 0x6B, 0x73, 0x7B,
0x84, 0x8C, 0x94, 0x9C, 0xA5, 0xAD, 0xB5, 0xBD,
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
};
CACHE_ALIGN const u8 material_6bit_to_8bit[] = {
0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C,
0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C,
0x41, 0x45, 0x49, 0x4D, 0x51, 0x55, 0x59, 0x5D,
0x61, 0x65, 0x69, 0x6D, 0x71, 0x75, 0x79, 0x7D,
0x82, 0x86, 0x8A, 0x8E, 0x92, 0x96, 0x9A, 0x9E,
0xA2, 0xA6, 0xAA, 0xAE, 0xB2, 0xB6, 0xBA, 0xBE,
0xC3, 0xC7, 0xCB, 0xCF, 0xD3, 0xD7, 0xDB, 0xDF,
0xE3, 0xE7, 0xEB, 0xEF, 0xF3, 0xF7, 0xFB, 0xFF
};
CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
};
//maybe not very precise
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
0, 4, 8, 13, 17, 22, 26, 31
};
//TODO - generate this in the static init method more accurately
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
0, 8, 16, 26, 34, 44, 52, 63
};
void ColorspaceHandlerInit()
{
static bool needInitTables = true;
if (needInitTables)
{
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
for (size_t i = 0; i < 32768; i++)
{
color_555_to_666[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) );
color_555_to_6665_opaque[i] = LE_TO_LOCAL_32( RGB15TO18_BITLOGIC(i) | 0x1F000000 );
color_555_to_6665_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000 );
color_555_to_888[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) );
color_555_to_8888_opaque[i] = LE_TO_LOCAL_32( RGB15TO24_BITLOGIC(i) | 0xFF000000 );
color_555_to_8888_opaque_swap_rb[i] = LE_TO_LOCAL_32( RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000 );
}
}
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert555To8888Opaque(const u16 src)
{
return (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF);
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src)
{
return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF);
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert8888To6665(FragmentColor srcColor)
{
FragmentColor outColor;
outColor.r = ((SWAP_RB) ? srcColor.b : srcColor.r) >> 2;
outColor.g = srcColor.g >> 2;
outColor.b = ((SWAP_RB) ? srcColor.r : srcColor.b) >> 2;
outColor.a = srcColor.a >> 3;
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert8888To6665(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert8888To6665<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert6665To8888(FragmentColor srcColor)
{
FragmentColor outColor;
outColor.r = material_6bit_to_8bit[((SWAP_RB) ? srcColor.b : srcColor.r)];
outColor.g = material_6bit_to_8bit[srcColor.g];
outColor.b = material_6bit_to_8bit[((SWAP_RB) ? srcColor.r : srcColor.b)];
outColor.a = material_5bit_to_8bit[srcColor.a];
return outColor.color;
}
template <bool SWAP_RB>
FORCEINLINE u32 ColorspaceConvert6665To8888(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert6665To8888<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert8888To5551(FragmentColor srcColor)
{
return R5G5B5TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r) >> 3, srcColor.g >> 3, ((SWAP_RB) ? srcColor.r : srcColor.b) >> 3) | ((srcColor.a == 0) ? 0x0000 : 0x8000 );
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert8888To5551(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert8888To5551<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert6665To5551(FragmentColor srcColor)
{
return R6G6B6TORGB15( ((SWAP_RB) ? srcColor.b : srcColor.r), srcColor.g, ((SWAP_RB) ? srcColor.r : srcColor.b)) | ((srcColor.a == 0) ? 0x0000 : 0x8000);
}
template <bool SWAP_RB>
FORCEINLINE u16 ColorspaceConvert6665To5551(u32 srcColor)
{
FragmentColor srcColorComponent;
srcColorComponent.color = srcColor;
return ColorspaceConvert6665To5551<SWAP_RB>(srcColorComponent);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To8888Opaque_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To8888Opaque(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To8888Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer555To6665Opaque_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer555To6665Opaque(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To6665Opaque<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 4);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 16);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To6665_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To6665_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To6665_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To6665(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To6665<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 4);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 16);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To8888_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To8888_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To8888_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To8888(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To8888<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To5551_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To5551_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer8888To5551_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer8888To5551(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To5551<SWAP_RB>(src[i]);
}
}
template <bool SWAP_RB, bool IS_UNALIGNED>
void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount)
{
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
#if defined(USEVECTORSIZE_128)
const size_t pixCountVector = pixCount - (pixCount % 8);
#elif defined(USEVECTORSIZE_256)
const size_t pixCountVector = pixCount - (pixCount % 16);
#elif defined(USEVECTORSIZE_512)
const size_t pixCountVector = pixCount - (pixCount % 32);
#endif
if (SWAP_RB)
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To5551_SwapRB_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To5551_SwapRB(src, dst, pixCountVector);
}
}
else
{
if (IS_UNALIGNED)
{
i = csh.ConvertBuffer6665To5551_IsUnaligned(src, dst, pixCountVector);
}
else
{
i = csh.ConvertBuffer6665To5551(src, dst, pixCountVector);
}
}
#pragma LOOPVECTORIZE_DISABLE
#endif // USEMANUALVECTORIZATION
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To5551<SWAP_RB>(src[i]);
}
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To8888Opaque<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To8888Opaque<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To6665Opaque<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert555To6665Opaque<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To6665<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To6665<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To6665(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To6665_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To8888<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To8888<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To8888(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To8888_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To5551<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert8888To5551<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To5551(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer8888To5551_SwapRB(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (; i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To5551<false>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
size_t i = 0;
for (;i < pixCount; i++)
{
dst[i] = ColorspaceConvert6665To5551<true>(src[i]);
}
return i;
}
size_t ColorspaceHandler::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To5551(src, dst, pixCount);
}
size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount);
}
template u32 ColorspaceConvert555To8888Opaque<true>(const u16 src);
template u32 ColorspaceConvert555To8888Opaque<false>(const u16 src);
template u32 ColorspaceConvert555To6665Opaque<true>(const u16 src);
template u32 ColorspaceConvert555To6665Opaque<false>(const u16 src);
template u32 ColorspaceConvert8888To6665<true>(FragmentColor srcColor);
template u32 ColorspaceConvert8888To6665<false>(FragmentColor srcColor);
template u32 ColorspaceConvert8888To6665<true>(u32 srcColor);
template u32 ColorspaceConvert8888To6665<false>(u32 srcColor);
template u32 ColorspaceConvert6665To8888<true>(FragmentColor srcColor);
template u32 ColorspaceConvert6665To8888<false>(FragmentColor srcColor);
template u32 ColorspaceConvert6665To8888<true>(u32 srcColor);
template u32 ColorspaceConvert6665To8888<false>(u32 srcColor);
template u16 ColorspaceConvert8888To5551<true>(FragmentColor srcColor);
template u16 ColorspaceConvert8888To5551<false>(FragmentColor srcColor);
template u16 ColorspaceConvert8888To5551<true>(u32 srcColor);
template u16 ColorspaceConvert8888To5551<false>(u32 srcColor);
template u16 ColorspaceConvert6665To5551<true>(FragmentColor srcColor);
template u16 ColorspaceConvert6665To5551<false>(FragmentColor srcColor);
template u16 ColorspaceConvert6665To5551<true>(u32 srcColor);
template u16 ColorspaceConvert6665To5551<false>(u32 srcColor);
template void ColorspaceConvertBuffer555To8888Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To8888Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<true, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, true>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer555To6665Opaque<false, false>(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<true, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<false, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To6665<false, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<true, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<true, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<false, true>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To8888<false, false>(const u32 *src, u32 *dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer8888To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<true, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<true, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<false, true>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template void ColorspaceConvertBuffer6665To5551<false, false>(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);

View File

@ -0,0 +1,194 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_H
#define COLORSPACEHANDLER_H
#include "types.h"
#include <stdio.h>
#include <stdint.h>
enum NDSColorFormat
{
// The color format information is packed in a 32-bit value.
// The bits are as follows:
// FFFOOOOO AAAAAABB BBBBGGGG GGRRRRRR
//
// F = Flags (see below)
// O = Color order (see below)
// A = Bit count for alpha [0-63]
// B = Bit count for blue [0-63]
// G = Bit count for green [0-63]
// R = Bit count for red [0-63]
//
// Flags:
// Bit 29: Reverse order flag.
// Set = Bits are in reverse order, usually for little-endian usage.
// Cleared = Bits are in normal order, usually for big-endian usage.
//
// Color order bits, 24-28:
// 0x00 = RGBA, common format
// 0x01 = RGAB
// 0x02 = RBGA
// 0x03 = RBAG
// 0x04 = RAGB
// 0x05 = RABG
// 0x06 = GRBA
// 0x07 = GRAB
// 0x08 = GBRA
// 0x09 = GBAR
// 0x0A = GARB
// 0x0B = GABR
// 0x0C = BRGA
// 0x0D = BRAG
// 0x0E = BGRA, common format
// 0x0F = BGAR
// 0x10 = BARG
// 0x11 = BAGR
// 0x12 = ARGB
// 0x13 = ARBG
// 0x14 = AGRB
// 0x15 = AGBR
// 0x16 = ABRG
// 0x17 = ABGR
// Color formats used for internal processing.
//NDSColorFormat_ABGR1555_Rev = 0x20045145,
//NDSColorFormat_ABGR5666_Rev = 0x20186186,
//NDSColorFormat_ABGR8888_Rev = 0x20208208,
// Color formats used by the output framebuffers.
NDSColorFormat_BGR555_Rev = 0x20005145,
NDSColorFormat_BGR666_Rev = 0x20006186,
NDSColorFormat_BGR888_Rev = 0x20008208
};
union FragmentColor
{
u32 color;
struct
{
u8 r,g,b,a;
};
};
extern CACHE_ALIGN const u32 material_5bit_to_31bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_6bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
extern CACHE_ALIGN const u8 material_6bit_to_8bit[64];
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
extern CACHE_ALIGN u32 color_555_to_6665_opaque[32768];
extern CACHE_ALIGN u32 color_555_to_6665_opaque_swap_rb[32768];
extern CACHE_ALIGN u32 color_555_to_666[32768];
extern CACHE_ALIGN u32 color_555_to_8888_opaque[32768];
extern CACHE_ALIGN u32 color_555_to_8888_opaque_swap_rb[32768];
extern CACHE_ALIGN u32 color_555_to_888[32768];
#define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color
#define COLOR555TO6665_OPAQUE_SWAP_RB(col) (color_555_to_6665_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color with R and B components swapped
#define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color
#ifdef LOCAL_LE
#define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, little-endian
#else
#define COLOR555TO6665(col,alpha5) ((alpha5) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha, big-endian
#endif
#define COLOR555TO8888_OPAQUE(col) (color_555_to_8888_opaque[(col)]) // Convert a 15-bit color to an opaque 32-bit color
#define COLOR555TO8888_OPAQUE_SWAP_RB(col) (color_555_to_8888_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque 32-bit color with R and B components swapped
#define COLOR555TO888(col) (color_555_to_888[(col)]) // Convert a 15-bit color to an opaque 24-bit color or a fully transparent 32-bit color
#ifdef LOCAL_LE
#define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, little-endian
#else
#define COLOR555TO8888(col,alpha8) ((alpha8) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha, big-endian
#endif
//produce a 15bpp color from individual 5bit components
#define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) )
//produce a 16bpp color from individual 5bit components
#define R6G6B6TORGB15(r,g,b) ( ((r)>>1) | (((g)&0x3E)<<4) | (((b)&0x3E)<<9) )
void ColorspaceHandlerInit();
template<bool SWAP_RB> u32 ColorspaceConvert555To8888Opaque(const u16 src);
template<bool SWAP_RB> u32 ColorspaceConvert555To6665Opaque(const u16 src);
template<bool SWAP_RB> u32 ColorspaceConvert8888To6665(FragmentColor srcColor);
template<bool SWAP_RB> u32 ColorspaceConvert8888To6665(u32 srcColor);
template<bool SWAP_RB> u32 ColorspaceConvert6665To8888(FragmentColor srcColor);
template<bool SWAP_RB> u32 ColorspaceConvert6665To8888(u32 srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert8888To5551(FragmentColor srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert8888To5551(u32 srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert6665To5551(FragmentColor srcColor);
template<bool SWAP_RB> u16 ColorspaceConvert6665To5551(u32 srcColor);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
template<bool SWAP_RB, bool IS_UNALIGNED> void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount);
class ColorspaceHandler
{
public:
ColorspaceHandler() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
FORCEINLINE FragmentColor MakeFragmentColor(const u8 r, const u8 g, const u8 b, const u8 a)
{
FragmentColor ret;
ret.r = r; ret.g = g; ret.b = b; ret.a = a;
return ret;
}
#endif /* COLORSPACEHANDLER_H */

View File

@ -0,0 +1,491 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler_AVX2.h"
#ifndef ENABLE_AVX2
#error This code requires AVX2 support.
#else
#include <immintrin.h>
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi)
{
v256u32 src32;
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
src32 = _mm256_unpacklo_epi16(srcColor, _mm256_setzero_si256());
dstLo = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 19), _mm256_srli_epi32(src32, 7)) : _mm256_or_si256(_mm256_slli_epi32(src32, 3), _mm256_slli_epi32(src32, 9));
dstLo = _mm256_and_si256( dstLo, _mm256_set1_epi32(0x00F800F8) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_slli_epi32(src32, 6), _mm256_set1_epi32(0x0000F800)) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_srli_epi32(dstLo, 5), _mm256_set1_epi32(0x00070707)) );
dstLo = _mm256_or_si256( dstLo, srcAlphaBits32Lo );
src32 = _mm256_unpackhi_epi16(srcColor, _mm256_setzero_si256());
dstHi = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 19), _mm256_srli_epi32(src32, 7)) : _mm256_or_si256(_mm256_slli_epi32(src32, 3), _mm256_slli_epi32(src32, 9));
dstHi = _mm256_and_si256( dstHi, _mm256_set1_epi32(0x00F800F8) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_slli_epi32(src32, 6), _mm256_set1_epi32(0x0000F800)) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_srli_epi32(dstHi, 5), _mm256_set1_epi32(0x00070707)) );
dstHi = _mm256_or_si256( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi)
{
v256u32 src32;
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
src32 = _mm256_unpacklo_epi16(srcColor, _mm256_setzero_si256());
dstLo = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 17), _mm256_srli_epi32(src32, 9)) : _mm256_or_si256(_mm256_slli_epi32(src32, 1), _mm256_slli_epi32(src32, 7));
dstLo = _mm256_and_si256( dstLo, _mm256_set1_epi32(0x003E003E) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_slli_epi32(src32, 4), _mm256_set1_epi32(0x00003E00)) );
dstLo = _mm256_or_si256( dstLo, _mm256_and_si256(_mm256_srli_epi32(dstLo, 5), _mm256_set1_epi32(0x00010101)) );
dstLo = _mm256_or_si256( dstLo, srcAlphaBits32Lo );
src32 = _mm256_unpackhi_epi16(srcColor, _mm256_setzero_si256());
dstHi = (SWAP_RB) ? _mm256_or_si256(_mm256_slli_epi32(src32, 17), _mm256_srli_epi32(src32, 9)) : _mm256_or_si256(_mm256_slli_epi32(src32, 1), _mm256_slli_epi32(src32, 7));
dstHi = _mm256_and_si256( dstHi, _mm256_set1_epi32(0x003E003E) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_slli_epi32(src32, 4), _mm256_set1_epi32(0x00003E00)) );
dstHi = _mm256_or_si256( dstHi, _mm256_and_si256(_mm256_srli_epi32(dstHi, 5), _mm256_set1_epi32(0x00010101)) );
dstHi = _mm256_or_si256( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u32 srcAlphaBits32 = _mm256_set1_epi32(0xFF000000);
ColorspaceConvert555To8888_AVX2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi)
{
const v256u32 srcAlphaBits32 = _mm256_set1_epi32(0x1F000000);
ColorspaceConvert555To6665_AVX2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src)
{
// Conversion algorithm:
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
v256u32 rgb;
const v256u32 a = _mm256_and_si256( _mm256_srli_epi32(src, 3), _mm256_set1_epi32(0x1F000000) );
if (SWAP_RB)
{
rgb = _mm256_and_si256( _mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x003F3F3F) );
rgb = _mm256_shuffle_epi8( rgb, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
}
else
{
rgb = _mm256_and_si256( _mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x003F3F3F) );
}
return _mm256_or_si256(rgb, a);
}
template <bool SWAP_RB>
FORCEINLINE v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src)
{
// Conversion algorithm:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
v256u32 rgb = _mm256_or_si256( _mm256_and_si256(_mm256_slli_epi32(src, 2), _mm256_set1_epi32(0x00FCFCFC)), _mm256_and_si256(_mm256_srli_epi32(src, 4), _mm256_set1_epi32(0x00030303)) );
const v256u32 a = _mm256_or_si256( _mm256_and_si256(_mm256_slli_epi32(src, 3), _mm256_set1_epi32(0xF8000000)), _mm256_and_si256(_mm256_srli_epi32(src, 2), _mm256_set1_epi32(0x07000000)) );
if (SWAP_RB)
{
rgb = _mm256_shuffle_epi8( rgb, _mm256_set_epi8(31,28,29,30, 27,24,25,26, 23,20,21,22, 19,16,17,18, 15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
}
return _mm256_or_si256(rgb, a);
}
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
FORCEINLINE v256u16 _ConvertColorBaseTo5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
{
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
{
return srcLo;
}
v256u32 rgbLo;
v256u32 rgbHi;
v256u16 alpha;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 17), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 4), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_slli_epi32(srcLo, 9), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 17), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 4), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_slli_epi32(srcHi, 9), _mm256_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 1), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 4), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 7), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 1), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 4), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 7), _mm256_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm256_packs_epi32( _mm256_and_si256(_mm256_srli_epi32(srcLo, 24), _mm256_set1_epi32(0x0000001F)), _mm256_and_si256(_mm256_srli_epi32(srcHi, 24), _mm256_set1_epi32(0x0000001F)) );
alpha = _mm256_cmpgt_epi16(alpha, _mm256_setzero_si256());
alpha = _mm256_and_si256(alpha, _mm256_set1_epi16(0x8000));
}
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 19), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 6), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_slli_epi32(srcLo, 7), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 19), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 6), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_slli_epi32(srcHi, 7), _mm256_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm256_and_si256(_mm256_srli_epi32(srcLo, 3), _mm256_set1_epi32(0x0000001F));
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 6), _mm256_set1_epi32(0x000003E0)) );
rgbLo = _mm256_or_si256(rgbLo, _mm256_and_si256(_mm256_srli_epi32(srcLo, 9), _mm256_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm256_and_si256(_mm256_srli_epi32(srcHi, 3), _mm256_set1_epi32(0x0000001F));
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 6), _mm256_set1_epi32(0x000003E0)) );
rgbHi = _mm256_or_si256(rgbHi, _mm256_and_si256(_mm256_srli_epi32(srcHi, 9), _mm256_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm256_packs_epi32( _mm256_srli_epi32(srcLo, 24), _mm256_srli_epi32(srcHi, 24) );
alpha = _mm256_cmpgt_epi16(alpha, _mm256_setzero_si256());
alpha = _mm256_and_si256(alpha, _mm256_set1_epi16(0x8000));
}
return _mm256_or_si256(_mm256_packs_epi32(rgbLo, rgbHi), alpha);
}
template <bool SWAP_RB>
FORCEINLINE v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
{
return _ConvertColorBaseTo5551_AVX2<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi)
{
return _ConvertColorBaseTo5551_AVX2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_storeu_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
else
{
_mm256_store_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_store_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i));
v256u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AVX2<SWAP_RB>(src_vec256, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm256_storeu_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_storeu_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
else
{
_mm256_store_si256((v256u32 *)(dst+i+0), dstConvertedLo);
_mm256_store_si256((v256u32 *)(dst+i+8), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To6665_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=8)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert8888To6665_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
}
else
{
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert8888To6665_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To8888_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=8)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert6665To8888_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i))) );
}
else
{
_mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert6665To8888_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To5551_AVX2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u16 *)(dst+i), ColorspaceConvert8888To5551_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i)), _mm256_loadu_si256((v256u32 *)(src+i+8))) );
}
else
{
_mm256_store_si256( (v256u16 *)(dst+i), ColorspaceConvert8888To5551_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i)), _mm256_load_si256((v256u32 *)(src+i+8))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To5551_AVX2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec256)
{
size_t i = 0;
for (; i < pixCountVec256; i+=16)
{
if (IS_UNALIGNED)
{
_mm256_storeu_si256( (v256u16 *)(dst+i), ColorspaceConvert6665To5551_AVX2<SWAP_RB>(_mm256_loadu_si256((v256u32 *)(src+i)), _mm256_loadu_si256((v256u32 *)(src+i+8))) );
}
else
{
_mm256_store_si256( (v256u16 *)(dst+i), ColorspaceConvert6665To5551_AVX2<SWAP_RB>(_mm256_load_si256((v256u32 *)(src+i)), _mm256_load_si256((v256u32 *)(src+i+8))) );
}
}
return i;
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AVX2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AVX2<true, true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665_AVX2<true>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665_AVX2<false>(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX2<true>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AVX2<false>(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template v256u32 ColorspaceConvert8888To6665_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert8888To6665_AVX2<false>(const v256u32 &src);
template v256u32 ColorspaceConvert6665To8888_AVX2<true>(const v256u32 &src);
template v256u32 ColorspaceConvert6665To8888_AVX2<false>(const v256u32 &src);
template v256u16 ColorspaceConvert8888To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert8888To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert6665To5551_AVX2<true>(const v256u32 &srcLo, const v256u32 &srcHi);
template v256u16 ColorspaceConvert6665To5551_AVX2<false>(const v256u32 &srcLo, const v256u32 &srcHi);
#endif // ENABLE_AVX2

View File

@ -0,0 +1,74 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_AVX2_H
#define COLORSPACEHANDLER_AVX2_H
#include "colorspacehandler.h"
#ifndef ENABLE_AVX2
#warning This header requires AVX2 support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u32 &srcAlphaBits32Lo, const v256u32 &srcAlphaBits32Hi, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi);
template<bool SWAP_RB> v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src);
template<bool SWAP_RB> v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
template<bool SWAP_RB> v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi);
class ColorspaceHandler_AVX2 : public ColorspaceHandler
{
public:
ColorspaceHandler_AVX2() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
#endif // ENABLE_AVX2
#endif /* COLORSPACEHANDLER_AVX2_H */

View File

@ -0,0 +1,345 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler_Altivec.h"
#ifndef ENABLE_ALTIVEC
#error This code requires PowerPC AltiVec support.
#else
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
dstLo = vec_unpackl((vector pixel)srcColor);
dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){3,3,3,0, 3,3,3,0, 3,3,3,0, 3,3,3,0})), vec_sr((v128u8)dstLo, ((v128u8){2,2,2,0, 2,2,2,0, 2,2,2,0, 2,2,2,0})) );
dstLo = vec_sel(dstLo, srcAlphaBits32Lo, vec_splat_u32(0xFF000000));
dstHi = vec_unpackh((vector pixel)srcColor);
dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){3,3,3,0, 3,3,3,0, 3,3,3,0, 3,3,3,0})), vec_sr((v128u8)dstHi, ((v128u8){2,2,2,0, 2,2,2,0, 2,2,2,0, 2,2,2,0})) );
dstHi = vec_sel(dstHi, srcAlphaBits32Hi, vec_splat_u32(0xFF000000));
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
dstLo = vec_unpackl((vector pixel)srcColor);
dstLo = vec_or( vec_sl((v128u8)dstLo, ((v128u8){1,1,1,0, 1,1,1,0, 1,1,1,0, 1,1,1,0})), vec_sr((v128u8)dstLo, ((v128u8){4,4,4,0, 4,4,4,0, 4,4,4,0, 4,4,4,0})) );
dstLo = vec_sel(dstLo, srcAlphaBits32Lo, vec_splat_u32(0xFF000000));
dstHi = vec_unpackh((vector pixel)srcColor);
dstHi = vec_or( vec_sl((v128u8)dstHi, ((v128u8){1,1,1,0, 1,1,1,0, 1,1,1,0, 1,1,1,0})), vec_sr((v128u8)dstHi, ((v128u8){4,4,4,0, 4,4,4,0, 4,4,4,0, 4,4,4,0})) );
dstHi = vec_sel(dstHi, srcAlphaBits32Hi, vec_splat_u32(0xFF000000));
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = {0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000};
ColorspaceConvert555To8888_AltiVec<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = {0x1F000000, 0x1F000000, 0x1F000000, 0x1F000000};
ColorspaceConvert555To6665_AltiVec<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src)
{
// Conversion algorithm:
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
v128u8 rgba = vec_sr( (v128u8)src, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3}) );
if (SWAP_RB)
{
rgba = vec_perm( rgba, rgba, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
}
return (v128u32)rgba;
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src)
{
// Conversion algorithm:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
v128u8 rgba = vec_or( vec_sl((v128u8)src, ((v128u8){2,2,2,3, 2,2,2,3, 2,2,2,3, 2,2,2,3})), vec_sr((v128u8)src, ((v128u8){4,4,4,2, 4,4,4,2, 4,4,4,2, 4,4,4,2})) );
if (SWAP_RB)
{
rgba = vec_perm( rgba, rgba, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
}
return (v128u32)rgba;
}
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
FORCEINLINE v128u16 _ConvertColorBaseTo5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
{
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
{
return srcLo;
}
v128u32 rgbLo;
v128u32 rgbHi;
v128u16 dstColor;
v128u16 dstAlpha;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
// Convert alpha
dstAlpha = vec_packsu( vec_and(vec_sr(srcLo, vec_splat_u32(24)), vec_splat_u32(0x0000001F)), vec_and(vec_sr(srcHi, vec_splat_u32(24)), vec_splat_u32(0x0000001F)) );
dstAlpha = vec_cmpgt(dstAlpha, vec_splat_u16(0));
dstAlpha = vec_and(dstAlpha, vec_splat_u16(0x8000));
// Convert RGB
if (SWAP_RB)
{
rgbLo = vec_perm( srcLo, srcLo, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
rgbHi = vec_perm( srcHi, srcHi, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
rgbLo = vec_sl( rgbLo, vec_splat_u32(2) );
rgbHi = vec_sl( rgbHi, vec_splat_u32(2) );
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
}
else
{
rgbLo = vec_sl( srcLo, vec_splat_u32(2) );
rgbHi = vec_sl( srcHi, vec_splat_u32(2) );
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
}
}
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
{
// Convert alpha
dstAlpha = vec_packsu( vec_sr(srcLo, vec_splat_u32(24)), vec_sr(srcHi, vec_splat_u32(24)) );
dstAlpha = vec_cmpgt(dstAlpha, vec_splat_u16(0));
dstAlpha = vec_and(dstAlpha, vec_splat_u16(0x8000));
// Convert RGB
if (SWAP_RB)
{
rgbLo = vec_perm( srcLo, srcLo, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
rgbHi = vec_perm( srcHi, srcHi, ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15}) );
dstColor = (v128u16)vec_packpx(rgbLo, rgbHi);
}
else
{
dstColor = (v128u16)vec_packpx(srcLo, srcHi);
}
}
dstColor = vec_and(dstColor, vec_splat_u16(0x7FFF));
return vec_or(dstColor, dstAlpha);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_AltiVec<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_AltiVec<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_AltiVec<SWAP_RB>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_AltiVec<SWAP_RB>( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi );
vec_st(dstConvertedHi, 0, dst+i);
vec_st(dstConvertedLo, 16, dst+i);
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer8888To6665_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
vec_st( ColorspaceConvert8888To6665_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer6665To8888_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
vec_st( ColorspaceConvert6665To8888_AltiVec<SWAP_RB>(vec_ld(0, src+i)), 0, dst+i );
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer8888To5551_AltiVec(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
vec_st( ColorspaceConvert8888To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
}
return i;
}
template <bool SWAP_RB>
size_t ColorspaceConvertBuffer6665To5551_AltiVec(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
vec_st( ColorspaceConvert6665To5551_AltiVec<SWAP_RB>(vec_ld(0, src+i), vec_ld(16, src+i)), 0, dst+i );
}
return i;
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_AltiVec<true>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AltiVec<false>(src, dst, pixCount);
}
size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_AltiVec<true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_AltiVec<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_AltiVec<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_AltiVec<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_AltiVec<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_AltiVec<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_AltiVec<false>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_AltiVec<true>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_AltiVec<false>(const v128u32 &src);
template v128u16 ColorspaceConvert8888To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert8888To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_AltiVec<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_AltiVec<false>(const v128u32 &srcLo, const v128u32 &srcHi);
#endif // ENABLE_SSE2

View File

@ -0,0 +1,64 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_ALTIVEC_H
#define COLORSPACEHANDLER_ALTIVEC_H
#include "colorspacehandler.h"
#ifndef ENABLE_ALTIVEC
#warning This header requires PowerPC AltiVec support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi);
// AltiVec has very poor support for dealing with unaligned addresses (it's possible, just
// very obtuse), so we're not even going to bother dealing with any unaligned addresses.
class ColorspaceHandler_AltiVec : public ColorspaceHandler
{
public:
ColorspaceHandler_AltiVec() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
#endif // ENABLE_ALTIVEC
#endif /* COLORSPACEHANDLER_ALTIVEC_H */

View File

@ -0,0 +1,503 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include "colorspacehandler_SSE2.h"
#ifndef ENABLE_SSE2
#error This code requires SSE2 support.
#else
#include <emmintrin.h>
#ifdef ENABLE_SSSE3
#include <tmmintrin.h>
#endif
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
v128u32 src32;
// Conversion algorithm:
// RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07)
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x00F800F8) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00070707)) );
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 19), _mm_srli_epi32(src32, 7)) : _mm_or_si128(_mm_slli_epi32(src32, 3), _mm_slli_epi32(src32, 9));
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x00F800F8) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 6), _mm_set1_epi32(0x0000F800)) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00070707)) );
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi)
{
v128u32 src32;
// Conversion algorithm:
// RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01)
src32 = _mm_unpacklo_epi16(srcColor, _mm_setzero_si128());
dstLo = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
dstLo = _mm_and_si128( dstLo, _mm_set1_epi32(0x003E003E) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
dstLo = _mm_or_si128( dstLo, _mm_and_si128(_mm_srli_epi32(dstLo, 5), _mm_set1_epi32(0x00010101)) );
dstLo = _mm_or_si128( dstLo, srcAlphaBits32Lo );
src32 = _mm_unpackhi_epi16(srcColor, _mm_setzero_si128());
dstHi = (SWAP_RB) ? _mm_or_si128(_mm_slli_epi32(src32, 17), _mm_srli_epi32(src32, 9)) : _mm_or_si128(_mm_slli_epi32(src32, 1), _mm_slli_epi32(src32, 7));
dstHi = _mm_and_si128( dstHi, _mm_set1_epi32(0x003E003E) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_slli_epi32(src32, 4), _mm_set1_epi32(0x00003E00)) );
dstHi = _mm_or_si128( dstHi, _mm_and_si128(_mm_srli_epi32(dstHi, 5), _mm_set1_epi32(0x00010101)) );
dstHi = _mm_or_si128( dstHi, srcAlphaBits32Hi );
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = _mm_set1_epi32(0xFF000000);
ColorspaceConvert555To8888_SSE2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi)
{
const v128u32 srcAlphaBits32 = _mm_set1_epi32(0x1F000000);
ColorspaceConvert555To6665_SSE2<SWAP_RB>(srcColor, srcAlphaBits32, srcAlphaBits32, dstLo, dstHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src)
{
// Conversion algorithm:
// RGB 8-bit to 6-bit formula: dstRGB6 = (srcRGB8 >> 2)
// Alpha 8-bit to 6-bit formula: dstA5 = (srcA8 >> 3)
v128u32 rgb;
const v128u32 a = _mm_and_si128( _mm_srli_epi32(src, 3), _mm_set1_epi32(0x1F000000) );
if (SWAP_RB)
{
#ifdef ENABLE_SSSE3
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
#else
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x003F0000)), 18), _mm_or_si128(_mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00003F00)), 2), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x0000003F)), 14)) );
#endif
}
else
{
rgb = _mm_and_si128( _mm_srli_epi32(src, 2), _mm_set1_epi32(0x003F3F3F) );
}
return _mm_or_si128(rgb, a);
}
template <bool SWAP_RB>
FORCEINLINE v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src)
{
// Conversion algorithm:
// RGB 6-bit to 8-bit formula: dstRGB8 = (srcRGB6 << 2) | ((srcRGB6 >> 4) & 0x03)
// Alpha 5-bit to 8-bit formula: dstA8 = (srcA5 << 3) | ((srcA5 >> 2) & 0x07)
v128u32 rgb = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 2), _mm_set1_epi32(0x00FCFCFC)), _mm_and_si128(_mm_srli_epi32(src, 4), _mm_set1_epi32(0x00030303)) );
const v128u32 a = _mm_or_si128( _mm_and_si128(_mm_slli_epi32(src, 3), _mm_set1_epi32(0xF8000000)), _mm_and_si128(_mm_srli_epi32(src, 2), _mm_set1_epi32(0x07000000)) );
if (SWAP_RB)
{
#ifdef ENABLE_SSSE3
rgb = _mm_shuffle_epi8( rgb, _mm_set_epi8(15,12,13,14, 11,8,9,10, 7,4,5,6, 3,0,1,2) );
#else
rgb = _mm_or_si128( _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x00FF0000)), 16), _mm_or_si128(_mm_and_si128(src, _mm_set1_epi32(0x0000FF00)), _mm_slli_epi32(_mm_and_si128(src, _mm_set1_epi32(0x000000FF)), 16)) );
#endif
}
return _mm_or_si128(rgb, a);
}
template <NDSColorFormat COLORFORMAT, bool SWAP_RB>
FORCEINLINE v128u16 _ConvertColorBaseTo5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
{
if (COLORFORMAT == NDSColorFormat_BGR555_Rev)
{
return srcLo;
}
v128u32 rgbLo;
v128u32 rgbHi;
v128u16 alpha;
if (COLORFORMAT == NDSColorFormat_BGR666_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 17), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 17), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 1), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 4), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 1), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 4), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm_packs_epi32( _mm_and_si128(_mm_srli_epi32(srcLo, 24), _mm_set1_epi32(0x0000001F)), _mm_and_si128(_mm_srli_epi32(srcHi, 24), _mm_set1_epi32(0x0000001F)) );
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
}
else if (COLORFORMAT == NDSColorFormat_BGR888_Rev)
{
if (SWAP_RB)
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 19), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_slli_epi32(srcLo, 7), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 19), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_slli_epi32(srcHi, 7), _mm_set1_epi32(0x00007C00)) );
}
else
{
// Convert color from low bits
rgbLo = _mm_and_si128(_mm_srli_epi32(srcLo, 3), _mm_set1_epi32(0x0000001F));
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 6), _mm_set1_epi32(0x000003E0)) );
rgbLo = _mm_or_si128(rgbLo, _mm_and_si128(_mm_srli_epi32(srcLo, 9), _mm_set1_epi32(0x00007C00)) );
// Convert color from high bits
rgbHi = _mm_and_si128(_mm_srli_epi32(srcHi, 3), _mm_set1_epi32(0x0000001F));
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 6), _mm_set1_epi32(0x000003E0)) );
rgbHi = _mm_or_si128(rgbHi, _mm_and_si128(_mm_srli_epi32(srcHi, 9), _mm_set1_epi32(0x00007C00)) );
}
// Convert alpha
alpha = _mm_packs_epi32( _mm_srli_epi32(srcLo, 24), _mm_srli_epi32(srcHi, 24) );
alpha = _mm_cmpgt_epi16(alpha, _mm_setzero_si128());
alpha = _mm_and_si128(alpha, _mm_set1_epi16(0x8000));
}
return _mm_or_si128(_mm_packs_epi32(rgbLo, rgbHi), alpha);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_SSE2<NDSColorFormat_BGR888_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB>
FORCEINLINE v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi)
{
return _ConvertColorBaseTo5551_SSE2<NDSColorFormat_BGR666_Rev, SWAP_RB>(srcLo, srcHi);
}
template <bool SWAP_RB, bool IS_UNALIGNED>
static size_t ColorspaceConvertBuffer555To8888Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To8888Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_storeu_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
else
{
_mm_store_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_store_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer555To6665Opaque_SSE2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i));
v128u32 dstConvertedLo, dstConvertedHi;
ColorspaceConvert555To6665Opaque_SSE2<SWAP_RB>(src_vec128, dstConvertedLo, dstConvertedHi);
if (IS_UNALIGNED)
{
_mm_storeu_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_storeu_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
else
{
_mm_store_si128((v128u32 *)(dst+i+0), dstConvertedLo);
_mm_store_si128((v128u32 *)(dst+i+4), dstConvertedHi);
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To6665_SSE2(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
}
else
{
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To8888_SSE2(const u32 *src, u32 *dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=4)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert6665To8888_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i))) );
}
else
{
_mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert6665To8888_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer8888To5551_SSE2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u16 *)(dst+i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i)), _mm_loadu_si128((v128u32 *)(src+i+4))) );
}
else
{
_mm_store_si128( (v128u16 *)(dst+i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i)), _mm_load_si128((v128u32 *)(src+i+4))) );
}
}
return i;
}
template <bool SWAP_RB, bool IS_UNALIGNED>
size_t ColorspaceConvertBuffer6665To5551_SSE2(const u32 *__restrict src, u16 *__restrict dst, size_t pixCountVec128)
{
size_t i = 0;
for (; i < pixCountVec128; i+=8)
{
if (IS_UNALIGNED)
{
_mm_storeu_si128( (v128u16 *)(dst+i), ColorspaceConvert6665To5551_SSE2<SWAP_RB>(_mm_loadu_si128((v128u32 *)(src+i)), _mm_loadu_si128((v128u32 *)(src+i+4))) );
}
else
{
_mm_store_si128( (v128u16 *)(dst+i), ColorspaceConvert6665To5551_SSE2<SWAP_RB>(_mm_load_si128((v128u32 *)(src+i)), _mm_load_si128((v128u32 *)(src+i+4))) );
}
}
return i;
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To8888Opaque_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer555To6665Opaque_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To6665_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To8888_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer8888To5551_SSE2<true, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<false, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<true, false>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<false, true>(src, dst, pixCount);
}
size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const
{
return ColorspaceConvertBuffer6665To5551_SSE2<true, true>(src, dst, pixCount);
}
template void ColorspaceConvert555To8888_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_SSE2<true>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665_SSE2<false>(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To8888Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_SSE2<true>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template void ColorspaceConvert555To6665Opaque_SSE2<false>(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template v128u32 ColorspaceConvert8888To6665_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert8888To6665_SSE2<false>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_SSE2<true>(const v128u32 &src);
template v128u32 ColorspaceConvert6665To8888_SSE2<false>(const v128u32 &src);
template v128u16 ColorspaceConvert8888To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert8888To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_SSE2<true>(const v128u32 &srcLo, const v128u32 &srcHi);
template v128u16 ColorspaceConvert6665To5551_SSE2<false>(const v128u32 &srcLo, const v128u32 &srcHi);
#endif // ENABLE_SSE2

View File

@ -0,0 +1,74 @@
/*
Copyright (C) 2016 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the this software. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef COLORSPACEHANDLER_SSE2_H
#define COLORSPACEHANDLER_SSE2_H
#include "colorspacehandler.h"
#ifndef ENABLE_SSE2
#warning This header requires SSE2 support.
#else
template<bool SWAP_RB> void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u32 &srcAlphaBits32Lo, const v128u32 &srcAlphaBits32Hi, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi);
template<bool SWAP_RB> v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src);
template<bool SWAP_RB> v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
template<bool SWAP_RB> v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi);
class ColorspaceHandler_SSE2 : public ColorspaceHandler
{
public:
ColorspaceHandler_SSE2() {};
size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To6665_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer6665To8888_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer8888To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const;
};
#endif // ENABLE_SSE2
#endif /* COLORSPACEHANDLER_SSE2_H */

View File

@ -59,44 +59,41 @@
#define DESMUME_PLATFORM_STRING ""
#endif
#define DESMUME_SSE_STRING ""
#define DESMUME_AVX_STRING ""
#define DESMUME_CPUEXT_PRIMARY_STRING ""
#define DESMUME_CPUEXT_SECONDARY_STRING ""
#ifdef ENABLE_SSE
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSE"
#endif
#ifdef ENABLE_SSE2
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSE2"
#endif
#ifdef ENABLE_SSE3
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSE3"
#endif
#ifdef ENABLE_SSSE3
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSSE3"
#endif
#ifdef ENABLE_SSE4_1
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSE4.1"
#endif
#ifdef ENABLE_SSE4_2
#undef DESMUME_SSE_STRING
#define DESMUME_SSE_STRING " SSE4.2"
#endif
#ifdef ENABLE_AVX
#undef DESMUME_AVX_STRING
#define DESMUME_AVX_STRING "+AVX"
#endif
#ifdef ENABLE_AVX2
#undef DESMUME_AVX_STRING
#define DESMUME_AVX_STRING "+AVX2"
#if defined(ENABLE_SSE4_2)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE4.2"
#elif defined(ENABLE_SSE4_1)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE4.1"
#elif defined(ENABLE_SSSE3)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSSE3"
#elif defined(ENABLE_SSE3)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE3"
#elif defined(ENABLE_SSE2)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE2"
#elif defined(ENABLE_SSE)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " SSE"
#elif defined(ENABLE_ALTIVEC)
#undef DESMUME_CPUEXT_PRIMARY_STRING
#define DESMUME_CPUEXT_PRIMARY_STRING " AltiVec"
#endif
#define DESMUME_CPUEXT_STRING DESMUME_SSE_STRING DESMUME_AVX_STRING
#if defined(ENABLE_AVX2)
#undef DESMUME_CPUEXT_SECONDARY_STRING
#define DESMUME_CPUEXT_SECONDARY_STRING "+AVX2"
#elif defined(ENABLE_AVX)
#undef DESMUME_CPUEXT_SECONDARY_STRING
#define DESMUME_CPUEXT_SECONDARY_STRING "+AVX"
#endif
#define DESMUME_CPUEXT_STRING DESMUME_CPUEXT_PRIMARY_STRING DESMUME_CPUEXT_SECONDARY_STRING
#ifdef DEVELOPER
#define DESMUME_FEATURE_STRING " dev+"