GPU: Fix various graphical glitches on big-endian systems.

- Fix a bug where max bright down would display a red screen instead of a black screen when running in 18-bit or 24-bit color mode.
- Fix incorrect colors for various display capture scenarios.
- 15-bit to 18-bit and 15-bit to 24-bit color conversions now assume byte swapping. This improves 2D graphics performance by up to 5%.
This commit is contained in:
rogerman 2025-07-19 22:29:29 -07:00
parent 6bcf70dc6d
commit 08394d33f9
9 changed files with 139 additions and 102 deletions

View File

@ -517,13 +517,13 @@ void DISP_FIFOrecv_LineOpaque(u32 *__restrict dst)
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
dst[i+0] = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>((src >> 0) & 0x7FFF) );
dst[i+1] = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>((src >> 16) & 0x7FFF) );
dst[i+0] = ColorspaceConvert555To6665Opaque<false>(src >> 0);
dst[i+1] = ColorspaceConvert555To6665Opaque<false>(src >> 16);
}
else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{
dst[i+0] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>((src >> 0) & 0x7FFF) );
dst[i+1] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>((src >> 16) & 0x7FFF) );
dst[i+0] = ColorspaceConvert555To8888Opaque<false>(src >> 0);
dst[i+1] = ColorspaceConvert555To8888Opaque<false>(src >> 16);
}
}
}

View File

@ -378,7 +378,7 @@ void GPUEngineBase::Reset()
renderState.selectedBGLayer = &this->_BGLayer[GPULayerID_BG0];
renderState.backdropColor16 = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF;
renderState.workingBackdropColor16 = renderState.backdropColor16;
renderState.workingBackdropColor32.value = LOCAL_TO_LE_32( (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) );
renderState.workingBackdropColor32.value = (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(LOCAL_TO_LE_16(renderState.workingBackdropColor16)) : COLOR555TO888(LOCAL_TO_LE_16(renderState.workingBackdropColor16));
renderState.colorEffect = (ColorEffect)this->_IORegisterMap->BLDCNT.ColorEffect;
renderState.blendEVA = 0;
renderState.blendEVB = 0;
@ -851,7 +851,7 @@ void GPUEngineBase::UpdateRenderStates(const size_t l)
{
currRenderState.workingBackdropColor16 = currRenderState.backdropColor16;
}
currRenderState.workingBackdropColor32.value = LOCAL_TO_LE_32( (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(currRenderState.workingBackdropColor16) : COLOR555TO888(currRenderState.workingBackdropColor16) );
currRenderState.workingBackdropColor32.value = (this->_targetDisplay->GetColorFormat() == NDSColorFormat_BGR666_Rev) ? COLOR555TO666(currRenderState.workingBackdropColor16) : COLOR555TO888(currRenderState.workingBackdropColor16);
// Save the current render states to this line's compositor info.
compInfo.renderState = currRenderState;
@ -1398,23 +1398,25 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{
if ( (LAYERTYPE != GPULayerType_OBJ) && ((((u32 *)vramColorPtr)[i] & 0xFF000000) == 0) )
Color4u8 srcColor32 = ((Color4u8 *)vramColorPtr)[i];
if ( (LAYERTYPE != GPULayerType_OBJ) && (srcColor32.a == 0) )
{
continue;
}
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (colorEffectEnable[compInfo.target.xCustom] != 0) : true;
pixelop.Composite32<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE>(compInfo, ((Color4u8 *)vramColorPtr)[i], enableColorEffect, this->_sprAlphaCustom[compInfo.target.xCustom], this->_sprTypeCustom[compInfo.target.xCustom]);
pixelop.Composite32<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE>(compInfo, srcColor32, enableColorEffect, this->_sprAlphaCustom[compInfo.target.xCustom], this->_sprTypeCustom[compInfo.target.xCustom]);
}
else
{
if ( (LAYERTYPE != GPULayerType_OBJ) && ((((u16 *)vramColorPtr)[i] & 0x8000) == 0) )
const u16 srcColor16 = LE_TO_LOCAL_16(((u16 *)vramColorPtr)[i]);
if ( (LAYERTYPE != GPULayerType_OBJ) && ((srcColor16 & 0x8000) == 0) )
{
continue;
}
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (colorEffectEnable[compInfo.target.xCustom] != 0) : true;
pixelop.Composite16<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE>(compInfo, ((u16 *)vramColorPtr)[i], enableColorEffect, this->_sprAlphaCustom[compInfo.target.xCustom], this->_sprTypeCustom[compInfo.target.xCustom]);
pixelop.Composite16<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE>(compInfo, srcColor16, enableColorEffect, this->_sprAlphaCustom[compInfo.target.xCustom], this->_sprTypeCustom[compInfo.target.xCustom]);
}
}
}
@ -3758,7 +3760,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA
else
{
u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32();
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapNone>((u16 *)srcAPtr, workingNativeBuffer32 + lineInfo.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH);
ColorspaceConvertBuffer555xTo8888Opaque<false, false, BESwapDst>((u16 *)srcAPtr, workingNativeBuffer32 + lineInfo.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH);
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, workingNativeBuffer32 + lineInfo.blockOffsetNative, this->_captureWorkingA32);
srcAPtr = this->_captureWorkingA32;
}
@ -4092,7 +4094,11 @@ template<NDSColorFormat COLORFORMAT, int SOURCESWITCH, size_t CAPTURELENGTH, boo
void GPUEngineA::_RenderLine_DispCapture_Copy(const GPUEngineLineInfo &lineInfo, const void *src, void *dst, const size_t captureLengthExt)
{
const u16 alphaBit16 = (SOURCESWITCH == 0) ? 0x8000 : 0x0000;
#if defined(MSB_FIRST)
const u32 alphaBit32 = (SOURCESWITCH == 0) ? ((COLORFORMAT == NDSColorFormat_BGR888_Rev) ? 0x000000FF : 0x0000001F) : 0x00000000;
#else
const u32 alphaBit32 = (SOURCESWITCH == 0) ? ((COLORFORMAT == NDSColorFormat_BGR888_Rev) ? 0xFF000000 : 0x1F000000) : 0x00000000;
#endif
if (CAPTURETONATIVEDST)
{
@ -4119,7 +4125,7 @@ void GPUEngineA::_RenderLine_DispCapture_Copy(const GPUEngineLineInfo &lineInfo,
case NDSColorFormat_BGR666_Rev:
case NDSColorFormat_BGR888_Rev:
((u32 *)dst)[i] = LE_TO_LOCAL_32(((u32 *)src)[_gpuDstPitchIndex[i]] | alphaBit32);
((u32 *)dst)[i] = ((u32 *)src)[_gpuDstPitchIndex[i]] | alphaBit32;
break;
}
}
@ -4141,7 +4147,7 @@ void GPUEngineA::_RenderLine_DispCapture_Copy(const GPUEngineLineInfo &lineInfo,
case NDSColorFormat_BGR666_Rev:
case NDSColorFormat_BGR888_Rev:
((u32 *)dst)[_gpuDstPitchIndex[i] + p] = LE_TO_LOCAL_32(((u32 *)src)[i] | alphaBit32);
((u32 *)dst)[_gpuDstPitchIndex[i] + p] = ((u32 *)src)[i] | alphaBit32;
break;
}
}
@ -4184,7 +4190,7 @@ void GPUEngineA::_RenderLine_DispCapture_Copy(const GPUEngineLineInfo &lineInfo,
case NDSColorFormat_BGR888_Rev:
{
const size_t vecLength = (pixCountExt * sizeof(u32)) - ((pixCountExt * sizeof(u32)) % VECTORSIZE);
buffer_copy_or_constant_s32<true>(dst, src, vecLength, alphaBit32);
buffer_copy_or_constant_s32<false>(dst, src, vecLength, alphaBit32);
i += vecLength / sizeof(u32);
break;
}
@ -4201,7 +4207,7 @@ void GPUEngineA::_RenderLine_DispCapture_Copy(const GPUEngineLineInfo &lineInfo,
case NDSColorFormat_BGR666_Rev:
case NDSColorFormat_BGR888_Rev:
((u32 *)dst)[i] = LE_TO_LOCAL_32(((u32 *)src)[i] | alphaBit32);
((u32 *)dst)[i] = ((u32 *)src)[i] | alphaBit32;
break;
}
}
@ -4237,13 +4243,13 @@ void GPUEngineA::_RenderLine_DispCapture_Copy(const GPUEngineLineInfo &lineInfo,
{
#ifdef USEMANUALVECTORIZATION
const size_t vecLength = (captureLengthExt * sizeof(u32)) - ((captureLengthExt * sizeof(u32)) % VECTORSIZE);
buffer_copy_or_constant_s32<true>(dst, src, vecLength, alphaBit32);
buffer_copy_or_constant_s32<false>(dst, src, vecLength, alphaBit32);
i += vecLength / sizeof(u32);
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < captureLengthExt; i++)
{
((u32 *)dst)[i] = LE_TO_LOCAL_32(((u32 *)src)[i] | alphaBit32);
((u32 *)dst)[i] = ((u32 *)src)[i] | alphaBit32;
}
src = (u32 *)src + lineInfo.widthCustom;
@ -5680,11 +5686,11 @@ void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551)
switch (this->_displayInfo.colorFormat)
{
case NDSColorFormat_BGR666_Rev:
color32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(colorBGRA5551 & 0x7FFF) );
color32.value = ColorspaceConvert555To6665Opaque<false>(colorBGRA5551);
break;
case NDSColorFormat_BGR888_Rev:
color32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(colorBGRA5551 & 0x7FFF) );
color32.value = ColorspaceConvert555To8888Opaque<false>(colorBGRA5551);
break;
default:
@ -6493,7 +6499,11 @@ void NDSDisplay::ApplyMasterBrightness(void *dst, const size_t pixCount, const G
break;
case NDSColorFormat_BGR666_Rev:
#if defined(MSB_FIRST)
memset_u32(dst, 0x3F3F3F1F, pixCount);
#else
memset_u32(dst, 0x1F3F3F3F, pixCount);
#endif
break;
case NDSColorFormat_BGR888_Rev:
@ -6540,11 +6550,19 @@ void NDSDisplay::ApplyMasterBrightness(void *dst, const size_t pixCount, const G
break;
case NDSColorFormat_BGR666_Rev:
#if defined(MSB_FIRST)
memset_u32(dst, 0x0000001F, pixCount);
#else
memset_u32(dst, 0x1F000000, pixCount);
#endif
break;
case NDSColorFormat_BGR888_Rev:
#if defined(MSB_FIRST)
memset_u32(dst, 0x000000FF, pixCount);
#else
memset_u32(dst, 0xFF000000, pixCount);
#endif
break;
default:

View File

@ -242,8 +242,8 @@ void PixelOperation::InitLUTs()
cur.bits.blue = (cur.bits.blue + ((31 - cur.bits.blue) * i / 16));
cur.bits.alpha = 0;
PixelOperation::BrightnessUpTable555[i][j] = cur.val;
PixelOperation::BrightnessUpTable666[i][j].value = LOCAL_TO_LE_32( COLOR555TO666(cur.val) );
PixelOperation::BrightnessUpTable888[i][j].value = LOCAL_TO_LE_32( COLOR555TO888(cur.val) );
PixelOperation::BrightnessUpTable666[i][j].value = COLOR555TO666(cur.val);
PixelOperation::BrightnessUpTable888[i][j].value = COLOR555TO888(cur.val);
cur.val = j;
cur.bits.red = (cur.bits.red - (cur.bits.red * i / 16));
@ -251,8 +251,8 @@ void PixelOperation::InitLUTs()
cur.bits.blue = (cur.bits.blue - (cur.bits.blue * i / 16));
cur.bits.alpha = 0;
PixelOperation::BrightnessDownTable555[i][j] = cur.val;
PixelOperation::BrightnessDownTable666[i][j].value = LOCAL_TO_LE_32( COLOR555TO666(cur.val) );
PixelOperation::BrightnessDownTable888[i][j].value = LOCAL_TO_LE_32( COLOR555TO888(cur.val) );
PixelOperation::BrightnessDownTable666[i][j].value = COLOR555TO666(cur.val);
PixelOperation::BrightnessDownTable888[i][j].value = COLOR555TO888(cur.val);
}
}
@ -289,11 +289,11 @@ FORCEINLINE void PixelOperation::_copy16(GPUEngineCompositorInfo &compInfo, cons
break;
case NDSColorFormat_BGR666_Rev:
dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(srcColor16) );
dstColor32.value = ColorspaceConvert555To6665Opaque<false>(srcColor16);
break;
case NDSColorFormat_BGR888_Rev:
dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(srcColor16) );
dstColor32.value = ColorspaceConvert555To8888Opaque<false>(srcColor16);
break;
}
@ -562,7 +562,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI
switch (selectedEffect)
{
case ColorEffect_Disable:
dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(srcColor16) );
dstColor32.value = ColorspaceConvert555To6665Opaque<false>(srcColor16);
break;
case ColorEffect_IncreaseBrightness:
@ -576,7 +576,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI
case ColorEffect_Blend:
{
Color4u8 srcColor32;
srcColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To6665Opaque<false>(srcColor16) );
srcColor32.value = ColorspaceConvert555To6665Opaque<false>(srcColor16);
dstColor32 = (LAYERTYPE == GPULayerType_3D) ? colorop.blend3D<OUTPUTFORMAT>(srcColor32, dstColor32) : colorop.blend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
break;
}
@ -587,7 +587,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI
switch (selectedEffect)
{
case ColorEffect_Disable:
dstColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(srcColor16) );
dstColor32.value = ColorspaceConvert555To8888Opaque<false>(srcColor16);
break;
case ColorEffect_IncreaseBrightness:
@ -601,7 +601,7 @@ FORCEINLINE void PixelOperation::_unknownEffect16(GPUEngineCompositorInfo &compI
case ColorEffect_Blend:
{
Color4u8 srcColor32;
srcColor32.value = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(srcColor16) );
srcColor32.value = ColorspaceConvert555To8888Opaque<false>(srcColor16);
dstColor32 = (LAYERTYPE == GPULayerType_3D) ? colorop.blend3D<OUTPUTFORMAT>(srcColor32, dstColor32) : colorop.blend<OUTPUTFORMAT>(srcColor32, dstColor32, blendEVA, blendEVB);
break;
}
@ -793,7 +793,7 @@ static FORCEINLINE void CopyLineExpand(void *__restrict dst, const void *__restr
}
else if (ELEMENTSIZE == 4)
{
((u32 *)dst)[i] = LE_TO_LOCAL_32( ((u32 *)src)[i] );
((u32 *)dst)[i] = ((u32 *)src)[i];
}
}
}
@ -816,7 +816,7 @@ static FORCEINLINE void CopyLineExpand(void *__restrict dst, const void *__restr
}
else if (ELEMENTSIZE == 4)
{
((u32 *)dst)[i] = LE_TO_LOCAL_32( ((u32 *)src)[i] );
((u32 *)dst)[i] = ((u32 *)src)[i];
}
}
}
@ -844,7 +844,7 @@ static FORCEINLINE void CopyLineExpand(void *__restrict dst, const void *__restr
}
else if (ELEMENTSIZE == 4)
{
((u32 *)dst)[(srcX * scale) + lx] = (NEEDENDIANSWAP) ? LE_TO_LOCAL_32( ((u32 *)src)[srcX] ) : ((u32 *)src)[srcX];
((u32 *)dst)[(srcX * scale) + lx] = (NEEDENDIANSWAP) ? ((u32 *)src)[srcX] : ((u32 *)src)[srcX];
}
}
}
@ -870,7 +870,7 @@ static FORCEINLINE void CopyLineExpand(void *__restrict dst, const void *__restr
}
else if (ELEMENTSIZE == 4)
{
((u32 *)dst)[_gpuDstPitchIndex[x] + p] = (NEEDENDIANSWAP) ? LE_TO_LOCAL_32( ((u32 *)src)[x] ) : ((u32 *)src)[x];
((u32 *)dst)[_gpuDstPitchIndex[x] + p] = (NEEDENDIANSWAP) ? ((u32 *)src)[x] : ((u32 *)src)[x];
}
}
}
@ -913,7 +913,7 @@ static FORCEINLINE void CopyLineReduce(void *__restrict dst, const void *__restr
}
else if (ELEMENTSIZE == 4)
{
((u32 *)dst)[i] = LE_TO_LOCAL_32( ((u32 *)src)[i] );
((u32 *)dst)[i] = ((u32 *)src)[i];
}
}
}
@ -936,7 +936,7 @@ static FORCEINLINE void CopyLineReduce(void *__restrict dst, const void *__restr
}
else if (ELEMENTSIZE == 4)
{
((u32 *)dst)[i] = LE_TO_LOCAL_32( ((u32 *)src)[i] );
((u32 *)dst)[i] = ((u32 *)src)[i];
}
}
}
@ -980,7 +980,7 @@ static FORCEINLINE void CopyLineReduce(void *__restrict dst, const void *__restr
}
else if (ELEMENTSIZE == 4)
{
((u32 *)dst)[i] = (NEEDENDIANSWAP) ? LE_TO_LOCAL_32( ((u32 *)src)[_gpuDstPitchIndex[i]] ) : ((u32 *)src)[_gpuDstPitchIndex[i]];
((u32 *)dst)[i] = (NEEDENDIANSWAP) ? ((u32 *)src)[_gpuDstPitchIndex[i]] : ((u32 *)src)[_gpuDstPitchIndex[i]];
}
}
}
@ -1004,9 +1004,9 @@ void GPUEngineBase::_MosaicLine(GPUEngineCompositorInfo &compInfo)
else
{
outColor16 = mosaicColorBG[compInfo.renderState.mosaicWidthBG->trunc[x]];
isOpaque = (outColor16 != 0xFFFF);
}
isOpaque = (outColor16 != 0xFFFF);
if (isOpaque)
{
this->_deferredColorNative[x] = outColor16;

View File

@ -433,13 +433,6 @@ static NSMutableDictionary *saveTypeValues = nil;
uint32_t *bitmapData = (uint32_t *)[imageRep bitmapData];
RomIconToRGBA8888(bitmapData);
#ifdef MSB_FIRST
for (size_t i = 0; i < ROM_ICON_WIDTH * ROM_ICON_HEIGHT; i++)
{
bitmapData[i] = LE_TO_LOCAL_32(bitmapData[i]);
}
#endif
[imageRep autorelease];
[newImage addRepresentation:imageRep];

View File

@ -2091,7 +2091,7 @@ void SoftRasterizerRenderer::_UpdateEdgeMarkColorTable(const u16 *edgeMarkColorT
//we can do this by rendering a 3d frame and then freezing the system, but only changing the edge mark colors
for (size_t i = 0; i < 8; i++)
{
this->_edgeMarkTable[i].value = LE_TO_LOCAL_32( COLOR555TO6665(edgeMarkColorTable[i] & 0x7FFF, (this->currentRenderState->DISP3DCNT.EnableAntialiasing) ? 0x10 : 0x1F) );
this->_edgeMarkTable[i].value = COLOR555TO6665(edgeMarkColorTable[i] & 0x7FFF, (this->currentRenderState->DISP3DCNT.EnableAntialiasing) ? 0x10 : 0x1F);
//zero 20-jun-2013 - this doesnt make any sense. at least, it should be related to the 0x8000 bit. if this is undocumented behaviour, lets write about which scenario proves it here, or which scenario is requiring this code.
//// this seems to be the only thing that selectively disables edge marking
@ -2239,7 +2239,7 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz
if (param.enableFog)
{
Color4u8 fogColor;
fogColor.value = LE_TO_LOCAL_32( COLOR555TO6665(param.fogColor & 0x7FFF, (param.fogColor>>16) & 0x1F) );
fogColor.value = COLOR555TO6665(param.fogColor & 0x7FFF, (param.fogColor>>16) & 0x1F);
const size_t fogIndex = depth >> 9;
assert(fogIndex < 32768);
@ -2302,7 +2302,7 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo
{
const size_t ir = readLine + ((x * xRatio) >> 16);
this->_framebufferColor[iw].value = LE_TO_LOCAL_32( COLOR555TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F) );
this->_framebufferColor[iw].value = COLOR555TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F);
this->_framebufferAttributes->depth[iw] = depthBuffer[ir];
this->_framebufferAttributes->isFogged[iw] = fogBuffer[ir];
this->_framebufferAttributes->opaquePolyID[iw] = opaquePolyID;

View File

@ -811,7 +811,7 @@ Render3DError Render3D::Render(const GFX3D_State &renderState, const GFX3D_Geome
Render3DError error = RENDER3DERROR_NOERR;
this->_isPoweredOn = true;
this->_clearColor6665.value = LE_TO_LOCAL_32( COLOR555TO6665(renderState.clearColor & 0x7FFF, (renderState.clearColor >> 16) & 0x1F) );
this->_clearColor6665.value = COLOR555TO6665(renderState.clearColor & 0x7FFF, (renderState.clearColor >> 16) & 0x1F);
this->_clearAttributes.opaquePolyID = (renderState.clearColor >> 24) & 0x3F;
//special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display
@ -1054,10 +1054,10 @@ void Render3D_AltiVec::_ClearImageBaseLoop(const u16 *__restrict inColor16, cons
const v128u16 clearDepthValueLo = vec_and(clearDepthLo, ((v128u16){0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF}));
const v128u16 clearDepthValueHi = vec_and(clearDepthHi, ((v128u16){0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF}));
const v128u16 calcDepth0 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueLo, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0}));
const v128u16 calcDepth1 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueLo, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0}));
const v128u16 calcDepth2 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueHi, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0}));
const v128u16 calcDepth3 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), (v128u8)clearDepthValueHi, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0}));
const v128u16 calcDepth0 = vec_perm( vec_splat_u8(0), (v128u8)clearDepthValueLo, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0}) );
const v128u16 calcDepth1 = vec_perm( vec_splat_u8(0), (v128u8)clearDepthValueLo, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0}) );
const v128u16 calcDepth2 = vec_perm( vec_splat_u8(0), (v128u8)clearDepthValueHi, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0}) );
const v128u16 calcDepth3 = vec_perm( vec_splat_u8(0), (v128u8)clearDepthValueHi, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0}) );
vec_st( vec_msum(calcDepth0, calcDepthMul, calcDepthAdd), 0, outDepth24 + i);
vec_st( vec_msum(calcDepth1, calcDepthMul, calcDepthAdd), 16, outDepth24 + i);

View File

@ -1,7 +1,7 @@
/*
Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 shash
Copyright (C) 2008-2024 DeSmuME team
Copyright (C) 2008-2025 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -1014,8 +1014,8 @@ void __NDSTextureUnpackI2_AltiVec(const size_t texelCount, const u8 *__restrict
idx = vec_perm(idx, idx, ((v128u8){0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3}));
idx = vec_sr(idx, ((v128u8){0,2,4,6, 0,2,4,6, 0,2,4,6, 0,2,4,6}));
idx = vec_and(idx, ((v128u8){0x03,0x03,0x03,0x03, 0x03,0x03,0x03,0x03, 0x03,0x03,0x03,0x03, 0x03,0x03,0x03,0x03}));
idx = vec_sl(idx, ((v128u8){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1}));
idx = vec_and(idx, vec_splat_u8(0x03));
idx = vec_sl(idx, vec_splat_u8(1));
v128u8 idx0 = vec_add( vec_perm(idx,idx,((v128u8){ 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7})), ((v128u8){0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1}) );
v128u8 idx1 = vec_add( vec_perm(idx,idx,((v128u8){ 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15})), ((v128u8){0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1}) );
@ -1037,7 +1037,7 @@ void __NDSTextureUnpackI2_AltiVec(const size_t texelCount, const u8 *__restrict
// Set converted colors to 0 if the palette index is 0.
if (ISPALZEROTRANSPARENT)
{
const v128u8 idxMask = vec_cmpgt(idx, ((v128u8){0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0}));
const v128u8 idxMask = vec_cmpgt(idx, vec_splat_u8(0));
convertedColor[0] = vec_and( convertedColor[0], vec_perm(idxMask, idxMask, ((v128u8){ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3})) );
convertedColor[1] = vec_and( convertedColor[1], vec_perm(idxMask, idxMask, ((v128u8){ 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7})) );
convertedColor[2] = vec_and( convertedColor[2], vec_perm(idxMask, idxMask, ((v128u8){ 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,11,11,11,11})) );
@ -1074,16 +1074,16 @@ void NDSTextureUnpackI2(const size_t srcSize, const u8 *__restrict srcData, cons
u8 idx;
idx = *srcData & 0x03;
*dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) );
*dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx]);
idx = (*srcData >> 2) & 0x03;
*dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) );
*dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx]);
idx = (*srcData >> 4) & 0x03;
*dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) );
*dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx]);
idx = (*srcData >> 6) & 0x03;
*dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) );
*dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx]);
}
#endif
}
@ -1100,10 +1100,10 @@ void NDSTextureUnpackI2(const size_t srcSize, const u8 *__restrict srcData, cons
#else
for (size_t i = 0; i < srcSize; i++, srcData++)
{
*dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[ *srcData & 0x03] & 0x7FFF) );
*dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[(*srcData >> 2) & 0x03] & 0x7FFF) );
*dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[(*srcData >> 4) & 0x03] & 0x7FFF) );
*dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[(*srcData >> 6) & 0x03] & 0x7FFF) );
*dstBuffer++ = CONVERT(srcPal[ *srcData & 0x03]);
*dstBuffer++ = CONVERT(srcPal[(*srcData >> 2) & 0x03]);
*dstBuffer++ = CONVERT(srcPal[(*srcData >> 4) & 0x03]);
*dstBuffer++ = CONVERT(srcPal[(*srcData >> 6) & 0x03]);
}
#endif
}
@ -1298,8 +1298,8 @@ void __NDSTextureUnpackI4_AltiVec(const size_t texelCount, const u8 *__restrict
idx = vec_perm(idx, idx, ((v128u8){0,0,1,1, 2,2,3,3, 4,4,5,5, 6,6,7,7}));
idx = vec_sr(idx, ((v128u8){0,4,0,4, 0,4,0,4, 0,4,0,4, 0,4,0,4}));
idx = vec_and(idx, ((v128u8){0x0F,0x0F,0x0F,0x0F, 0x0F,0x0F,0x0F,0x0F, 0x0F,0x0F,0x0F,0x0F, 0x0F,0x0F,0x0F,0x0F}));
idx = vec_sl(idx, ((v128u8){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1}));
idx = vec_and(idx, vec_splat_u8(0x0F));
idx = vec_sl(idx, vec_splat_u8(1));
v128u8 idx0 = vec_add( vec_perm(idx,idx,((v128u8){ 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7})), ((v128u8){0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1}) );
v128u8 idx1 = vec_add( vec_perm(idx,idx,((v128u8){ 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15})), ((v128u8){0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1}) );
@ -1321,7 +1321,7 @@ void __NDSTextureUnpackI4_AltiVec(const size_t texelCount, const u8 *__restrict
// Set converted colors to 0 if the palette index is 0.
if (ISPALZEROTRANSPARENT)
{
const v128u8 idxMask = vec_cmpgt(idx, ((v128u8){0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0}));
const v128u8 idxMask = vec_cmpgt(idx, vec_splat_u8(0));
convertedColor[0] = vec_and( convertedColor[0], vec_perm(idxMask, idxMask, ((v128u8){ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3})) );
convertedColor[1] = vec_and( convertedColor[1], vec_perm(idxMask, idxMask, ((v128u8){ 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7})) );
convertedColor[2] = vec_and( convertedColor[2], vec_perm(idxMask, idxMask, ((v128u8){ 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,11,11,11,11})) );
@ -1358,10 +1358,10 @@ void NDSTextureUnpackI4(const size_t srcSize, const u8 *__restrict srcData, cons
u8 idx;
idx = *srcData & 0x0F;
*dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) );
*dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx]);
idx = *srcData >> 4;
*dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) );
*dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx]);
}
#endif
}
@ -1378,8 +1378,8 @@ void NDSTextureUnpackI4(const size_t srcSize, const u8 *__restrict srcData, cons
#else
for (size_t i = 0; i < srcSize; i++, srcData++)
{
*dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[*srcData & 0x0F] & 0x7FFF) );
*dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[*srcData >> 4] & 0x7FFF) );
*dstBuffer++ = CONVERT(srcPal[*srcData & 0x0F]);
*dstBuffer++ = CONVERT(srcPal[*srcData >> 4]);
}
#endif
}
@ -1393,14 +1393,14 @@ void NDSTextureUnpackI8(const size_t srcSize, const u8 *__restrict srcData, cons
for (size_t i = 0; i < srcSize; i++, srcData++)
{
const u8 idx = *srcData;
*dstBuffer++ = (idx == 0) ? 0 : LE_TO_LOCAL_32( CONVERT(srcPal[idx] & 0x7FFF) );
*dstBuffer++ = (idx == 0) ? 0 : CONVERT(srcPal[idx]);
}
}
else
{
for (size_t i = 0; i < srcSize; i++, srcData++)
{
*dstBuffer++ = LE_TO_LOCAL_32( CONVERT(srcPal[*srcData] & 0x7FFF) );
*dstBuffer++ = CONVERT(srcPal[*srcData]);
}
}
}
@ -1460,7 +1460,7 @@ void __NDSTextureUnpackA3I5_AltiVec(const size_t texelCount, const u8 *__restric
const v128u8 bits = vec_perm( vec_ld(0, srcData), vec_ld(16, srcData), unalignedShift );
v128u8 idx = vec_and(bits, ((v128u8){0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F}));
idx = vec_sl(idx, ((v128u8){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}));
idx = vec_sl(idx, vec_splat_u8(1));
v128u8 idx0 = vec_add( vec_perm(idx, idx, ((v128u8){ 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7})), ((v128u8){0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1}) );
idx0 = vec_and(idx0, ((v128u8){0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F}));
@ -1477,9 +1477,9 @@ void __NDSTextureUnpackA3I5_AltiVec(const size_t texelCount, const u8 *__restric
const v128u16 palColor0 = vec_sel( palColor0A, palColor0B, vec_perm(palMask, palMask, ((v128u8){ 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7})) );
const v128u16 palColor1 = vec_sel( palColor1A, palColor1B, vec_perm(palMask, palMask, ((v128u8){ 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15})) );
const v128u8 alpha = vec_perm( alpha_LUT, alpha_LUT, vec_sr(bits, ((v128u8){5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5})) );
const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) );
const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) );
const v128u8 alpha = vec_perm( alpha_LUT, alpha_LUT, vec_sr(bits, vec_splat_u8(5)) );
const v128u16 alphaLo = vec_perm( (v128u8)alpha, vec_splat_u8(0), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) );
const v128u16 alphaHi = vec_perm( (v128u8)alpha, vec_splat_u8(0), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) );
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
@ -1523,7 +1523,7 @@ void NDSTextureUnpackA3I5(const size_t srcSize, const u8 *__restrict srcData, co
{
const u16 c = srcPal[*srcData & 0x1F] & 0x7FFF;
const u8 alpha = *srcData >> 5;
*dstBuffer++ = LE_TO_LOCAL_32( (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, material_3bit_to_5bit[alpha]) : COLOR555TO8888(c, material_3bit_to_8bit[alpha]) );
*dstBuffer++ = (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, material_3bit_to_5bit[alpha]) : COLOR555TO8888(c, material_3bit_to_8bit[alpha]);
}
#endif
}
@ -1689,8 +1689,8 @@ void __NDSTextureUnpackA5I3_AltiVec(const size_t texelCount, const u8 *__restric
// Must be unaligned since srcData could sit outside of a 16-byte boundary.
const v128u8 bits = vec_perm( vec_ld(0, srcData), vec_ld(16, srcData), unalignedShift );
v128u8 idx = vec_and(bits, ((v128u8){0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07}));
idx = vec_sl(idx, ((v128u8){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}));
v128u8 idx = vec_and(bits, vec_splat_u8(0x07));
idx = vec_sl(idx, vec_splat_u8(1));
const v128u8 idx0 = vec_add( vec_perm(idx, idx, ((v128u8){ 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7})), ((v128u8){0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1}) );
const v128u8 idx1 = vec_add( vec_perm(idx, idx, ((v128u8){ 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15})), ((v128u8){0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1}) );
@ -1700,18 +1700,18 @@ void __NDSTextureUnpackA5I3_AltiVec(const size_t texelCount, const u8 *__restric
if (TEXCACHEFORMAT == TexFormat_15bpp)
{
const v128u8 alpha = vec_sr(bits, ((v128u8){3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3}));
const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) );
const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) );
const v128u8 alpha = vec_sr(bits, vec_splat_u8(3));
const v128u16 alphaLo = vec_perm( (v128u8)alpha, vec_splat_u8(0), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) );
const v128u16 alphaHi = vec_perm( (v128u8)alpha, vec_splat_u8(0), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) );
ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555aTo6665_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
}
else
{
const v128u8 alpha = vec_or( vec_and(bits, ((v128u8){0xF8,0xF8,0xF8,0xF8,0xF8,0xF8,0xF8,0xF8,0xF8,0xF8,0xF8,0xF8,0xF8,0xF8,0xF8,0xF8})), vec_sr(bits, ((v128u8){5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5})) );
const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) );
const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) );
const v128u8 alpha = vec_or( vec_and(bits, vec_splat_u8((s8)0xF8)), vec_sr(bits, vec_splat_u8(5)) );
const v128u16 alphaLo = vec_perm( (v128u8)alpha, vec_splat_u8(0), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) );
const v128u16 alphaHi = vec_perm( (v128u8)alpha, vec_splat_u8(0), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) );
ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(palColor0, alphaLo, convertedColor[1], convertedColor[0]);
ColorspaceConvert555aTo8888_AltiVec<false, BESwapDst>(palColor1, alphaHi, convertedColor[3], convertedColor[2]);
@ -1744,7 +1744,7 @@ void NDSTextureUnpackA5I3(const size_t srcSize, const u8 *__restrict srcData, co
{
const u16 c = srcPal[*srcData & 0x07] & 0x7FFF;
const u8 alpha = (*srcData >> 3);
*dstBuffer++ = LE_TO_LOCAL_32( (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, alpha) : COLOR555TO8888(c, material_5bit_to_8bit[alpha]) );
*dstBuffer++ = (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, alpha) : COLOR555TO8888(c, material_5bit_to_8bit[alpha]);
}
#endif
}
@ -1792,13 +1792,13 @@ void NDSTextureUnpack4x4(const size_t srcSize, const u32 *__restrict srcData, co
const u8 mode = pal1>>14;
CACHE_ALIGN u32 tmp_col[4];
tmp_col[0] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(PAL4X4(pal1offset+0)) );
tmp_col[1] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(PAL4X4(pal1offset+1)) );
tmp_col[0] = ColorspaceConvert555To8888Opaque<false>( PAL4X4(pal1offset+0) );
tmp_col[1] = ColorspaceConvert555To8888Opaque<false>( PAL4X4(pal1offset+1) );
switch (mode)
{
case 0:
tmp_col[2] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(PAL4X4(pal1offset+2)) );
tmp_col[2] = ColorspaceConvert555To8888Opaque<false>( PAL4X4(pal1offset+2) );
tmp_col[3] = 0x00000000;
break;
@ -1818,8 +1818,8 @@ void NDSTextureUnpack4x4(const size_t srcSize, const u32 *__restrict srcData, co
break;
case 2:
tmp_col[2] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(PAL4X4(pal1offset+2)) );
tmp_col[3] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(PAL4X4(pal1offset+3)) );
tmp_col[2] = ColorspaceConvert555To8888Opaque<false>( PAL4X4(pal1offset+2) );
tmp_col[3] = ColorspaceConvert555To8888Opaque<false>( PAL4X4(pal1offset+3) );
break;
case 3:
@ -1847,8 +1847,8 @@ void NDSTextureUnpack4x4(const size_t srcSize, const u32 *__restrict srcData, co
( ((g0*3 + g1*5)>>6) << 5 ) |
( ((b0*3 + b1*5)>>6) << 10 );
tmp_col[2] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(tmp1) );
tmp_col[3] = LE_TO_LOCAL_32( ColorspaceConvert555To8888Opaque<false>(tmp2) );
tmp_col[2] = ColorspaceConvert555To8888Opaque<false>(tmp1);
tmp_col[3] = ColorspaceConvert555To8888Opaque<false>(tmp2);
break;
}
@ -2020,7 +2020,7 @@ void NDSTextureUnpackDirect16Bit(const size_t srcSize, const u16 *__restrict src
for (size_t i = 0; i < texelCount; i++, srcData++)
{
const u16 c = LOCAL_TO_LE_16(*srcData);
*dstBuffer++ = (c & 0x8000) ? LE_TO_LOCAL_32( CONVERT(c & 0x7FFF) ) : 0;
*dstBuffer++ = (c & 0x8000) ? CONVERT(c) : 0;
}
#endif
}

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2024 DeSmuME team
Copyright (C) 2016-2025 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -161,13 +161,28 @@ void ColorspaceHandlerInit()
if (needInitTables)
{
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
#if defined(MSB_FIRST)
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<8) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<16) | (material_5bit_to_6bit[(col)&0x1F]<<24) )
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<24) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<16) | (material_5bit_to_6bit[(col)&0x1F]<<8) )
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<8) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<16) | (material_5bit_to_8bit[(col)&0x1F]<<24) )
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<24) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<16) | (material_5bit_to_8bit[(col)&0x1F]<<8) )
#else
#define RGB15TO18_BITLOGIC(col) ( (material_5bit_to_6bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | material_5bit_to_6bit[(col)&0x1F] )
#define RGB15TO18_SWAP_RB_BITLOGIC(col) ( material_5bit_to_6bit[((col)>>10)&0x1F] | (material_5bit_to_6bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_6bit[(col)&0x1F]<<16) )
#define RGB15TO24_BITLOGIC(col) ( (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
#define RGB15TO24_SWAP_RB_BITLOGIC(col) ( material_5bit_to_8bit[((col)>>10)&0x1F] | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | (material_5bit_to_8bit[(col)&0x1F]<<16) )
#endif
for (size_t i = 0; i < 32768; i++)
{
#if defined(MSB_FIRST)
color_555_to_666[i] = RGB15TO18_BITLOGIC(i);
color_555_to_6665_opaque[i] = RGB15TO18_BITLOGIC(i) | 0x0000001F;
color_555_to_6665_opaque_swap_rb[i] = RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x0000001F;
color_555_to_888[i] = RGB15TO24_BITLOGIC(i);
color_555_to_8888_opaque[i] = RGB15TO24_BITLOGIC(i) | 0x000000FF;
color_555_to_8888_opaque_swap_rb[i] = RGB15TO24_SWAP_RB_BITLOGIC(i) | 0x000000FF;
#else
color_555_to_666[i] = RGB15TO18_BITLOGIC(i);
color_555_to_6665_opaque[i] = RGB15TO18_BITLOGIC(i) | 0x1F000000;
color_555_to_6665_opaque_swap_rb[i] = RGB15TO18_SWAP_RB_BITLOGIC(i) | 0x1F000000;
@ -175,6 +190,7 @@ void ColorspaceHandlerInit()
color_555_to_888[i] = RGB15TO24_BITLOGIC(i);
color_555_to_8888_opaque[i] = RGB15TO24_BITLOGIC(i) | 0xFF000000;
color_555_to_8888_opaque_swap_rb[i] = RGB15TO24_SWAP_RB_BITLOGIC(i) | 0xFF000000;
#endif
}
#define RGB16_SWAP_RB_BITLOGIC(col) ( (((col)&0x001F)<<10) | ((col)&0x03E0) | (((col)&0x7C00)>>10) | ((col)&0x8000) )

View File

@ -1,5 +1,5 @@
/*
Copyright (C) 2016-2024 DeSmuME team
Copyright (C) 2016-2025 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -99,12 +99,22 @@ extern CACHE_ALIGN u32 color_555_to_888[32768];
#define COLOR555TO6665_OPAQUE(col) (color_555_to_6665_opaque[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color
#define COLOR555TO6665_OPAQUE_SWAP_RB(col) (color_555_to_6665_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque sparsely packed 32-bit color containing an RGBA6665 color with R and B components swapped
#define COLOR555TO666(col) (color_555_to_666[(col)]) // Convert a 15-bit color to a fully transparent sparsely packed 32-bit color containing an RGBA6665 color
#if defined(MSB_FIRST)
#define COLOR555TO6665(col,alpha5) (((alpha5) ) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha
#else
#define COLOR555TO6665(col,alpha5) (((alpha5)<<24) | color_555_to_666[(col)]) // Convert a 15-bit color to a sparsely packed 32-bit color containing an RGBA6665 color with user-defined alpha
#endif
#define COLOR555TO8888_OPAQUE(col) (color_555_to_8888_opaque[(col)]) // Convert a 15-bit color to an opaque 32-bit color
#define COLOR555TO8888_OPAQUE_SWAP_RB(col) (color_555_to_8888_opaque_swap_rb[(col)]) // Convert a 15-bit color to an opaque 32-bit color with R and B components swapped
#define COLOR555TO888(col) (color_555_to_888[(col)]) // Convert a 15-bit color to an opaque 24-bit color or a fully transparent 32-bit color
#if defined(MSB_FIRST)
#define COLOR555TO8888(col,alpha8) (((alpha8) ) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha
#else
#define COLOR555TO8888(col,alpha8) (((alpha8)<<24) | color_555_to_888[(col)]) // Convert a 15-bit color to a 32-bit color with user-defined alpha
#endif
//produce a 15bpp color from individual 5bit components
#define R5G5B5TORGB15(r,g,b) ( (r) | ((g)<<5) | ((b)<<10) )