GPU Operations: Fix bug where generic calls to CopyLine*<>() caused certain compilers to interpret 0xFFFF as -1 in all cases.

- This fix has the side effect of greatly increasing the code size.
- Quick testing shows that this fix increases overall graphics performance by 2% - 3%. But is this small performance gain worth the massive increase in code size? Hmmm....
This commit is contained in:
rogerman 2021-09-14 14:49:31 -07:00
parent a948838c16
commit d60684ce4d
3 changed files with 33 additions and 33 deletions

View File

@ -1052,16 +1052,16 @@ void GPUEngineBase::_TransitionLineNativeToCustom(GPUEngineCompositorInfo &compI
switch (OUTPUTFORMAT) switch (OUTPUTFORMAT)
{ {
case NDSColorFormat_BGR555_Rev: case NDSColorFormat_BGR555_Rev:
CopyLineExpandHinted<0xFFFF, true, false, false, 2>(compInfo.line, compInfo.target.lineColorHeadNative, compInfo.target.lineColorHeadCustom); CopyLineExpandHinted<0x3FFF, true, false, false, 2>(compInfo.line, compInfo.target.lineColorHeadNative, compInfo.target.lineColorHeadCustom);
break; break;
case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR666_Rev:
case NDSColorFormat_BGR888_Rev: case NDSColorFormat_BGR888_Rev:
CopyLineExpandHinted<0xFFFF, true, false, false, 4>(compInfo.line, compInfo.target.lineColorHeadNative, compInfo.target.lineColorHeadCustom); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(compInfo.line, compInfo.target.lineColorHeadNative, compInfo.target.lineColorHeadCustom);
break; break;
} }
CopyLineExpandHinted<0xFFFF, true, false, false, 1>(compInfo.line, compInfo.target.lineLayerIDHeadNative, compInfo.target.lineLayerIDHeadCustom); CopyLineExpandHinted<0x3FFF, true, false, false, 1>(compInfo.line, compInfo.target.lineLayerIDHeadNative, compInfo.target.lineLayerIDHeadCustom);
} }
compInfo.target.lineColorHead = compInfo.target.lineColorHeadCustom; compInfo.target.lineColorHead = compInfo.target.lineColorHeadCustom;
@ -1291,8 +1291,8 @@ void GPUEngineBase::_PrecompositeNativeToCustomLineBG(GPUEngineCompositorInfo &c
} }
} }
CopyLineExpand<0xFFFF, false, false, 2>(this->_deferredColorCustom, this->_deferredColorNative, compInfo.line.widthCustom, 1); CopyLineExpand<0x3FFF, false, false, 2>(this->_deferredColorCustom, this->_deferredColorNative, compInfo.line.widthCustom, 1);
CopyLineExpand<0xFFFF, false, false, 1>(this->_deferredIndexCustom, this->_deferredIndexNative, compInfo.line.widthCustom, 1); CopyLineExpand<0x3FFF, false, false, 1>(this->_deferredIndexCustom, this->_deferredIndexNative, compInfo.line.widthCustom, 1);
} }
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
@ -2445,8 +2445,8 @@ void GPUEngineBase::_RenderLine_SetupSprites(GPUEngineCompositorInfo &compInfo)
if (isLineComplete) if (isLineComplete)
{ {
this->_needExpandSprColorCustom = true; this->_needExpandSprColorCustom = true;
CopyLineExpandHinted<0xFFFF, false, false, false, 1>(compInfo.line, this->_sprAlpha[compInfo.line.indexNative], this->_sprAlphaCustom); CopyLineExpandHinted<0x3FFF, false, false, false, 1>(compInfo.line, this->_sprAlpha[compInfo.line.indexNative], this->_sprAlphaCustom);
CopyLineExpandHinted<0xFFFF, false, false, false, 1>(compInfo.line, this->_sprType[compInfo.line.indexNative], this->_sprTypeCustom); CopyLineExpandHinted<0x3FFF, false, false, false, 1>(compInfo.line, this->_sprType[compInfo.line.indexNative], this->_sprTypeCustom);
} }
} }
} }
@ -2503,7 +2503,7 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item
if (this->_needExpandSprColorCustom) if (this->_needExpandSprColorCustom)
{ {
this->_needExpandSprColorCustom = false; this->_needExpandSprColorCustom = false;
CopyLineExpandHinted<0xFFFF, false, false, false, 2>(compInfo.line, this->_sprColor, this->_sprColorCustom); CopyLineExpandHinted<0x3FFF, false, false, false, 2>(compInfo.line, this->_sprColor, this->_sprColorCustom);
} }
this->_CompositeLineDeferred<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_OBJ, WILLPERFORMWINDOWTEST>(compInfo, this->_sprColorCustom, NULL); this->_CompositeLineDeferred<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_OBJ, WILLPERFORMWINDOWTEST>(compInfo, this->_sprColorCustom, NULL);
@ -2961,8 +2961,8 @@ void GPUEngineBase::_PerformWindowTesting(GPUEngineCompositorInfo &compInfo)
} }
else if ((compInfo.line.widthCustom % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0) else if ((compInfo.line.widthCustom % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0)
{ {
CopyLineExpand<0xFFFF, false, false, 1>(this->_didPassWindowTestCustom[layerID], this->_didPassWindowTestNative[layerID], compInfo.line.widthCustom, 1); CopyLineExpand<0x3FFF, false, false, 1>(this->_didPassWindowTestCustom[layerID], this->_didPassWindowTestNative[layerID], compInfo.line.widthCustom, 1);
CopyLineExpand<0xFFFF, false, false, 1>(this->_enableColorEffectCustom[layerID], this->_enableColorEffectNative[layerID], compInfo.line.widthCustom, 1); CopyLineExpand<0x3FFF, false, false, 1>(this->_enableColorEffectCustom[layerID], this->_enableColorEffectNative[layerID], compInfo.line.widthCustom, 1);
} }
else else
{ {
@ -3363,7 +3363,7 @@ void GPUEngineBase::ResolveToCustomFramebuffer(NDSDisplayInfo &mutableInfo)
for (size_t y = 0; y < GPU_FRAMEBUFFER_NATIVE_HEIGHT; y++) for (size_t y = 0; y < GPU_FRAMEBUFFER_NATIVE_HEIGHT; y++)
{ {
const GPUEngineLineInfo &lineInfo = this->_currentCompositorInfo[y].line; const GPUEngineLineInfo &lineInfo = this->_currentCompositorInfo[y].line;
CopyLineExpandHinted<0xFFFF, true, false, false, 2>(lineInfo, src, dst); CopyLineExpandHinted<0x3FFF, true, false, false, 2>(lineInfo, src, dst);
src += GPU_FRAMEBUFFER_NATIVE_WIDTH; src += GPU_FRAMEBUFFER_NATIVE_WIDTH;
dst += lineInfo.pixelCount; dst += lineInfo.pixelCount;
} }
@ -3376,7 +3376,7 @@ void GPUEngineBase::ResolveToCustomFramebuffer(NDSDisplayInfo &mutableInfo)
for (size_t y = 0; y < GPU_FRAMEBUFFER_NATIVE_HEIGHT; y++) for (size_t y = 0; y < GPU_FRAMEBUFFER_NATIVE_HEIGHT; y++)
{ {
const GPUEngineLineInfo &lineInfo = this->_currentCompositorInfo[y].line; const GPUEngineLineInfo &lineInfo = this->_currentCompositorInfo[y].line;
CopyLineExpandHinted<0xFFFF, true, false, false, 4>(lineInfo, src, dst); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, src, dst);
src += GPU_FRAMEBUFFER_NATIVE_WIDTH; src += GPU_FRAMEBUFFER_NATIVE_WIDTH;
dst += lineInfo.pixelCount; dst += lineInfo.pixelCount;
} }
@ -3412,7 +3412,7 @@ void GPUEngineBase::ResolveNativeLines()
if (this->_isLineRenderNative[y]) if (this->_isLineRenderNative[y])
{ {
CopyLineExpandHinted<0xFFFF, true, false, false, 2>(lineInfo, src, dst); CopyLineExpandHinted<0x3FFF, true, false, false, 2>(lineInfo, src, dst);
this->_isLineRenderNative[y] = false; this->_isLineRenderNative[y] = false;
} }
@ -3431,7 +3431,7 @@ void GPUEngineBase::ResolveNativeLines()
if (this->_isLineRenderNative[y]) if (this->_isLineRenderNative[y])
{ {
CopyLineExpandHinted<0xFFFF, true, false, false, 4>(lineInfo, src, dst); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, src, dst);
this->_isLineRenderNative[y] = false; this->_isLineRenderNative[y] = false;
} }
@ -4028,12 +4028,12 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA
{ {
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
{ {
CopyLineExpandHinted<0xFFFF, true, false, false, 2>(lineInfo, srcAPtr, this->_captureWorkingA16); CopyLineExpandHinted<0x3FFF, true, false, false, 2>(lineInfo, srcAPtr, this->_captureWorkingA16);
srcAPtr = this->_captureWorkingA16; srcAPtr = this->_captureWorkingA16;
} }
else else
{ {
CopyLineExpandHinted<0xFFFF, true, false, false, 4>(lineInfo, srcAPtr, this->_captureWorkingA32); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, srcAPtr, this->_captureWorkingA32);
srcAPtr = this->_captureWorkingA32; srcAPtr = this->_captureWorkingA32;
} }
} }
@ -4042,7 +4042,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA
{ {
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
{ {
CopyLineExpandHinted<0xFFFF, true, false, false, 2>(lineInfo, srcBPtr, this->_captureWorkingB16); CopyLineExpandHinted<0x3FFF, true, false, false, 2>(lineInfo, srcBPtr, this->_captureWorkingB16);
srcBPtr = this->_captureWorkingB16; srcBPtr = this->_captureWorkingB16;
} }
else else
@ -4052,7 +4052,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA
ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer555To8888Opaque<false, false, BESwapDst>(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH);
} }
CopyLineExpandHinted<0xFFFF, true, false, false, 4>(lineInfo, srcBPtr, this->_captureWorkingB32); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, srcBPtr, this->_captureWorkingB32);
srcBPtr = this->_captureWorkingB32; srcBPtr = this->_captureWorkingB32;
} }
} }
@ -4235,7 +4235,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
if (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH) if (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH)
{ {
CopyLineReduceHinted<0xFFFF, false, false, 2>(compInfo.line, dstCustomPtr, dstNative16); CopyLineReduceHinted<0x3FFF, false, false, 2>(compInfo.line, dstCustomPtr, dstNative16);
needCaptureNative = false; needCaptureNative = false;
} }
} }
@ -4256,7 +4256,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
if (compInfo.line.widthCustom > GPU_FRAMEBUFFER_NATIVE_WIDTH) if (compInfo.line.widthCustom > GPU_FRAMEBUFFER_NATIVE_WIDTH)
{ {
dstNative32 = (u32 *)this->_captureWorkingA32; // We're going to reuse _captureWorkingA32, since we should already be done with it by now. dstNative32 = (u32 *)this->_captureWorkingA32; // We're going to reuse _captureWorkingA32, since we should already be done with it by now.
CopyLineReduceHinted<0xFFFF, false, false, 4>(compInfo.line, dstCustomPtr, dstNative32); CopyLineReduceHinted<0x3FFF, false, false, 4>(compInfo.line, dstCustomPtr, dstNative32);
} }
ColorspaceConvertBuffer8888To5551<false, false>(dstNative32, dstNative16, GPU_FRAMEBUFFER_NATIVE_WIDTH); ColorspaceConvertBuffer8888To5551<false, false>(dstNative32, dstNative16, GPU_FRAMEBUFFER_NATIVE_WIDTH);
@ -4333,7 +4333,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
{ {
if ( ((DISPCAPCNT.SrcA == 0) && !isReadDisplayLineNative) || ((DISPCAPCNT.SrcA != 0) && !isRead3DLineNative) ) if ( ((DISPCAPCNT.SrcA == 0) && !isReadDisplayLineNative) || ((DISPCAPCNT.SrcA != 0) && !isRead3DLineNative) )
{ {
CopyLineReduceHinted<0xFFFF, false, false, 2>(srcAPtr, 0, CAPTURELENGTH, this->_captureWorkingA16, 0); CopyLineReduceHinted<0x3FFF, false, false, 2>(srcAPtr, 0, CAPTURELENGTH, this->_captureWorkingA16, 0);
srcAPtr = this->_captureWorkingA16; srcAPtr = this->_captureWorkingA16;
} }
@ -6025,7 +6025,7 @@ u8* GPUSubsystem::_DownscaleAndConvertForSavestate(const NDSDisplayID displayID,
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{ {
CopyLineReduceHinted<0xFFFF, false, true, 2>(this->_lineInfo[l], src, dst); CopyLineReduceHinted<0x3FFF, false, true, 2>(this->_lineInfo[l], src, dst);
src += this->_lineInfo[l].pixelCount; src += this->_lineInfo[l].pixelCount;
dst += GPU_FRAMEBUFFER_NATIVE_WIDTH; dst += GPU_FRAMEBUFFER_NATIVE_WIDTH;
} }
@ -6040,7 +6040,7 @@ u8* GPUSubsystem::_DownscaleAndConvertForSavestate(const NDSDisplayID displayID,
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{ {
CopyLineReduceHinted<0xFFFF, false, true, 4>(this->_lineInfo[l], src, dst); CopyLineReduceHinted<0x3FFF, false, true, 4>(this->_lineInfo[l], src, dst);
src += this->_lineInfo[l].pixelCount; src += this->_lineInfo[l].pixelCount;
dst += GPU_FRAMEBUFFER_NATIVE_WIDTH; dst += GPU_FRAMEBUFFER_NATIVE_WIDTH;
} }
@ -6237,7 +6237,7 @@ bool GPUSubsystem::LoadState(EMUFILE &is, int size)
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{ {
CopyLineExpandHinted<0xFFFF, true, false, true, 2>(this->_lineInfo[l], src, dst); CopyLineExpandHinted<0x3FFF, true, false, true, 2>(this->_lineInfo[l], src, dst);
src += GPU_FRAMEBUFFER_NATIVE_WIDTH; src += GPU_FRAMEBUFFER_NATIVE_WIDTH;
dst += this->_lineInfo[l].pixelCount; dst += this->_lineInfo[l].pixelCount;
} }
@ -6252,7 +6252,7 @@ bool GPUSubsystem::LoadState(EMUFILE &is, int size)
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{ {
CopyLineExpandHinted<0xFFFF, true, false, true, 4>(this->_lineInfo[l], src, dst); CopyLineExpandHinted<0x3FFF, true, false, true, 4>(this->_lineInfo[l], src, dst);
src += GPU_FRAMEBUFFER_NATIVE_WIDTH; src += GPU_FRAMEBUFFER_NATIVE_WIDTH;
dst += this->_lineInfo[l].pixelCount; dst += this->_lineInfo[l].pixelCount;
} }
@ -6279,7 +6279,7 @@ bool GPUSubsystem::LoadState(EMUFILE &is, int size)
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{ {
CopyLineExpandHinted<0xFFFF, true, false, true, 2>(this->_lineInfo[l], src, dst); CopyLineExpandHinted<0x3FFF, true, false, true, 2>(this->_lineInfo[l], src, dst);
src += GPU_FRAMEBUFFER_NATIVE_WIDTH; src += GPU_FRAMEBUFFER_NATIVE_WIDTH;
dst += this->_lineInfo[l].pixelCount; dst += this->_lineInfo[l].pixelCount;
} }
@ -6294,7 +6294,7 @@ bool GPUSubsystem::LoadState(EMUFILE &is, int size)
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{ {
CopyLineExpandHinted<0xFFFF, true, false, true, 4>(this->_lineInfo[l], src, dst); CopyLineExpandHinted<0x3FFF, true, false, true, 4>(this->_lineInfo[l], src, dst);
src += GPU_FRAMEBUFFER_NATIVE_WIDTH; src += GPU_FRAMEBUFFER_NATIVE_WIDTH;
dst += this->_lineInfo[l].pixelCount; dst += this->_lineInfo[l].pixelCount;
} }

View File

@ -1242,7 +1242,7 @@ void CopyLineExpandHinted(const void *__restrict srcBuffer, const size_t srcLine
{ {
if ((dstLineWidth % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0) if ((dstLineWidth % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0)
{ {
CopyLineExpand<0xFFFF, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, dstLineWidth, dstLineCount); CopyLineExpand<0x3FFF, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, dstLineWidth, dstLineCount);
} }
else else
{ {
@ -1315,7 +1315,7 @@ void CopyLineReduceHinted(const void *__restrict srcBuffer, const size_t srcLine
{ {
if ((srcLineWidth % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0) if ((srcLineWidth % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0)
{ {
CopyLineReduce<0xFFFF, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, srcLineWidth); CopyLineReduce<0x3FFF, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, srcLineWidth);
} }
else else
{ {
@ -1337,5 +1337,5 @@ void CopyLineReduceHinted(const GPUEngineLineInfo &lineInfo, const void *__restr
} }
// These functions are used in gfx3d.cpp // These functions are used in gfx3d.cpp
template void CopyLineExpandHinted<0xFFFF, true, false, true, 4>(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer); template void CopyLineExpandHinted<0x3FFF, true, false, true, 4>(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer);
template void CopyLineReduceHinted<0xFFFF, false, true, 4>(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer); template void CopyLineReduceHinted<0x3FFF, false, true, 4>(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer);

View File

@ -2844,7 +2844,7 @@ void gfx3d_PrepareSaveStateBufferWrite()
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{ {
const GPUEngineLineInfo &lineInfo = GPU->GetLineInfoAtIndex(l); const GPUEngineLineInfo &lineInfo = GPU->GetLineInfoAtIndex(l);
CopyLineReduceHinted<0xFFFF, false, true, 4>(lineInfo, src, dst); CopyLineReduceHinted<0x3FFF, false, true, 4>(lineInfo, src, dst);
src += lineInfo.pixelCount; src += lineInfo.pixelCount;
dst += GPU_FRAMEBUFFER_NATIVE_WIDTH; dst += GPU_FRAMEBUFFER_NATIVE_WIDTH;
} }
@ -3067,7 +3067,7 @@ void gfx3d_FinishLoadStateBufferRead()
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{ {
const GPUEngineLineInfo &lineInfo = GPU->GetLineInfoAtIndex(l); const GPUEngineLineInfo &lineInfo = GPU->GetLineInfoAtIndex(l);
CopyLineExpandHinted<0xFFFF, true, false, true, 4>(lineInfo, src, dst); CopyLineExpandHinted<0x3FFF, true, false, true, 4>(lineInfo, src, dst);
src += GPU_FRAMEBUFFER_NATIVE_WIDTH; src += GPU_FRAMEBUFFER_NATIVE_WIDTH;
dst += lineInfo.pixelCount; dst += lineInfo.pixelCount;
} }