GPU: Only perform window testing when the relevant window states actually change.

This commit is contained in:
rogerman 2021-09-22 13:24:05 -07:00
parent 2fb06cf4cc
commit 7fc2e4b6b6
6 changed files with 178 additions and 99 deletions

View File

@ -345,8 +345,9 @@ void GPUEngineBase::Reset()
this->_BGLayer[GPULayerID_BG2].extPalette = (u16 **)&MMU.ExtPal[this->_engineID][GPULayerID_BG2];
this->_BGLayer[GPULayerID_BG3].extPalette = (u16 **)&MMU.ExtPal[this->_engineID][GPULayerID_BG3];
this->_needUpdateWINH[0] = true;
this->_needUpdateWINH[1] = true;
this->_prevWINState.value = 0xFFFF;
this->_prevWINCoord.value = 0xFFFFFFFFFFFFFFFFULL;
this->_prevWINCtrl.value = 0xFFFFFFFF;
this->_vramBlockOBJAddress = 0;
@ -396,9 +397,7 @@ void GPUEngineBase::Reset()
memset(&renderState.WINOBJ_enable[0], 0, sizeof(renderState.WINOBJ_enable[0]) * 6);
memset(&renderState.dstBlendEnableVecLookup, 0, sizeof(renderState.dstBlendEnableVecLookup));
renderState.WIN0_ENABLED = false;
renderState.WIN1_ENABLED = false;
renderState.WINOBJ_ENABLED = false;
renderState.windowState.value = 0;
renderState.isAnyWindowEnabled = false;
memset(&renderState.srcEffectEnable[0], 0, sizeof(renderState.srcEffectEnable[0]) * 6);
@ -425,6 +424,7 @@ void GPUEngineBase::Reset()
void GPUEngineBase::_ResortBGLayers()
{
const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT;
GPUEngineRenderState &renderState = this->_currentRenderState;
int i, prio;
itemsForPriority_t *item;
@ -441,6 +441,12 @@ void GPUEngineBase::_ResortBGLayers()
this->_isAnyBGLayerShown = this->_isBGLayerShown[GPULayerID_BG0] || this->_isBGLayerShown[GPULayerID_BG1] || this->_isBGLayerShown[GPULayerID_BG2] || this->_isBGLayerShown[GPULayerID_BG3];
renderState.windowState.BG0_Shown = (this->_isBGLayerShown[GPULayerID_BG0]) ? 1 : 0;
renderState.windowState.BG1_Shown = (this->_isBGLayerShown[GPULayerID_BG1]) ? 1 : 0;
renderState.windowState.BG2_Shown = (this->_isBGLayerShown[GPULayerID_BG2]) ? 1 : 0;
renderState.windowState.BG3_Shown = (this->_isBGLayerShown[GPULayerID_BG3]) ? 1 : 0;
renderState.windowState.OBJ_Shown = (this->_isBGLayerShown[GPULayerID_OBJ]) ? 1 : 0;
// KISS ! lower priority first, if same then lower num
for (i = 0; i < NB_PRIORITIES; i++)
{
@ -495,10 +501,10 @@ void GPUEngineBase::ParseReg_DISPCNT()
renderState.displayOutputMode = (this->_engineID == GPUEngineID_Main) ? (GPUDisplayMode)DISPCNT.DisplayMode : (GPUDisplayMode)(DISPCNT.DisplayMode & 0x01);
renderState.WIN0_ENABLED = (DISPCNT.Win0_Enable != 0);
renderState.WIN1_ENABLED = (DISPCNT.Win1_Enable != 0);
renderState.WINOBJ_ENABLED = (DISPCNT.WinOBJ_Enable != 0);
renderState.isAnyWindowEnabled = (renderState.WIN0_ENABLED || renderState.WIN1_ENABLED || renderState.WINOBJ_ENABLED);
renderState.windowState.WIN0_ENABLED = DISPCNT.Win0_Enable;
renderState.windowState.WIN1_ENABLED = DISPCNT.Win1_Enable;
renderState.windowState.WINOBJ_ENABLED = DISPCNT.WinOBJ_Enable;
renderState.isAnyWindowEnabled = ( (DISPCNT.Win0_Enable != 0) || (DISPCNT.Win1_Enable != 0) || (DISPCNT.WinOBJ_Enable != 0) );
if (DISPCNT.OBJ_Tile_mapping)
{
@ -1338,6 +1344,9 @@ void GPUEngineBase::_CompositeNativeLineOBJ(GPUEngineCompositorInfo &compInfo, c
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom)
{
const u8 *windowTest = (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) ? this->_didPassWindowTestNative[compInfo.renderState.selectedLayerID] : this->_didPassWindowTestCustom[compInfo.renderState.selectedLayerID];
const u8 *colorEffectEnable = (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) ? this->_enableColorEffectNative[compInfo.renderState.selectedLayerID] : this->_enableColorEffectCustom[compInfo.renderState.selectedLayerID];
compInfo.target.xNative = 0;
compInfo.target.xCustom = 0;
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead;
@ -1347,7 +1356,7 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo, co
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
i = this->_CompositeLineDeferred_LoopOp<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE, WILLPERFORMWINDOWTEST>(compInfo, srcColorCustom16, srcIndexCustom);
i = this->_CompositeLineDeferred_LoopOp<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE, WILLPERFORMWINDOWTEST>(compInfo, windowTest, colorEffectEnable, srcColorCustom16, srcIndexCustom);
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < compInfo.line.pixelCount; i++, compInfo.target.xCustom++, compInfo.target.lineColor16++, compInfo.target.lineColor32++, compInfo.target.lineLayerID++)
@ -1375,6 +1384,9 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo, co
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr)
{
const u8 *windowTest = (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) ? this->_didPassWindowTestNative[compInfo.renderState.selectedLayerID] : this->_didPassWindowTestCustom[compInfo.renderState.selectedLayerID];
const u8 *colorEffectEnable = (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) ? this->_enableColorEffectNative[compInfo.renderState.selectedLayerID] : this->_enableColorEffectCustom[compInfo.renderState.selectedLayerID];
compInfo.target.xNative = 0;
compInfo.target.xCustom = 0;
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead;
@ -1384,7 +1396,7 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
i = this->_CompositeVRAMLineDeferred_LoopOp<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE, WILLPERFORMWINDOWTEST>(compInfo, vramColorPtr);
i = this->_CompositeVRAMLineDeferred_LoopOp<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE, WILLPERFORMWINDOWTEST>(compInfo, windowTest, colorEffectEnable, vramColorPtr);
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < compInfo.line.pixelCount; i++, compInfo.target.xCustom++, compInfo.target.lineColor16++, compInfo.target.lineColor32++, compInfo.target.lineLayerID++)
@ -1394,7 +1406,7 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
compInfo.target.xCustom -= compInfo.line.widthCustom;
}
if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestCustom[compInfo.renderState.selectedLayerID][compInfo.target.xCustom] == 0) )
if ( WILLPERFORMWINDOWTEST && (windowTest[compInfo.target.xCustom] == 0) )
{
continue;
}
@ -1406,7 +1418,7 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
continue;
}
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectCustom[compInfo.renderState.selectedLayerID][compInfo.target.xCustom] != 0) : true;
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (colorEffectEnable[compInfo.target.xCustom] != 0) : true;
pixelop.Composite32<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE>(compInfo, ((FragmentColor *)vramColorPtr)[i], enableColorEffect, this->_sprAlphaCustom[compInfo.target.xCustom], this->_sprTypeCustom[compInfo.target.xCustom]);
}
else
@ -1416,7 +1428,7 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
continue;
}
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectCustom[compInfo.renderState.selectedLayerID][compInfo.target.xCustom] != 0) : true;
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (colorEffectEnable[compInfo.target.xCustom] != 0) : true;
pixelop.Composite16<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE>(compInfo, ((u16 *)vramColorPtr)[i], enableColorEffect, this->_sprAlphaCustom[compInfo.target.xCustom], this->_sprTypeCustom[compInfo.target.xCustom]);
}
}
@ -2663,12 +2675,12 @@ void GPUEngineBase::TransitionRenderStatesToDisplayInfo(NDSDisplayInfo &mutableI
template <size_t WIN_NUM>
bool GPUEngineBase::_IsWindowInsideVerticalRange(GPUEngineCompositorInfo &compInfo)
{
if (WIN_NUM == 0 && !compInfo.renderState.WIN0_ENABLED)
if (WIN_NUM == 0 && !compInfo.renderState.windowState.WIN0_ENABLED)
{
return false;
}
if (WIN_NUM == 1 && !compInfo.renderState.WIN1_ENABLED)
if (WIN_NUM == 1 && !compInfo.renderState.windowState.WIN1_ENABLED)
{
return false;
}
@ -2699,10 +2711,9 @@ template <size_t WIN_NUM>
void GPUEngineBase::_UpdateWINH(GPUEngineCompositorInfo &compInfo)
{
//dont even waste any time in here if the window isnt enabled
if (WIN_NUM == 0 && !compInfo.renderState.WIN0_ENABLED) return;
if (WIN_NUM == 1 && !compInfo.renderState.WIN1_ENABLED) return;
if (WIN_NUM == 0 && !compInfo.renderState.windowState.WIN0_ENABLED) return;
if (WIN_NUM == 1 && !compInfo.renderState.windowState.WIN1_ENABLED) return;
this->_needUpdateWINH[WIN_NUM] = false;
const size_t windowLeft = (WIN_NUM == 0) ? this->_IORegisterMap->WIN0H.Left : this->_IORegisterMap->WIN1H.Left;
const size_t windowRight = (WIN_NUM == 0) ? this->_IORegisterMap->WIN0H.Right : this->_IORegisterMap->WIN1H.Right;
@ -2730,12 +2741,42 @@ void GPUEngineBase::_UpdateWINH(GPUEngineCompositorInfo &compInfo)
void GPUEngineBase::_PerformWindowTesting(GPUEngineCompositorInfo &compInfo)
{
if (this->_needUpdateWINH[0]) this->_UpdateWINH<0>(compInfo);
if (this->_needUpdateWINH[1]) this->_UpdateWINH<1>(compInfo);
compInfo.renderState.windowState.IsWithinVerticalRange_WIN0 = (this->_IsWindowInsideVerticalRange<0>(compInfo)) ? 1 : 0;
compInfo.renderState.windowState.IsWithinVerticalRange_WIN1 = (this->_IsWindowInsideVerticalRange<1>(compInfo)) ? 1 : 0;
const u8 *__restrict win0Ptr = (this->_IsWindowInsideVerticalRange<0>(compInfo)) ? this->_h_win[0] : NULL;
const u8 *__restrict win1Ptr = (this->_IsWindowInsideVerticalRange<1>(compInfo)) ? this->_h_win[1] : NULL;
const u8 *__restrict winObjPtr = (compInfo.renderState.WINOBJ_ENABLED) ? this->_sprWin[compInfo.line.indexNative] : NULL;
// While window testing isn't too expensive to do, it might become a performance issue when it's
// performed on every line when running large framebuffer widths on a system without SIMD
// acceleration. Therefore, we're going to check all the relevant window states and only perform
// window testing when one of those states actually changes.
if ( (this->_prevWINState.value == compInfo.renderState.windowState.value) &&
(this->_prevWINCoord.value == this->_IORegisterMap->WIN_COORD.value) &&
(this->_prevWINCtrl.value == this->_IORegisterMap->WIN_CTRL.value) &&
(compInfo.renderState.windowState.WINOBJ_ENABLED == 0) ) // Sprite window states continually update. If WINOBJ is enabled, that means we need to continually perform window testing as well.
{
return;
}
const bool needUpdateWIN0H = (this->_prevWINCoord.WIN0H.value != this->_IORegisterMap->WIN0H.value);
const bool needUpdateWIN1H = (this->_prevWINCoord.WIN1H.value != this->_IORegisterMap->WIN1H.value);
this->_prevWINState.value = compInfo.renderState.windowState.value;
this->_prevWINCoord.value = this->_IORegisterMap->WIN_COORD.value;
this->_prevWINCtrl.value = this->_IORegisterMap->WIN_CTRL.value;
if (needUpdateWIN0H)
{
this->_UpdateWINH<0>(compInfo);
}
if (needUpdateWIN1H)
{
this->_UpdateWINH<1>(compInfo);
}
const u8 *__restrict win0Ptr = (compInfo.renderState.windowState.IsWithinVerticalRange_WIN0) ? this->_h_win[0] : NULL;
const u8 *__restrict win1Ptr = (compInfo.renderState.windowState.IsWithinVerticalRange_WIN1) ? this->_h_win[1] : NULL;
const u8 *__restrict winObjPtr = (compInfo.renderState.windowState.WINOBJ_ENABLED) ? this->_sprWin[compInfo.line.indexNative] : NULL;
for (size_t layerID = GPULayerID_BG0; layerID <= GPULayerID_OBJ; layerID++)
{
@ -2939,12 +2980,6 @@ void GPUEngineBase::_HandleDisplayModeNormal(const size_t l)
}
}
template <size_t WINNUM>
void GPUEngineBase::ParseReg_WINnH()
{
this->_needUpdateWINH[WINNUM] = true;
}
void GPUEngineBase::ParseReg_WININ()
{
GPUEngineRenderState &renderState = this->_currentRenderState;
@ -3127,9 +3162,6 @@ void GPUEngineBase::AllocateWorkingBuffers(NDSColorFormat requestedColorFormat,
this->_enableColorEffectCustom[GPULayerID_BG3] = this->_enableColorEffectCustomMasterPtr + (3 * w * sizeof(u8));
this->_enableColorEffectCustom[GPULayerID_OBJ] = this->_enableColorEffectCustomMasterPtr + (4 * w * sizeof(u8));
this->_needUpdateWINH[0] = true;
this->_needUpdateWINH[1] = true;
for (size_t line = 0; line < GPU_VRAM_BLOCK_LINES + 1; line++)
{
this->_currentCompositorInfo[line].line = GPU->GetLineInfoAtIndex(line);
@ -3202,8 +3234,6 @@ void GPUEngineBase::ParseAllRegisters()
this->ParseReg_BGnX<GPULayerID_BG3>();
this->ParseReg_BGnY<GPULayerID_BG3>();
this->ParseReg_WINnH<0>();
this->ParseReg_WINnH<1>();
this->ParseReg_WININ();
this->ParseReg_WINOUT();
@ -3582,6 +3612,9 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo)
this->_TransitionLineNativeToCustom<OUTPUTFORMAT>(compInfo);
}
const u8 *windowTest = (CurrentRenderer->GetFramebufferWidth() == GPU_FRAMEBUFFER_NATIVE_WIDTH) ? this->_didPassWindowTestNative[GPULayerID_BG0] : this->_didPassWindowTestCustom[GPULayerID_BG0];
const u8 *colorEffectEnable = (CurrentRenderer->GetFramebufferWidth() == GPU_FRAMEBUFFER_NATIVE_WIDTH) ? this->_enableColorEffectNative[GPULayerID_BG0] : this->_enableColorEffectCustom[GPULayerID_BG0];
const float customWidthScale = (float)compInfo.line.widthCustom / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH;
const FragmentColor *__restrict srcLinePtr = framebuffer3D + compInfo.line.blockOffsetCustom;
@ -3600,7 +3633,7 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo)
size_t i = 0;
#ifdef USEMANUALVECTORIZATION
i = this->_RenderLine_Layer3D_LoopOp<COMPOSITORMODE, OUTPUTFORMAT, WILLPERFORMWINDOWTEST>(compInfo, srcLinePtr);
i = this->_RenderLine_Layer3D_LoopOp<COMPOSITORMODE, OUTPUTFORMAT, WILLPERFORMWINDOWTEST>(compInfo, windowTest, colorEffectEnable, srcLinePtr);
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < compInfo.line.pixelCount; i++, srcLinePtr++, compInfo.target.xCustom++, compInfo.target.lineColor16++, compInfo.target.lineColor32++, compInfo.target.lineLayerID++)
@ -3610,12 +3643,12 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo)
compInfo.target.xCustom -= compInfo.line.widthCustom;
}
if ( (srcLinePtr->a == 0) || (WILLPERFORMWINDOWTEST && (this->_didPassWindowTestCustom[GPULayerID_BG0][compInfo.target.xCustom] == 0)) )
if ( (srcLinePtr->a == 0) || (WILLPERFORMWINDOWTEST && (windowTest[compInfo.target.xCustom] == 0)) )
{
continue;
}
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectCustom[GPULayerID_BG0][compInfo.target.xCustom] != 0) : true;
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (colorEffectEnable[compInfo.target.xCustom] != 0) : true;
pixelop.Composite32<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_3D>(compInfo, *srcLinePtr, enableColorEffect, 0, 0);
}
}
@ -3625,7 +3658,7 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo)
{
for (compInfo.target.xCustom = 0; compInfo.target.xCustom < compInfo.line.widthCustom; compInfo.target.xCustom++, compInfo.target.lineColor16++, compInfo.target.lineColor32++, compInfo.target.lineLayerID++)
{
if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestCustom[GPULayerID_BG0][compInfo.target.xCustom] == 0) )
if ( WILLPERFORMWINDOWTEST && (windowTest[compInfo.target.xCustom] == 0) )
{
continue;
}
@ -3641,7 +3674,7 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo)
continue;
}
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectCustom[GPULayerID_BG0][compInfo.target.xCustom] != 0) : true;
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (colorEffectEnable[compInfo.target.xCustom] != 0) : true;
pixelop.Composite32<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_3D>(compInfo, srcLinePtr[srcX], enableColorEffect, 0, 0);
}
@ -6533,9 +6566,6 @@ template void GPUEngineBase::ParseReg_BGnVOFS<GPULayerID_BG1>();
template void GPUEngineBase::ParseReg_BGnVOFS<GPULayerID_BG2>();
template void GPUEngineBase::ParseReg_BGnVOFS<GPULayerID_BG3>();
template void GPUEngineBase::ParseReg_WINnH<0>();
template void GPUEngineBase::ParseReg_WINnH<1>();
template void GPUEngineBase::ParseReg_BGnX<GPULayerID_BG2>();
template void GPUEngineBase::ParseReg_BGnY<GPULayerID_BG2>();
template void GPUEngineBase::ParseReg_BGnX<GPULayerID_BG3>();

View File

@ -407,6 +407,19 @@ typedef union
} IOREG_WIN0V; // 0x400x044: Vertical coordinates of Window 0 (Engine A+B)
typedef IOREG_WIN0V IOREG_WIN1V; // 0x400x046: Vertical coordinates of Window 1 (Engine A+B)
typedef union
{
u64 value;
struct
{
IOREG_WIN0H WIN0H; // 0x0400x040
IOREG_WIN1H WIN1H; // 0x0400x042
IOREG_WIN0V WIN0V; // 0x0400x044
IOREG_WIN1V WIN1V; // 0x0400x046
};
} IOREG_WIN_COORD;
typedef union
{
u8 value;
@ -436,6 +449,19 @@ typedef IOREG_WIN0IN IOREG_WIN1IN; // 0x400x049: Control of inside of Window 1
typedef IOREG_WIN0IN IOREG_WINOUT; // 0x400x04A: Control of outside of all windows
typedef IOREG_WIN0IN IOREG_WINOBJ; // 0x400x04B: Control of inside of Window OBJ (lowest priority)
typedef union
{
u32 value;
struct
{
IOREG_WIN0IN WIN0IN; // 0x0400x048
IOREG_WIN1IN WIN1IN; // 0x0400x049
IOREG_WINOUT WINOUT; // 0x0400x04A
IOREG_WINOBJ WINOBJ; // 0x0400x04B
};
} IOREG_WIN_CTRL;
typedef union
{
u32 value;
@ -751,14 +777,31 @@ typedef struct
};
};
union
{
IOREG_WIN_COORD WIN_COORD; // 0x0400x040
struct
{
IOREG_WIN0H WIN0H; // 0x0400x040
IOREG_WIN1H WIN1H; // 0x0400x042
IOREG_WIN0V WIN0V; // 0x0400x044
IOREG_WIN1V WIN1V; // 0x0400x046
};
};
union
{
IOREG_WIN_CTRL WIN_CTRL; // 0x0400x048
struct
{
IOREG_WIN0IN WIN0IN; // 0x0400x048
IOREG_WIN1IN WIN1IN; // 0x0400x049
IOREG_WINOUT WINOUT; // 0x0400x04A
IOREG_WINOBJ WINOBJ; // 0x0400x04B
};
};
IOREG_MOSAIC MOSAIC; // 0x0400x04C
@ -1222,6 +1265,28 @@ typedef struct
u32 trunc32[GPU_FRAMEBUFFER_NATIVE_WIDTH];
} MosaicTableEntry;
typedef union
{
u16 value;
struct
{
u8 WIN0_ENABLED:1;
u8 WIN1_ENABLED:1;
u8 WINOBJ_ENABLED:1;
u8 IsWithinVerticalRange_WIN0:1;
u8 IsWithinVerticalRange_WIN1:1;
u8 unused1:3;
u8 BG0_Shown:1;
u8 BG1_Shown:1;
u8 BG2_Shown:1;
u8 BG3_Shown:1;
u8 OBJ_Shown:1;
u8 unused2:3;
};
} WINState;
typedef struct
{
GPULayerID layerID;
@ -1293,9 +1358,7 @@ typedef struct
u8 WINOUT_enable[6];
u8 WINOBJ_enable[6];
bool WIN0_ENABLED;
bool WIN1_ENABLED;
bool WINOBJ_ENABLED;
WINState windowState;
bool isAnyWindowEnabled;
u8 srcEffectEnable[6];
@ -1434,7 +1497,10 @@ protected:
void *_internalRenderLineTargetCustom;
u8 *_renderLineLayerIDCustom;
bool _needUpdateWINH[2];
WINState _prevWINState;
IOREG_WIN_COORD _prevWINCoord;
IOREG_WIN_CTRL _prevWINCtrl;
Task *_asyncClearTask;
bool _asyncClearIsRunning;
@ -1467,8 +1533,8 @@ protected:
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> void _CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST> void _CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorNative16, const FragmentColor *__restrict srcColorNative32);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> size_t _CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> size_t _CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> size_t _CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST> size_t _CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const void *__restrict vramColorPtr);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING> void _RenderLine_BGText(GPUEngineCompositorInfo &compInfo, const u16 XBG, const u16 YBG);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING> void _RenderLine_BGAffine(GPUEngineCompositorInfo &compInfo, const IOREG_BGnParameter &param);
@ -1528,7 +1594,6 @@ public:
template<GPULayerID LAYERID> void ParseReg_BGnVOFS();
template<GPULayerID LAYERID> void ParseReg_BGnX();
template<GPULayerID LAYERID> void ParseReg_BGnY();
template<size_t WINNUM> void ParseReg_WINnH();
void ParseReg_WININ();
void ParseReg_WINOUT();
void ParseReg_MOSAIC();
@ -1635,7 +1700,7 @@ protected:
template<NDSColorFormat COLORFORMAT> FragmentColor _RenderLine_DispCapture_BlendFunc(const FragmentColor srcA, const FragmentColor srcB, const u8 blendEVA, const u8 blendEVB);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
size_t _RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const FragmentColor *__restrict srcLinePtr);
size_t _RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr);
template<NDSColorFormat OUTPUTFORMAT>
void _RenderLine_DispCapture_Blend_Buffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t pixCount); // Do not use restrict pointers, since srcB and dst can be the same

View File

@ -1019,14 +1019,14 @@ void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &comp
}
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom)
size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom)
{
// Do nothing. This is a placeholder for a manually vectorized version of this method.
return 0;
}
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr)
size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const void *__restrict vramColorPtr)
{
// Do nothing. This is a placeholder for a manually vectorized version of this method.
return 0;
@ -1086,7 +1086,7 @@ void GPUEngineBase::_PerformWindowTestingNative(GPUEngineCompositorInfo &compInf
}
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const FragmentColor *__restrict srcLinePtr)
size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr)
{
// Do nothing. This is a placeholder for a manually vectorized version of this method.
return 0;

View File

@ -2583,7 +2583,7 @@ void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &comp
}
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom)
size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom)
{
static const size_t step = sizeof(v256u8);
@ -2610,7 +2610,7 @@ size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &com
if (WILLPERFORMWINDOWTEST)
{
// Do the window test.
passMask8 = _mm256_load_si256((v256u8 *)(this->_didPassWindowTestCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom));
passMask8 = _mm256_load_si256((v256u8 *)(windowTestPtr + compInfo.target.xCustom));
}
if (LAYERTYPE == GPULayerType_BG)
@ -2656,7 +2656,7 @@ size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &com
src[1], src[0],
srcEffectEnableMask,
dstBlendEnableMaskLUT,
this->_enableColorEffectCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom,
colorEffectEnablePtr + compInfo.target.xCustom,
this->_sprAlphaCustom + compInfo.target.xCustom,
this->_sprTypeCustom + compInfo.target.xCustom);
}
@ -2665,7 +2665,7 @@ size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &com
}
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr)
size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const void *__restrict vramColorPtr)
{
static const size_t step = sizeof(v256u8);
@ -2689,7 +2689,7 @@ size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo
if (WILLPERFORMWINDOWTEST)
{
// Do the window test.
passMask8 = _mm256_load_si256((v256u8 *)(this->_didPassWindowTestCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom));
passMask8 = _mm256_load_si256((v256u8 *)(windowTestPtr + compInfo.target.xCustom));
// If none of the pixels within the vector pass, then reject them all at once.
passMaskValue = _mm256_movemask_epi8(passMask8);
@ -2739,7 +2739,7 @@ size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo
src16[1], src16[0],
srcEffectEnableMask,
dstBlendEnableMaskLUT,
this->_enableColorEffectCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom,
colorEffectEnablePtr + compInfo.target.xCustom,
this->_sprAlphaCustom + compInfo.target.xCustom,
this->_sprTypeCustom + compInfo.target.xCustom);
break;
@ -2780,7 +2780,7 @@ size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo
src32[3], src32[2], src32[1], src32[0],
srcEffectEnableMask,
dstBlendEnableMaskLUT,
this->_enableColorEffectCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom,
colorEffectEnablePtr + compInfo.target.xCustom,
this->_sprAlphaCustom + compInfo.target.xCustom,
this->_sprTypeCustom + compInfo.target.xCustom);
break;
@ -2905,7 +2905,7 @@ void GPUEngineBase::_PerformWindowTestingNative(GPUEngineCompositorInfo &compInf
}
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const FragmentColor *__restrict srcLinePtr)
size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr)
{
static const size_t step = sizeof(v256u32);
@ -2930,7 +2930,7 @@ size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo,
if (WILLPERFORMWINDOWTEST)
{
// Do the window test.
passMask8 = _mm256_load_si256((v256u8 *)(this->_didPassWindowTestCustom[GPULayerID_BG0] + compInfo.target.xCustom));
passMask8 = _mm256_load_si256((v256u8 *)(windowTestPtr + compInfo.target.xCustom));
// If none of the pixels within the vector pass, then reject them all at once.
passMaskValue = _mm256_movemask_epi8(passMask8);
@ -2974,7 +2974,7 @@ size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo,
src[3], src[2], src[1], src[0],
srcEffectEnableMask,
dstBlendEnableMaskLUT,
this->_enableColorEffectCustom[GPULayerID_BG0] + compInfo.target.xCustom,
colorEffectEnablePtr + compInfo.target.xCustom,
NULL,
NULL);
}

View File

@ -2366,7 +2366,7 @@ void GPUEngineBase::_CompositeNativeLineOBJ_LoopOp(GPUEngineCompositorInfo &comp
}
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom)
size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const u16 *__restrict srcColorCustom16, const u8 *__restrict srcIndexCustom)
{
static const size_t step = sizeof(v128u8);
@ -2393,7 +2393,7 @@ size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &com
if (WILLPERFORMWINDOWTEST)
{
// Do the window test.
passMask8 = _mm_load_si128((v128u8 *)(this->_didPassWindowTestCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom));
passMask8 = _mm_load_si128((v128u8 *)(windowTestPtr + compInfo.target.xCustom));
}
if (LAYERTYPE == GPULayerType_BG)
@ -2439,7 +2439,7 @@ size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &com
src[1], src[0],
srcEffectEnableMask,
dstBlendEnableMaskLUT,
this->_enableColorEffectCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom,
colorEffectEnablePtr + compInfo.target.xCustom,
this->_sprAlphaCustom + compInfo.target.xCustom,
this->_sprTypeCustom + compInfo.target.xCustom);
}
@ -2448,7 +2448,7 @@ size_t GPUEngineBase::_CompositeLineDeferred_LoopOp(GPUEngineCompositorInfo &com
}
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr)
size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const void *__restrict vramColorPtr)
{
static const size_t step = sizeof(v128u8);
@ -2472,7 +2472,7 @@ size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo
if (WILLPERFORMWINDOWTEST)
{
// Do the window test.
passMask8 = _mm_load_si128((v128u8 *)(this->_didPassWindowTestCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom));
passMask8 = _mm_load_si128((v128u8 *)(windowTestPtr + compInfo.target.xCustom));
// If none of the pixels within the vector pass, then reject them all at once.
passMaskValue = _mm_movemask_epi8(passMask8);
@ -2521,7 +2521,7 @@ size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo
src16[1], src16[0],
srcEffectEnableMask,
dstBlendEnableMaskLUT,
this->_enableColorEffectCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom,
colorEffectEnablePtr + compInfo.target.xCustom,
this->_sprAlphaCustom + compInfo.target.xCustom,
this->_sprTypeCustom + compInfo.target.xCustom);
break;
@ -2560,7 +2560,7 @@ size_t GPUEngineBase::_CompositeVRAMLineDeferred_LoopOp(GPUEngineCompositorInfo
src32[3], src32[2], src32[1], src32[0],
srcEffectEnableMask,
dstBlendEnableMaskLUT,
this->_enableColorEffectCustom[compInfo.renderState.selectedLayerID] + compInfo.target.xCustom,
colorEffectEnablePtr + compInfo.target.xCustom,
this->_sprAlphaCustom + compInfo.target.xCustom,
this->_sprTypeCustom + compInfo.target.xCustom);
break;
@ -2688,7 +2688,7 @@ void GPUEngineBase::_PerformWindowTestingNative(GPUEngineCompositorInfo &compInf
}
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const FragmentColor *__restrict srcLinePtr)
size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo, const u8 *__restrict windowTestPtr, const u8 *__restrict colorEffectEnablePtr, const FragmentColor *__restrict srcLinePtr)
{
static const size_t step = sizeof(v128u8);
@ -2713,7 +2713,7 @@ size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo,
if (WILLPERFORMWINDOWTEST)
{
// Do the window test.
passMask8 = _mm_load_si128((v128u8 *)(this->_didPassWindowTestCustom[GPULayerID_BG0] + compInfo.target.xCustom));
passMask8 = _mm_load_si128((v128u8 *)(windowTestPtr + compInfo.target.xCustom));
// If none of the pixels within the vector pass, then reject them all at once.
passMaskValue = _mm_movemask_epi8(passMask8);
@ -2757,7 +2757,7 @@ size_t GPUEngineA::_RenderLine_Layer3D_LoopOp(GPUEngineCompositorInfo &compInfo,
src[3], src[2], src[1], src[0],
srcEffectEnableMask,
dstBlendEnableMaskLUT,
this->_enableColorEffectCustom[GPULayerID_BG0] + compInfo.target.xCustom,
colorEffectEnablePtr + compInfo.target.xCustom,
NULL,
NULL);
}

View File

@ -3337,20 +3337,16 @@ void FASTCALL _MMU_ARM9_write08(u32 adr, u8 val)
case REG_DISPA_WIN0H:
T1WriteByte(MMU.ARM9_REG, 0x0040, val);
mainEngine->ParseReg_WINnH<0>();
return;
case REG_DISPA_WIN0H+1:
T1WriteByte(MMU.ARM9_REG, 0x0041, val);
mainEngine->ParseReg_WINnH<0>();
return;
case REG_DISPA_WIN1H:
T1WriteByte(MMU.ARM9_REG, 0x0042, val);
mainEngine->ParseReg_WINnH<1>();
return;
case REG_DISPA_WIN1H+1:
T1WriteByte(MMU.ARM9_REG, 0x0043, val);
mainEngine->ParseReg_WINnH<1>();
return;
case REG_DISPA_WIN0V:
@ -3506,20 +3502,16 @@ void FASTCALL _MMU_ARM9_write08(u32 adr, u8 val)
case REG_DISPB_WIN0H:
T1WriteByte(MMU.ARM9_REG, 0x1040, val);
subEngine->ParseReg_WINnH<0>();
return;
case REG_DISPB_WIN0H+1:
T1WriteByte(MMU.ARM9_REG, 0x1041, val);
subEngine->ParseReg_WINnH<0>();
return;
case REG_DISPB_WIN1H:
T1WriteByte(MMU.ARM9_REG, 0x1042, val);
subEngine->ParseReg_WINnH<1>();
return;
case REG_DISPB_WIN1H+1:
T1WriteByte(MMU.ARM9_REG, 0x1043, val);
subEngine->ParseReg_WINnH<1>();
return;
case REG_DISPB_WIN0V:
@ -3875,12 +3867,10 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val)
case REG_DISPA_WIN0H:
T1WriteWord(MMU.ARM9_REG, 0x0040, val);
mainEngine->ParseReg_WINnH<0>();
return;
case REG_DISPA_WIN1H:
T1WriteWord(MMU.ARM9_REG, 0x0042, val);
mainEngine->ParseReg_WINnH<1>();
return;
case REG_DISPA_WIN0V:
@ -4062,12 +4052,10 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val)
case REG_DISPB_WIN0H:
T1WriteWord(MMU.ARM9_REG, 0x1040, val);
subEngine->ParseReg_WINnH<0>();
return;
case REG_DISPB_WIN1H:
T1WriteWord(MMU.ARM9_REG, 0x1042, val);
subEngine->ParseReg_WINnH<1>();
return;
case REG_DISPB_WIN0V:
@ -4462,8 +4450,6 @@ void FASTCALL _MMU_ARM9_write32(u32 adr, u32 val)
case REG_DISPA_WIN0H:
T1WriteLong(MMU.ARM9_REG, 0x0040, val);
mainEngine->ParseReg_WINnH<0>();
mainEngine->ParseReg_WINnH<1>();
return;
case REG_DISPA_WIN0V:
@ -4577,8 +4563,6 @@ void FASTCALL _MMU_ARM9_write32(u32 adr, u32 val)
case REG_DISPB_WIN0H:
T1WriteLong(MMU.ARM9_REG, 0x1040, val);
subEngine->ParseReg_WINnH<0>();
subEngine->ParseReg_WINnH<1>();
return;
case REG_DISPB_WIN0V: