diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index 15c26bce0..bf6fee99d 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -1667,7 +1667,7 @@ void GPUEngineBase::_LineLayerIDCopy(u8 *__restrict dstBuffer, const u8 *__restr /*****************************************************************************/ // PIXEL RENDERING /*****************************************************************************/ -template +template FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compState, const u16 srcColor16, const u8 srcAlpha) { u16 &dstColor16 = *compState.lineColorTarget16; @@ -1696,16 +1696,12 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compState, return; } - if (!NOWINDOWSENABLEDHINT) + if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestNative[compState.selectedLayerID][compState.xNative] == 0) ) { - const bool didPassWindowTest = (this->_didPassWindowTestNative[compState.selectedLayerID][compState.xNative] != 0); - if (!didPassWindowTest) - { - return; - } + return; } - const bool enableColorEffect = (this->_enableColorEffectNative[compState.selectedLayerID][compState.xNative] != 0); + const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[compState.selectedLayerID][compState.xNative] != 0) : true; if (!ISSRCLAYEROBJ && COLOREFFECTDISABLEDHINT) { @@ -1887,7 +1883,7 @@ FORCEINLINE void GPUEngineBase::_RenderPixel(GPUEngineCompositorInfo &compState, #ifdef ENABLE_SSE2 -template +template FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(GPUEngineCompositorInfo &compState, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, const __m128i &srcAlpha, @@ -1929,11 +1925,7 @@ FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(GPUEngineCompositorInfo &com __m128i enableColorEffectMask; - if (NOWINDOWSENABLEDHINT) - { - enableColorEffectMask = _mm_set1_epi8(0xFF); - } - else + if (WILLPERFORMWINDOWTEST) { // Do the window test. __m128i didPassWindowTest = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_didPassWindowTestCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); @@ -1943,6 +1935,10 @@ FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(GPUEngineCompositorInfo &com enableColorEffectMask = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_enableColorEffectCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); } + else + { + enableColorEffectMask = _mm_set1_epi8(0xFF); + } if ( (!ISSRCLAYEROBJ && COLOREFFECTDISABLEDHINT) || (_mm_movemask_epi8(srcEffectEnableMask) == 0) ) { @@ -1983,7 +1979,7 @@ FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(GPUEngineCompositorInfo &com // Select the color effect based on the BLDCNT target flags. __m128i forceBlendEffectMask = _mm_setzero_si128(); - const __m128i colorEffect_vec128 = (NOWINDOWSENABLEDHINT) ? _mm_set1_epi8(compState.colorEffect) : _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(compState.colorEffect), enableColorEffectMask); + const __m128i colorEffect_vec128 = (WILLPERFORMWINDOWTEST) ? _mm_blendv_epi8(_mm_set1_epi8(ColorEffect_Disable), _mm_set1_epi8(compState.colorEffect), enableColorEffectMask) : _mm_set1_epi8(compState.colorEffect); __m128i eva_vec128 = _mm_set1_epi16(compState.blendEVA); __m128i evb_vec128 = _mm_set1_epi16(compState.blendEVB); @@ -2442,7 +2438,7 @@ void GPUEngineBase::_MosaicSpriteLine(GPUEngineCompositorInfo &compState, u16 *_ } } -template +template void GPUEngineBase::_RenderPixelIterate_Final(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { const u16 lineWidth = (ISDEBUGRENDER) ? compState.selectedBGLayer->size.width : GPU_FRAMEBUFFER_NATIVE_WIDTH; @@ -2489,7 +2485,7 @@ void GPUEngineBase::_RenderPixelIterate_Final(GPUEngineCompositorInfo &compState } else { - this->_RenderPixelSingle(compState, i, srcColor, (index != 0)); + this->_RenderPixelSingle(compState, i, srcColor, (index != 0)); } auxX++; @@ -2520,28 +2516,28 @@ void GPUEngineBase::_RenderPixelIterate_Final(GPUEngineCompositorInfo &compState } else { - this->_RenderPixelSingle(compState, i, srcColor, (index != 0)); + this->_RenderPixelSingle(compState, i, srcColor, (index != 0)); } } } } -template +template void GPUEngineBase::_RenderPixelIterate_ApplyWrap(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { - this->_RenderPixelIterate_Final(compState, param, map, tile, pal); + this->_RenderPixelIterate_Final(compState, param, map, tile, pal); } -template +template void GPUEngineBase::_RenderPixelIterate(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal) { if (compState.selectedBGLayer->isDisplayWrapped) { - this->_RenderPixelIterate_ApplyWrap(compState, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap(compState, param, map, tile, pal); } else { - this->_RenderPixelIterate_ApplyWrap(compState, param, map, tile, pal); + this->_RenderPixelIterate_ApplyWrap(compState, param, map, tile, pal); } } @@ -2557,7 +2553,7 @@ TILEENTRY GPUEngineBase::_GetTileEntry(const u32 tileMapAddress, const u16 xOffs return theTileEntry; } -template +template FORCEINLINE void GPUEngineBase::_RenderPixelSingle(GPUEngineCompositorInfo &compState, const size_t srcX, u16 srcColor16, const bool opaque) { bool willRenderColor = opaque; @@ -2588,13 +2584,13 @@ FORCEINLINE void GPUEngineBase::_RenderPixelSingle(GPUEngineCompositorInfo &comp if (willRenderColor) { - this->_RenderPixel(compState, + this->_RenderPixel(compState, srcColor16, 0); } } -template +template void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compState) { #ifdef ENABLE_SSE2 @@ -2758,7 +2754,7 @@ void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compState) dst[3] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 3); } - this->_RenderPixel16_SSE2(compState, + this->_RenderPixel16_SSE2(compState, src[3], src[2], src[1], src[0], srcAlpha, srcEffectEnableMask, @@ -2788,14 +2784,14 @@ void GPUEngineBase::_RenderPixelsCustom(GPUEngineCompositorInfo &compState) continue; } - this->_RenderPixel(compState, + this->_RenderPixel(compState, this->_bgLayerColorCustom[compState.xCustom], 0); } } } -template +template void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compState) { const u16 *__restrict srcLine = GPU->GetCustomVRAMAddressUsingMappedAddress(compState.selectedBGLayer->BMPAddress) + compState.blockOffsetCustom; @@ -2860,7 +2856,7 @@ void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compState) dst[3] = _mm_load_si128((__m128i *)*compState.lineColorTarget + 3); } - this->_RenderPixel16_SSE2(compState, + this->_RenderPixel16_SSE2(compState, src[3], src[2], src[1], src[0], srcAlpha, srcEffectEnableMask, @@ -2890,7 +2886,7 @@ void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compState) continue; } - this->_RenderPixel(compState, + this->_RenderPixel(compState, srcLine[i], 0); } @@ -2900,7 +2896,7 @@ void GPUEngineBase::_RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compState) // BACKGROUND RENDERING -TEXT- /*****************************************************************************/ // render a text background to the combined pixelbuffer -template +template void GPUEngineBase::_RenderLine_BGText(GPUEngineCompositorInfo &compState, const u16 XBG, const u16 YBG) { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; @@ -2948,7 +2944,7 @@ void GPUEngineBase::_RenderLine_BGText(GPUEngineCompositorInfo &compState, const { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle(compState, x, color, (index != 0)); + this->_RenderPixelSingle(compState, x, color, (index != 0)); } x++; @@ -2967,7 +2963,7 @@ void GPUEngineBase::_RenderLine_BGText(GPUEngineCompositorInfo &compState, const { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle(compState, x, color, (index != 0)); + this->_RenderPixelSingle(compState, x, color, (index != 0)); } x++; @@ -2984,7 +2980,7 @@ void GPUEngineBase::_RenderLine_BGText(GPUEngineCompositorInfo &compState, const { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle(compState, x, color, (index != 0)); + this->_RenderPixelSingle(compState, x, color, (index != 0)); } x++; @@ -3007,7 +3003,7 @@ void GPUEngineBase::_RenderLine_BGText(GPUEngineCompositorInfo &compState, const { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle(compState, x, color, (index != 0)); + this->_RenderPixelSingle(compState, x, color, (index != 0)); } x++; @@ -3026,7 +3022,7 @@ void GPUEngineBase::_RenderLine_BGText(GPUEngineCompositorInfo &compState, const { index = *tileColorIdx & 0x0F; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle(compState, x, color, (index != 0)); + this->_RenderPixelSingle(compState, x, color, (index != 0)); } x++; @@ -3043,7 +3039,7 @@ void GPUEngineBase::_RenderLine_BGText(GPUEngineCompositorInfo &compState, const { index = *tileColorIdx >> 4; color = LE_TO_LOCAL_16(pal[index + tilePalette]); - this->_RenderPixelSingle(compState, x, color, (index != 0)); + this->_RenderPixelSingle(compState, x, color, (index != 0)); } x++; @@ -3088,20 +3084,20 @@ void GPUEngineBase::_RenderLine_BGText(GPUEngineCompositorInfo &compState, const { const u8 index = *tileColorIdx; const u16 color = LE_TO_LOCAL_16(tilePal[index]); - this->_RenderPixelSingle(compState, x, color, (index != 0)); + this->_RenderPixelSingle(compState, x, color, (index != 0)); } } } } } -template +template void GPUEngineBase::_RenderLine_BGAffine(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m) { - this->_RenderPixelIterate(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, this->_paletteBG); + this->_RenderPixelIterate(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, this->_paletteBG); } -template +template void GPUEngineBase::_RenderLine_BGExtended(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, bool &outUseCustomVRAM) { const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; @@ -3112,17 +3108,17 @@ void GPUEngineBase::_RenderLine_BGExtended(GPUEngineCompositorInfo &compState, c { if (DISPCNT.ExBGxPalette_Enable) { - this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry >(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, *(compState.selectedBGLayer->extPalette)); + this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry >(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, *(compState.selectedBGLayer->extPalette)); } else { - this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, NOWINDOWSENABLEDHINT, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry >(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, this->_paletteBG); + this->_RenderPixelIterate< OUTPUTFORMAT, ISDEBUGRENDER, MOSAIC, WILLPERFORMWINDOWTEST, COLOREFFECTDISABLEDHINT, ISCUSTOMRENDERINGNEEDED, rot_tiled_16bit_entry >(compState, param, compState.selectedBGLayer->tileMapAddress, compState.selectedBGLayer->tileEntryAddress, this->_paletteBG); } break; } case BGType_AffineExt_256x1: // 256 colors - this->_RenderPixelIterate(compState, param, compState.selectedBGLayer->BMPAddress, 0, this->_paletteBG); + this->_RenderPixelIterate(compState, param, compState.selectedBGLayer->BMPAddress, 0, this->_paletteBG); break; case BGType_AffineExt_Direct: // direct colors / BMP @@ -3171,7 +3167,7 @@ void GPUEngineBase::_RenderLine_BGExtended(GPUEngineCompositorInfo &compState, c if (!outUseCustomVRAM) { - this->_RenderPixelIterate(compState, param, compState.selectedBGLayer->BMPAddress, 0, this->_paletteBG); + this->_RenderPixelIterate(compState, param, compState.selectedBGLayer->BMPAddress, 0, this->_paletteBG); } else { @@ -3202,7 +3198,7 @@ void GPUEngineBase::_RenderLine_BGExtended(GPUEngineCompositorInfo &compState, c } case BGType_Large8bpp: // large screen 256 colors - this->_RenderPixelIterate(compState, param, compState.selectedBGLayer->largeBMPAddress, 0, this->_paletteBG); + this->_RenderPixelIterate(compState, param, compState.selectedBGLayer->largeBMPAddress, 0, this->_paletteBG); break; default: @@ -3214,49 +3210,49 @@ void GPUEngineBase::_RenderLine_BGExtended(GPUEngineCompositorInfo &compState, c // BACKGROUND RENDERING -HELPER FUNCTIONS- /*****************************************************************************/ -template +template void GPUEngineBase::_LineText(GPUEngineCompositorInfo &compState) { if (ISDEBUGRENDER) { - this->_RenderLine_BGText(compState, 0, compState.lineIndexNative); + this->_RenderLine_BGText(compState, 0, compState.lineIndexNative); } else { - this->_RenderLine_BGText(compState, compState.selectedBGLayer->xOffset, compState.lineIndexNative + compState.selectedBGLayer->yOffset); + this->_RenderLine_BGText(compState, compState.selectedBGLayer->xOffset, compState.lineIndexNative + compState.selectedBGLayer->yOffset); } } -template +template void GPUEngineBase::_LineRot(GPUEngineCompositorInfo &compState) { if (ISDEBUGRENDER) { static const IOREG_BGnParameter debugParams = {256, 0, 0, -77, 0, compState.blockOffsetNative}; - this->_RenderLine_BGAffine(compState, debugParams); + this->_RenderLine_BGAffine(compState, debugParams); } else { IOREG_BGnParameter *__restrict bgParams = (compState.selectedLayerID == GPULayerID_BG2) ? (IOREG_BGnParameter *)&this->_IORegisterMap->BG2Param : (IOREG_BGnParameter *)&this->_IORegisterMap->BG3Param; - this->_RenderLine_BGAffine(compState, *bgParams); + this->_RenderLine_BGAffine(compState, *bgParams); bgParams->BGnX.value += bgParams->BGnPB.value; bgParams->BGnY.value += bgParams->BGnPD.value; } } -template +template void GPUEngineBase::_LineExtRot(GPUEngineCompositorInfo &compState, bool &outUseCustomVRAM) { if (ISDEBUGRENDER) { static const IOREG_BGnParameter debugParams = {256, 0, 0, -77, 0, compState.blockOffsetNative}; - this->_RenderLine_BGExtended(compState, debugParams, outUseCustomVRAM); + this->_RenderLine_BGExtended(compState, debugParams, outUseCustomVRAM); } else { IOREG_BGnParameter *__restrict bgParams = (compState.selectedLayerID == GPULayerID_BG2) ? (IOREG_BGnParameter *)&this->_IORegisterMap->BG2Param : (IOREG_BGnParameter *)&this->_IORegisterMap->BG3Param; - this->_RenderLine_BGExtended(compState, *bgParams, outUseCustomVRAM); + this->_RenderLine_BGExtended(compState, *bgParams, outUseCustomVRAM); bgParams->BGnX.value += bgParams->BGnPB.value; bgParams->BGnY.value += bgParams->BGnPD.value; @@ -3914,7 +3910,7 @@ void GPUEngineBase::_SpriteRenderPerform(GPUEngineCompositorInfo &compState, u16 } } -template +template void GPUEngineBase::_RenderLine_Layers(const size_t l) { const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); @@ -3969,7 +3965,7 @@ void GPUEngineBase::_RenderLine_Layers(const size_t l) this->_RenderLine_SetupSprites(compState); } - if (!NOWINDOWSENABLEDHINT) + if (WILLPERFORMWINDOWTEST) { this->_PerformWindowTesting(compState); } @@ -3996,18 +3992,18 @@ void GPUEngineBase::_RenderLine_Layers(const size_t l) { if ( (layerID == GPULayerID_BG0) && GPU->GetEngineMain()->WillRender3DLayer() ) { - GPU->GetEngineMain()->RenderLine_Layer3D(compState); + GPU->GetEngineMain()->RenderLine_Layer3D(compState); continue; } } if (this->isLineRenderNative[compState.lineIndexNative]) { - this->_RenderLine_LayerBG(compState); + this->_RenderLine_LayerBG(compState); } else { - this->_RenderLine_LayerBG(compState); + this->_RenderLine_LayerBG(compState); } } //layer enabled } @@ -4018,7 +4014,7 @@ void GPUEngineBase::_RenderLine_Layers(const size_t l) { compState.selectedLayerID = GPULayerID_OBJ; compState.selectedBGLayer = NULL; - this->_RenderLine_LayerOBJ(compState, item); + this->_RenderLine_LayerOBJ(compState, item); } } } @@ -4051,7 +4047,7 @@ void GPUEngineBase::_RenderLine_SetupSprites(GPUEngineCompositorInfo &compState) } } -template +template void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compState, itemsForPriority_t *__restrict item) { if (this->vramBlockOBJIndex != VRAM_NO_3D_USAGE) @@ -4114,9 +4110,9 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compState, ite compState.lineColorTarget32 = (FragmentColor *)compState.lineColorHead + srcX; compState.lineLayerIDTarget = compState.lineLayerIDHead + srcX; - this->_RenderPixel(compState, - this->_sprColor[srcX], - this->_sprAlpha[srcX]); + this->_RenderPixel(compState, + this->_sprColor[srcX], + this->_sprAlpha[srcX]); } } else @@ -4144,9 +4140,9 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compState, ite compState.lineColorTarget32 = (FragmentColor *)dstColorPtr + dstX; compState.lineLayerIDTarget = dstLayerIDPtr + dstX; - this->_RenderPixel(compState, - (useCustomVRAM) ? srcLine[dstX] : this->_sprColor[srcX], - this->_sprAlpha[srcX]); + this->_RenderPixel(compState, + (useCustomVRAM) ? srcLine[dstX] : this->_sprColor[srcX], + this->_sprAlpha[srcX]); } } @@ -4482,7 +4478,7 @@ void GPUEngineBase::_PerformWindowTesting(GPUEngineCompositorInfo &compState) // Window 0 has the highest priority, so always check this first. if (this->_WIN0_ENABLED && this->_IsWindowInsideVerticalRange<0>(compState)) { - if (this->_h_win[0][i] == 1) + if (this->_h_win[0][i] != 0) { this->_didPassWindowTestNative[layerID][i] = this->_WIN0_enable[layerID]; this->_enableColorEffectNative[layerID][i] = this->_WIN0_enable[WINDOWCONTROL_EFFECTFLAG]; @@ -4493,7 +4489,7 @@ void GPUEngineBase::_PerformWindowTesting(GPUEngineCompositorInfo &compState) // Window 1 has medium priority, and is checked after Window 0. if (this->_WIN1_ENABLED && this->_IsWindowInsideVerticalRange<1>(compState)) { - if (this->_h_win[1][i] == 1) + if (this->_h_win[1][i] != 0) { this->_didPassWindowTestNative[layerID][i] = this->_WIN1_enable[layerID]; this->_enableColorEffectNative[layerID][i] = this->_WIN1_enable[WINDOWCONTROL_EFFECTFLAG]; @@ -4504,7 +4500,7 @@ void GPUEngineBase::_PerformWindowTesting(GPUEngineCompositorInfo &compState) // Window OBJ has low priority, and is checked after both Window 0 and Window 1. if (this->_WINOBJ_ENABLED) { - if (this->_sprWin[i] == 1) + if (this->_sprWin[i] != 0) { this->_didPassWindowTestNative[layerID][i] = this->_WINOBJ_enable[layerID]; this->_enableColorEffectNative[layerID][i] = this->_WINOBJ_enable[WINDOWCONTROL_EFFECTFLAG]; @@ -4561,17 +4557,17 @@ void GPUEngineBase::UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex) } } -template +template void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compState) { bool useCustomVRAM = false; switch (compState.selectedBGLayer->baseType) { - case BGType_Text: this->_LineText(compState); break; - case BGType_Affine: this->_LineRot(compState); break; - case BGType_AffineExt: this->_LineExtRot(compState, useCustomVRAM); break; - case BGType_Large8bpp: this->_LineExtRot(compState, useCustomVRAM); break; + case BGType_Text: this->_LineText(compState); break; + case BGType_Affine: this->_LineRot(compState); break; + case BGType_AffineExt: this->_LineExtRot(compState, useCustomVRAM); break; + case BGType_Large8bpp: this->_LineExtRot(compState, useCustomVRAM); break; case BGType_Invalid: PROGINFO("Attempting to render an invalid BG type\n"); break; @@ -4588,35 +4584,35 @@ void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compState if (useCustomVRAM) { - this->_RenderPixelsCustomVRAM(compState); + this->_RenderPixelsCustomVRAM(compState); } else { - this->_RenderPixelsCustom(compState); + this->_RenderPixelsCustom(compState); } } } -template +template void GPUEngineBase::_RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compState) { - this->_RenderLine_LayerBG_Final(compState); + this->_RenderLine_LayerBG_Final(compState); } -template +template void GPUEngineBase::_RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compState) { if (compState.colorEffect == ColorEffect_Disable) { - this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint(compState); + this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint(compState); } else { - this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint(compState); + this->_RenderLine_LayerBG_ApplyColorEffectDisabledHint(compState); } } -template +template void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compState) { if (ISDEBUGRENDER) @@ -4628,12 +4624,12 @@ void GPUEngineBase::_RenderLine_LayerBG(GPUEngineCompositorInfo &compState) #ifndef DISABLE_MOSAIC if (compState.selectedBGLayer->isMosaic && this->_isBGMosaicSet) { - this->_RenderLine_LayerBG_ApplyMosaic(compState); + this->_RenderLine_LayerBG_ApplyMosaic(compState); } else #endif { - this->_RenderLine_LayerBG_ApplyMosaic(compState); + this->_RenderLine_LayerBG_ApplyMosaic(compState); } } } @@ -5331,11 +5327,11 @@ void GPUEngineA::RenderLine(const u16 l) { if (this->_isAnyWindowEnabled) { - this->_RenderLine_Layers(l); + this->_RenderLine_Layers(l); } else { - this->_RenderLine_Layers(l); + this->_RenderLine_Layers(l); } } @@ -5377,7 +5373,7 @@ void GPUEngineA::RenderLine(const u16 l) } } -template +template void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compState) { const FragmentColor *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer(); @@ -5441,17 +5437,17 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compState) __m128i passMask8; __m128i enableColorEffectMask; - if (NOWINDOWSENABLEDHINT) - { - passMask8 = _mm_set1_epi8(0xFF); - enableColorEffectMask = _mm_set1_epi8(0xFF); - } - else + if (WILLPERFORMWINDOWTEST) { // Do the window test. passMask8 = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_didPassWindowTestCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); enableColorEffectMask = _mm_cmpeq_epi8( _mm_load_si128((__m128i *)(this->_enableColorEffectCustom[compState.selectedLayerID] + compState.xCustom)), _mm_set1_epi8(1) ); } + else + { + passMask8 = _mm_set1_epi8(0xFF); + enableColorEffectMask = _mm_set1_epi8(0xFF); + } // Do the alpha test. Pixels with an alpha value of 0 are rejected. passMask8 = _mm_andnot_si128(_mm_cmpeq_epi8(srcAlpha, _mm_setzero_si128()), passMask8); @@ -5509,12 +5505,12 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compState) #endif for (; compState.xCustom < compState.lineWidthCustom; srcLinePtr++, compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) { - if ( (srcLinePtr->a == 0) || (!NOWINDOWSENABLEDHINT && (this->_didPassWindowTestCustom[compState.selectedLayerID][compState.xCustom] != 0)) ) + if ( (srcLinePtr->a == 0) || (WILLPERFORMWINDOWTEST && (this->_didPassWindowTestCustom[compState.selectedLayerID][compState.xCustom] == 0)) ) { continue; } - const bool enableColorEffect = (NOWINDOWSENABLEDHINT) ? true : (this->_enableColorEffectCustom[compState.selectedLayerID][compState.xCustom] != 0); + const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectCustom[compState.selectedLayerID][compState.xCustom] != 0) : true; this->_RenderPixel3D(compState, enableColorEffect, @@ -5528,7 +5524,7 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compState) { for (compState.xNative = 0, compState.xCustom = 0; compState.xCustom < compState.lineWidthCustom; compState.xCustom++, compState.xNative = _gpuDstToSrcIndex[compState.xCustom], compState.lineColorTarget16++, compState.lineColorTarget32++, compState.lineLayerIDTarget++) { - if ( !NOWINDOWSENABLEDHINT && (this->_didPassWindowTestCustom[compState.selectedLayerID][compState.xCustom] == 0) ) + if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestCustom[compState.selectedLayerID][compState.xCustom] == 0) ) { continue; } @@ -5545,7 +5541,7 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compState) } compState.xNative = _gpuDstToSrcIndex[compState.xCustom]; - const bool enableColorEffect = (NOWINDOWSENABLEDHINT) ? true : (this->_enableColorEffectCustom[compState.selectedLayerID][compState.xCustom] != 0); + const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectCustom[compState.selectedLayerID][compState.xCustom] != 0) : true; this->_RenderPixel3D(compState, enableColorEffect, @@ -6637,7 +6633,7 @@ void GPUEngineA::_HandleDisplayModeMainMemory(const size_t l) } } -template +template void GPUEngineA::_LineLarge8bpp(GPUEngineCompositorInfo &compState) { u16 XBG = this->_IORegisterMap->BGnOFS[compState.selectedLayerID].BGnHOFS.Offset; @@ -6666,7 +6662,7 @@ void GPUEngineA::_LineLarge8bpp(GPUEngineCompositorInfo &compState) { const u8 index = map[XBG]; const u16 color = LE_TO_LOCAL_16(this->_paletteBG[index]); - this->_RenderPixelSingle(compState, x, color, (color != 0)); + this->_RenderPixelSingle(compState, x, color, (color != 0)); } } } @@ -6745,11 +6741,11 @@ void GPUEngineB::RenderLine(const u16 l) { if (this->_isAnyWindowEnabled) { - this->_RenderLine_Layers(l); + this->_RenderLine_Layers(l); } else { - this->_RenderLine_Layers(l); + this->_RenderLine_Layers(l); } this->_HandleDisplayModeNormal(l); diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index 26b57eda3..6471b8e42 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -1373,26 +1373,26 @@ protected: void _MosaicSpriteLinePixel(GPUEngineCompositorInfo &compState, const size_t x, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab); void _MosaicSpriteLine(GPUEngineCompositorInfo &compState, u16 *__restrict dst, u8 *__restrict dst_alpha, u8 *__restrict typeTab, u8 *__restrict prioTab); - template void _RenderPixelIterate_Final(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); - template void _RenderPixelIterate_ApplyWrap(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); - template void _RenderPixelIterate(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); + template void _RenderPixelIterate_Final(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); + template void _RenderPixelIterate_ApplyWrap(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); + template void _RenderPixelIterate(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal); TILEENTRY _GetTileEntry(const u32 tileMapAddress, const u16 xOffset, const u16 layerWidthMask); - template FORCEINLINE void _RenderPixelSingle(GPUEngineCompositorInfo &compState, const size_t srcX, u16 srcColor16, const bool opaque); - template void _RenderPixelsCustom(GPUEngineCompositorInfo &compState); - template void _RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compState); + template FORCEINLINE void _RenderPixelSingle(GPUEngineCompositorInfo &compState, const size_t srcX, u16 srcColor16, const bool opaque); + template void _RenderPixelsCustom(GPUEngineCompositorInfo &compState); + template void _RenderPixelsCustomVRAM(GPUEngineCompositorInfo &compState); - template void _RenderLine_BGText(GPUEngineCompositorInfo &compState, const u16 XBG, const u16 YBG); - template void _RenderLine_BGAffine(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m); - template void _RenderLine_BGExtended(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, bool &outUseCustomVRAM); + template void _RenderLine_BGText(GPUEngineCompositorInfo &compState, const u16 XBG, const u16 YBG); + template void _RenderLine_BGAffine(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m); + template void _RenderLine_BGExtended(GPUEngineCompositorInfo &compState, const IOREG_BGnParameter ¶m, bool &outUseCustomVRAM); - template void _LineText(GPUEngineCompositorInfo &compState); - template void _LineRot(GPUEngineCompositorInfo &compState); - template void _LineExtRot(GPUEngineCompositorInfo &compState, bool &outUseCustomVRAM); + template void _LineText(GPUEngineCompositorInfo &compState); + template void _LineRot(GPUEngineCompositorInfo &compState); + template void _LineExtRot(GPUEngineCompositorInfo &compState, bool &outUseCustomVRAM); template void _RenderLine_Clear(GPUEngineCompositorInfo &compState); void _RenderLine_SetupSprites(GPUEngineCompositorInfo &compState); - template void _RenderLine_Layers(const size_t l); + template void _RenderLine_Layers(const size_t l); template void _HandleDisplayModeOff(const size_t l); template void _HandleDisplayModeNormal(const size_t l); @@ -1401,14 +1401,14 @@ protected: template bool _IsWindowInsideVerticalRange(GPUEngineCompositorInfo &compState); void _PerformWindowTesting(GPUEngineCompositorInfo &compState); - template void _RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compState); - template void _RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compState); - template void _RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compState); - template void _RenderLine_LayerBG(GPUEngineCompositorInfo &compState); + template void _RenderLine_LayerBG_Final(GPUEngineCompositorInfo &compState); + template void _RenderLine_LayerBG_ApplyColorEffectDisabledHint(GPUEngineCompositorInfo &compState); + template void _RenderLine_LayerBG_ApplyMosaic(GPUEngineCompositorInfo &compState); + template void _RenderLine_LayerBG(GPUEngineCompositorInfo &compState); - template void _RenderLine_LayerOBJ(GPUEngineCompositorInfo &compState, itemsForPriority_t *__restrict item); + template void _RenderLine_LayerOBJ(GPUEngineCompositorInfo &compState, itemsForPriority_t *__restrict item); - template FORCEINLINE void _RenderPixel(GPUEngineCompositorInfo &compState, const u16 srcColor16, const u8 srcAlpha); + template FORCEINLINE void _RenderPixel(GPUEngineCompositorInfo &compState, const u16 srcColor16, const u8 srcAlpha); template FORCEINLINE void _RenderPixel3D(GPUEngineCompositorInfo &compState, const bool enableColorEffect, const FragmentColor srcColor32); FORCEINLINE u16 _ColorEffectBlend(const u16 colA, const u16 colB, const u16 blendEVA, const u16 blendEVB); @@ -1430,7 +1430,7 @@ protected: template FORCEINLINE __m128i _ColorEffectIncreaseBrightness(const __m128i &col, const __m128i &blendEVY); template FORCEINLINE __m128i _ColorEffectDecreaseBrightness(const __m128i &col, const __m128i &blendEVY); template FORCEINLINE void _RenderPixel_CheckWindows16_SSE2(GPUEngineCompositorInfo &compState, const size_t dstX, __m128i &didPassWindowTest, __m128i &enableColorEffect) const; - template FORCEINLINE void _RenderPixel16_SSE2(GPUEngineCompositorInfo &compState, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, const __m128i &srcAlpha, const __m128i &srcEffectEnableMask, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID, __m128i &passMask8); + template FORCEINLINE void _RenderPixel16_SSE2(GPUEngineCompositorInfo &compState, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, const __m128i &srcAlpha, const __m128i &srcEffectEnableMask, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID, __m128i &passMask8); template FORCEINLINE void _RenderPixel3D_SSE2(GPUEngineCompositorInfo &compState, const __m128i &passMask8, const __m128i &enableColorEffectMask, const __m128i &src3, const __m128i &src2, const __m128i &src1, const __m128i &src0, __m128i &dst3, __m128i &dst2, __m128i &dst1, __m128i &dst0, __m128i &dstLayerID); #endif @@ -1543,7 +1543,7 @@ protected: DISPCAPCNT_parsed _dispCapCnt; - template void _LineLarge8bpp(GPUEngineCompositorInfo &compState); + template void _LineLarge8bpp(GPUEngineCompositorInfo &compState); template void _RenderLine_DisplayCapture(const u16 l); void _RenderLine_DispCapture_FIFOToBuffer(u16 *fifoLineBuffer); @@ -1590,7 +1590,7 @@ public: virtual void Reset(); template void RenderLine(const u16 l); - template void RenderLine_Layer3D(GPUEngineCompositorInfo &compState); + template void RenderLine_Layer3D(GPUEngineCompositorInfo &compState); }; class GPUEngineB : public GPUEngineBase