From ee5fcf6bd21452da06c82eb20d670440150d7353 Mon Sep 17 00:00:00 2001 From: rogerman Date: Wed, 24 Feb 2016 07:33:42 +0000 Subject: [PATCH] =?UTF-8?q?GPU:=20-=20Fix=20possible=20memory=20corruption?= =?UTF-8?q?=20with=20display=20capture,=20at=20the=20cost=20of=20some=20pe?= =?UTF-8?q?rformance.=20(Regression=20from=20r5243.)=20-=20Add=20a=20coupl?= =?UTF-8?q?e=20more=20rules=20for=20determining=20if=20the=203D=20framebuf?= =?UTF-8?q?fer=20will=20be=20read=20directly=20for=20display=20capture.=20?= =?UTF-8?q?-=20Keep=20track=20of=20render=20states=20that=20are=20updated?= =?UTF-8?q?=20while=20rendering,=20even=20when=20the=20frame=20isn?= =?UTF-8?q?=E2=80=99t=20rendered.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- desmume/src/GPU.cpp | 136 +++++++++++++++++++++++++++++++++++--------- desmume/src/GPU.h | 8 ++- 2 files changed, 114 insertions(+), 30 deletions(-) diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index 7633b40b8..b0d2cf0e6 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -927,7 +927,30 @@ void GPUEngineBase::_RenderLine_Clear(const u16 clearColor, const u16 l, u16 *ds template void GPUEngineBase::RenderLine(const u16 l) { + // By default, do nothing. + this->UpdatePropertiesWithoutRender(l); +} + +void GPUEngineBase::UpdatePropertiesWithoutRender(const u16 l) +{ + // Update BG2/BG3 parameters for Affine and AffineExt modes + if ( this->_enableLayer[GPULayerID_BG2] && + ((this->_BGLayer[GPULayerID_BG2].baseType == BGType_Affine) || (this->_BGLayer[GPULayerID_BG2].baseType == BGType_AffineExt)) ) + { + IOREG_BG2Parameter &BG2Param = this->_IORegisterMap->BG2Param; + + BG2Param.BG2X.value += BG2Param.BG2PB.value; + BG2Param.BG2Y.value += BG2Param.BG2PD.value; + } + if ( this->_enableLayer[GPULayerID_BG3] && + ((this->_BGLayer[GPULayerID_BG3].baseType == BGType_Affine) || (this->_BGLayer[GPULayerID_BG3].baseType == BGType_AffineExt)) ) + { + IOREG_BG3Parameter &BG3Param = this->_IORegisterMap->BG3Param; + + BG3Param.BG3X.value += BG3Param.BG3PB.value; + BG3Param.BG3Y.value += BG3Param.BG3PD.value; + } } void GPUEngineBase::FramebufferPostprocess() @@ -4093,13 +4116,13 @@ void GPUEngineA::SetCustomFramebufferSize(size_t w, size_t h) bool GPUEngineA::WillRender3DLayer() { - return ( this->_enableLayer[GPULayerID_BG0] && (this->_IORegisterMap->DISPCNT.BG0_Enable != 0) && (this->_IORegisterMap->DISPCNT.BG0_3D != 0) ); + return ( this->_enableLayer[GPULayerID_BG0] && (this->_IORegisterMap->DISPCNT.BG0_3D != 0) ); } bool GPUEngineA::WillCapture3DLayerDirect() { const IOREG_DISPCAPCNT &DISPCAPCNT = this->_IORegisterMap->DISPCAPCNT; - return ( (DISPCAPCNT.CaptureEnable != 0) && (vramConfiguration.banks[DISPCAPCNT.VRAMWriteBlock].purpose == VramConfiguration::LCDC) && (DISPCAPCNT.SrcA != 0) ); + return ( (DISPCAPCNT.CaptureEnable != 0) && (DISPCAPCNT.SrcA != 0) && (DISPCAPCNT.CaptureSrc != 1) && (vramConfiguration.banks[DISPCAPCNT.VRAMWriteBlock].purpose == VramConfiguration::LCDC) ); } template @@ -4166,6 +4189,57 @@ void GPUEngineA::RenderLine(const u16 l) } } +void GPUEngineA::UpdatePropertiesWithoutRender(const u16 l) +{ + GPUEngineBase::UpdatePropertiesWithoutRender(l); + + // Update display capture properties + const IOREG_DISPCAPCNT &DISPCAPCNT = this->_IORegisterMap->DISPCAPCNT; + const u8 vramWriteBlock = DISPCAPCNT.VRAMWriteBlock; + + if ((DISPCAPCNT.CaptureEnable != 0) && (vramConfiguration.banks[vramWriteBlock].purpose == VramConfiguration::LCDC) && (l < this->_dispCapCnt.capy)) + { + const IOREG_DISPCNT &DISPCNT = this->_IORegisterMap->DISPCNT; + const u8 vramReadBlock = DISPCNT.VRAM_Block; + VRAM3DUsageProperties &vramUsageProperty = GPU->GetRenderProperties(); + + switch (DISPCAPCNT.CaptureSrc) + { + case 0: // Capture source is SourceA + vramUsageProperty.isCustomBlockUsed[vramWriteBlock] = this->isCustomRenderingNeeded; + break; + + case 1: // Capture source is SourceB + { + switch (DISPCAPCNT.SrcB) + { + case 0: // Capture VRAM + vramUsageProperty.isCustomBlockUsed[vramWriteBlock] = vramUsageProperty.isCustomBlockUsed[vramReadBlock]; + break; + + case 1: // Capture FIFO + vramUsageProperty.isCustomBlockUsed[vramWriteBlock] = false; + break; + } + break; + } + + default: // Capture source is SourceA+B blended + { + if ( (DISPCAPCNT.SrcB == 0) && vramUsageProperty.isCustomBlockUsed[vramReadBlock] ) + { + vramUsageProperty.isCustomBlockUsed[vramWriteBlock] = true; + } + else + { + vramUsageProperty.isCustomBlockUsed[vramWriteBlock] = this->isCustomRenderingNeeded; + } + break; + } + } + } +} + template void GPUEngineA::_RenderLine_Layer(const u16 l, u16 *dstColorLine, const size_t dstLineWidth, const size_t dstLineCount) { @@ -4573,7 +4647,7 @@ void GPUEngineA::_RenderLine_DispCapture_FIFOToBuffer(u16 *fifoLineBuffer) } template -void GPUEngineA::_RenderLine_DispCapture_Copy(const u16 *__restrict src, u16 *__restrict dst, const size_t captureLengthExt, const size_t captureLineCount) +void GPUEngineA::_RenderLine_DispCapture_Copy(const u16 *src, u16 *dst, const size_t captureLengthExt, const size_t captureLineCount) { const u16 alphaBit = (SOURCESWITCH == 0) ? 0x8000 : 0x0000; @@ -4752,7 +4826,7 @@ __m128i GPUEngineA::_RenderLine_DispCapture_BlendFunc_SSE2(__m128i &srcA, __m128 #endif template -void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *__restrict srcA, const u16 *__restrict srcB, u16 *__restrict dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l) +void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA, const u16 *srcB, u16 *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l) { #ifdef ENABLE_SSE2 const __m128i blendEVA_vec128 = _mm_set1_epi16(blendEVA); @@ -4799,7 +4873,7 @@ void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *__res } template -void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *__restrict srcA, const u16 *__restrict srcB, u16 *__restrict dst, const size_t captureLengthExt, const size_t l) +void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *srcA, const u16 *srcB, u16 *dst, const size_t captureLengthExt, const size_t l) { const u8 blendEVA = GPU->GetEngineMain()->_dispCapCnt.EVA; const u8 blendEVB = GPU->GetEngineMain()->_dispCapCnt.EVB; @@ -5728,6 +5802,9 @@ void GPUSubsystem::SetWillAutoResolveToCustomBuffer(const bool willAutoResolve) void GPUSubsystem::RenderLine(const u16 l, bool isFrameSkipRequested) { + const bool isFramebuffeRenderNeeded[2] = { CommonSettings.showGpu.main && !(this->_engineMain->GetIsMasterBrightFullIntensity() && (this->_engineMain->GetIORegisterMap().DISPCAPCNT.CaptureEnable == 0)), + CommonSettings.showGpu.sub && !this->_engineSub->GetIsMasterBrightFullIntensity() }; + if (l == 0) { CurrentRenderer->SetFramebufferFlushStates(this->_engineMain->WillRender3DLayer(), this->_engineMain->WillCapture3DLayerDirect()); @@ -5760,43 +5837,48 @@ void GPUSubsystem::RenderLine(const u16 l, bool isFrameSkipRequested) } } - if (!isFrameSkipRequested) + if (isFramebuffeRenderNeeded[GPUEngineID_Main] && !isFrameSkipRequested) { - if (CommonSettings.showGpu.main && !(this->_engineMain->GetIsMasterBrightFullIntensity() && (this->_engineMain->GetIORegisterMap().DISPCAPCNT.CaptureEnable == 0))) + if (this->_engineMain->isCustomRenderingNeeded) { - if (this->_engineMain->isCustomRenderingNeeded) - { - this->_engineMain->RenderLine(l); - } - else - { - this->_engineMain->RenderLine(l); - } + this->_engineMain->RenderLine(l); } - - if (CommonSettings.showGpu.sub && !this->_engineSub->GetIsMasterBrightFullIntensity()) + else { - if (this->_engineSub->isCustomRenderingNeeded) - { - this->_engineSub->RenderLine(l); - } - else - { - this->_engineSub->RenderLine(l); - } + this->_engineMain->RenderLine(l); } } + else + { + this->_engineMain->UpdatePropertiesWithoutRender(l); + } + + if (isFramebuffeRenderNeeded[GPUEngineID_Sub] && !isFrameSkipRequested) + { + if (this->_engineSub->isCustomRenderingNeeded) + { + this->_engineSub->RenderLine(l); + } + else + { + this->_engineSub->RenderLine(l); + } + } + else + { + this->_engineSub->UpdatePropertiesWithoutRender(l); + } if (l == 191) { if (!isFrameSkipRequested) { - if (CommonSettings.showGpu.main && !(this->_engineMain->GetIsMasterBrightFullIntensity() && (this->_engineMain->GetIORegisterMap().DISPCAPCNT.CaptureEnable == 0))) + if (isFramebuffeRenderNeeded[GPUEngineID_Main]) { this->_engineMain->ApplyMasterBrightness(); } - if (CommonSettings.showGpu.sub && !this->_engineSub->GetIsMasterBrightFullIntensity()) + if (isFramebuffeRenderNeeded[GPUEngineID_Sub]) { this->_engineSub->ApplyMasterBrightness(); } diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index 69113b439..f4f760006 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -1304,6 +1304,7 @@ public: template void ParseAllRegisters(); template void RenderLine(const u16 l); + void UpdatePropertiesWithoutRender(const u16 l); void FramebufferPostprocess(); bool isCustomRenderingNeeded; @@ -1377,7 +1378,7 @@ protected: void _RenderLine_DispCapture_FIFOToBuffer(u16 *fifoLineBuffer); template - void _RenderLine_DispCapture_Copy(const u16 *__restrict src, u16 *__restrict dst, const size_t captureLengthExt, const size_t captureLineCount); + void _RenderLine_DispCapture_Copy(const u16 *src, u16 *dst, const size_t captureLengthExt, const size_t captureLineCount); // Do not use restrict pointers, since src and dst can be the same u16 _RenderLine_DispCapture_BlendFunc(const u16 srcA, const u16 srcB, const u8 blendEVA, const u8 blendEVB); @@ -1386,10 +1387,10 @@ protected: #endif template - void _RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *__restrict srcA, const u16 *__restrict srcB, u16 *__restrict dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l); + void _RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA, const u16 *srcB, u16 *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l); // Do not use restrict pointers, since srcB and dst can be the same template - void _RenderLine_DispCapture_Blend(const u16 *__restrict srcA, const u16 *__restrict srcB, u16 *__restrict dst, const size_t captureLengthExt, const size_t l); + void _RenderLine_DispCapture_Blend(const u16 *srcA, const u16 *srcB, u16 *dst, const size_t captureLengthExt, const size_t l); // Do not use restrict pointers, since srcB and dst can be the same template void _HandleDisplayModeVRAM(u16 *__restrict dstColorLine, const size_t l, const size_t dstLineWidth, const size_t dstLineCount); void _HandleDisplayModeMainMemory(u16 *dstColorLine); @@ -1410,6 +1411,7 @@ public: bool WillCapture3DLayerDirect(); template void RenderLine(const u16 l); + void UpdatePropertiesWithoutRender(const u16 l); void FramebufferPostprocess(); };