diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index ca3f3eff4..59893d5ea 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -718,8 +718,8 @@ static FORCEINLINE void CopyLineExpand(void *__restrict dst, const void *__restr } template -static void CopyLineExpandHinted(const void *__restrict srcBuffer, const size_t srcLineIndex, - void *__restrict dstBuffer, const size_t dstLineIndex, const size_t dstLineWidth, const size_t dstLineCount) +void CopyLineExpandHinted(const void *__restrict srcBuffer, const size_t srcLineIndex, + void *__restrict dstBuffer, const size_t dstLineIndex, const size_t dstLineWidth, const size_t dstLineCount) { switch (INTEGERSCALEHINT) { @@ -784,7 +784,7 @@ static void CopyLineExpandHinted(const void *__restrict srcBuffer, const size_t } template -static void CopyLineExpandHinted(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer) +void CopyLineExpandHinted(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer) { CopyLineExpandHinted(srcBuffer, lineInfo.indexNative, dstBuffer, lineInfo.indexCustom, lineInfo.widthCustom, lineInfo.renderCount); @@ -1184,8 +1184,8 @@ static FORCEINLINE void CopyLineReduce(void *__restrict dst, const void *__restr } template -static void CopyLineReduceHinted(const void *__restrict srcBuffer, const size_t srcLineIndex, const size_t srcLineWidth, - void *__restrict dstBuffer, const size_t dstLineIndex) +void CopyLineReduceHinted(const void *__restrict srcBuffer, const size_t srcLineIndex, const size_t srcLineWidth, + void *__restrict dstBuffer, const size_t dstLineIndex) { switch (INTEGERSCALEHINT) { @@ -1250,7 +1250,7 @@ static void CopyLineReduceHinted(const void *__restrict srcBuffer, const size_t } template -static void CopyLineReduceHinted(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer) +void CopyLineReduceHinted(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer) { CopyLineReduceHinted(srcBuffer, lineInfo.indexCustom, lineInfo.widthCustom, dstBuffer, lineInfo.indexNative); @@ -6393,7 +6393,6 @@ GPUEngineA::GPUEngineA() _captureWorkingB16 = (u16 *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16)); _captureWorkingA32 = (FragmentColor *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(FragmentColor)); _captureWorkingB32 = (FragmentColor *)malloc_alignedPage(GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(FragmentColor)); - gfx3d_Update3DFramebuffers(_3DFramebufferMain, _3DFramebuffer16); } GPUEngineA::~GPUEngineA() @@ -6404,7 +6403,6 @@ GPUEngineA::~GPUEngineA() free_aligned(this->_captureWorkingB16); free_aligned(this->_captureWorkingA32); free_aligned(this->_captureWorkingB32); - gfx3d_Update3DFramebuffers(NULL, NULL); } GPUEngineA* GPUEngineA::Allocate() @@ -6552,7 +6550,6 @@ void GPUEngineA::SetCustomFramebufferSize(size_t w, size_t h) this->_captureWorkingB16 = newCaptureWorkingB16; this->_captureWorkingA32 = newCaptureWorkingA32; this->_captureWorkingB32 = newCaptureWorkingB32; - gfx3d_Update3DFramebuffers(this->_3DFramebufferMain, this->_3DFramebuffer16); const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); @@ -8133,6 +8130,19 @@ GPUSubsystem::GPUSubsystem() gfx3d_init(); + for (size_t line = 0; line < GPU_VRAM_BLOCK_LINES + 1; line++) + { + GPUEngineLineInfo &lineInfo = this->_lineInfo[line]; + + lineInfo.indexNative = line; + lineInfo.indexCustom = lineInfo.indexNative; + lineInfo.widthCustom = GPU_FRAMEBUFFER_NATIVE_WIDTH; + lineInfo.renderCount = 1; + lineInfo.pixelCount = GPU_FRAMEBUFFER_NATIVE_WIDTH; + lineInfo.blockOffsetNative = lineInfo.indexNative * GPU_FRAMEBUFFER_NATIVE_WIDTH; + lineInfo.blockOffsetCustom = lineInfo.indexCustom * GPU_FRAMEBUFFER_NATIVE_WIDTH; + } + _engineMain = GPUEngineA::Allocate(); _engineSub = GPUEngineB::Allocate(); @@ -8443,6 +8453,11 @@ const NDSDisplayInfo& GPUSubsystem::GetDisplayInfo() return this->_displayInfo; } +const GPUEngineLineInfo& GPUSubsystem::GetLineInfoAtIndex(size_t l) +{ + return this->_lineInfo[l]; +} + u32 GPUSubsystem::GetFPSRender3D() const { return this->_render3DFrameCount; @@ -8593,6 +8608,19 @@ void GPUSubsystem::SetCustomFramebufferSize(size_t w, size_t h) _gpuDstToSrcSSSE3_u16_8e = newGpuDstToSrcSSSE3_u16_8e; _gpuDstToSrcSSSE3_u32_4e = newGpuDstToSrcSSSE3_u32_4e; + for (size_t line = 0; line < GPU_VRAM_BLOCK_LINES + 1; line++) + { + GPUEngineLineInfo &lineInfo = this->_lineInfo[line]; + + lineInfo.indexNative = line; + lineInfo.indexCustom = _gpuCaptureLineIndex[lineInfo.indexNative]; + lineInfo.widthCustom = w; + lineInfo.renderCount = _gpuCaptureLineCount[lineInfo.indexNative]; + lineInfo.pixelCount = lineInfo.widthCustom * lineInfo.renderCount; + lineInfo.blockOffsetNative = lineInfo.indexNative * GPU_FRAMEBUFFER_NATIVE_WIDTH; + lineInfo.blockOffsetCustom = lineInfo.indexCustom * lineInfo.widthCustom; + } + CurrentRenderer->RenderFinish(); CurrentRenderer->SetRenderNeedsFinish(false); @@ -9153,8 +9181,8 @@ u8* GPUSubsystem::_DownscaleAndConvertForSavestate(const NDSDisplayID displayID, isIntermediateBufferMissing = (intermediateBuffer == NULL); if (!isIntermediateBufferMissing) { - const u32 *src = (u32 *)this->_displayInfo.customBuffer[displayID]; - u32 *dst = (u32 *)intermediateBuffer; + const u32 *__restrict src = (u32 *__restrict)this->_displayInfo.customBuffer[displayID]; + u32 *__restrict dst = (u32 *__restrict)intermediateBuffer; for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) { @@ -9650,3 +9678,7 @@ template void GPUEngineBase::ParseReg_BGnY(); template void GPUSubsystem::RenderLine(const size_t l); template void GPUSubsystem::RenderLine(const size_t l); template void GPUSubsystem::RenderLine(const size_t l); + +// These functions are used in gfx3d.cpp +template void CopyLineExpandHinted<0xFFFF, false, true, 4>(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer); +template void CopyLineReduceHinted<0xFFFF, false, true, 4>(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer); diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index 8d7aaf003..0e7ee30b7 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -1763,6 +1763,7 @@ private: GPUEngineB *_engineSub; NDSDisplay *_display[2]; float _backlightIntensityTotal[2]; + GPUEngineLineInfo _lineInfo[GPU_VRAM_BLOCK_LINES + 1]; int _pending3DRendererID; bool _needChange3DRenderer; @@ -1797,6 +1798,7 @@ public: void ForceFrameStop(); const NDSDisplayInfo& GetDisplayInfo(); // Frontends need to call this whenever they need to read the video buffers from the emulator core + const GPUEngineLineInfo& GetLineInfoAtIndex(size_t l); u32 GetFPSRender3D() const; GPUEngineA* GetEngineMain(); @@ -1898,6 +1900,20 @@ public: void SetClientData(void *clientData); }; +template +void CopyLineExpandHinted(const void *__restrict srcBuffer, const size_t srcLineIndex, + void *__restrict dstBuffer, const size_t dstLineIndex, const size_t dstLineWidth, const size_t dstLineCount); + +template +void CopyLineExpandHinted(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer); + +template +void CopyLineReduceHinted(const void *__restrict srcBuffer, const size_t srcLineIndex, const size_t srcLineWidth, + void *__restrict dstBuffer, const size_t dstLineIndex); + +template +void CopyLineReduceHinted(const GPUEngineLineInfo &lineInfo, const void *__restrict srcBuffer, void *__restrict dstBuffer); + extern GPUSubsystem *GPU; extern MMU_struct MMU; diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index ea670f9dc..ac348fc3e 100755 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -283,8 +283,7 @@ static float normalTable[1024]; #define fix10_2float(v) (((float)((s32)(v))) / (float)(1<<9)) // Color buffer that is filled by the 3D renderer and is read by the GPU engine. -static FragmentColor *_gfx3d_colorMain = NULL; -static u16 *_gfx3d_color16 = NULL; +static CACHE_ALIGN FragmentColor _gfx3d_savestateBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; // Matrix stack handling //TODO: decouple stack pointers from matrix stack type @@ -609,6 +608,7 @@ void gfx3d_reset() memset(gxPIPE.param, 0, sizeof(gxPIPE.param)); memset(colorRGB, 0, sizeof(colorRGB)); memset(&tempVertInfo, 0, sizeof(tempVertInfo)); + memset(_gfx3d_savestateBuffer, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u32)); MatrixInit(mtxCurrent[MATRIXMODE_PROJECTION]); MatrixInit(mtxCurrent[MATRIXMODE_POSITION]); @@ -2717,24 +2717,54 @@ SFORMAT SF_GFX3D[]={ { "GTVC", 4, 1, &tempVertInfo.count}, { "GTVM", 4, 4, tempVertInfo.map}, { "GTVF", 4, 1, &tempVertInfo.first}, - { "G3CX", 1, 4*GPU_FRAMEBUFFER_NATIVE_WIDTH*GPU_FRAMEBUFFER_NATIVE_HEIGHT, _gfx3d_colorMain}, + { "G3CX", 1, 4*GPU_FRAMEBUFFER_NATIVE_WIDTH*GPU_FRAMEBUFFER_NATIVE_HEIGHT, _gfx3d_savestateBuffer}, { 0 } }; -void gfx3d_Update3DFramebuffers(FragmentColor *framebufferMain, u16 *framebuffer16) -{ - _gfx3d_colorMain = framebufferMain; - _gfx3d_color16 = framebuffer16; -} - //-------------savestate -void gfx3d_savestate(EMUFILE &os) +void gfx3d_PrepareSaveStateBufferWrite() { if (CurrentRenderer->GetRenderNeedsFinish()) { GPU->ForceRender3DFinishAndFlush(true); } + const size_t w = CurrentRenderer->GetFramebufferWidth(); + const size_t h = CurrentRenderer->GetFramebufferHeight(); + + if ( (w == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (h == GPU_FRAMEBUFFER_NATIVE_HEIGHT) ) // Framebuffer is at the native size + { + if (CurrentRenderer->GetColorFormat() == NDSColorFormat_BGR666_Rev) + { + ColorspaceConvertBuffer6665To8888((u32 *)CurrentRenderer->GetFramebuffer(), (u32 *)_gfx3d_savestateBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + } + else + { + ColorspaceCopyBuffer32((u32 *)CurrentRenderer->GetFramebuffer(), (u32 *)_gfx3d_savestateBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + } + } + else // Framebuffer is at a custom size + { + const FragmentColor *__restrict src = CurrentRenderer->GetFramebuffer(); + FragmentColor *__restrict dst = _gfx3d_savestateBuffer; + + for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) + { + const GPUEngineLineInfo &lineInfo = GPU->GetLineInfoAtIndex(l); + CopyLineReduceHinted<0xFFFF, false, true, 4>(lineInfo, src, dst); + src += lineInfo.pixelCount; + dst += GPU_FRAMEBUFFER_NATIVE_WIDTH; + } + + if (CurrentRenderer->GetColorFormat() == NDSColorFormat_BGR666_Rev) + { + ColorspaceConvertBuffer6665To8888((u32 *)_gfx3d_savestateBuffer, (u32 *)_gfx3d_savestateBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + } + } +} + +void gfx3d_savestate(EMUFILE &os) +{ //version os.write_32LE(4); @@ -2905,6 +2935,59 @@ bool gfx3d_loadstate(EMUFILE &is, int size) return true; } +void gfx3d_FinishLoadStateBufferRead() +{ + const Render3DDeviceInfo &deviceInfo = CurrentRenderer->GetDeviceInfo(); + + switch (deviceInfo.renderID) + { + case RENDERID_NULL: + memset(CurrentRenderer->GetFramebuffer(), 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(FragmentColor)); + break; + + case RENDERID_SOFTRASTERIZER: + { + const size_t w = CurrentRenderer->GetFramebufferWidth(); + const size_t h = CurrentRenderer->GetFramebufferHeight(); + + if ( (w == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (h == GPU_FRAMEBUFFER_NATIVE_HEIGHT) ) // Framebuffer is at the native size + { + if (CurrentRenderer->GetColorFormat() == NDSColorFormat_BGR666_Rev) + { + ColorspaceConvertBuffer8888To6665((u32 *)_gfx3d_savestateBuffer, (u32 *)CurrentRenderer->GetFramebuffer(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + } + else + { + ColorspaceCopyBuffer32((u32 *)_gfx3d_savestateBuffer, (u32 *)CurrentRenderer->GetFramebuffer(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + } + } + else // Framebuffer is at a custom size + { + if (CurrentRenderer->GetColorFormat() == NDSColorFormat_BGR666_Rev) + { + ColorspaceConvertBuffer8888To6665((u32 *)_gfx3d_savestateBuffer, (u32 *)_gfx3d_savestateBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + } + + const FragmentColor *__restrict src = _gfx3d_savestateBuffer; + FragmentColor *__restrict dst = CurrentRenderer->GetFramebuffer(); + + for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++) + { + const GPUEngineLineInfo &lineInfo = GPU->GetLineInfoAtIndex(l); + CopyLineExpandHinted<0xFFFF, false, true, 4>(lineInfo, src, dst); + src += GPU_FRAMEBUFFER_NATIVE_WIDTH; + dst += lineInfo.pixelCount; + } + } + break; + } + + default: + // Do nothing. Loading the 3D framebuffer is unsupported on this 3D renderer. + break; + } +} + void gfx3d_parseCurrentDISP3DCNT() { const IOREG_DISP3DCNT &DISP3DCNT = gfx3d.state.savedDISP3DCNT; diff --git a/desmume/src/gfx3d.h b/desmume/src/gfx3d.h index ab6cdd577..330f5b5a8 100644 --- a/desmume/src/gfx3d.h +++ b/desmume/src/gfx3d.h @@ -23,7 +23,7 @@ #include #include -#include "types.h" +#include "types.h" #include "matrix.h" #include "GPU.h" @@ -75,11 +75,11 @@ class EMUFILE; // 15-bit to 24-bit depth formula from http://nocash.emubase.de/gbatek.htm#ds3drearplane extern CACHE_ALIGN u32 dsDepthExtend_15bit_to_24bit[32768]; -#define DS_DEPTH15TO24(depth) ( dsDepthExtend_15bit_to_24bit[(depth) & 0x7FFF] ) - -extern CACHE_ALIGN MatrixStack mtxStackProjection; -extern CACHE_ALIGN MatrixStack mtxStackPosition; -extern CACHE_ALIGN MatrixStack mtxStackPositionVector; +#define DS_DEPTH15TO24(depth) ( dsDepthExtend_15bit_to_24bit[(depth) & 0x7FFF] ) + +extern CACHE_ALIGN MatrixStack mtxStackProjection; +extern CACHE_ALIGN MatrixStack mtxStackPosition; +extern CACHE_ALIGN MatrixStack mtxStackPositionVector; extern CACHE_ALIGN MatrixStack mtxStackTexture; // POLYGON PRIMITIVE TYPES @@ -636,9 +636,10 @@ void gfx3d_glGetLightColor(const size_t index, u32 &dst); struct SFORMAT; extern SFORMAT SF_GFX3D[]; -void gfx3d_Update3DFramebuffers(FragmentColor *framebufferMain, u16 *framebuffer16); +void gfx3d_PrepareSaveStateBufferWrite(); void gfx3d_savestate(EMUFILE &os); bool gfx3d_loadstate(EMUFILE &is, int size); +void gfx3d_FinishLoadStateBufferRead(); void gfx3d_ClearStack(); diff --git a/desmume/src/saves.cpp b/desmume/src/saves.cpp index 7e57b4963..0584822d4 100644 --- a/desmume/src/saves.cpp +++ b/desmume/src/saves.cpp @@ -1061,6 +1061,7 @@ static void writechunks(EMUFILE &os) save_time = tm.get_Ticks(); + gfx3d_PrepareSaveStateBufferWrite(); wifiHandler->PrepareSaveStateWrite(); savestate_WriteChunk(os,1,SF_ARM9); @@ -1184,8 +1185,14 @@ static bool ReadStateChunks(EMUFILE &is, s32 totalsize) } if (chunkError) + { msgbox->warn("There was an error loading the savestate. Your game session is probably corrupt now."); - + } + else + { + gfx3d_FinishLoadStateBufferRead(); + } + if (haveInfo) {