diff --git a/src/Args.h b/src/Args.h index c6d131c8..d836b643 100644 --- a/src/Args.h +++ b/src/Args.h @@ -30,6 +30,7 @@ #include "DSi_NAND.h" #include "FATStorage.h" #include "FreeBIOS.h" +#include "GPU3D_Soft.h" #include "SPI_Firmware.h" #include "SPU.h" @@ -118,6 +119,11 @@ struct NDSArgs /// Defaults to disabled. /// Ignored in builds that don't have the GDB stub included. std::optional GDB = std::nullopt; + + /// The 3D renderer to initialize the DS with. + /// Defaults to the software renderer. + /// Can be changed later at any time. + std::unique_ptr Renderer3D = std::make_unique(); }; /// Arguments to pass into the DSi constructor. diff --git a/src/GPU.cpp b/src/GPU.cpp index 2c140a86..c226ccbe 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -67,7 +67,7 @@ GPU::GPU(melonDS::NDS& nds, std::unique_ptr&& renderer3d, std::uniqu NDS(nds), GPU2D_A(0, *this), GPU2D_B(1, *this), - GPU3D(nds, renderer3d ? std::move(renderer3d) : std::make_unique(*this)), + GPU3D(nds, renderer3d ? std::move(renderer3d) : std::make_unique()), GPU2D_Renderer(renderer2d ? std::move(renderer2d) : std::make_unique(*this)) { NDS.RegisterEventFunc(Event_LCD, LCD_StartHBlank, MemberEventFunc(GPU, StartHBlank)); @@ -209,7 +209,7 @@ void GPU::Stop() noexcept memset(Framebuffer[1][0].get(), 0, fbsize*4); memset(Framebuffer[1][1].get(), 0, fbsize*4); - GPU3D.Stop(); + GPU3D.Stop(*this); } void GPU::DoSavestate(Savestate* file) noexcept @@ -294,7 +294,7 @@ void GPU::AssignFramebuffers() noexcept void GPU::SetRenderer3D(std::unique_ptr&& renderer) noexcept { if (renderer == nullptr) - GPU3D.SetCurrentRenderer(std::make_unique(*this)); + GPU3D.SetCurrentRenderer(std::make_unique()); else GPU3D.SetCurrentRenderer(std::move(renderer)); @@ -899,7 +899,7 @@ void GPU::StartHBlank(u32 line) noexcept } else if (VCount == 215) { - GPU3D.VCount215(); + GPU3D.VCount215(*this); } else if (VCount == 262) { @@ -925,7 +925,7 @@ void GPU::FinishFrame(u32 lines) noexcept if (GPU3D.AbortFrame) { - GPU3D.RestartFrame(); + GPU3D.RestartFrame(*this); GPU3D.AbortFrame = false; } } @@ -1018,7 +1018,7 @@ void GPU::StartScanline(u32 line) noexcept // texture memory anyway and only update it before the start //of the next frame. // So we can give the rasteriser a bit more headroom - GPU3D.VCount144(); + GPU3D.VCount144(*this); // VBlank DispStat[0] |= (1<<0); @@ -1038,7 +1038,7 @@ void GPU::StartScanline(u32 line) noexcept // Need a better way to identify the openGL renderer in particular if (GPU3D.IsRendererAccelerated()) - GPU3D.Blit(); + GPU3D.Blit(*this); } } @@ -1068,7 +1068,7 @@ void GPU::SetVCount(u16 val) noexcept } template -NonStupidBitField VRAMTrackingSet::DeriveState(u32* currentMappings, GPU& gpu) +NonStupidBitField VRAMTrackingSet::DeriveState(const u32* currentMappings, GPU& gpu) { NonStupidBitField result; u16 banksToBeZeroed = 0; @@ -1131,12 +1131,12 @@ NonStupidBitField VRAMTrackingSet VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*, GPU& gpu); -template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*, GPU& gpu); -template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*, GPU& gpu); -template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*, GPU& gpu); -template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*, GPU& gpu); -template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*, GPU& gpu); +template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(const u32*, GPU& gpu); +template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(const u32*, GPU& gpu); +template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(const u32*, GPU& gpu); +template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(const u32*, GPU& gpu); +template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(const u32*, GPU& gpu); +template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(const u32*, GPU& gpu); diff --git a/src/GPU.h b/src/GPU.h index ee7311a6..e070db78 100644 --- a/src/GPU.h +++ b/src/GPU.h @@ -49,7 +49,7 @@ struct VRAMTrackingSet Mapping[i] = 0x8000; } } - NonStupidBitField DeriveState(u32* currentMappings, GPU& gpu); + NonStupidBitField DeriveState(const u32* currentMappings, GPU& gpu); }; class GPU diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 7e7df244..ac29a1e4 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -142,7 +142,7 @@ void MatrixLoadIdentity(s32* m); GPU3D::GPU3D(melonDS::NDS& nds, std::unique_ptr&& renderer) noexcept : NDS(nds), - CurrentRenderer(renderer ? std::move(renderer) : std::make_unique(nds.GPU)) + CurrentRenderer(renderer ? std::move(renderer) : std::make_unique()) { } @@ -2367,20 +2367,20 @@ void GPU3D::CheckFIFODMA() noexcept NDS.CheckDMAs(0, 0x07); } -void GPU3D::VCount144() noexcept +void GPU3D::VCount144(GPU& gpu) noexcept { - CurrentRenderer->VCount144(); + CurrentRenderer->VCount144(gpu); } -void GPU3D::RestartFrame() noexcept +void GPU3D::RestartFrame(GPU& gpu) noexcept { - CurrentRenderer->RestartFrame(); + CurrentRenderer->RestartFrame(gpu); } -void GPU3D::Stop() noexcept +void GPU3D::Stop(const GPU& gpu) noexcept { if (CurrentRenderer) - CurrentRenderer->Stop(); + CurrentRenderer->Stop(gpu); } @@ -2473,9 +2473,9 @@ void GPU3D::VBlank() noexcept } } -void GPU3D::VCount215() noexcept +void GPU3D::VCount215(GPU& gpu) noexcept { - CurrentRenderer->RenderFrame(); + CurrentRenderer->RenderFrame(gpu); } void GPU3D::SetRenderXPos(u16 xpos) noexcept @@ -2935,10 +2935,10 @@ void GPU3D::Write32(u32 addr, u32 val) noexcept Log(LogLevel::Debug, "unknown GPU3D write32 %08X %08X\n", addr, val); } -void GPU3D::Blit() noexcept +void GPU3D::Blit(const GPU& gpu) noexcept { if (CurrentRenderer) - CurrentRenderer->Blit(); + CurrentRenderer->Blit(gpu); } Renderer3D::Renderer3D(bool Accelerated) diff --git a/src/GPU3D.h b/src/GPU3D.h index eb975c68..0c900c6c 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -101,12 +101,12 @@ public: void CheckFIFOIRQ() noexcept; void CheckFIFODMA() noexcept; - void VCount144() noexcept; + void VCount144(GPU& gpu) noexcept; void VBlank() noexcept; - void VCount215() noexcept; + void VCount215(GPU& gpu) noexcept; - void RestartFrame() noexcept; - void Stop() noexcept; + void RestartFrame(GPU& gpu) noexcept; + void Stop(const GPU& gpu) noexcept; void SetRenderXPos(u16 xpos) noexcept; [[nodiscard]] u16 GetRenderXPos() const noexcept { return RenderXPos; } @@ -125,7 +125,7 @@ public: void Write8(u32 addr, u8 val) noexcept; void Write16(u32 addr, u16 val) noexcept; void Write32(u32 addr, u32 val) noexcept; - void Blit() noexcept; + void Blit(const GPU& gpu) noexcept; private: melonDS::NDS& NDS; typedef union @@ -334,19 +334,19 @@ public: Renderer3D(const Renderer3D&) = delete; Renderer3D& operator=(const Renderer3D&) = delete; - virtual void Reset() = 0; + virtual void Reset(GPU& gpu) = 0; // This "Accelerated" flag currently communicates if the framebuffer should // be allocated differently and other little misc handlers. Ideally there // are more detailed "traits" that we can ask of the Renderer3D type const bool Accelerated; - virtual void VCount144() {}; - virtual void Stop() {} - virtual void RenderFrame() = 0; - virtual void RestartFrame() {}; + virtual void VCount144(GPU& gpu) {}; + virtual void Stop(const GPU& gpu) {} + virtual void RenderFrame(GPU& gpu) = 0; + virtual void RestartFrame(GPU& gpu) {}; virtual u32* GetLine(int line) = 0; - virtual void Blit() {}; + virtual void Blit(const GPU& gpu) {}; virtual void PrepareCaptureFrame() {} protected: Renderer3D(bool Accelerated); diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp index 55a034cd..27711a89 100644 --- a/src/GPU3D_OpenGL.cpp +++ b/src/GPU3D_OpenGL.cpp @@ -97,9 +97,8 @@ void SetupDefaultTexParams(GLuint tex) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); } -GLRenderer::GLRenderer(GLCompositor&& compositor, melonDS::GPU& gpu) noexcept : +GLRenderer::GLRenderer(GLCompositor&& compositor) noexcept : Renderer3D(true), - GPU(gpu), CurGLCompositor(std::move(compositor)) { // GLRenderer::New() will be used to actually initialize the renderer; @@ -107,7 +106,7 @@ GLRenderer::GLRenderer(GLCompositor&& compositor, melonDS::GPU& gpu) noexcept : // so we can just let the destructor clean up a half-initialized renderer. } -std::unique_ptr GLRenderer::New(melonDS::GPU& gpu) noexcept +std::unique_ptr GLRenderer::New() noexcept { assert(glEnable != nullptr); @@ -117,7 +116,7 @@ std::unique_ptr GLRenderer::New(melonDS::GPU& gpu) noexcept // Will be returned if the initialization succeeds, // or cleaned up via RAII if it fails. - std::unique_ptr result = std::unique_ptr(new GLRenderer(std::move(*compositor), gpu)); + std::unique_ptr result = std::unique_ptr(new GLRenderer(std::move(*compositor))); compositor = std::nullopt; glEnable(GL_DEPTH_TEST); @@ -333,7 +332,7 @@ GLRenderer::~GLRenderer() } } -void GLRenderer::Reset() +void GLRenderer::Reset(GPU& gpu) { // This is where the compositor's Reset() method would be called, // except there's no such method right now. @@ -786,14 +785,14 @@ int GLRenderer::RenderPolygonEdgeBatch(int i) const return numpolys; } -void GLRenderer::RenderSceneChunk(int y, int h) +void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) { u32 flags = 0; - if (GPU.GPU3D.RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; + if (gpu3d.RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; if (h != 192) glScissor(0, y<PolyData->IsShadow) { // shadow against clear-plane will only pass if its polyID matches that of the clear plane - u32 clrpolyid = (GPU.GPU3D.RenderClearAttr1 >> 24) & 0x3F; + u32 clrpolyid = (gpu3d.RenderClearAttr1 >> 24) & 0x3F; if (polyid != clrpolyid) { i++; continue; } glEnable(GL_BLEND); @@ -1089,7 +1088,7 @@ void GLRenderer::RenderSceneChunk(int y, int h) } } - if (GPU.GPU3D.RenderDispCnt & 0x00A0) // fog/edge enabled + if (gpu3d.RenderDispCnt & 0x00A0) // fog/edge enabled { glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glColorMaski(1, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); @@ -1111,7 +1110,7 @@ void GLRenderer::RenderSceneChunk(int y, int h) glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID); glBindVertexArray(ClearVertexArrayID); - if (GPU.GPU3D.RenderDispCnt & (1<<5)) + if (gpu3d.RenderDispCnt & (1<<5)) { // edge marking // TODO: depth/polyid values at screen edges @@ -1123,19 +1122,19 @@ void GLRenderer::RenderSceneChunk(int y, int h) glDrawArrays(GL_TRIANGLES, 0, 2*3); } - if (GPU.GPU3D.RenderDispCnt & (1<<7)) + if (gpu3d.RenderDispCnt & (1<<7)) { // fog glUseProgram(FinalPassFogShader[2]); - if (GPU.GPU3D.RenderDispCnt & (1<<6)) + if (gpu3d.RenderDispCnt & (1<<6)) glBlendFuncSeparate(GL_ZERO, GL_ONE, GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA); else glBlendFuncSeparate(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA, GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA); { - u32 c = GPU.GPU3D.RenderFogColor; + u32 c = gpu3d.RenderFogColor; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -1150,7 +1149,7 @@ void GLRenderer::RenderSceneChunk(int y, int h) } -void GLRenderer::RenderFrame() +void GLRenderer::RenderFrame(GPU& gpu) { CurShaderID = -1; @@ -1159,11 +1158,11 @@ void GLRenderer::RenderFrame() ShaderConfig.uScreenSize[0] = ScreenW; ShaderConfig.uScreenSize[1] = ScreenH; - ShaderConfig.uDispCnt = GPU.GPU3D.RenderDispCnt; + ShaderConfig.uDispCnt = gpu.GPU3D.RenderDispCnt; for (int i = 0; i < 32; i++) { - u16 c = GPU.GPU3D.RenderToonTable[i]; + u16 c = gpu.GPU3D.RenderToonTable[i]; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -1175,7 +1174,7 @@ void GLRenderer::RenderFrame() for (int i = 0; i < 8; i++) { - u16 c = GPU.GPU3D.RenderEdgeTable[i]; + u16 c = gpu.GPU3D.RenderEdgeTable[i]; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -1186,7 +1185,7 @@ void GLRenderer::RenderFrame() } { - u32 c = GPU.GPU3D.RenderFogColor; + u32 c = gpu.GPU3D.RenderFogColor; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -1200,12 +1199,12 @@ void GLRenderer::RenderFrame() for (int i = 0; i < 34; i++) { - u8 d = GPU.GPU3D.RenderFogDensityTable[i]; + u8 d = gpu.GPU3D.RenderFogDensityTable[i]; ShaderConfig.uFogDensity[i][0] = (float)d / 127.0; } - ShaderConfig.uFogOffset = GPU.GPU3D.RenderFogOffset; - ShaderConfig.uFogShift = GPU.GPU3D.RenderFogShift; + ShaderConfig.uFogOffset = gpu.GPU3D.RenderFogOffset; + ShaderConfig.uFogShift = gpu.GPU3D.RenderFogShift; glBindBuffer(GL_UNIFORM_BUFFER, ShaderConfigUBO); void* unibuf = glMapBuffer(GL_UNIFORM_BUFFER, GL_WRITE_ONLY); @@ -1218,13 +1217,13 @@ void GLRenderer::RenderFrame() glBindTexture(GL_TEXTURE_2D, TexMemID); for (int i = 0; i < 4; i++) { - u32 mask = GPU.VRAMMap_Texture[i]; + u32 mask = gpu.VRAMMap_Texture[i]; u8* vram; if (!mask) continue; - else if (mask & (1<<0)) vram = GPU.VRAM_A; - else if (mask & (1<<1)) vram = GPU.VRAM_B; - else if (mask & (1<<2)) vram = GPU.VRAM_C; - else if (mask & (1<<3)) vram = GPU.VRAM_D; + else if (mask & (1<<0)) vram = gpu.VRAM_A; + else if (mask & (1<<1)) vram = gpu.VRAM_B; + else if (mask & (1<<2)) vram = gpu.VRAM_C; + else if (mask & (1<<3)) vram = gpu.VRAM_D; glTexSubImage2D(GL_TEXTURE_2D, 0, 0, i*128, 1024, 128, GL_RED_INTEGER, GL_UNSIGNED_BYTE, vram); } @@ -1234,12 +1233,12 @@ void GLRenderer::RenderFrame() for (int i = 0; i < 6; i++) { // 6 x 16K chunks - u32 mask = GPU.VRAMMap_TexPal[i]; + u32 mask = gpu.VRAMMap_TexPal[i]; u8* vram; if (!mask) continue; - else if (mask & (1<<4)) vram = &GPU.VRAM_E[(i&3)*0x4000]; - else if (mask & (1<<5)) vram = GPU.VRAM_F; - else if (mask & (1<<6)) vram = GPU.VRAM_G; + else if (mask & (1<<4)) vram = &gpu.VRAM_E[(i&3)*0x4000]; + else if (mask & (1<<5)) vram = gpu.VRAM_F; + else if (mask & (1<<6)) vram = gpu.VRAM_G; glTexSubImage2D(GL_TEXTURE_2D, 0, 0, i*8, 1024, 8, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, vram); } @@ -1264,13 +1263,13 @@ void GLRenderer::RenderFrame() glUseProgram(ClearShaderPlain[2]); glDepthFunc(GL_ALWAYS); - u32 r = GPU.GPU3D.RenderClearAttr1 & 0x1F; - u32 g = (GPU.GPU3D.RenderClearAttr1 >> 5) & 0x1F; - u32 b = (GPU.GPU3D.RenderClearAttr1 >> 10) & 0x1F; - u32 fog = (GPU.GPU3D.RenderClearAttr1 >> 15) & 0x1; - u32 a = (GPU.GPU3D.RenderClearAttr1 >> 16) & 0x1F; - u32 polyid = (GPU.GPU3D.RenderClearAttr1 >> 24) & 0x3F; - u32 z = ((GPU.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; + u32 r = gpu.GPU3D.RenderClearAttr1 & 0x1F; + u32 g = (gpu.GPU3D.RenderClearAttr1 >> 5) & 0x1F; + u32 b = (gpu.GPU3D.RenderClearAttr1 >> 10) & 0x1F; + u32 fog = (gpu.GPU3D.RenderClearAttr1 >> 15) & 0x1; + u32 a = (gpu.GPU3D.RenderClearAttr1 >> 16) & 0x1F; + u32 polyid = (gpu.GPU3D.RenderClearAttr1 >> 24) & 0x3F; + u32 z = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; glStencilFunc(GL_ALWAYS, 0xFF, 0xFF); glStencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); @@ -1289,20 +1288,20 @@ void GLRenderer::RenderFrame() glDrawArrays(GL_TRIANGLES, 0, 2*3); } - if (GPU.GPU3D.RenderNumPolygons) + if (gpu.GPU3D.RenderNumPolygons) { // render shit here u32 flags = 0; - if (GPU.GPU3D.RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; + if (gpu.GPU3D.RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; int npolys = 0; int firsttrans = -1; - for (u32 i = 0; i < GPU.GPU3D.RenderNumPolygons; i++) + for (u32 i = 0; i < gpu.GPU3D.RenderNumPolygons; i++) { - if (GPU.GPU3D.RenderPolygonRAM[i]->Degenerate) continue; + if (gpu.GPU3D.RenderPolygonRAM[i]->Degenerate) continue; - SetupPolygon(&PolygonList[npolys], GPU.GPU3D.RenderPolygonRAM[i]); - if (firsttrans < 0 && GPU.GPU3D.RenderPolygonRAM[i]->Translucent) + SetupPolygon(&PolygonList[npolys], gpu.GPU3D.RenderPolygonRAM[i]); + if (firsttrans < 0 && gpu.GPU3D.RenderPolygonRAM[i]->Translucent) firsttrans = npolys; npolys++; @@ -1319,15 +1318,15 @@ void GLRenderer::RenderFrame() glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, NumIndices * 2, IndexBuffer); glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, EdgeIndicesOffset * 2, NumEdgeIndices * 2, IndexBuffer + EdgeIndicesOffset); - RenderSceneChunk(0, 192); + RenderSceneChunk(gpu.GPU3D, 0, 192); } FrontBuffer = FrontBuffer ? 0 : 1; } -void GLRenderer::Stop() +void GLRenderer::Stop(const GPU& gpu) { - CurGLCompositor.Stop(GPU); + CurGLCompositor.Stop(gpu); } void GLRenderer::PrepareCaptureFrame() @@ -1345,9 +1344,9 @@ void GLRenderer::PrepareCaptureFrame() glReadPixels(0, 0, 256, 192, GL_BGRA, GL_UNSIGNED_BYTE, NULL); } -void GLRenderer::Blit() +void GLRenderer::Blit(const GPU& gpu) { - CurGLCompositor.RenderFrame(GPU, *this); + CurGLCompositor.RenderFrame(gpu, *this); } u32* GLRenderer::GetLine(int line) diff --git a/src/GPU3D_OpenGL.h b/src/GPU3D_OpenGL.h index 286d9f58..c30232ca 100644 --- a/src/GPU3D_OpenGL.h +++ b/src/GPU3D_OpenGL.h @@ -31,7 +31,7 @@ class GLRenderer : public Renderer3D { public: ~GLRenderer() override; - void Reset() override; + void Reset(GPU& gpu) override; void SetRenderSettings(bool betterpolygons, int scale) noexcept; void SetBetterPolygons(bool betterpolygons) noexcept; @@ -39,22 +39,22 @@ public: [[nodiscard]] bool GetBetterPolygons() const noexcept { return BetterPolygons; } [[nodiscard]] int GetScaleFactor() const noexcept { return ScaleFactor; } - void VCount144() override {}; - void RenderFrame() override; - void Stop() override; + void VCount144(GPU& gpu) override {}; + void RenderFrame(GPU& gpu) override; + void Stop(const GPU& gpu) override; u32* GetLine(int line) override; void SetupAccelFrame(); void PrepareCaptureFrame() override; - void Blit() override; + void Blit(const GPU& gpu) override; [[nodiscard]] const GLCompositor& GetCompositor() const noexcept { return CurGLCompositor; } GLCompositor& GetCompositor() noexcept { return CurGLCompositor; } - static std::unique_ptr New(melonDS::GPU& gpu) noexcept; + static std::unique_ptr New() noexcept; private: // Used by New() - GLRenderer(GLCompositor&& compositor, GPU& gpu) noexcept; + GLRenderer(GLCompositor&& compositor) noexcept; // GL version requirements // * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS) @@ -74,7 +74,6 @@ private: u32 RenderKey; }; - melonDS::GPU& GPU; GLCompositor CurGLCompositor; RendererPolygon PolygonList[2048] {}; @@ -86,7 +85,7 @@ private: int RenderSinglePolygon(int i) const; int RenderPolygonBatch(int i) const; int RenderPolygonEdgeBatch(int i) const; - void RenderSceneChunk(int y, int h); + void RenderSceneChunk(const GPU3D& gpu3d, int y, int h); enum { diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 894ac94a..c96464a6 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -42,14 +42,16 @@ void SoftRenderer::StopRenderThread() } } -void SoftRenderer::SetupRenderThread() +void SoftRenderer::SetupRenderThread(GPU& gpu) { if (Threaded) { if (!RenderThreadRunning.load(std::memory_order_relaxed)) { RenderThreadRunning = true; - RenderThread = Platform::Thread_Create(std::bind(&SoftRenderer::RenderThreadFunc, this)); + RenderThread = Platform::Thread_Create([this, &gpu]() { + RenderThreadFunc(gpu); + }); } // otherwise more than one frame can be queued up at once @@ -71,8 +73,8 @@ void SoftRenderer::SetupRenderThread() } -SoftRenderer::SoftRenderer(melonDS::GPU& gpu, bool threaded) noexcept - : Renderer3D(false), GPU(gpu), Threaded(threaded) +SoftRenderer::SoftRenderer(bool threaded) noexcept + : Renderer3D(false), Threaded(threaded) { Sema_RenderStart = Platform::Semaphore_Create(); Sema_RenderDone = Platform::Semaphore_Create(); @@ -92,7 +94,7 @@ SoftRenderer::~SoftRenderer() Platform::Semaphore_Free(Sema_ScanlineCount); } -void SoftRenderer::Reset() +void SoftRenderer::Reset(GPU& gpu) { memset(ColorBuffer, 0, BufferSize * 2 * 4); memset(DepthBuffer, 0, BufferSize * 2 * 4); @@ -100,19 +102,19 @@ void SoftRenderer::Reset() PrevIsShadowMask = false; - SetupRenderThread(); + SetupRenderThread(gpu); } -void SoftRenderer::SetThreaded(bool threaded) noexcept +void SoftRenderer::SetThreaded(bool threaded, GPU& gpu) noexcept { if (Threaded != threaded) { Threaded = threaded; - SetupRenderThread(); + SetupRenderThread(gpu); } } -void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const +void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const { u32 vramaddr = (texparam & 0xFFFF) << 3; @@ -167,10 +169,10 @@ void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* co case 1: // A3I5 { vramaddr += ((t * width) + s); - u8 pixel = ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr, gpu); texpal <<= 4; - *color = ReadVRAM_TexPal(texpal + ((pixel&0x1F)<<1)); + *color = ReadVRAM_TexPal(texpal + ((pixel&0x1F)<<1), gpu); *alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6); } break; @@ -178,12 +180,12 @@ void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* co case 2: // 4-color { vramaddr += (((t * width) + s) >> 2); - u8 pixel = ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr, gpu); pixel >>= ((s & 0x3) << 1); pixel &= 0x3; texpal <<= 3; - *color = ReadVRAM_TexPal(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal(texpal + (pixel<<1), gpu); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -191,12 +193,12 @@ void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* co case 3: // 16-color { vramaddr += (((t * width) + s) >> 1); - u8 pixel = ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr, gpu); if (s & 0x1) pixel >>= 4; else pixel &= 0xF; texpal <<= 4; - *color = ReadVRAM_TexPal(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal(texpal + (pixel<<1), gpu); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -204,10 +206,10 @@ void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* co case 4: // 256-color { vramaddr += ((t * width) + s); - u8 pixel = ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr, gpu); texpal <<= 4; - *color = ReadVRAM_TexPal(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal(texpal + (pixel<<1), gpu); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -221,30 +223,30 @@ void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* co if (vramaddr >= 0x40000) slot1addr += 0x10000; - u8 val = ReadVRAM_Texture(vramaddr); + u8 val = ReadVRAM_Texture(vramaddr, gpu); val >>= (2 * (s & 0x3)); - u16 palinfo = ReadVRAM_Texture(slot1addr); + u16 palinfo = ReadVRAM_Texture(slot1addr, gpu); u32 paloffset = (palinfo & 0x3FFF) << 2; texpal <<= 4; switch (val & 0x3) { case 0: - *color = ReadVRAM_TexPal(texpal + paloffset); + *color = ReadVRAM_TexPal(texpal + paloffset, gpu); *alpha = 31; break; case 1: - *color = ReadVRAM_TexPal(texpal + paloffset + 2); + *color = ReadVRAM_TexPal(texpal + paloffset + 2, gpu); *alpha = 31; break; case 2: if ((palinfo >> 14) == 1) { - u16 color0 = ReadVRAM_TexPal(texpal + paloffset); - u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal(texpal + paloffset, gpu); + u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2, gpu); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -261,8 +263,8 @@ void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* co } else if ((palinfo >> 14) == 3) { - u16 color0 = ReadVRAM_TexPal(texpal + paloffset); - u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal(texpal + paloffset, gpu); + u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2, gpu); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -278,20 +280,20 @@ void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* co *color = r | g | b; } else - *color = ReadVRAM_TexPal(texpal + paloffset + 4); + *color = ReadVRAM_TexPal(texpal + paloffset + 4, gpu); *alpha = 31; break; case 3: if ((palinfo >> 14) == 2) { - *color = ReadVRAM_TexPal(texpal + paloffset + 6); + *color = ReadVRAM_TexPal(texpal + paloffset + 6, gpu); *alpha = 31; } else if ((palinfo >> 14) == 3) { - u16 color0 = ReadVRAM_TexPal(texpal + paloffset); - u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal(texpal + paloffset, gpu); + u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2, gpu); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -320,10 +322,10 @@ void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* co case 6: // A5I3 { vramaddr += ((t * width) + s); - u8 pixel = ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr, gpu); texpal <<= 4; - *color = ReadVRAM_TexPal(texpal + ((pixel&0x7)<<1)); + *color = ReadVRAM_TexPal(texpal + ((pixel&0x7)<<1), gpu); *alpha = (pixel >> 3); } break; @@ -331,7 +333,7 @@ void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* co case 7: // direct color { vramaddr += (((t * width) + s) << 1); - *color = ReadVRAM_Texture(vramaddr); + *color = ReadVRAM_Texture(vramaddr, gpu); *alpha = (*color & 0x8000) ? 31 : 0; } break; @@ -388,7 +390,7 @@ bool DepthTest_LessThan_FrontFacing(s32 dstz, s32 z, u32 dstattr) return false; } -u32 SoftRenderer::AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) const noexcept +u32 SoftRenderer::AlphaBlend(const GPU3D& gpu3d, u32 srccolor, u32 dstcolor, u32 alpha) const noexcept { u32 dstalpha = dstcolor >> 24; @@ -399,7 +401,7 @@ u32 SoftRenderer::AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) const noexce u32 srcG = (srccolor >> 8) & 0x3F; u32 srcB = (srccolor >> 16) & 0x3F; - if (GPU.GPU3D.RenderDispCnt & (1<<3)) + if (gpu3d.RenderDispCnt & (1<<3)) { u32 dstR = dstcolor & 0x3F; u32 dstG = (dstcolor >> 8) & 0x3F; @@ -418,7 +420,7 @@ u32 SoftRenderer::AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) const noexce return srcR | (srcG << 8) | (srcB << 16) | (dstalpha << 24); } -u32 SoftRenderer::RenderPixel(const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const +u32 SoftRenderer::RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const { u8 r, g, b, a; @@ -428,7 +430,7 @@ u32 SoftRenderer::RenderPixel(const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s if (blendmode == 2) { - if (GPU.GPU3D.RenderDispCnt & (1<<1)) + if (gpu.GPU3D.RenderDispCnt & (1<<1)) { // highlight mode: color is calculated normally // except all vertex color components are set @@ -442,7 +444,7 @@ u32 SoftRenderer::RenderPixel(const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s { // toon mode: vertex color is replaced by toon color - u16 tooncolor = GPU.GPU3D.RenderToonTable[vr >> 1]; + u16 tooncolor = gpu.GPU3D.RenderToonTable[vr >> 1]; vr = (tooncolor << 1) & 0x3E; if (vr) vr++; vg = (tooncolor >> 4) & 0x3E; if (vg) vg++; @@ -450,12 +452,12 @@ u32 SoftRenderer::RenderPixel(const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s } } - if ((GPU.GPU3D.RenderDispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0)) + if ((gpu.GPU3D.RenderDispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0)) { u8 tr, tg, tb; u16 tcolor; u8 talpha; - TextureLookup(polygon->TexParam, polygon->TexPalette, s, t, &tcolor, &talpha); + TextureLookup(gpu, polygon->TexParam, polygon->TexPalette, s, t, &tcolor, &talpha); tr = (tcolor << 1) & 0x3E; if (tr) tr++; tg = (tcolor >> 4) & 0x3E; if (tg) tg++; @@ -503,9 +505,9 @@ u32 SoftRenderer::RenderPixel(const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s a = polyalpha; } - if ((blendmode == 2) && (GPU.GPU3D.RenderDispCnt & (1<<1))) + if ((blendmode == 2) && (gpu.GPU3D.RenderDispCnt & (1<<1))) { - u16 tooncolor = GPU.GPU3D.RenderToonTable[vr >> 1]; + u16 tooncolor = gpu.GPU3D.RenderToonTable[vr >> 1]; vr = (tooncolor << 1) & 0x3E; if (vr) vr++; vg = (tooncolor >> 4) & 0x3E; if (vg) vg++; @@ -526,7 +528,7 @@ u32 SoftRenderer::RenderPixel(const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s return r | (g << 8) | (b << 16) | (a << 24); } -void SoftRenderer::PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow) +void SoftRenderer::PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow) { u32 dstattr = AttrBuffer[pixeladdr]; u32 attr = (polyattr & 0xE0F0) | ((polyattr >> 8) & 0xFF0000) | (1<<22) | (dstattr & 0xFF001F0F); @@ -556,7 +558,7 @@ void SoftRenderer::PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 pol if (!(dstattr & (1<<15))) attr &= ~(1<<15); - color = AlphaBlend(color, ColorBuffer[pixeladdr], color>>24); + color = AlphaBlend(gpu3d, color, ColorBuffer[pixeladdr], color>>24); if (z != -1) DepthBuffer[pixeladdr] = z; @@ -672,7 +674,7 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly } } -void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) +void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -749,7 +751,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) std::swap(zl, zr); // CHECKME: edge fill rules for swapped opaque shadow mask polygons - if ((GPU.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU.GPU3D.RenderDispCnt & (1<<3))) || wireframe) + if ((gpu3d.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (gpu3d.RenderDispCnt & (1<<3))) || wireframe) { l_filledge = true; r_filledge = true; @@ -777,7 +779,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) rp->SlopeR.EdgeParams(&r_edgelen, &r_edgecov); // CHECKME: edge fill rules for unswapped opaque shadow mask polygons - if ((GPU.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU.GPU3D.RenderDispCnt & (1<<3))) || wireframe) + if ((gpu3d.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (gpu3d.RenderDispCnt & (1<<3))) || wireframe) { l_filledge = true; r_filledge = true; @@ -798,7 +800,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) // similarly, we can perform alpha test early (checkme) if (wireframe) polyalpha = 31; - if (polyalpha <= GPU.GPU3D.RenderAlphaRef) return; + if (polyalpha <= gpu3d.RenderAlphaRef) return; // in wireframe mode, there are special rules for equal Z (TODO) @@ -900,7 +902,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) rp->XR = rp->SlopeR.Step(); } -void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) +void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -984,7 +986,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) // edges are always filled if antialiasing/edgemarking are enabled, // if the pixels are translucent and alpha blending is enabled, or if the polygon is wireframe // checkme: do swapped line polygons exist? - if ((GPU.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU.GPU3D.RenderDispCnt & (1<<3))) || wireframe) + if ((gpu.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (gpu.GPU3D.RenderDispCnt & (1<<3))) || wireframe) { l_filledge = true; r_filledge = true; @@ -1019,7 +1021,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) // * edges are filled if both sides are identical and fully overlapping // edges are always filled if antialiasing/edgemarking are enabled, // if the pixels are translucent and alpha blending is enabled, or if the polygon is wireframe - if ((GPU.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU.GPU3D.RenderDispCnt & (1<<3))) || wireframe) + if ((gpu.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (gpu.GPU3D.RenderDispCnt & (1<<3))) || wireframe) { l_filledge = true; r_filledge = true; @@ -1118,17 +1120,17 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) s16 s = interpX.Interpolate(sl, sr); s16 t = interpX.Interpolate(tl, tr); - u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t); + u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); u8 alpha = color >> 24; // alpha test - if (alpha <= GPU.GPU3D.RenderAlphaRef) continue; + if (alpha <= gpu.GPU3D.RenderAlphaRef) continue; if (alpha == 31) { u32 attr = polyattr | edge; - if (GPU.GPU3D.RenderDispCnt & (1<<4)) + if (gpu.GPU3D.RenderDispCnt & (1<<4)) { // anti-aliasing: all edges are rendered @@ -1158,11 +1160,11 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) else { if (!(polygon->Attr & (1<<11))) z = -1; - PlotTranslucentPixel(pixeladdr, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(gpu.GPU3D, pixeladdr, color, z, polyattr, polygon->IsShadow); // blend with bottom pixel too, if needed if ((dstattr & 0xF) && (pixeladdr < BufferSize)) - PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(gpu.GPU3D, pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); } } @@ -1214,17 +1216,17 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) s16 s = interpX.Interpolate(sl, sr); s16 t = interpX.Interpolate(tl, tr); - u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t); + u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); u8 alpha = color >> 24; // alpha test - if (alpha <= GPU.GPU3D.RenderAlphaRef) continue; + if (alpha <= gpu.GPU3D.RenderAlphaRef) continue; if (alpha == 31) { u32 attr = polyattr | edge; - if ((GPU.GPU3D.RenderDispCnt & (1<<4)) && (attr & 0xF)) + if ((gpu.GPU3D.RenderDispCnt & (1<<4)) && (attr & 0xF)) { // anti-aliasing: all edges are rendered @@ -1247,11 +1249,11 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) else { if (!(polygon->Attr & (1<<11))) z = -1; - PlotTranslucentPixel(pixeladdr, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(gpu.GPU3D, pixeladdr, color, z, polyattr, polygon->IsShadow); // blend with bottom pixel too, if needed if ((dstattr & 0xF) && (pixeladdr < BufferSize)) - PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(gpu.GPU3D, pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); } } @@ -1306,17 +1308,17 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) s16 s = interpX.Interpolate(sl, sr); s16 t = interpX.Interpolate(tl, tr); - u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t); + u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); u8 alpha = color >> 24; // alpha test - if (alpha <= GPU.GPU3D.RenderAlphaRef) continue; + if (alpha <= gpu.GPU3D.RenderAlphaRef) continue; if (alpha == 31) { u32 attr = polyattr | edge; - if (GPU.GPU3D.RenderDispCnt & (1<<4)) + if (gpu.GPU3D.RenderDispCnt & (1<<4)) { // anti-aliasing: all edges are rendered @@ -1346,11 +1348,11 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) else { if (!(polygon->Attr & (1<<11))) z = -1; - PlotTranslucentPixel(pixeladdr, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(gpu.GPU3D, pixeladdr, color, z, polyattr, polygon->IsShadow); // blend with bottom pixel too, if needed if ((dstattr & 0xF) && (pixeladdr < BufferSize)) - PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(gpu.GPU3D, pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); } } @@ -1358,7 +1360,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) rp->XR = rp->SlopeR.Step(); } -void SoftRenderer::RenderScanline(s32 y, int npolys) +void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys) { for (int i = 0; i < npolys; i++) { @@ -1368,19 +1370,19 @@ void SoftRenderer::RenderScanline(s32 y, int npolys) if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop))) { if (polygon->IsShadowMask) - RenderShadowMaskScanline(rp, y); + RenderShadowMaskScanline(gpu.GPU3D, rp, y); else - RenderPolygonScanline(rp, y); + RenderPolygonScanline(gpu, rp, y); } } } -u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr) const +u32 SoftRenderer::CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const { u32 z = DepthBuffer[pixeladdr]; u32 densityid, densityfrac; - if (z < GPU.GPU3D.RenderFogOffset) + if (z < gpu3d.RenderFogOffset) { densityid = 0; densityfrac = 0; @@ -1392,8 +1394,8 @@ u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr) const // on hardware, the final value can overflow the 32-bit range with a shift big enough, // causing fog to 'wrap around' and accidentally apply to larger Z ranges - z -= GPU.GPU3D.RenderFogOffset; - z = (z >> 2) << GPU.GPU3D.RenderFogShift; + z -= gpu3d.RenderFogOffset; + z = (z >> 2) << gpu3d.RenderFogShift; densityid = z >> 17; if (densityid >= 32) @@ -1407,20 +1409,20 @@ u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr) const // checkme (may be too precise?) u32 density = - ((GPU.GPU3D.RenderFogDensityTable[densityid] * (0x20000-densityfrac)) + - (GPU.GPU3D.RenderFogDensityTable[densityid+1] * densityfrac)) >> 17; + ((gpu3d.RenderFogDensityTable[densityid] * (0x20000-densityfrac)) + + (gpu3d.RenderFogDensityTable[densityid+1] * densityfrac)) >> 17; if (density >= 127) density = 128; return density; } -void SoftRenderer::ScanlineFinalPass(s32 y) +void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y) { // to consider: // clearing all polygon fog flags if the master flag isn't set? // merging all final pass loops into one? - if (GPU.GPU3D.RenderDispCnt & (1<<5)) + if (gpu3d.RenderDispCnt & (1<<5)) { // edge marking // only applied to topmost pixels @@ -1440,7 +1442,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y) ((polyid != (AttrBuffer[pixeladdr-ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr-ScanlineWidth])) || ((polyid != (AttrBuffer[pixeladdr+ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr+ScanlineWidth]))) { - u16 edgecolor = GPU.GPU3D.RenderEdgeTable[polyid >> 3]; + u16 edgecolor = gpu3d.RenderEdgeTable[polyid >> 3]; u32 edgeR = (edgecolor << 1) & 0x3E; if (edgeR) edgeR++; u32 edgeG = (edgecolor >> 4) & 0x3E; if (edgeG) edgeG++; u32 edgeB = (edgecolor >> 9) & 0x3E; if (edgeB) edgeB++; @@ -1453,7 +1455,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y) } } - if (GPU.GPU3D.RenderDispCnt & (1<<7)) + if (gpu3d.RenderDispCnt & (1<<7)) { // fog @@ -1466,12 +1468,12 @@ void SoftRenderer::ScanlineFinalPass(s32 y) // TODO: check the 'fog alpha glitch with small Z' GBAtek talks about - bool fogcolor = !(GPU.GPU3D.RenderDispCnt & (1<<6)); + bool fogcolor = !(gpu3d.RenderDispCnt & (1<<6)); - u32 fogR = (GPU.GPU3D.RenderFogColor << 1) & 0x3E; if (fogR) fogR++; - u32 fogG = (GPU.GPU3D.RenderFogColor >> 4) & 0x3E; if (fogG) fogG++; - u32 fogB = (GPU.GPU3D.RenderFogColor >> 9) & 0x3E; if (fogB) fogB++; - u32 fogA = (GPU.GPU3D.RenderFogColor >> 16) & 0x1F; + u32 fogR = (gpu3d.RenderFogColor << 1) & 0x3E; if (fogR) fogR++; + u32 fogG = (gpu3d.RenderFogColor >> 4) & 0x3E; if (fogG) fogG++; + u32 fogB = (gpu3d.RenderFogColor >> 9) & 0x3E; if (fogB) fogB++; + u32 fogA = (gpu3d.RenderFogColor >> 16) & 0x1F; for (int x = 0; x < 256; x++) { @@ -1481,7 +1483,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y) u32 attr = AttrBuffer[pixeladdr]; if (attr & (1<<15)) { - density = CalculateFogDensity(pixeladdr); + density = CalculateFogDensity(gpu3d, pixeladdr); srccolor = ColorBuffer[pixeladdr]; srcR = srccolor & 0x3F; @@ -1510,7 +1512,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y) attr = AttrBuffer[pixeladdr]; if (!(attr & (1<<15))) continue; - density = CalculateFogDensity(pixeladdr); + density = CalculateFogDensity(gpu3d, pixeladdr); srccolor = ColorBuffer[pixeladdr]; srcR = srccolor & 0x3F; @@ -1531,7 +1533,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y) } } - if (GPU.GPU3D.RenderDispCnt & (1<<4)) + if (gpu3d.RenderDispCnt & (1<<4)) { // anti-aliasing @@ -1584,10 +1586,10 @@ void SoftRenderer::ScanlineFinalPass(s32 y) } } -void SoftRenderer::ClearBuffers() +void SoftRenderer::ClearBuffers(const GPU& gpu) { - u32 clearz = ((GPU.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; - u32 polyid = GPU.GPU3D.RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID + u32 clearz = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; + u32 polyid = gpu.GPU3D.RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID // fill screen borders for edge marking @@ -1617,17 +1619,17 @@ void SoftRenderer::ClearBuffers() // clear the screen - if (GPU.GPU3D.RenderDispCnt & (1<<14)) + if (gpu.GPU3D.RenderDispCnt & (1<<14)) { - u8 xoff = (GPU.GPU3D.RenderClearAttr2 >> 16) & 0xFF; - u8 yoff = (GPU.GPU3D.RenderClearAttr2 >> 24) & 0xFF; + u8 xoff = (gpu.GPU3D.RenderClearAttr2 >> 16) & 0xFF; + u8 yoff = (gpu.GPU3D.RenderClearAttr2 >> 24) & 0xFF; for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth) { for (int x = 0; x < 256; x++) { - u16 val2 = ReadVRAM_Texture(0x40000 + (yoff << 9) + (xoff << 1)); - u16 val3 = ReadVRAM_Texture(0x60000 + (yoff << 9) + (xoff << 1)); + u16 val2 = ReadVRAM_Texture(0x40000 + (yoff << 9) + (xoff << 1), gpu); + u16 val3 = ReadVRAM_Texture(0x60000 + (yoff << 9) + (xoff << 1), gpu); // TODO: confirm color conversion u32 r = (val2 << 1) & 0x3E; if (r) r++; @@ -1652,13 +1654,13 @@ void SoftRenderer::ClearBuffers() else { // TODO: confirm color conversion - u32 r = (GPU.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++; - u32 g = (GPU.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++; - u32 b = (GPU.GPU3D.RenderClearAttr1 >> 9) & 0x3E; if (b) b++; - u32 a = (GPU.GPU3D.RenderClearAttr1 >> 16) & 0x1F; + u32 r = (gpu.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++; + u32 g = (gpu.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++; + u32 b = (gpu.GPU3D.RenderClearAttr1 >> 9) & 0x3E; if (b) b++; + u32 a = (gpu.GPU3D.RenderClearAttr1 >> 16) & 0x1F; u32 color = r | (g << 8) | (b << 16) | (a << 24); - polyid |= (GPU.GPU3D.RenderClearAttr1 & 0x8000); + polyid |= (gpu.GPU3D.RenderClearAttr1 & 0x8000); for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth) { @@ -1673,7 +1675,7 @@ void SoftRenderer::ClearBuffers() } } -void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys) +void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polygons, int npolys) { int j = 0; for (int i = 0; i < npolys; i++) @@ -1682,38 +1684,38 @@ void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys) SetupPolygon(&PolygonList[j++], polygons[i]); } - RenderScanline(0, j); + RenderScanline(gpu, 0, j); for (s32 y = 1; y < 192; y++) { - RenderScanline(y, j); - ScanlineFinalPass(y-1); + RenderScanline(gpu, y, j); + ScanlineFinalPass(gpu.GPU3D, y-1); if (threaded) Platform::Semaphore_Post(Sema_ScanlineCount); } - ScanlineFinalPass(191); + ScanlineFinalPass(gpu.GPU3D, 191); if (threaded) Platform::Semaphore_Post(Sema_ScanlineCount); } -void SoftRenderer::VCount144() +void SoftRenderer::VCount144(GPU& gpu) { - if (RenderThreadRunning.load(std::memory_order_relaxed) && !GPU.GPU3D.AbortFrame) + if (RenderThreadRunning.load(std::memory_order_relaxed) && !gpu.GPU3D.AbortFrame) Platform::Semaphore_Wait(Sema_RenderDone); } -void SoftRenderer::RenderFrame() +void SoftRenderer::RenderFrame(GPU& gpu) { - auto textureDirty = GPU.VRAMDirty_Texture.DeriveState(GPU.VRAMMap_Texture, GPU); - auto texPalDirty = GPU.VRAMDirty_TexPal.DeriveState(GPU.VRAMMap_TexPal, GPU); + auto textureDirty = gpu.VRAMDirty_Texture.DeriveState(gpu.VRAMMap_Texture, gpu); + auto texPalDirty = gpu.VRAMDirty_TexPal.DeriveState(gpu.VRAMMap_TexPal, gpu); - bool textureChanged = GPU.MakeVRAMFlat_TextureCoherent(textureDirty); - bool texPalChanged = GPU.MakeVRAMFlat_TexPalCoherent(texPalDirty); + bool textureChanged = gpu.MakeVRAMFlat_TextureCoherent(textureDirty); + bool texPalChanged = gpu.MakeVRAMFlat_TexPalCoherent(texPalDirty); - FrameIdentical = !(textureChanged || texPalChanged) && GPU.GPU3D.RenderFrameIdentical; + FrameIdentical = !(textureChanged || texPalChanged) && gpu.GPU3D.RenderFrameIdentical; if (RenderThreadRunning.load(std::memory_order_relaxed)) { @@ -1721,17 +1723,17 @@ void SoftRenderer::RenderFrame() } else if (!FrameIdentical) { - ClearBuffers(); - RenderPolygons(false, &GPU.GPU3D.RenderPolygonRAM[0], GPU.GPU3D.RenderNumPolygons); + ClearBuffers(gpu); + RenderPolygons(gpu, false, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons); } } -void SoftRenderer::RestartFrame() +void SoftRenderer::RestartFrame(GPU& gpu) { - SetupRenderThread(); + SetupRenderThread(gpu); } -void SoftRenderer::RenderThreadFunc() +void SoftRenderer::RenderThreadFunc(GPU& gpu) { for (;;) { @@ -1745,8 +1747,8 @@ void SoftRenderer::RenderThreadFunc() } else { - ClearBuffers(); - RenderPolygons(true, &GPU.GPU3D.RenderPolygonRAM[0], GPU.GPU3D.RenderNumPolygons); + ClearBuffers(gpu); + RenderPolygons(gpu, true, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons); } Platform::Semaphore_Post(Sema_RenderDone); diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index f405b2d8..8fb42013 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -29,19 +29,19 @@ namespace melonDS class SoftRenderer : public Renderer3D { public: - SoftRenderer(melonDS::GPU& gpu, bool threaded = false) noexcept; + SoftRenderer(bool threaded = false) noexcept; ~SoftRenderer() override; - void Reset() override; + void Reset(GPU& gpu) override; - void SetThreaded(bool threaded) noexcept; + void SetThreaded(bool threaded, GPU& gpu) noexcept; [[nodiscard]] bool IsThreaded() const noexcept { return Threaded; } - void VCount144() override; - void RenderFrame() override; - void RestartFrame() override; + void VCount144(GPU& gpu) override; + void RenderFrame(GPU& gpu) override; + void RestartFrame(GPU& gpu) override; u32* GetLine(int line) override; - void SetupRenderThread(); + void SetupRenderThread(GPU& gpu); void StopRenderThread(); private: // Notes on the interpolator: @@ -429,16 +429,16 @@ private: }; template - inline T ReadVRAM_Texture(u32 addr) const + inline T ReadVRAM_Texture(u32 addr, const GPU& gpu) const { - return *(T*)&GPU.VRAMFlat_Texture[addr & 0x7FFFF]; + return *(T*)&gpu.VRAMFlat_Texture[addr & 0x7FFFF]; } template - inline T ReadVRAM_TexPal(u32 addr) const + inline T ReadVRAM_TexPal(u32 addr, const GPU& gpu) const { - return *(T*)&GPU.VRAMFlat_TexPal[addr & 0x1FFFF]; + return *(T*)&gpu.VRAMFlat_TexPal[addr & 0x1FFFF]; } - u32 AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) const noexcept; + u32 AlphaBlend(const GPU3D& gpu3d, u32 srccolor, u32 dstcolor, u32 alpha) const noexcept; struct RendererPolygon { @@ -452,23 +452,22 @@ private: }; - melonDS::GPU& GPU; RendererPolygon PolygonList[2048]; - void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const; - u32 RenderPixel(const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const; - void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow); + void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const; + u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const; + void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow); void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const; void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const; void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const; - void RenderShadowMaskScanline(RendererPolygon* rp, s32 y); - void RenderPolygonScanline(RendererPolygon* rp, s32 y); - void RenderScanline(s32 y, int npolys); - u32 CalculateFogDensity(u32 pixeladdr) const; - void ScanlineFinalPass(s32 y); - void ClearBuffers(); - void RenderPolygons(bool threaded, Polygon** polygons, int npolys); + void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y); + void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y); + void RenderScanline(const GPU& gpu, s32 y, int npolys); + u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const; + void ScanlineFinalPass(const GPU3D& gpu3d, s32 y); + void ClearBuffers(const GPU& gpu); + void RenderPolygons(const GPU& gpu, bool threaded, Polygon** polygons, int npolys); - void RenderThreadFunc(); + void RenderThreadFunc(GPU& gpu); // buffer dimensions are 258x194 to add a offscreen 1px border // which simplifies edge marking tests diff --git a/src/NDS.cpp b/src/NDS.cpp index 5d2a1ce2..4fa5eefd 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -94,7 +94,7 @@ NDS::NDS(NDSArgs&& args, int type) noexcept : ARM9BIOS(args.ARM9BIOS), JIT(*this, args.JIT), SPU(*this, args.BitDepth, args.Interpolation), - GPU(*this), + GPU(*this, std::move(args.Renderer3D)), SPI(*this, std::move(args.Firmware)), RTC(*this), Wifi(*this), @@ -1322,7 +1322,7 @@ void NDS::SetIRQ(u32 cpu, u32 irq) { CPUStop &= ~CPUStop_Sleep; CPUStop |= CPUStop_Wakeup; - GPU.GPU3D.RestartFrame(); + GPU.GPU3D.RestartFrame(GPU); } } } diff --git a/src/NDS.h b/src/NDS.h index e178c4a2..d485bd3c 100644 --- a/src/NDS.h +++ b/src/NDS.h @@ -328,6 +328,14 @@ public: Firmware& GetFirmware() { return SPI.GetFirmwareMem()->GetFirmware(); } void SetFirmware(Firmware&& firmware) { SPI.GetFirmwareMem()->SetFirmware(std::move(firmware)); } + const Renderer3D& GetRenderer3D() const noexcept { return GPU.GetRenderer3D(); } + Renderer3D& GetRenderer3D() noexcept { return GPU.GetRenderer3D(); } + void SetRenderer3D(std::unique_ptr&& renderer) noexcept + { + if (renderer != nullptr) + GPU.SetRenderer3D(std::move(renderer)); + } + virtual bool NeedsDirectBoot() const; void SetupDirectBoot(const std::string& romname); virtual void SetupDirectBoot(); diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 45dc4e06..3eb5db09 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -544,11 +544,11 @@ void EmuThread::run() if (videoRenderer == 0) { // If we're using the software renderer... - NDS->GPU.SetRenderer3D(std::make_unique(NDS->GPU, Config::Threaded3D != 0)); + NDS->GPU.SetRenderer3D(std::make_unique(Config::Threaded3D != 0)); } else { - auto glrenderer = melonDS::GLRenderer::New(NDS->GPU); + auto glrenderer = melonDS::GLRenderer::New(); glrenderer->SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor); NDS->GPU.SetRenderer3D(std::move(glrenderer)); } @@ -677,11 +677,11 @@ void EmuThread::run() if (videoRenderer == 0) { // If we're using the software renderer... - NDS->GPU.SetRenderer3D(std::make_unique(NDS->GPU, Config::Threaded3D != 0)); + NDS->GPU.SetRenderer3D(std::make_unique(Config::Threaded3D != 0)); } else { - auto glrenderer = melonDS::GLRenderer::New(NDS->GPU); + auto glrenderer = melonDS::GLRenderer::New(); glrenderer->SetRenderSettings(Config::GL_BetterPolygons, Config::GL_ScaleFactor); NDS->GPU.SetRenderer3D(std::move(glrenderer)); }