From 66cec85a9a1e95c8fa76ac6ebf9f718cfafdf5bf Mon Sep 17 00:00:00 2001 From: Arisotura Date: Thu, 10 Dec 2020 19:12:08 +0100 Subject: [PATCH] GPU: forward BG0HOFS to internal rendering engine register for 3D layer scroll (only when the rendering engine is enabled). fixes #840 thank you RSDuck and Hydr8gon for your insight into this. --- src/GPU2D.cpp | 17 +++++++++++---- src/GPU2D_Soft.cpp | 37 ++++++++++---------------------- src/GPU3D.cpp | 46 ++++++++++++++++++++++++++++++++++++---- src/GPU3D.h | 4 ++++ src/GPU_OpenGL.cpp | 5 +++++ src/GPU_OpenGL_shaders.h | 9 +++++--- 6 files changed, 81 insertions(+), 37 deletions(-) diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index d2a8b34c..eb160d8a 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -303,8 +303,14 @@ void GPU2D::Write8(u32 addr, u8 val) case 0x00E: BGCnt[3] = (BGCnt[3] & 0xFF00) | val; return; case 0x00F: BGCnt[3] = (BGCnt[3] & 0x00FF) | (val << 8); return; - case 0x010: BGXPos[0] = (BGXPos[0] & 0xFF00) | val; return; - case 0x011: BGXPos[0] = (BGXPos[0] & 0x00FF) | (val << 8); return; + case 0x010: + BGXPos[0] = (BGXPos[0] & 0xFF00) | val; + if (Num == 0) GPU3D::SetRenderXPos(BGXPos[0]); + return; + case 0x011: + BGXPos[0] = (BGXPos[0] & 0x00FF) | (val << 8); + if (Num == 0) GPU3D::SetRenderXPos(BGXPos[0]); + return; case 0x012: BGYPos[0] = (BGYPos[0] & 0xFF00) | val; return; case 0x013: BGYPos[0] = (BGYPos[0] & 0x00FF) | (val << 8); return; case 0x014: BGXPos[1] = (BGXPos[1] & 0xFF00) | val; return; @@ -401,7 +407,10 @@ void GPU2D::Write16(u32 addr, u16 val) case 0x00C: BGCnt[2] = val; return; case 0x00E: BGCnt[3] = val; return; - case 0x010: BGXPos[0] = val; return; + case 0x010: + BGXPos[0] = val; + if (Num == 0) GPU3D::SetRenderXPos(BGXPos[0]); + return; case 0x012: BGYPos[0] = val; return; case 0x014: BGXPos[1] = val; return; case 0x016: BGYPos[1] = val; return; @@ -716,4 +725,4 @@ void GPU2D::GetOBJVRAM(u8*& data, u32& mask) data = GPU::VRAMFlat_BOBJ; mask = 0x1FFFF; } -} \ No newline at end of file +} diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp index 7345af9c..c686bad7 100644 --- a/src/GPU2D_Soft.cpp +++ b/src/GPU2D_Soft.cpp @@ -403,7 +403,7 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) { // 3D on top, blending - u32 _3dval = _3DLine[val3 & 0xFF]; + u32 _3dval = _3DLine[i]; if ((_3dval >> 24) > 0) val1 = ColorBlend5(_3dval, val1); else @@ -413,7 +413,7 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) { // 3D on bottom, blending - u32 _3dval = _3DLine[val3 & 0xFF]; + u32 _3dval = _3DLine[i]; if ((_3dval >> 24) > 0) { u32 eva = (val3 >> 8) & 0x1F; @@ -428,7 +428,7 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) { // 3D on top, normal/fade - u32 _3dval = _3DLine[val3 & 0xFF]; + u32 _3dval = _3DLine[i]; if ((_3dval >> 24) > 0) { u32 evy = (val3 >> 8) & 0x1F; @@ -807,7 +807,7 @@ void GPU2D_Soft::DrawScanline_BGOBJ(u32 line) BGOBJLine[i] = val2; BGOBJLine[256+i] = ColorComposite(i, val2, val3); - BGOBJLine[512+i] = 0x04000000 | (val1 & 0xFF); + BGOBJLine[512+i] = 0x04000000; } else if ((flag1 & 0xC0) == 0x40) { @@ -819,7 +819,7 @@ void GPU2D_Soft::DrawScanline_BGOBJ(u32 line) BGOBJLine[i] = val2; BGOBJLine[256+i] = ColorComposite(i, val2, val3); - BGOBJLine[512+i] = (bldcnteffect << 24) | (EVY << 8) | (val1 & 0xFF); + BGOBJLine[512+i] = (bldcnteffect << 24) | (EVY << 8); } else if (((flag2 & 0xC0) == 0x40) && ((BlendCnt & 0x01C0) == 0x0140)) { @@ -842,7 +842,7 @@ void GPU2D_Soft::DrawScanline_BGOBJ(u32 line) BGOBJLine[i] = val1; BGOBJLine[256+i] = ColorComposite(i, val1, val3); - BGOBJLine[512+i] = (bldcnteffect << 24) | (EVB << 16) | (EVA << 8) | (val2 & 0xFF); + BGOBJLine[512+i] = (bldcnteffect << 24) | (EVB << 16) | (EVA << 8); } else { @@ -910,39 +910,24 @@ void GPU2D_Soft::DrawPixel_Accel(u32* dst, u16 color, u32 flag) void GPU2D_Soft::DrawBG_3D() { - u16 xoff = BGXPos[0]; int i = 0; - int iend = 256; - - if (xoff & 0x100) - { - i = (0x100 - (xoff & 0xFF)); - xoff += i; - } - if ((xoff - i + iend - 1) & 0x100) - { - iend -= (xoff & 0xFF); - } if (Accelerated) { - for (; i < iend; i++) + for (i = 0; i < 256; i++) { - int pos = xoff++; - if (!(WindowMask[i] & 0x01)) continue; BGOBJLine[i+512] = BGOBJLine[i+256]; BGOBJLine[i+256] = BGOBJLine[i]; - BGOBJLine[i] = 0x40000000 | pos; // 3D-layer placeholder + BGOBJLine[i] = 0x40000000; // 3D-layer placeholder } } else { - for (; i < iend; i++) + for (i = 0; i < 256; i++) { - u32 c = _3DLine[xoff]; - xoff++; + u32 c = _3DLine[i]; if ((c >> 24) == 0) continue; if (!(WindowMask[i] & 0x01)) continue; @@ -2227,4 +2212,4 @@ void GPU2D_Soft::MosaicXSizeChanged() { CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]]; CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[1]]; -} \ No newline at end of file +} diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index fd8d3200..a02e2866 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -181,6 +181,8 @@ u32 RenderClearAttr1, RenderClearAttr2; bool RenderFrameIdentical; +u16 RenderXPos; + u32 ZeroDotWLimit; u32 GXStat; @@ -385,6 +387,8 @@ void Reset() FlushAttributes = 0; ResetRenderingState(); + + RenderXPos = 0; } void DoSavestate(Savestate* file) @@ -430,6 +434,8 @@ void DoSavestate(Savestate* file) file->Var32(&RenderClearAttr1); file->Var32(&RenderClearAttr2); + file->Var16(&RenderXPos); + file->Var32(&ZeroDotWLimit); file->Var32(&GXStat); @@ -587,8 +593,6 @@ void DoSavestate(Savestate* file) } } - // probably not worth storing the vblank-latched Renderxxxxxx variables - CmdStallQueue->DoSavestate(file); file->Var32((u32*)&VertexPipeline); file->Var32((u32*)&NormalPipeline); @@ -2564,14 +2568,48 @@ void VCount215() #endif } +void SetRenderXPos(u16 xpos) +{ + if (!RenderingEnabled) return; + + RenderXPos = xpos & 0x01FF; +} + +u32 ScrolledLine[256]; + u32* GetLine(int line) { - if (GPU::Renderer == 0) return SoftRenderer::GetLine(line); + u32* rawline; + + if (GPU::Renderer == 0) rawline = SoftRenderer::GetLine(line); #ifdef OGLRENDERER_ENABLED - else return GLRenderer::GetLine(line); + else rawline = GLRenderer::GetLine(line); #else return NULL; #endif + + if (RenderXPos == 0) return rawline; + + // apply X scroll + + if (RenderXPos & 0x100) + { + int i = 0, j = RenderXPos; + for (; j < 512; i++, j++) + ScrolledLine[i] = 0; + for (j = 0; i < 256; i++, j++) + ScrolledLine[i] = rawline[j]; + } + else + { + int i = 0, j = RenderXPos; + for (; j < 256; i++, j++) + ScrolledLine[i] = rawline[j]; + for (; i < 256; i++) + ScrolledLine[i] = 0; + } + + return ScrolledLine; } diff --git a/src/GPU3D.h b/src/GPU3D.h index 0477c4f1..69b67fa7 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -89,6 +89,8 @@ extern u32 RenderClearAttr1, RenderClearAttr2; extern bool RenderFrameIdentical; +extern u16 RenderXPos; + extern std::array RenderPolygonRAM; extern u32 RenderNumPolygons; @@ -114,6 +116,8 @@ void CheckFIFODMA(); void VCount144(); void VBlank(); void VCount215(); + +void SetRenderXPos(u16 xpos); u32* GetLine(int line); void WriteToGXFIFO(u32 val); diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp index 359e9cd4..0c6cf004 100644 --- a/src/GPU_OpenGL.cpp +++ b/src/GPU_OpenGL.cpp @@ -36,6 +36,7 @@ int ScreenH, ScreenW; GLuint CompShader[1][3]; GLuint CompScaleLoc[1]; +GLuint Comp3DXPosLoc[1]; GLuint CompVertexBufferID; GLuint CompVertexArrayID; @@ -64,6 +65,7 @@ bool Init() return false; CompScaleLoc[i] = glGetUniformLocation(CompShader[i][2], "u3DScale"); + Comp3DXPosLoc[i] = glGetUniformLocation(CompShader[i][2], "u3DXPos"); glUseProgram(CompShader[i][2]); uni_id = glGetUniformLocation(CompShader[i][2], "ScreenTex"); @@ -180,6 +182,9 @@ void RenderFrame() OpenGL::UseShaderProgram(CompShader[0]); glUniform1ui(CompScaleLoc[0], Scale); + // TODO: support setting this midframe, if ever needed + glUniform1i(Comp3DXPosLoc[0], ((int)GPU3D::RenderXPos << 23) >> 23); + int frontbuf = GPU::FrontBuffer; glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, CompScreenInputTex); diff --git a/src/GPU_OpenGL_shaders.h b/src/GPU_OpenGL_shaders.h index 20ac7673..03ddb7af 100644 --- a/src/GPU_OpenGL_shaders.h +++ b/src/GPU_OpenGL_shaders.h @@ -40,6 +40,7 @@ void main() const char* kCompositorFS_Nearest = R"(#version 140 uniform uint u3DScale; +uniform int u3DXPos; uniform usampler2D ScreenTex; uniform sampler2D _3DTex; @@ -52,6 +53,8 @@ void main() { ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0)); + float _3dxpos = float(u3DXPos); + ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0)); int dispmode = mbright.b & 0x3; @@ -68,7 +71,7 @@ void main() { // 3D on top, blending - float xpos = val3.r + fract(fTexcoord.x); + float xpos = fTexcoord.x + _3dxpos; float ypos = mod(fTexcoord.y, 192); ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra * vec4(63,63,63,31)); @@ -89,7 +92,7 @@ void main() { // 3D on bottom, blending - float xpos = val3.r + fract(fTexcoord.x); + float xpos = fTexcoord.x + _3dxpos; float ypos = mod(fTexcoord.y, 192); ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra * vec4(63,63,63,31)); @@ -109,7 +112,7 @@ void main() { // 3D on top, normal/fade - float xpos = val3.r + fract(fTexcoord.x); + float xpos = fTexcoord.x + _3dxpos; float ypos = mod(fTexcoord.y, 192); ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra * vec4(63,63,63,31));