From 97ea3a603ef4df32945dfd33300cedcb04ffa67a Mon Sep 17 00:00:00 2001 From: ezio1900 Date: Mon, 19 Apr 2021 20:20:37 +0800 Subject: [PATCH] VideoCommon: Fix scissorOffset, handle negative value correctly VideoCommon: Change the type of BPMemory.scissorOffset to 10bit signed: S32X10Y10 VideoBackends: Fix Software Clipper.PerspectiveDivide function, use BPMemory.scissorOffset instead of hard code 342 --- .../Core/VideoBackends/Software/Clipper.cpp | 6 ++-- .../VideoBackends/Software/Rasterizer.cpp | 12 +++---- Source/Core/VideoCommon/BPFunctions.cpp | 34 ++++++++++++------- Source/Core/VideoCommon/BPMemory.h | 8 ++++- Source/Core/VideoCommon/BPStructs.cpp | 24 ++++++------- 5 files changed, 51 insertions(+), 33 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Clipper.cpp b/Source/Core/VideoBackends/Software/Clipper.cpp index 417bc63fa3..15cd650b79 100644 --- a/Source/Core/VideoBackends/Software/Clipper.cpp +++ b/Source/Core/VideoBackends/Software/Clipper.cpp @@ -513,8 +513,10 @@ void PerspectiveDivide(OutputVertexData* vertex) Vec3& screen = vertex->screenPosition; float wInverse = 1.0f / projected.w; - screen.x = projected.x * wInverse * xfmem.viewport.wd + xfmem.viewport.xOrig - 342; - screen.y = projected.y * wInverse * xfmem.viewport.ht + xfmem.viewport.yOrig - 342; + screen.x = + projected.x * wInverse * xfmem.viewport.wd + xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2; + screen.y = + projected.y * wInverse * xfmem.viewport.ht + xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2; screen.z = projected.z * wInverse * xfmem.viewport.zRange + xfmem.viewport.farZ; } } // namespace Clipper diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index e987817c8c..cf5374342b 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -306,22 +306,22 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v s32 maxy = (std::max(std::max(Y1, Y2), Y3) + 0xF) >> 4; // scissor - int xoff = bpmem.scissorOffset.x * 2 - 342; - int yoff = bpmem.scissorOffset.y * 2 - 342; + s32 xoff = bpmem.scissorOffset.x * 2; + s32 yoff = bpmem.scissorOffset.y * 2; - s32 scissorLeft = bpmem.scissorTL.x - xoff - 342; + s32 scissorLeft = bpmem.scissorTL.x - xoff; if (scissorLeft < 0) scissorLeft = 0; - s32 scissorTop = bpmem.scissorTL.y - yoff - 342; + s32 scissorTop = bpmem.scissorTL.y - yoff; if (scissorTop < 0) scissorTop = 0; - s32 scissorRight = bpmem.scissorBR.x - xoff - 341; + s32 scissorRight = bpmem.scissorBR.x - xoff + 1; if (scissorRight > s32(EFB_WIDTH)) scissorRight = EFB_WIDTH; - s32 scissorBottom = bpmem.scissorBR.y - yoff - 341; + s32 scissorBottom = bpmem.scissorBR.y - yoff + 1; if (scissorBottom > s32(EFB_HEIGHT)) scissorBottom = EFB_HEIGHT; diff --git a/Source/Core/VideoCommon/BPFunctions.cpp b/Source/Core/VideoCommon/BPFunctions.cpp index 1242c8e853..f7ab305303 100644 --- a/Source/Core/VideoCommon/BPFunctions.cpp +++ b/Source/Core/VideoCommon/BPFunctions.cpp @@ -39,19 +39,29 @@ void SetGenerationMode() void SetScissor() { - /* NOTE: the minimum value here for the scissor rect and offset is -342. - * GX internally adds on an offset of 342 to both the offset and scissor - * coords to ensure that the register was always unsigned. + /* NOTE: the minimum value here for the scissor rect is -342. + * GX SDK functions internally add an offset of 342 to scissor coords to + * ensure that the register was always unsigned. * * The code that was here before tried to "undo" this offset, but * since we always take the difference, the +342 added to both * sides cancels out. */ - /* The scissor offset is always even, so to save space, the scissor offset - * register is scaled down by 2. So, if somebody calls - * GX_SetScissorBoxOffset(20, 20); the registers will be set to 10, 10. */ - const int xoff = bpmem.scissorOffset.x * 2; - const int yoff = bpmem.scissorOffset.y * 2; + /* NOTE: With a positive scissor offset, the scissor rect is shifted left and/or up; + * With a negative scissor offset, the scissor rect is shifted right and/or down. + * + * GX SDK functions internally add an offset of 342 to scissor offset. + * The scissor offset is always even, so to save space, the scissor offset register + * is scaled down by 2. So, if somebody calls GX_SetScissorBoxOffset(20, 20); + * the registers will be set to ((20 + 342) / 2 = 181, 181). + * + * The scissor offset register is 10bit signed [-512, 511]. + * e.g. In Super Mario Galaxy 1 and 2, during the "Boss roar effect", + * for a scissor offset of (0, -464), the scissor offset register will be set to + * (171, (-464 + 342) / 2 = -61). + */ + s32 xoff = bpmem.scissorOffset.x * 2; + s32 yoff = bpmem.scissorOffset.y * 2; MathUtil::Rectangle native_rc(bpmem.scissorTL.x - xoff, bpmem.scissorTL.y - yoff, bpmem.scissorBR.x - xoff + 1, bpmem.scissorBR.y - yoff + 1); @@ -65,10 +75,10 @@ void SetScissor() void SetViewport() { - int scissor_x_off = bpmem.scissorOffset.x * 2; - int scissor_y_off = bpmem.scissorOffset.y * 2; - float x = g_renderer->EFBToScaledXf(xfmem.viewport.xOrig - xfmem.viewport.wd - scissor_x_off); - float y = g_renderer->EFBToScaledYf(xfmem.viewport.yOrig + xfmem.viewport.ht - scissor_y_off); + s32 xoff = bpmem.scissorOffset.x * 2; + s32 yoff = bpmem.scissorOffset.y * 2; + float x = g_renderer->EFBToScaledXf(xfmem.viewport.xOrig - xfmem.viewport.wd - xoff); + float y = g_renderer->EFBToScaledYf(xfmem.viewport.yOrig + xfmem.viewport.ht - yoff); float width = g_renderer->EFBToScaledXf(2.0f * xfmem.viewport.wd); float height = g_renderer->EFBToScaledYf(-2.0f * xfmem.viewport.ht); diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index 035fa5531e..f22ceda2cd 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -993,6 +993,12 @@ union X10Y10 BitField<10, 10, u32> y; u32 hex; }; +union S32X10Y10 +{ + BitField<0, 10, s32> x; + BitField<10, 10, s32> y; + u32 hex; +}; // Framebuffer/pixel stuff (incl fog) enum class SrcBlendFactor : u32 @@ -1949,7 +1955,7 @@ struct BPMemory u32 boundbox0; // 55 u32 boundbox1; // 56 u32 unknown7[2]; // 57,58 - X10Y10 scissorOffset; // 59 + S32X10Y10 scissorOffset; // 59 u32 unknown8[6]; // 5a,5b,5c,5d, 5e,5f BPS_TmemConfig tmem_config; // 60-66 u32 metric; // 67 diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 699b92ce17..8754152fe9 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -217,14 +217,14 @@ static void BPWritten(const BPCmd& bp) u32 destAddr = bpmem.copyTexDest << 5; u32 destStride = bpmem.copyMipMapStrideChannels << 5; - MathUtil::Rectangle srcRect; - srcRect.left = static_cast(bpmem.copyTexSrcXY.x); - srcRect.top = static_cast(bpmem.copyTexSrcXY.y); + MathUtil::Rectangle srcRect; + srcRect.left = bpmem.copyTexSrcXY.x; + srcRect.top = bpmem.copyTexSrcXY.y; // Here Width+1 like Height, otherwise some textures are corrupted already since the native // resolution. - srcRect.right = static_cast(bpmem.copyTexSrcXY.x + bpmem.copyTexSrcWH.x + 1); - srcRect.bottom = static_cast(bpmem.copyTexSrcXY.y + bpmem.copyTexSrcWH.y + 1); + srcRect.right = bpmem.copyTexSrcXY.x + bpmem.copyTexSrcWH.x + 1; + srcRect.bottom = bpmem.copyTexSrcXY.y + bpmem.copyTexSrcWH.y + 1; // Since the copy X and Y coordinates/sizes are 10-bit, the game can configure a copy region up // to 1024x1024. Hardware tests have found that the number of bytes written does not depend on @@ -238,21 +238,21 @@ static void BPWritten(const BPCmd& bp) // writing the junk data, we don't write anything to RAM at all for over-sized copies, and clamp // to the EFB borders for over-offset copies. The arcade virtual console games (e.g. 1942) are // known for configuring these out-of-range copies. - int copy_width = srcRect.GetWidth(); - int copy_height = srcRect.GetHeight(); - if (srcRect.right > s32(EFB_WIDTH) || srcRect.bottom > s32(EFB_HEIGHT)) + u32 copy_width = srcRect.GetWidth(); + u32 copy_height = srcRect.GetHeight(); + if (srcRect.right > EFB_WIDTH || srcRect.bottom > EFB_HEIGHT) { WARN_LOG_FMT(VIDEO, "Oversized EFB copy: {}x{} (offset {},{} stride {})", copy_width, copy_height, srcRect.left, srcRect.top, destStride); // Adjust the copy size to fit within the EFB. So that we don't end up with a stretched image, // instead of clamping the source rectangle, we reduce it by the over-sized amount. - if (copy_width > s32(EFB_WIDTH)) + if (copy_width > EFB_WIDTH) { srcRect.right -= copy_width - EFB_WIDTH; copy_width = EFB_WIDTH; } - if (copy_height > s32(EFB_HEIGHT)) + if (copy_height > EFB_HEIGHT) { srcRect.bottom -= copy_height - EFB_HEIGHT; copy_height = EFB_HEIGHT; @@ -1010,8 +1010,8 @@ std::pair GetBPRegInfo(u8 cmd, u32 cmddata) case BPMEM_SCISSOROFFSET: // 0x59 { - const X10Y10 xy{.hex = cmddata}; - return std::make_pair(RegName(BPMEM_EFB_TL), + const S32X10Y10 xy{.hex = cmddata}; + return std::make_pair(RegName(BPMEM_SCISSOROFFSET), fmt::format("Scissor X offset: {}\nScissor Y offset: {}", xy.x, xy.y)); }