From 208ec0d50f9bc8fed055da4d7a964068f687d09f Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 29 Jun 2024 19:17:48 -0400 Subject: [PATCH] small optimizations --- src/GPU3D.cpp | 1 + src/GPU3D.h | 1 + src/GPU3D_Soft.cpp | 157 +++++++++++++++++++++++++-------------------- src/GPU3D_Soft.h | 47 ++++++++------ 4 files changed, 115 insertions(+), 91 deletions(-) diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index f6eb0de6..f0b8865a 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -1216,6 +1216,7 @@ void GPU3D::SubmitPolygon() noexcept poly->Degenerate = false; poly->Type = 0; + poly->OOBRendering = UpdateLastPoly; poly->FacingView = facingview; diff --git a/src/GPU3D.h b/src/GPU3D.h index 50725628..a670a079 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -84,6 +84,7 @@ struct Polygon bool IsShadow; int Type; // 0=regular 1=line + bool OOBRendering; u32 VTop, VBottom; // vertex indices s32 YTop, YBottom; // Y coords diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index f1ea1509..ba70b3a8 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -623,9 +623,9 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y if (y1 < polygon->Vertices[rp->CurVL]->FinalPosition[1]) y1 += 256; - rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], - polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, - polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); + rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], + polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, + polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); } void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const @@ -660,9 +660,9 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 if (y1 < polygon->Vertices[rp->CurVR]->FinalPosition[1]) y1 += 256; - rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], - polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, - polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); + rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], + polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, + polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); } void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const @@ -723,9 +723,9 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly if (y1 < polygon->Vertices[rp->CurVL]->FinalPosition[1]) y1 += 256; - rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], - polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, - polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); + rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], + polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, + polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); y = ytop; if (y < polygon->Vertices[rp->CurVR]->FinalPosition[1]) @@ -735,12 +735,13 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly if (y1 < polygon->Vertices[rp->CurVR]->FinalPosition[1]) y1 += 256; - rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], - polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, - polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); + rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], + polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, + polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); } } +template void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -788,11 +789,11 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* xstart = rp->XL; xend = rp->XR; - s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); - s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); + s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); + s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); - s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); - s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); + s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); + s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); // right vertical edges are pushed 1px to the left as long as either: // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen @@ -875,8 +876,13 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* if (y == polygon->YTop) yedge = 0x4; else if (y == polygon->YBottom-1) yedge = 0x8; int edge; + + xend += 1; + Interpolator<0> interpX; + interpX.Setup(xstart, xend, wl, wr); + // CHECKME: should the unclamped values be used for timings? - // negative values are clamped to 0 before interpolation is done + // negative values are clamped to 0 if (xstart < 0) { l_edgelen += xstart; @@ -884,14 +890,12 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* xstart = 0; } s32 x = xstart; - //xend += 1; dont forget to fix that later- - // too big values are clamped to 511 before interpolation is done + // too big values are clamped to 511 if (xend > 511) { r_edgelen += 256 - xend; xend = 511; } - Interpolator<0> interpX(xstart, xend+1, wl, wr); s32 xlimit; @@ -901,7 +905,6 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* // part 1: left edge edge = yedge | 0x1; xlimit = xstart+l_edgelen; - if (xlimit > xend+1) xlimit = xend+1; if (xlimit > 256) xlimit = 256; if (!l_filledge) x = xlimit; @@ -912,7 +915,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -928,8 +931,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* // part 2: polygon inside edge = yedge; - xlimit = xend-r_edgelen+1; - if (xlimit > xend+1) xlimit = xend+1; + xlimit = xend-r_edgelen; + if (xlimit > xend) xlimit = xend; if (xlimit > 256) xlimit = 256; if (wireframe && !edge) x = std::max(x, xlimit); else for (; x < xlimit; x++) @@ -938,7 +941,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -954,7 +957,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* // part 3: right edge edge = yedge | 0x2; - xlimit = xend+1; + xlimit = xend; if (xlimit > 256) xlimit = 256; if (r_filledge) @@ -964,7 +967,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); u32 dstattr = AttrBuffer[pixeladdr]; if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) @@ -978,10 +981,11 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* } } - rp->XL = rp->SlopeL.Step(); - rp->XR = rp->SlopeR.Step(); + rp->XL = rp->SlopeL.Step(); + rp->XR = rp->SlopeR.Step(); } +template void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1026,11 +1030,11 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 xstart = rp->XL; xend = rp->XR; - s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); - s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); + s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); + s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); - s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); - s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); + s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); + s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); // right vertical edges are pushed 1px to the left as long as either: // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen @@ -1118,19 +1122,19 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 // interpolate attributes along Y - s32 rl = interp_start->Interpolate(vlcur->FinalColor[0], vlnext->FinalColor[0]); - s32 gl = interp_start->Interpolate(vlcur->FinalColor[1], vlnext->FinalColor[1]); - s32 bl = interp_start->Interpolate(vlcur->FinalColor[2], vlnext->FinalColor[2]); + s32 rl = interp_start->Interpolate(vlcur->FinalColor[0], vlnext->FinalColor[0]); + s32 gl = interp_start->Interpolate(vlcur->FinalColor[1], vlnext->FinalColor[1]); + s32 bl = interp_start->Interpolate(vlcur->FinalColor[2], vlnext->FinalColor[2]); - s32 sl = interp_start->Interpolate(vlcur->TexCoords[0], vlnext->TexCoords[0]); - s32 tl = interp_start->Interpolate(vlcur->TexCoords[1], vlnext->TexCoords[1]); + s32 sl = interp_start->Interpolate(vlcur->TexCoords[0], vlnext->TexCoords[0]); + s32 tl = interp_start->Interpolate(vlcur->TexCoords[1], vlnext->TexCoords[1]); - s32 rr = interp_end->Interpolate(vrcur->FinalColor[0], vrnext->FinalColor[0]); - s32 gr = interp_end->Interpolate(vrcur->FinalColor[1], vrnext->FinalColor[1]); - s32 br = interp_end->Interpolate(vrcur->FinalColor[2], vrnext->FinalColor[2]); + s32 rr = interp_end->Interpolate(vrcur->FinalColor[0], vrnext->FinalColor[0]); + s32 gr = interp_end->Interpolate(vrcur->FinalColor[1], vrnext->FinalColor[1]); + s32 br = interp_end->Interpolate(vrcur->FinalColor[2], vrnext->FinalColor[2]); - s32 sr = interp_end->Interpolate(vrcur->TexCoords[0], vrnext->TexCoords[0]); - s32 tr = interp_end->Interpolate(vrcur->TexCoords[1], vrnext->TexCoords[1]); + s32 sr = interp_end->Interpolate(vrcur->TexCoords[0], vrnext->TexCoords[0]); + s32 tr = interp_end->Interpolate(vrcur->TexCoords[1], vrnext->TexCoords[1]); // in wireframe mode, there are special rules for equal Z (TODO) @@ -1139,7 +1143,9 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 else if (y == polygon->YBottom-1) yedge = 0x8; int edge; - Interpolator<0> interpX(xstart, xend+1, wl, wr); + xend+=1; + Interpolator<0> interpX; + interpX.Setup(xstart, xend, wl, wr); // CHECKME: should the unclamped values be used for timings? // negative values are clamped to 0 @@ -1163,7 +1169,6 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 // part 1: left edge edge = yedge | 0x1; xlimit = xstart+l_edgelen; - //if (xlimit > xend+1) xlimit = xend+1; if (xlimit > 256) xlimit = 256; if (l_edgecov & (1<<31)) { @@ -1192,7 +1197,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); // if depth test against the topmost pixel fails, test // against the pixel underneath @@ -1206,12 +1211,12 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 continue; } - u32 vr = interpX.Interpolate(rl, rr); - u32 vg = interpX.Interpolate(gl, gr); - u32 vb = interpX.Interpolate(bl, br); + u32 vr = interpX.Interpolate(rl, rr); + u32 vg = interpX.Interpolate(gl, gr); + u32 vb = interpX.Interpolate(bl, br); - s16 s = interpX.Interpolate(sl, sr); - s16 t = interpX.Interpolate(tl, tr); + s16 s = interpX.Interpolate(sl, sr); + s16 t = interpX.Interpolate(tl, tr); u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); u8 alpha = color >> 24; @@ -1263,8 +1268,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 // part 2: polygon inside edge = yedge; - xlimit = xend-r_edgelen+1; - if (xlimit > xend+1) xlimit = xend+1; + xlimit = xend-r_edgelen; + if (xlimit > xend+1) xlimit = xend; if (xlimit > 256) xlimit = 256; if (wireframe && !edge) x = std::max(x, xlimit); @@ -1288,7 +1293,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); // if depth test against the topmost pixel fails, test // against the pixel underneath @@ -1302,12 +1307,12 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 continue; } - u32 vr = interpX.Interpolate(rl, rr); - u32 vg = interpX.Interpolate(gl, gr); - u32 vb = interpX.Interpolate(bl, br); + u32 vr = interpX.Interpolate(rl, rr); + u32 vg = interpX.Interpolate(gl, gr); + u32 vb = interpX.Interpolate(bl, br); - s16 s = interpX.Interpolate(sl, sr); - s16 t = interpX.Interpolate(tl, tr); + s16 s = interpX.Interpolate(sl, sr); + s16 t = interpX.Interpolate(tl, tr); u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); u8 alpha = color >> 24; @@ -1352,7 +1357,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 // part 3: right edge edge = yedge | 0x2; - xlimit = xend+1; + xlimit = xend; if (xlimit > 256) xlimit = 256; if (r_edgecov & (1<<31)) { @@ -1380,7 +1385,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interpX.SetX(x); - s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); // if depth test against the topmost pixel fails, test // against the pixel underneath @@ -1394,12 +1399,12 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 continue; } - u32 vr = interpX.Interpolate(rl, rr); - u32 vg = interpX.Interpolate(gl, gr); - u32 vb = interpX.Interpolate(bl, br); + u32 vr = interpX.Interpolate(rl, rr); + u32 vg = interpX.Interpolate(gl, gr); + u32 vb = interpX.Interpolate(bl, br); - s16 s = interpX.Interpolate(sl, sr); - s16 t = interpX.Interpolate(tl, tr); + s16 s = interpX.Interpolate(sl, sr); + s16 t = interpX.Interpolate(tl, tr); u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); u8 alpha = color >> 24; @@ -1449,8 +1454,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 } } - rp->XL = rp->SlopeL.Step(); - rp->XR = rp->SlopeR.Step(); + rp->XL = rp->SlopeL.Step(); + rp->XR = rp->SlopeR.Step(); } void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys) @@ -1462,10 +1467,20 @@ void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys) if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop))) { - if (polygon->IsShadowMask) - RenderShadowMaskScanline(gpu.GPU3D, rp, y); + if (polygon->OOBRendering) + { + if (polygon->IsShadowMask) + RenderShadowMaskScanline(gpu.GPU3D, rp, y); + else + RenderPolygonScanline(gpu, rp, y); + } else - RenderPolygonScanline(gpu, rp, y); + { + if (polygon->IsShadowMask) + RenderShadowMaskScanline(gpu.GPU3D, rp, y); + else + RenderPolygonScanline(gpu, rp, y); + } } } } diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 2d925c64..c3d68e4b 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -69,16 +69,17 @@ private: { public: constexpr Interpolator() {} - constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1) - { - Setup(x0, x1, w0, w1); - } - + + template constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1) { this->x0 = x0; this->x1 = x1; - this->xdiff = std::min(x1, 511) - std::max(x0, 0); + + if (oob) + this->xdiff = std::min(x1, 511) - std::max(x0, 0); + else + this->xdiff = x1 - x0; // calculate reciprocal for Z interpolation // TODO eventually: use a faster reciprocal function? @@ -129,7 +130,7 @@ private: constexpr void SetX(s32 x) { x -= x0; - if (x > xdiff) x = xdiff; // may or may not be correct + //if (x > xdiff) x = xdiff; // may or may not be correct this->x = x; if (xdiff != 0 && !linear) { @@ -142,12 +143,13 @@ private: else yfactor = (s32)(num / den); } } - + + template constexpr s32 Interpolate(s32 y0, s32 y1) const { if (xdiff == 0 || y0 == y1 || x == 0) return y0; - if (x0 <= 0 && x1 > 511) return y1; + if (oob && (x0 <= 0 && x1 > 511)) return y1; if (!linear) { @@ -166,12 +168,13 @@ private: return y1 + (s64)(y0-y1) * (xdiff - x) / xdiff; } } - + + template constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const { if (xdiff == 0 || z0 == z1 || x == 0) return z0; - if (x0 <= 0 && x1 > 511) return z1; + if (oob && (x0 <= 0 && x1 > 511)) return z1; if (wbuffer) { @@ -249,7 +252,7 @@ private: Increment = 0; XMajor = false; - Interp.Setup(0, 0, 0, 0); + Interp.Setup(0, 0, 0, 0); Interp.SetX(0); xcov_incr = 0; @@ -257,6 +260,7 @@ private: return x0; } + template constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) { this->x0 = x0; @@ -288,8 +292,8 @@ private: // note: for some reason, x/y isn't calculated directly, // instead, 1/y is calculated and then multiplied by x // TODO: this is still not perfect (see for example x=169 y=33) - if (ylen == 0) - Increment = xlen << 18; // this case should only be triggered by glitched polygons + if (oob && ylen == 0) // this case *should* only be triggered by glitched polygons that try to render oob + Increment = xlen << 18; else if (ylen == xlen && xlen != 1) Increment = 0x40000; else @@ -318,10 +322,11 @@ private: else dx = 0; } - dx += (y - y0) * Increment & 0xFFFFFFF; + dx += (y - y0) * Increment; + if (oob) dx &= 0xFFFFFFF; int interpoffset = (Increment >= 0x40000) && (side ^ Negative); - Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1); + Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1); Interp.SetX(y); // used for calculating AA coverage @@ -330,9 +335,11 @@ private: return XVal(); } + template constexpr s32 Step() { - dx = dx + Increment & 0xFFFFFFF; // seems to be a 28 bit integer + dx += Increment; // seems to be a 28 bit integer + if (oob) dx &= 0xFFFFFFF; y++; Interp.SetX(y); @@ -341,7 +348,7 @@ private: constexpr s32 XVal() const { - s32 ret = 0; + s32 ret; if (Negative) ret = x0 - (dx >> 18); else ret = x0 + (dx >> 18); @@ -461,8 +468,8 @@ private: void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const; void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const; void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const; - void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y); - void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y); + template void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y); + template void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y); void RenderScanline(const GPU& gpu, s32 y, int npolys); u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const; void ScanlineFinalPass(const GPU3D& gpu3d, s32 y);