neater code; fix a bug i introduced while optimizing

This commit is contained in:
Jaklyy 2024-07-07 19:48:11 -04:00
parent ce9eddda11
commit c7b92df113
2 changed files with 59 additions and 56 deletions

View File

@ -785,17 +785,17 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
bool l_filledge, r_filledge; bool l_filledge, r_filledge;
s32 l_edgelen, r_edgelen; s32 l_edgelen, r_edgelen;
s32 l_edgecov, r_edgecov; s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start; Interpolator<1, false>* interp_start;
Interpolator<1>* interp_end; Interpolator<1, false>* interp_end;
xstart = rp->XL; xstart = rp->XL;
xend = rp->XR; xend = rp->XR;
s32 wl = rp->SlopeL.Interp.Interpolate<false>(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
s32 wr = rp->SlopeR.Interp.Interpolate<false>(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
s32 zl = rp->SlopeL.Interp.InterpolateZ<false>(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer);
s32 zr = rp->SlopeR.Interp.InterpolateZ<false>(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer);
// right vertical edges are pushed 1px to the left as long as either: // right vertical edges are pushed 1px to the left as long as either:
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
@ -880,8 +880,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
int edge; int edge;
xend += 1; xend += 1;
Interpolator<0> interpX; Interpolator<0, oob> interpX(xstart, xend, wl, wr);
interpX.Setup<oob>(xstart, xend, wl, wr);
// CHECKME: should the unclamped values be used for timings? // CHECKME: should the unclamped values be used for timings?
// negative values are clamped to 0 // negative values are clamped to 0
@ -890,6 +889,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
l_edgelen += xstart; l_edgelen += xstart;
if (l_edgelen < 1) l_edgelen = 1; if (l_edgelen < 1) l_edgelen = 1;
xstart = 0; xstart = 0;
if (xend < 1) xend = 1;
} }
s32 x = xstart; s32 x = xstart;
// too big values are clamped to 511 // too big values are clamped to 511
@ -907,6 +907,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// part 1: left edge // part 1: left edge
edge = yedge | 0x1; edge = yedge | 0x1;
xlimit = xstart+l_edgelen; xlimit = xstart+l_edgelen;
if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256; if (xlimit > 256) xlimit = 256;
if (!l_filledge) x = xlimit; if (!l_filledge) x = xlimit;
@ -917,7 +918,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ<oob>(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
@ -943,7 +944,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ<oob>(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
@ -969,7 +970,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ<oob>(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
u32 dstattr = AttrBuffer[pixeladdr]; u32 dstattr = AttrBuffer[pixeladdr];
if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr)) if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
@ -1026,17 +1027,17 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
bool l_filledge, r_filledge; bool l_filledge, r_filledge;
s32 l_edgelen, r_edgelen; s32 l_edgelen, r_edgelen;
s32 l_edgecov, r_edgecov; s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start; Interpolator<1, false>* interp_start;
Interpolator<1>* interp_end; Interpolator<1, false>* interp_end;
xstart = rp->XL; xstart = rp->XL;
xend = rp->XR; xend = rp->XR;
s32 wl = rp->SlopeL.Interp.Interpolate<false>(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]); s32 wl = rp->SlopeL.Interp.Interpolate(polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL]);
s32 wr = rp->SlopeR.Interp.Interpolate<false>(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]); s32 wr = rp->SlopeR.Interp.Interpolate(polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR]);
s32 zl = rp->SlopeL.Interp.InterpolateZ<false>(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer);
s32 zr = rp->SlopeR.Interp.InterpolateZ<false>(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer);
// right vertical edges are pushed 1px to the left as long as either: // right vertical edges are pushed 1px to the left as long as either:
// the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen
@ -1124,19 +1125,19 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// interpolate attributes along Y // interpolate attributes along Y
s32 rl = interp_start->Interpolate<false>(vlcur->FinalColor[0], vlnext->FinalColor[0]); s32 rl = interp_start->Interpolate(vlcur->FinalColor[0], vlnext->FinalColor[0]);
s32 gl = interp_start->Interpolate<false>(vlcur->FinalColor[1], vlnext->FinalColor[1]); s32 gl = interp_start->Interpolate(vlcur->FinalColor[1], vlnext->FinalColor[1]);
s32 bl = interp_start->Interpolate<false>(vlcur->FinalColor[2], vlnext->FinalColor[2]); s32 bl = interp_start->Interpolate(vlcur->FinalColor[2], vlnext->FinalColor[2]);
s32 sl = interp_start->Interpolate<false>(vlcur->TexCoords[0], vlnext->TexCoords[0]); s32 sl = interp_start->Interpolate(vlcur->TexCoords[0], vlnext->TexCoords[0]);
s32 tl = interp_start->Interpolate<false>(vlcur->TexCoords[1], vlnext->TexCoords[1]); s32 tl = interp_start->Interpolate(vlcur->TexCoords[1], vlnext->TexCoords[1]);
s32 rr = interp_end->Interpolate<false>(vrcur->FinalColor[0], vrnext->FinalColor[0]); s32 rr = interp_end->Interpolate(vrcur->FinalColor[0], vrnext->FinalColor[0]);
s32 gr = interp_end->Interpolate<false>(vrcur->FinalColor[1], vrnext->FinalColor[1]); s32 gr = interp_end->Interpolate(vrcur->FinalColor[1], vrnext->FinalColor[1]);
s32 br = interp_end->Interpolate<false>(vrcur->FinalColor[2], vrnext->FinalColor[2]); s32 br = interp_end->Interpolate(vrcur->FinalColor[2], vrnext->FinalColor[2]);
s32 sr = interp_end->Interpolate<false>(vrcur->TexCoords[0], vrnext->TexCoords[0]); s32 sr = interp_end->Interpolate(vrcur->TexCoords[0], vrnext->TexCoords[0]);
s32 tr = interp_end->Interpolate<false>(vrcur->TexCoords[1], vrnext->TexCoords[1]); s32 tr = interp_end->Interpolate(vrcur->TexCoords[1], vrnext->TexCoords[1]);
// in wireframe mode, there are special rules for equal Z (TODO) // in wireframe mode, there are special rules for equal Z (TODO)
@ -1146,8 +1147,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
int edge; int edge;
xend+=1; xend+=1;
Interpolator<0> interpX; Interpolator<0, oob> interpX(xstart, xend, wl, wr);
interpX.Setup<oob>(xstart, xend, wl, wr);
// CHECKME: should the unclamped values be used for timings? // CHECKME: should the unclamped values be used for timings?
// negative values are clamped to 0 // negative values are clamped to 0
@ -1156,6 +1156,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
l_edgelen += xstart; l_edgelen += xstart;
if (l_edgelen < 1) l_edgelen = 1; if (l_edgelen < 1) l_edgelen = 1;
xstart = 0; xstart = 0;
if (xend < 1) xend = 1;
} }
s32 x = xstart; s32 x = xstart;
// too big values are clamped to 511 // too big values are clamped to 511
@ -1171,6 +1172,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 1: left edge // part 1: left edge
edge = yedge | 0x1; edge = yedge | 0x1;
xlimit = xstart+l_edgelen; xlimit = xstart+l_edgelen;
if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256; if (xlimit > 256) xlimit = 256;
if (l_edgecov & (1<<31)) if (l_edgecov & (1<<31))
{ {
@ -1199,7 +1201,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ<oob>(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
// if depth test against the topmost pixel fails, test // if depth test against the topmost pixel fails, test
// against the pixel underneath // against the pixel underneath
@ -1213,12 +1215,12 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
continue; continue;
} }
u32 vr = interpX.Interpolate<oob>(rl, rr); u32 vr = interpX.Interpolate(rl, rr);
u32 vg = interpX.Interpolate<oob>(gl, gr); u32 vg = interpX.Interpolate(gl, gr);
u32 vb = interpX.Interpolate<oob>(bl, br); u32 vb = interpX.Interpolate(bl, br);
s16 s = interpX.Interpolate<oob>(sl, sr); s16 s = interpX.Interpolate(sl, sr);
s16 t = interpX.Interpolate<oob>(tl, tr); s16 t = interpX.Interpolate(tl, tr);
u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t);
u8 alpha = color >> 24; u8 alpha = color >> 24;
@ -1271,7 +1273,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 2: polygon inside // part 2: polygon inside
edge = yedge; edge = yedge;
xlimit = xend-r_edgelen; xlimit = xend-r_edgelen;
if (xlimit > xend+1) xlimit = xend; if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256; if (xlimit > 256) xlimit = 256;
if (wireframe && !edge) x = std::max(x, xlimit); if (wireframe && !edge) x = std::max(x, xlimit);
@ -1295,7 +1297,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ<oob>(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
// if depth test against the topmost pixel fails, test // if depth test against the topmost pixel fails, test
// against the pixel underneath // against the pixel underneath
@ -1309,12 +1311,12 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
continue; continue;
} }
u32 vr = interpX.Interpolate<oob>(rl, rr); u32 vr = interpX.Interpolate(rl, rr);
u32 vg = interpX.Interpolate<oob>(gl, gr); u32 vg = interpX.Interpolate(gl, gr);
u32 vb = interpX.Interpolate<oob>(bl, br); u32 vb = interpX.Interpolate(bl, br);
s16 s = interpX.Interpolate<oob>(sl, sr); s16 s = interpX.Interpolate(sl, sr);
s16 t = interpX.Interpolate<oob>(tl, tr); s16 t = interpX.Interpolate(tl, tr);
u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t);
u8 alpha = color >> 24; u8 alpha = color >> 24;
@ -1387,7 +1389,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
interpX.SetX(x); interpX.SetX(x);
s32 z = interpX.InterpolateZ<oob>(zl, zr, polygon->WBuffer); s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
// if depth test against the topmost pixel fails, test // if depth test against the topmost pixel fails, test
// against the pixel underneath // against the pixel underneath
@ -1401,12 +1403,12 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
continue; continue;
} }
u32 vr = interpX.Interpolate<oob>(rl, rr); u32 vr = interpX.Interpolate(rl, rr);
u32 vg = interpX.Interpolate<oob>(gl, gr); u32 vg = interpX.Interpolate(gl, gr);
u32 vb = interpX.Interpolate<oob>(bl, br); u32 vb = interpX.Interpolate(bl, br);
s16 s = interpX.Interpolate<oob>(sl, sr); s16 s = interpX.Interpolate(sl, sr);
s16 t = interpX.Interpolate<oob>(tl, tr); s16 t = interpX.Interpolate(tl, tr);
u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t);
u8 alpha = color >> 24; u8 alpha = color >> 24;

View File

@ -64,13 +64,16 @@ private:
// interpolation, avoiding precision loss from the aforementioned approximation. // interpolation, avoiding precision loss from the aforementioned approximation.
// Which is desirable when using the GPU to draw 2D graphics. // Which is desirable when using the GPU to draw 2D graphics.
template<int dir> template<int dir, bool oob>
class Interpolator class Interpolator
{ {
public: public:
constexpr Interpolator() {} constexpr Interpolator() {}
constexpr Interpolator(s32 x0, s32 x1, s32 w0, s32 w1)
{
Setup(x0, x1, w0, w1);
}
template <bool oob>
constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1) constexpr void Setup(s32 x0, s32 x1, s32 w0, s32 w1)
{ {
this->x0 = x0; this->x0 = x0;
@ -144,7 +147,6 @@ private:
} }
} }
template <bool oob>
constexpr s32 Interpolate(s32 y0, s32 y1) const constexpr s32 Interpolate(s32 y0, s32 y1) const
{ {
if (xdiff == 0 || y0 == y1 || x == 0) return y0; if (xdiff == 0 || y0 == y1 || x == 0) return y0;
@ -169,7 +171,6 @@ private:
} }
} }
template <bool oob>
constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const
{ {
if (xdiff == 0 || z0 == z1 || x == 0) return z0; if (xdiff == 0 || z0 == z1 || x == 0) return z0;
@ -252,7 +253,7 @@ private:
Increment = 0; Increment = 0;
XMajor = false; XMajor = false;
Interp.Setup<false>(0, 0, 0, 0); Interp.Setup(0, 0, 0, 0);
Interp.SetX(0); Interp.SetX(0);
xcov_incr = 0; xcov_incr = 0;
@ -326,7 +327,7 @@ private:
if (oob) dx &= 0xFFFFFFF; if (oob) dx &= 0xFFFFFFF;
int interpoffset = (Increment >= 0x40000) && (side ^ Negative); int interpoffset = (Increment >= 0x40000) && (side ^ Negative);
Interp.Setup<false>(y0-interpoffset, y1-interpoffset, w0, w1); Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1);
Interp.SetX(y); Interp.SetX(y);
// used for calculating AA coverage // used for calculating AA coverage
@ -425,7 +426,7 @@ private:
s32 Increment; s32 Increment;
bool Negative; bool Negative;
bool XMajor; bool XMajor;
Interpolator<1> Interp; Interpolator<1, false> Interp;
private: private:
s32 x0, xmin, xmax; s32 x0, xmin, xmax;