From 27c023a0cc0e175893aa7163bcb7c3413106e58d Mon Sep 17 00:00:00 2001 From: StapleButter Date: Thu, 1 Jun 2017 01:02:39 +0200 Subject: [PATCH] separate rasterizer inner loop into three loops for leftedge/middle/rightedge --- src/GPU3D_Soft.cpp | 252 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 233 insertions(+), 19 deletions(-) diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 3771088a..23441b12 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -981,26 +981,18 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y) int yedge = 0; if (y == polygon->YTop) yedge = 0x4; else if (y == polygon->YBottom-1) yedge = 0x8; + int edge; + s32 x = xstart; Interpolator interpX(xstart, xend+1, wl, wr, 8); -//printf("%d: edge %d %d, %d %d\n", y, l_edgelen, r_edgelen, l_edgecov, r_edgecov); - // TODO: split this based on edge/middle - for (s32 x = xstart; x <= xend; x++) + + // part 1: left edge + edge = yedge | 0x1; + for (; x < xstart+l_edgelen; x++) { if (x < 0) continue; if (x > 255) break; - int edge = yedge; - if (x < xstart+l_edgelen) edge |= 0x1; - else if (x > xend-r_edgelen) edge |= 0x2; - - // wireframe polygons. really ugly, but works - if (wireframe && edge==0) - { - x = xend-r_edgelen; - continue; - } - u32 pixeladdr = 258*3 + 1 + (y*258*3) + x; u32 attr = (polygon->Attr & 0x3F008000) | edge; @@ -1025,9 +1017,7 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y) { if (!wireframe && !(RenderDispCnt & (1<<4))) { - if ((edge & 0x1) && !l_filledge) - continue; - if ((edge & 0x2) && !r_filledge) + if (!l_filledge) continue; } } @@ -1098,9 +1088,233 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y) // edge fill rules for opaque pixels if (!wireframe) { - if ((edge & 0x1) && !l_filledge) + if (!l_filledge) continue; - if ((edge & 0x2) && !r_filledge) + } + } + + DepthBuffer[pixeladdr] = z; + } + else + { + u32 dstattr = AttrBuffer[pixeladdr]; + attr |= (1<<30); + if (polygon->IsShadow) dstattr |= (1<<30); + + // skip if polygon IDs are equal + // note: this only happens if the destination pixel was translucent + // or always when drawing a shadow + // (the GPU keeps track of which pixels are translucent, regardless of + // the destination alpha) + // TODO: they say that there are two separate polygon ID buffers. verify that. + if ((dstattr & 0x7F000000) == (attr & 0x7F000000)) + continue; + + // fog flag + if (!(dstattr & (1<<15))) + attr &= ~(1<<15); + + color = AlphaBlend(color, ColorBuffer[pixeladdr], alpha); + + if (polygon->Attr & (1<<11)) + DepthBuffer[pixeladdr] = z; + } + + ColorBuffer[pixeladdr] = color; + AttrBuffer[pixeladdr] = attr; + } + + // part 2: polygon inside + edge = yedge; + if (wireframe && !edge) x = xend-r_edgelen+1; + else for (; x <= xend-r_edgelen; x++) + { + if (x < 0) continue; + if (x > 255) break; + + u32 pixeladdr = 258*3 + 1 + (y*258*3) + x; + u32 attr = (polygon->Attr & 0x3F008000) | edge; + + // check stencil buffer for shadows + if (polygon->IsShadow) + { + if (StencilBuffer[256*(y&0x1) + x] == 0) + continue; + } + + interpX.SetX(x); + + s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + + if (polygon->IsShadowMask) + { + // for shadow masks: set stencil bits where the depth test fails. + // draw nothing. + + if (!fnDepthTest(DepthBuffer[pixeladdr], z)) + StencilBuffer[256*(y&0x1) + x] = 1; + + continue; + } + + // if depth test against the topmost pixel fails, test + // against the pixel underneath + if (!fnDepthTest(DepthBuffer[pixeladdr], z)) + { + pixeladdr += 258; + if (!fnDepthTest(DepthBuffer[pixeladdr], z)) + continue; + } + + u32 vr = interpX.Interpolate(rl, rr); + u32 vg = interpX.Interpolate(gl, gr); + u32 vb = interpX.Interpolate(bl, br); + + s16 s = interpX.Interpolate(sl, sr); + s16 t = interpX.Interpolate(tl, tr); + + u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t); + u8 alpha = color >> 24; + + // alpha test + if (alpha <= RenderAlphaRef) continue; + + if (alpha == 31) + { + DepthBuffer[pixeladdr] = z; + } + else + { + u32 dstattr = AttrBuffer[pixeladdr]; + attr |= (1<<30); + if (polygon->IsShadow) dstattr |= (1<<30); + + // skip if polygon IDs are equal + // note: this only happens if the destination pixel was translucent + // or always when drawing a shadow + // (the GPU keeps track of which pixels are translucent, regardless of + // the destination alpha) + // TODO: they say that there are two separate polygon ID buffers. verify that. + if ((dstattr & 0x7F000000) == (attr & 0x7F000000)) + continue; + + // fog flag + if (!(dstattr & (1<<15))) + attr &= ~(1<<15); + + color = AlphaBlend(color, ColorBuffer[pixeladdr], alpha); + + if (polygon->Attr & (1<<11)) + DepthBuffer[pixeladdr] = z; + } + + ColorBuffer[pixeladdr] = color; + AttrBuffer[pixeladdr] = attr; + } + + // part 3: right edge + edge = yedge | 0x2; + for (; x <= xend; x++) + { + if (x < 0) continue; + if (x > 255) break; + + u32 pixeladdr = 258*3 + 1 + (y*258*3) + x; + u32 attr = (polygon->Attr & 0x3F008000) | edge; + + // check stencil buffer for shadows + if (polygon->IsShadow) + { + if (StencilBuffer[256*(y&0x1) + x] == 0) + continue; + } + + interpX.SetX(x); + + s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); + + if (polygon->IsShadowMask) + { + // for shadow masks: set stencil bits where the depth test fails. + // draw nothing. + + // checkme + if (polyalpha == 31) + { + if (!wireframe && !(RenderDispCnt & (1<<4))) + { + if (!r_filledge) + continue; + } + } + + if (!fnDepthTest(DepthBuffer[pixeladdr], z)) + StencilBuffer[256*(y&0x1) + x] = 1; + + continue; + } + + // if depth test against the topmost pixel fails, test + // against the pixel underneath + if (!fnDepthTest(DepthBuffer[pixeladdr], z)) + { + pixeladdr += 258; + if (!fnDepthTest(DepthBuffer[pixeladdr], z)) + continue; + } + + u32 vr = interpX.Interpolate(rl, rr); + u32 vg = interpX.Interpolate(gl, gr); + u32 vb = interpX.Interpolate(bl, br); + + s16 s = interpX.Interpolate(sl, sr); + s16 t = interpX.Interpolate(tl, tr); + + u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t); + u8 alpha = color >> 24; + + // alpha test + if (alpha <= RenderAlphaRef) continue; + + if (alpha == 31) + { + if (RenderDispCnt & (1<<4)) + { + // anti-aliasing: all edges are rendered + + if (edge) + { + // calculate coverage + // TODO: optimize + s32 cov = 31; + /*if (edge & 0x1) + {if(y==48||true)printf("[y%d] coverage for %d: %d / %d = %d %d %08X %d %08X\n", y, x, x-xstart, l_edgelen, + ((x - xstart) << 5) / (l_edgelen), ((x - xstart) *31) / (l_edgelen), rp->SlopeL.Increment, l_edgecov, + rp->SlopeL.DX()); + cov = l_edgecov; + if (cov == -1) cov = ((x - xstart) << 5) / l_edgelen; + } + else if (edge & 0x2) + { + cov = r_edgecov; + if (cov == -1) cov = ((xend - x) << 5) / r_edgelen; + }cov=31;*/ + attr |= (cov << 8); + + // push old pixel down if needed + // we only need to do it for opaque edge pixels, since + // this only serves for antialiasing + ColorBuffer[pixeladdr+258] = ColorBuffer[pixeladdr]; + DepthBuffer[pixeladdr+258] = DepthBuffer[pixeladdr]; + AttrBuffer[pixeladdr+258] = AttrBuffer[pixeladdr]; + } + } + else + { + // edge fill rules for opaque pixels + if (!wireframe) + { + if (!r_filledge) continue; } }