diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index d29e4954f2..b3d0dd3fdb 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -20,16 +20,82 @@ namespace Rasterizer { static constexpr int BLOCK_SIZE = 2; +struct SlopeContext +{ + SlopeContext(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2, + s32 x0, s32 y0) + : x0(x0), y0(y0) + { + // adjust a little less than 0.5 + const float adjust = 0.495f; + + xOff = ((float)x0 - v0->screenPosition.x) + adjust; + yOff = ((float)y0 - v0->screenPosition.y) + adjust; + + dx10 = v1->screenPosition.x - v0->screenPosition.x; + dx20 = v2->screenPosition.x - v0->screenPosition.x; + dy10 = v1->screenPosition.y - v0->screenPosition.y; + dy20 = v2->screenPosition.y - v0->screenPosition.y; + } + s32 x0; + s32 y0; + float xOff; + float yOff; + float dx10; + float dx20; + float dy10; + float dy20; +}; + +struct Slope +{ + Slope() = default; + Slope(float f0, float f1, float f2, const SlopeContext& ctx) : f0(f0) + { + float delta_20 = f2 - f0; + float delta_10 = f1 - f0; + + // x2 - x0 y1 - y0 x1 - x0 y2 - y0 + float a = delta_20 * ctx.dy10 - delta_10 * ctx.dy20; + float b = ctx.dx20 * delta_10 - ctx.dx10 * delta_20; + float c = ctx.dx20 * ctx.dy10 - ctx.dx10 * ctx.dy20; + + dfdx = a / c; + dfdy = b / c; + + x0 = ctx.x0; + y0 = ctx.y0; + xOff = ctx.xOff; + yOff = ctx.yOff; + } + + // These default values are used in the unlikely case that zfreeze is enabled when drawing the + // first primitive. + // TODO: This is just a guess! + float dfdx = 0.0f; + float dfdy = 0.0f; + float f0 = 1.0f; + + // Both an s32 value and a float value are used to minimize rounding error + // TODO: is this really needed? + s32 x0 = 0; + s32 y0 = 0; + float xOff = 0.0f; + float yOff = 0.0f; + + float GetValue(s32 x, s32 y) const + { + float dx = xOff + (float)(x - x0); + float dy = yOff + (float)(y - y0); + return f0 + (dfdx * dx) + (dfdy * dy); + } +}; + static Slope ZSlope; static Slope WSlope; static Slope ColorSlopes[2][4]; static Slope TexSlopes[8][3]; -static s32 vertex0X; -static s32 vertex0Y; -static float vertexOffsetX; -static float vertexOffsetY; - static Tev tev; static RasterBlock rasterBlock; @@ -37,11 +103,9 @@ void Init() { tev.Init(); - // Set initial z reference plane in the unlikely case that zfreeze is enabled when drawing the - // first primitive. - // TODO: This is just a guess! - ZSlope.dfdx = ZSlope.dfdy = 0.f; - ZSlope.f0 = 1.f; + // The other slopes are set each for each primitive drawn, but zfreeze means that the z slope + // needs to be set to an (untested) default value. + ZSlope = Slope(); } // Returns approximation of log2(f) in s28.4 @@ -75,10 +139,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi) { INCSTAT(g_stats.this_frame.rasterized_pixels); - float dx = vertexOffsetX + (float)(x - vertex0X); - float dy = vertexOffsetY + (float)(y - vertex0Y); - - s32 z = (s32)std::clamp(ZSlope.GetValue(dx, dy), 0.0f, 16777215.0f); + s32 z = (s32)std::clamp(ZSlope.GetValue(x, y), 0.0f, 16777215.0f); if (bpmem.UseEarlyDepthTest()) { @@ -104,7 +165,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi) { for (int comp = 0; comp < 4; comp++) { - u16 color = (u16)ColorSlopes[i][comp].GetValue(dx, dy); + u16 color = (u16)ColorSlopes[i][comp].GetValue(x, y); // clamp color value to 0 u16 mask = ~(color >> 8); @@ -136,31 +197,6 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi) tev.Draw(); } -static void InitTriangle(float X1, float Y1, s32 xi, s32 yi) -{ - vertex0X = xi; - vertex0Y = yi; - - // adjust a little less than 0.5 - const float adjust = 0.495f; - - vertexOffsetX = ((float)xi - X1) + adjust; - vertexOffsetY = ((float)yi - Y1) + adjust; -} - -static void InitSlope(Slope* slope, float f1, float f2, float f3, float DX31, float DX12, - float DY12, float DY31) -{ - float DF31 = f3 - f1; - float DF21 = f2 - f1; - float a = DF31 * -DY12 - DF21 * DY31; - float b = DX31 * DF21 + DX12 * DF31; - float c = -DX12 * DY31 - DX31 * -DY12; - slope->dfdx = -a / c; - slope->dfdy = -b / c; - slope->f0 = f1; -} - static inline void CalculateLOD(s32* lodp, bool* linear, u32 texmap, u32 texcoord) { auto texUnit = bpmem.tex.GetUnit(texmap); @@ -220,22 +256,22 @@ static void BuildBlock(s32 blockX, s32 blockY) { RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi]; - float dx = vertexOffsetX + (float)(xi + blockX - vertex0X); - float dy = vertexOffsetY + (float)(yi + blockY - vertex0Y); + s32 x = xi + blockX; + s32 y = yi + blockY; - float invW = 1.0f / WSlope.GetValue(dx, dy); + float invW = 1.0f / WSlope.GetValue(x, y); pixel.InvW = invW; // tex coords for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++) { float projection = invW; - float q = TexSlopes[i][2].GetValue(dx, dy) * invW; + float q = TexSlopes[i][2].GetValue(x, y) * invW; if (q != 0.0f) projection = invW / q; - pixel.Uv[i][0] = TexSlopes[i][0].GetValue(dx, dy) * projection; - pixel.Uv[i][1] = TexSlopes[i][1].GetValue(dx, dy) * projection; + pixel.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * projection; + pixel.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * projection; } } } @@ -334,19 +370,12 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v if (minx >= maxx || miny >= maxy) return; - // Setup slopes - float fltx1 = v0->screenPosition.x; - float flty1 = v0->screenPosition.y; - float fltdx31 = v2->screenPosition.x - fltx1; - float fltdx12 = fltx1 - v1->screenPosition.x; - float fltdy12 = flty1 - v1->screenPosition.y; - float fltdy31 = v2->screenPosition.y - flty1; - - InitTriangle(fltx1, flty1, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4); + // Set up slopes + const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4); float w[3] = {1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w, 1.0f / v2->projectedPosition.w}; - InitSlope(&WSlope, w[0], w[1], w[2], fltdx31, fltdx12, fltdy12, fltdy31); + WSlope = Slope(w[0], w[1], w[2], ctx); // TODO: The zfreeze emulation is not quite correct, yet! // Many things might prevent us from reaching this line (culling, clipping, scissoring). @@ -355,21 +384,21 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v // We're currently sloppy at this since we abort early if any of the culling/clipping/scissoring // tests fail. if (!bpmem.genMode.zfreeze) - InitSlope(&ZSlope, v0->screenPosition[2], v1->screenPosition[2], v2->screenPosition[2], fltdx31, - fltdx12, fltdy12, fltdy31); + ZSlope = Slope(v0->screenPosition.z, v1->screenPosition.z, v2->screenPosition.z, ctx); for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++) { for (int comp = 0; comp < 4; comp++) - InitSlope(&ColorSlopes[i][comp], v0->color[i][comp], v1->color[i][comp], v2->color[i][comp], - fltdx31, fltdx12, fltdy12, fltdy31); + ColorSlopes[i][comp] = Slope(v0->color[i][comp], v1->color[i][comp], v2->color[i][comp], ctx); } for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++) { for (int comp = 0; comp < 3; comp++) - InitSlope(&TexSlopes[i][comp], v0->texCoords[i][comp] * w[0], v1->texCoords[i][comp] * w[1], - v2->texCoords[i][comp] * w[2], fltdx31, fltdx12, fltdy12, fltdy31); + { + TexSlopes[i][comp] = Slope(v0->texCoords[i][comp] * w[0], v1->texCoords[i][comp] * w[1], + v2->texCoords[i][comp] * w[2], ctx); + } } // Half-edge constants diff --git a/Source/Core/VideoBackends/Software/Rasterizer.h b/Source/Core/VideoBackends/Software/Rasterizer.h index bae35f7cd8..97cbce4d63 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.h +++ b/Source/Core/VideoBackends/Software/Rasterizer.h @@ -16,15 +16,6 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v void SetTevReg(int reg, int comp, s16 color); -struct Slope -{ - float dfdx; - float dfdy; - float f0; - - float GetValue(float dx, float dy) const { return f0 + (dfdx * dx) + (dfdy * dy); } -}; - struct RasterBlockPixel { float InvW;