From 3b5b80bf3c209b01a4835a2aa0729fc6b3f38d40 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 17 Sep 2015 18:19:47 +0200 Subject: [PATCH] VideoSW: Drop SW bbox code The hardware backends don't use this shared code any more, and it's not needed for video sw either. So this was just dead code. --- .../VideoBackends/Software/Rasterizer.cpp | 331 ++++-------------- Source/Core/VideoBackends/Software/Tev.cpp | 215 ++++++------ 2 files changed, 171 insertions(+), 375 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index ccfc8fc009..0db2765944 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -128,7 +128,7 @@ inline void Draw(s32 x, s32 y, s32 xi, s32 yi) s32 z = (s32)MathUtil::Clamp(ZSlope.GetValue(dx, dy), 0.0f, 16777215.0f); - if (!BoundingBox::active && bpmem.UseEarlyDepthTest() && g_SWVideoConfig.bZComploc) + if (bpmem.UseEarlyDepthTest() && g_SWVideoConfig.bZComploc) { // TODO: Test if perf regs are incremented even if test is disabled EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT_ZCOMPLOC); @@ -418,281 +418,88 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - // If drawing, rasterize every block - if (!BoundingBox::active) + // Start in corner of 8x8 block + minx &= ~(BLOCK_SIZE - 1); + miny &= ~(BLOCK_SIZE - 1); + + // Loop through blocks + for (s32 y = miny; y < maxy; y += BLOCK_SIZE) { - // Start in corner of 8x8 block - minx &= ~(BLOCK_SIZE - 1); - miny &= ~(BLOCK_SIZE - 1); - - // Loop through blocks - for (s32 y = miny; y < maxy; y += BLOCK_SIZE) + for (s32 x = minx; x < maxx; x += BLOCK_SIZE) { - for (s32 x = minx; x < maxx; x += BLOCK_SIZE) + // Corners of block + s32 x0 = x << 4; + s32 x1 = (x + BLOCK_SIZE - 1) << 4; + s32 y0 = y << 4; + s32 y1 = (y + BLOCK_SIZE - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) + continue; + + BuildBlock(x, y); + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF) { - // Corners of block - s32 x0 = x << 4; - s32 x1 = (x + BLOCK_SIZE - 1) << 4; - s32 y0 = y << 4; - s32 y1 = (y + BLOCK_SIZE - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) - continue; - - BuildBlock(x, y); - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF) + for (s32 iy = 0; iy < BLOCK_SIZE; iy++) { - for (s32 iy = 0; iy < BLOCK_SIZE; iy++) + for (s32 ix = 0; ix < BLOCK_SIZE; ix++) { - for (s32 ix = 0; ix < BLOCK_SIZE; ix++) + Draw(x + ix, y + iy, ix, iy); + } + } + } + else // Partially covered block + { + s32 CY1 = C1 + DX12 * y0 - DY12 * x0; + s32 CY2 = C2 + DX23 * y0 - DY23 * x0; + s32 CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (s32 iy = 0; iy < BLOCK_SIZE; iy++) + { + s32 CX1 = CY1; + s32 CX2 = CY2; + s32 CX3 = CY3; + + for (s32 ix = 0; ix < BLOCK_SIZE; ix++) + { + if (CX1 > 0 && CX2 > 0 && CX3 > 0) { Draw(x + ix, y + iy, ix, iy); } + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; } - } - else // Partially covered block - { - s32 CY1 = C1 + DX12 * y0 - DY12 * x0; - s32 CY2 = C2 + DX23 * y0 - DY23 * x0; - s32 CY3 = C3 + DX31 * y0 - DY31 * x0; - for (s32 iy = 0; iy < BLOCK_SIZE; iy++) - { - s32 CX1 = CY1; - s32 CX2 = CY2; - s32 CX3 = CY3; - - for (s32 ix = 0; ix < BLOCK_SIZE; ix++) - { - if (CX1 > 0 && CX2 > 0 && CX3 > 0) - { - Draw(x + ix, y + iy, ix, iy); - } - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; } } } } - else - { - // Calculating bbox - // First check for alpha channel - don't do anything it if always fails, - // Change bbox to primitive size if it always passes - AlphaTest::TEST_RESULT alphaRes = bpmem.alpha_test.TestResult(); - - if (alphaRes != AlphaTest::UNDETERMINED) - { - if (alphaRes == AlphaTest::PASS) - { - BoundingBox::coords[BoundingBox::TOP] = std::min(BoundingBox::coords[BoundingBox::TOP], (u16) miny); - BoundingBox::coords[BoundingBox::LEFT] = std::min(BoundingBox::coords[BoundingBox::LEFT], (u16) minx); - BoundingBox::coords[BoundingBox::BOTTOM] = std::max(BoundingBox::coords[BoundingBox::BOTTOM], (u16) maxy); - BoundingBox::coords[BoundingBox::RIGHT] = std::max(BoundingBox::coords[BoundingBox::RIGHT], (u16) maxx); - } - return; - } - - // If we are calculating bbox with alpha, we only need to find the - // topmost, leftmost, bottom most and rightmost pixels to be drawn. - // So instead of drawing every single one of the triangle's pixels, - // four loops are run: one for the top pixel, one for the left, one for - // the bottom and one for the right. As soon as a pixel that is to be - // drawn is found, the loop breaks. This enables a ~150% speedbost in - // bbox calculation, albeit at the cost of some ugly repetitive code. - const s32 FLEFT = minx << 4; - const s32 FRIGHT = maxx << 4; - s32 FTOP = miny << 4; - s32 FBOTTOM = maxy << 4; - - // Start checking for bbox top - s32 CY1 = C1 + DX12 * FTOP - DY12 * FLEFT; - s32 CY2 = C2 + DX23 * FTOP - DY23 * FLEFT; - s32 CY3 = C3 + DX31 * FTOP - DY31 * FLEFT; - - // Loop - for (s32 y = miny; y <= maxy; ++y) - { - if (y >= BoundingBox::coords[BoundingBox::TOP]) - break; - - s32 CX1 = CY1; - s32 CX2 = CY2; - s32 CX3 = CY3; - - for (s32 x = minx; x <= maxx; ++x) - { - if (CX1 > 0 && CX2 > 0 && CX3 > 0) - { - // Build the new raster block every other pixel - PrepareBlock(x, y); - Draw(x, y, x & (BLOCK_SIZE - 1), y & (BLOCK_SIZE - 1)); - - if (y >= BoundingBox::coords[BoundingBox::TOP]) - break; - } - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } - - // Update top limit - miny = std::max((s32) BoundingBox::coords[BoundingBox::TOP], miny); - FTOP = miny << 4; - - // Checking for bbox left - s32 CX1 = C1 + DX12 * FTOP - DY12 * FLEFT; - s32 CX2 = C2 + DX23 * FTOP - DY23 * FLEFT; - s32 CX3 = C3 + DX31 * FTOP - DY31 * FLEFT; - - // Loop - for (s32 x = minx; x <= maxx; ++x) - { - if (x >= BoundingBox::coords[BoundingBox::LEFT]) - break; - - CY1 = CX1; - CY2 = CX2; - CY3 = CX3; - - for (s32 y = miny; y <= maxy; ++y) - { - if (CY1 > 0 && CY2 > 0 && CY3 > 0) - { - PrepareBlock(x, y); - Draw(x, y, x & (BLOCK_SIZE - 1), y & (BLOCK_SIZE - 1)); - - if (x >= BoundingBox::coords[BoundingBox::LEFT]) - break; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - // Update left limit - minx = std::max((s32) BoundingBox::coords[BoundingBox::LEFT], minx); - - // Checking for bbox bottom - CY1 = C1 + DX12 * FBOTTOM - DY12 * FRIGHT; - CY2 = C2 + DX23 * FBOTTOM - DY23 * FRIGHT; - CY3 = C3 + DX31 * FBOTTOM - DY31 * FRIGHT; - - // Loop - for (s32 y = maxy; y >= miny; --y) - { - CX1 = CY1; - CX2 = CY2; - CX3 = CY3; - - if (y <= BoundingBox::coords[BoundingBox::BOTTOM]) - break; - - for (s32 x = maxx; x >= minx; --x) - { - if (CX1 > 0 && CX2 > 0 && CX3 > 0) - { - // Build the new raster block every other pixel - PrepareBlock(x, y); - Draw(x, y, x & (BLOCK_SIZE - 1), y & (BLOCK_SIZE - 1)); - - if (y <= BoundingBox::coords[BoundingBox::BOTTOM]) - break; - } - - CX1 += FDY12; - CX2 += FDY23; - CX3 += FDY31; - } - - CY1 -= FDX12; - CY2 -= FDX23; - CY3 -= FDX31; - } - - // Update bottom limit - maxy = std::min((s32) BoundingBox::coords[BoundingBox::BOTTOM], maxy); - FBOTTOM = maxy << 4; - - // Checking for bbox right - CX1 = C1 + DX12 * FBOTTOM - DY12 * FRIGHT; - CX2 = C2 + DX23 * FBOTTOM - DY23 * FRIGHT; - CX3 = C3 + DX31 * FBOTTOM - DY31 * FRIGHT; - - // Loop - for (s32 x = maxx; x >= minx; --x) - { - if (x <= BoundingBox::coords[BoundingBox::RIGHT]) - break; - - CY1 = CX1; - CY2 = CX2; - CY3 = CX3; - - for (s32 y = maxy; y >= miny; --y) - { - if (CY1 > 0 && CY2 > 0 && CY3 > 0) - { - // Build the new raster block every other pixel - PrepareBlock(x, y); - Draw(x, y, x & (BLOCK_SIZE - 1), y & (BLOCK_SIZE - 1)); - - if (x <= BoundingBox::coords[BoundingBox::RIGHT]) - break; - } - - CY1 -= FDX12; - CY2 -= FDX23; - CY3 -= FDX31; - } - - CX1 += FDY12; - CX2 += FDY23; - CX3 += FDY31; - } - } } diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 6ea6eab051..552b60a99a 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -650,125 +650,120 @@ void Tev::Draw() if (!TevAlphaTest(output[ALP_C])) return; - // This part is only needed if we are not simply computing bbox - // (i. e., only needed when using the SW renderer) - if (!BoundingBox::active) + // z texture + if (bpmem.ztex2.op) { - // z texture - if (bpmem.ztex2.op) + u32 ztex = bpmem.ztex1.bias; + switch (bpmem.ztex2.type) { - u32 ztex = bpmem.ztex1.bias; - switch (bpmem.ztex2.type) - { - case 0: // 8 bit - ztex += TexColor[ALP_C]; - break; - case 1: // 16 bit - ztex += TexColor[ALP_C] << 8 | TexColor[RED_C]; - break; - case 2: // 24 bit - ztex += TexColor[RED_C] << 16 | TexColor[GRN_C] << 8 | TexColor[BLU_C]; - break; - } - - if (bpmem.ztex2.op == ZTEXTURE_ADD) - ztex += Position[2]; - - Position[2] = ztex & 0x00ffffff; + case 0: // 8 bit + ztex += TexColor[ALP_C]; + break; + case 1: // 16 bit + ztex += TexColor[ALP_C] << 8 | TexColor[RED_C]; + break; + case 2: // 24 bit + ztex += TexColor[RED_C] << 16 | TexColor[GRN_C] << 8 | TexColor[BLU_C]; + break; } - // fog - if (bpmem.fog.c_proj_fsel.fsel) + if (bpmem.ztex2.op == ZTEXTURE_ADD) + ztex += Position[2]; + + Position[2] = ztex & 0x00ffffff; + } + + // fog + if (bpmem.fog.c_proj_fsel.fsel) + { + float ze; + + if (bpmem.fog.c_proj_fsel.proj == 0) { - float ze; + // perspective + // ze = A/(B - (Zs >> B_SHF)) + s32 denom = bpmem.fog.b_magnitude - (Position[2] >> bpmem.fog.b_shift); + //in addition downscale magnitude and zs to 0.24 bits + ze = (bpmem.fog.a.GetA() * 16777215.0f) / (float)denom; + } + else + { + // orthographic + // ze = a*Zs + //in addition downscale zs to 0.24 bits + ze = bpmem.fog.a.GetA() * ((float)Position[2] / 16777215.0f); - if (bpmem.fog.c_proj_fsel.proj == 0) - { - // perspective - // ze = A/(B - (Zs >> B_SHF)) - s32 denom = bpmem.fog.b_magnitude - (Position[2] >> bpmem.fog.b_shift); - //in addition downscale magnitude and zs to 0.24 bits - ze = (bpmem.fog.a.GetA() * 16777215.0f) / (float)denom; - } - else - { - // orthographic - // ze = a*Zs - //in addition downscale zs to 0.24 bits - ze = bpmem.fog.a.GetA() * ((float)Position[2] / 16777215.0f); - - } - - if (bpmem.fogRange.Base.Enabled) - { - // TODO: This is untested and should definitely be checked against real hw. - // - No idea if offset is really normalized against the viewport width or against the projection matrix or yet something else - // - scaling of the "k" coefficient isn't clear either. - - // First, calculate the offset from the viewport center (normalized to 0..1) - float offset = (Position[0] - (bpmem.fogRange.Base.Center - 342)) / (float)xfmem.viewport.wd; - - // Based on that, choose the index such that points which are far away from the z-axis use the 10th "k" value and such that central points use the first value. - float floatindex = 9.f - std::abs(offset) * 9.f; - floatindex = (floatindex < 0.f) ? 0.f : (floatindex > 9.f) ? 9.f : floatindex; // TODO: This shouldn't be necessary! - - // Get the two closest integer indices, look up the corresponding samples - int indexlower = (int)floor(floatindex); - int indexupper = indexlower + 1; - // Look up coefficient... Seems like multiplying by 4 makes Fortune Street work properly (fog is too strong without the factor) - float klower = bpmem.fogRange.K[indexlower/2].GetValue(indexlower%2) * 4.f; - float kupper = bpmem.fogRange.K[indexupper/2].GetValue(indexupper%2) * 4.f; - - // linearly interpolate the samples and multiple ze by the resulting adjustment factor - float factor = indexupper - floatindex; - float k = klower * factor + kupper * (1.f - factor); - float x_adjust = sqrt(offset*offset + k*k)/k; - ze *= x_adjust; // NOTE: This is basically dividing by a cosine (hidden behind GXInitFogAdjTable): 1/cos = c/b = sqrt(a^2+b^2)/b - } - - ze -= bpmem.fog.c_proj_fsel.GetC(); - - // clamp 0 to 1 - float fog = (ze<0.0f) ? 0.0f : ((ze>1.0f) ? 1.0f : ze); - - switch (bpmem.fog.c_proj_fsel.fsel) - { - case 4: // exp - fog = 1.0f - pow(2.0f, -8.0f * fog); - break; - case 5: // exp2 - fog = 1.0f - pow(2.0f, -8.0f * fog * fog); - break; - case 6: // backward exp - fog = 1.0f - fog; - fog = pow(2.0f, -8.0f * fog); - break; - case 7: // backward exp2 - fog = 1.0f - fog; - fog = pow(2.0f, -8.0f * fog * fog); - break; - } - - // lerp from output to fog color - u32 fogInt = (u32)(fog * 256); - u32 invFog = 256 - fogInt; - - output[RED_C] = (output[RED_C] * invFog + fogInt * bpmem.fog.color.r) >> 8; - output[GRN_C] = (output[GRN_C] * invFog + fogInt * bpmem.fog.color.g) >> 8; - output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8; } - bool late_ztest = !bpmem.zcontrol.early_ztest || !g_SWVideoConfig.bZComploc; - if (late_ztest && bpmem.zmode.testenable) + if (bpmem.fogRange.Base.Enabled) { - // TODO: Check against hw if these values get incremented even if depth testing is disabled - EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT); + // TODO: This is untested and should definitely be checked against real hw. + // - No idea if offset is really normalized against the viewport width or against the projection matrix or yet something else + // - scaling of the "k" coefficient isn't clear either. - if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2])) - return; + // First, calculate the offset from the viewport center (normalized to 0..1) + float offset = (Position[0] - (bpmem.fogRange.Base.Center - 342)) / (float)xfmem.viewport.wd; - EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_OUTPUT); + // Based on that, choose the index such that points which are far away from the z-axis use the 10th "k" value and such that central points use the first value. + float floatindex = 9.f - std::abs(offset) * 9.f; + floatindex = (floatindex < 0.f) ? 0.f : (floatindex > 9.f) ? 9.f : floatindex; // TODO: This shouldn't be necessary! + + // Get the two closest integer indices, look up the corresponding samples + int indexlower = (int)floor(floatindex); + int indexupper = indexlower + 1; + // Look up coefficient... Seems like multiplying by 4 makes Fortune Street work properly (fog is too strong without the factor) + float klower = bpmem.fogRange.K[indexlower/2].GetValue(indexlower%2) * 4.f; + float kupper = bpmem.fogRange.K[indexupper/2].GetValue(indexupper%2) * 4.f; + + // linearly interpolate the samples and multiple ze by the resulting adjustment factor + float factor = indexupper - floatindex; + float k = klower * factor + kupper * (1.f - factor); + float x_adjust = sqrt(offset*offset + k*k)/k; + ze *= x_adjust; // NOTE: This is basically dividing by a cosine (hidden behind GXInitFogAdjTable): 1/cos = c/b = sqrt(a^2+b^2)/b } + + ze -= bpmem.fog.c_proj_fsel.GetC(); + + // clamp 0 to 1 + float fog = (ze<0.0f) ? 0.0f : ((ze>1.0f) ? 1.0f : ze); + + switch (bpmem.fog.c_proj_fsel.fsel) + { + case 4: // exp + fog = 1.0f - pow(2.0f, -8.0f * fog); + break; + case 5: // exp2 + fog = 1.0f - pow(2.0f, -8.0f * fog * fog); + break; + case 6: // backward exp + fog = 1.0f - fog; + fog = pow(2.0f, -8.0f * fog); + break; + case 7: // backward exp2 + fog = 1.0f - fog; + fog = pow(2.0f, -8.0f * fog * fog); + break; + } + + // lerp from output to fog color + u32 fogInt = (u32)(fog * 256); + u32 invFog = 256 - fogInt; + + output[RED_C] = (output[RED_C] * invFog + fogInt * bpmem.fog.color.r) >> 8; + output[GRN_C] = (output[GRN_C] * invFog + fogInt * bpmem.fog.color.g) >> 8; + output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8; + } + + bool late_ztest = !bpmem.zcontrol.early_ztest || !g_SWVideoConfig.bZComploc; + if (late_ztest && bpmem.zmode.testenable) + { + // TODO: Check against hw if these values get incremented even if depth testing is disabled + EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT); + + if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2])) + return; + + EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_OUTPUT); } // branchless bounding box update @@ -777,12 +772,6 @@ void Tev::Draw() BoundingBox::coords[BoundingBox::TOP] = std::min((u16)Position[1], BoundingBox::coords[BoundingBox::TOP]); BoundingBox::coords[BoundingBox::BOTTOM] = std::max((u16)Position[1], BoundingBox::coords[BoundingBox::BOTTOM]); - // if we are only calculating the bounding box, - // there's no need to actually draw anything - if (BoundingBox::active) - return; - - #if ALLOW_TEV_DUMPS if (g_SWVideoConfig.bDumpTevStages) {