VideoSW: Drop SW bbox code

The hardware backends don't use this shared code any more, and it's not needed for video sw either. So this was just dead code.
This commit is contained in:
degasus 2015-09-17 18:19:47 +02:00
parent e3e0399af2
commit 3b5b80bf3c
2 changed files with 171 additions and 375 deletions

View File

@ -128,7 +128,7 @@ inline void Draw(s32 x, s32 y, s32 xi, s32 yi)
s32 z = (s32)MathUtil::Clamp<float>(ZSlope.GetValue(dx, dy), 0.0f, 16777215.0f);
if (!BoundingBox::active && bpmem.UseEarlyDepthTest() && g_SWVideoConfig.bZComploc)
if (bpmem.UseEarlyDepthTest() && g_SWVideoConfig.bZComploc)
{
// TODO: Test if perf regs are incremented even if test is disabled
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT_ZCOMPLOC);
@ -418,281 +418,88 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer
if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++;
if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++;
// If drawing, rasterize every block
if (!BoundingBox::active)
// Start in corner of 8x8 block
minx &= ~(BLOCK_SIZE - 1);
miny &= ~(BLOCK_SIZE - 1);
// Loop through blocks
for (s32 y = miny; y < maxy; y += BLOCK_SIZE)
{
// Start in corner of 8x8 block
minx &= ~(BLOCK_SIZE - 1);
miny &= ~(BLOCK_SIZE - 1);
// Loop through blocks
for (s32 y = miny; y < maxy; y += BLOCK_SIZE)
for (s32 x = minx; x < maxx; x += BLOCK_SIZE)
{
for (s32 x = minx; x < maxx; x += BLOCK_SIZE)
// Corners of block
s32 x0 = x << 4;
s32 x1 = (x + BLOCK_SIZE - 1) << 4;
s32 y0 = y << 4;
s32 y1 = (y + BLOCK_SIZE - 1) << 4;
// Evaluate half-space functions
bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0;
bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0;
bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0;
bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0;
int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3);
bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0;
bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0;
bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0;
bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0;
int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3);
bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0;
bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0;
bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0;
bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0;
int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3);
// Skip block when outside an edge
if (a == 0x0 || b == 0x0 || c == 0x0)
continue;
BuildBlock(x, y);
// Accept whole block when totally covered
if (a == 0xF && b == 0xF && c == 0xF)
{
// Corners of block
s32 x0 = x << 4;
s32 x1 = (x + BLOCK_SIZE - 1) << 4;
s32 y0 = y << 4;
s32 y1 = (y + BLOCK_SIZE - 1) << 4;
// Evaluate half-space functions
bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0;
bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0;
bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0;
bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0;
int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3);
bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0;
bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0;
bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0;
bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0;
int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3);
bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0;
bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0;
bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0;
bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0;
int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3);
// Skip block when outside an edge
if (a == 0x0 || b == 0x0 || c == 0x0)
continue;
BuildBlock(x, y);
// Accept whole block when totally covered
if (a == 0xF && b == 0xF && c == 0xF)
for (s32 iy = 0; iy < BLOCK_SIZE; iy++)
{
for (s32 iy = 0; iy < BLOCK_SIZE; iy++)
for (s32 ix = 0; ix < BLOCK_SIZE; ix++)
{
for (s32 ix = 0; ix < BLOCK_SIZE; ix++)
Draw(x + ix, y + iy, ix, iy);
}
}
}
else // Partially covered block
{
s32 CY1 = C1 + DX12 * y0 - DY12 * x0;
s32 CY2 = C2 + DX23 * y0 - DY23 * x0;
s32 CY3 = C3 + DX31 * y0 - DY31 * x0;
for (s32 iy = 0; iy < BLOCK_SIZE; iy++)
{
s32 CX1 = CY1;
s32 CX2 = CY2;
s32 CX3 = CY3;
for (s32 ix = 0; ix < BLOCK_SIZE; ix++)
{
if (CX1 > 0 && CX2 > 0 && CX3 > 0)
{
Draw(x + ix, y + iy, ix, iy);
}
CX1 -= FDY12;
CX2 -= FDY23;
CX3 -= FDY31;
}
}
else // Partially covered block
{
s32 CY1 = C1 + DX12 * y0 - DY12 * x0;
s32 CY2 = C2 + DX23 * y0 - DY23 * x0;
s32 CY3 = C3 + DX31 * y0 - DY31 * x0;
for (s32 iy = 0; iy < BLOCK_SIZE; iy++)
{
s32 CX1 = CY1;
s32 CX2 = CY2;
s32 CX3 = CY3;
for (s32 ix = 0; ix < BLOCK_SIZE; ix++)
{
if (CX1 > 0 && CX2 > 0 && CX3 > 0)
{
Draw(x + ix, y + iy, ix, iy);
}
CX1 -= FDY12;
CX2 -= FDY23;
CX3 -= FDY31;
}
CY1 += FDX12;
CY2 += FDX23;
CY3 += FDX31;
}
CY1 += FDX12;
CY2 += FDX23;
CY3 += FDX31;
}
}
}
}
else
{
// Calculating bbox
// First check for alpha channel - don't do anything it if always fails,
// Change bbox to primitive size if it always passes
AlphaTest::TEST_RESULT alphaRes = bpmem.alpha_test.TestResult();
if (alphaRes != AlphaTest::UNDETERMINED)
{
if (alphaRes == AlphaTest::PASS)
{
BoundingBox::coords[BoundingBox::TOP] = std::min(BoundingBox::coords[BoundingBox::TOP], (u16) miny);
BoundingBox::coords[BoundingBox::LEFT] = std::min(BoundingBox::coords[BoundingBox::LEFT], (u16) minx);
BoundingBox::coords[BoundingBox::BOTTOM] = std::max(BoundingBox::coords[BoundingBox::BOTTOM], (u16) maxy);
BoundingBox::coords[BoundingBox::RIGHT] = std::max(BoundingBox::coords[BoundingBox::RIGHT], (u16) maxx);
}
return;
}
// If we are calculating bbox with alpha, we only need to find the
// topmost, leftmost, bottom most and rightmost pixels to be drawn.
// So instead of drawing every single one of the triangle's pixels,
// four loops are run: one for the top pixel, one for the left, one for
// the bottom and one for the right. As soon as a pixel that is to be
// drawn is found, the loop breaks. This enables a ~150% speedbost in
// bbox calculation, albeit at the cost of some ugly repetitive code.
const s32 FLEFT = minx << 4;
const s32 FRIGHT = maxx << 4;
s32 FTOP = miny << 4;
s32 FBOTTOM = maxy << 4;
// Start checking for bbox top
s32 CY1 = C1 + DX12 * FTOP - DY12 * FLEFT;
s32 CY2 = C2 + DX23 * FTOP - DY23 * FLEFT;
s32 CY3 = C3 + DX31 * FTOP - DY31 * FLEFT;
// Loop
for (s32 y = miny; y <= maxy; ++y)
{
if (y >= BoundingBox::coords[BoundingBox::TOP])
break;
s32 CX1 = CY1;
s32 CX2 = CY2;
s32 CX3 = CY3;
for (s32 x = minx; x <= maxx; ++x)
{
if (CX1 > 0 && CX2 > 0 && CX3 > 0)
{
// Build the new raster block every other pixel
PrepareBlock(x, y);
Draw(x, y, x & (BLOCK_SIZE - 1), y & (BLOCK_SIZE - 1));
if (y >= BoundingBox::coords[BoundingBox::TOP])
break;
}
CX1 -= FDY12;
CX2 -= FDY23;
CX3 -= FDY31;
}
CY1 += FDX12;
CY2 += FDX23;
CY3 += FDX31;
}
// Update top limit
miny = std::max((s32) BoundingBox::coords[BoundingBox::TOP], miny);
FTOP = miny << 4;
// Checking for bbox left
s32 CX1 = C1 + DX12 * FTOP - DY12 * FLEFT;
s32 CX2 = C2 + DX23 * FTOP - DY23 * FLEFT;
s32 CX3 = C3 + DX31 * FTOP - DY31 * FLEFT;
// Loop
for (s32 x = minx; x <= maxx; ++x)
{
if (x >= BoundingBox::coords[BoundingBox::LEFT])
break;
CY1 = CX1;
CY2 = CX2;
CY3 = CX3;
for (s32 y = miny; y <= maxy; ++y)
{
if (CY1 > 0 && CY2 > 0 && CY3 > 0)
{
PrepareBlock(x, y);
Draw(x, y, x & (BLOCK_SIZE - 1), y & (BLOCK_SIZE - 1));
if (x >= BoundingBox::coords[BoundingBox::LEFT])
break;
}
CY1 += FDX12;
CY2 += FDX23;
CY3 += FDX31;
}
CX1 -= FDY12;
CX2 -= FDY23;
CX3 -= FDY31;
}
// Update left limit
minx = std::max((s32) BoundingBox::coords[BoundingBox::LEFT], minx);
// Checking for bbox bottom
CY1 = C1 + DX12 * FBOTTOM - DY12 * FRIGHT;
CY2 = C2 + DX23 * FBOTTOM - DY23 * FRIGHT;
CY3 = C3 + DX31 * FBOTTOM - DY31 * FRIGHT;
// Loop
for (s32 y = maxy; y >= miny; --y)
{
CX1 = CY1;
CX2 = CY2;
CX3 = CY3;
if (y <= BoundingBox::coords[BoundingBox::BOTTOM])
break;
for (s32 x = maxx; x >= minx; --x)
{
if (CX1 > 0 && CX2 > 0 && CX3 > 0)
{
// Build the new raster block every other pixel
PrepareBlock(x, y);
Draw(x, y, x & (BLOCK_SIZE - 1), y & (BLOCK_SIZE - 1));
if (y <= BoundingBox::coords[BoundingBox::BOTTOM])
break;
}
CX1 += FDY12;
CX2 += FDY23;
CX3 += FDY31;
}
CY1 -= FDX12;
CY2 -= FDX23;
CY3 -= FDX31;
}
// Update bottom limit
maxy = std::min((s32) BoundingBox::coords[BoundingBox::BOTTOM], maxy);
FBOTTOM = maxy << 4;
// Checking for bbox right
CX1 = C1 + DX12 * FBOTTOM - DY12 * FRIGHT;
CX2 = C2 + DX23 * FBOTTOM - DY23 * FRIGHT;
CX3 = C3 + DX31 * FBOTTOM - DY31 * FRIGHT;
// Loop
for (s32 x = maxx; x >= minx; --x)
{
if (x <= BoundingBox::coords[BoundingBox::RIGHT])
break;
CY1 = CX1;
CY2 = CX2;
CY3 = CX3;
for (s32 y = maxy; y >= miny; --y)
{
if (CY1 > 0 && CY2 > 0 && CY3 > 0)
{
// Build the new raster block every other pixel
PrepareBlock(x, y);
Draw(x, y, x & (BLOCK_SIZE - 1), y & (BLOCK_SIZE - 1));
if (x <= BoundingBox::coords[BoundingBox::RIGHT])
break;
}
CY1 -= FDX12;
CY2 -= FDX23;
CY3 -= FDX31;
}
CX1 += FDY12;
CX2 += FDY23;
CX3 += FDY31;
}
}
}

View File

@ -650,125 +650,120 @@ void Tev::Draw()
if (!TevAlphaTest(output[ALP_C]))
return;
// This part is only needed if we are not simply computing bbox
// (i. e., only needed when using the SW renderer)
if (!BoundingBox::active)
// z texture
if (bpmem.ztex2.op)
{
// z texture
if (bpmem.ztex2.op)
u32 ztex = bpmem.ztex1.bias;
switch (bpmem.ztex2.type)
{
u32 ztex = bpmem.ztex1.bias;
switch (bpmem.ztex2.type)
{
case 0: // 8 bit
ztex += TexColor[ALP_C];
break;
case 1: // 16 bit
ztex += TexColor[ALP_C] << 8 | TexColor[RED_C];
break;
case 2: // 24 bit
ztex += TexColor[RED_C] << 16 | TexColor[GRN_C] << 8 | TexColor[BLU_C];
break;
}
if (bpmem.ztex2.op == ZTEXTURE_ADD)
ztex += Position[2];
Position[2] = ztex & 0x00ffffff;
case 0: // 8 bit
ztex += TexColor[ALP_C];
break;
case 1: // 16 bit
ztex += TexColor[ALP_C] << 8 | TexColor[RED_C];
break;
case 2: // 24 bit
ztex += TexColor[RED_C] << 16 | TexColor[GRN_C] << 8 | TexColor[BLU_C];
break;
}
// fog
if (bpmem.fog.c_proj_fsel.fsel)
if (bpmem.ztex2.op == ZTEXTURE_ADD)
ztex += Position[2];
Position[2] = ztex & 0x00ffffff;
}
// fog
if (bpmem.fog.c_proj_fsel.fsel)
{
float ze;
if (bpmem.fog.c_proj_fsel.proj == 0)
{
float ze;
// perspective
// ze = A/(B - (Zs >> B_SHF))
s32 denom = bpmem.fog.b_magnitude - (Position[2] >> bpmem.fog.b_shift);
//in addition downscale magnitude and zs to 0.24 bits
ze = (bpmem.fog.a.GetA() * 16777215.0f) / (float)denom;
}
else
{
// orthographic
// ze = a*Zs
//in addition downscale zs to 0.24 bits
ze = bpmem.fog.a.GetA() * ((float)Position[2] / 16777215.0f);
if (bpmem.fog.c_proj_fsel.proj == 0)
{
// perspective
// ze = A/(B - (Zs >> B_SHF))
s32 denom = bpmem.fog.b_magnitude - (Position[2] >> bpmem.fog.b_shift);
//in addition downscale magnitude and zs to 0.24 bits
ze = (bpmem.fog.a.GetA() * 16777215.0f) / (float)denom;
}
else
{
// orthographic
// ze = a*Zs
//in addition downscale zs to 0.24 bits
ze = bpmem.fog.a.GetA() * ((float)Position[2] / 16777215.0f);
}
if (bpmem.fogRange.Base.Enabled)
{
// TODO: This is untested and should definitely be checked against real hw.
// - No idea if offset is really normalized against the viewport width or against the projection matrix or yet something else
// - scaling of the "k" coefficient isn't clear either.
// First, calculate the offset from the viewport center (normalized to 0..1)
float offset = (Position[0] - (bpmem.fogRange.Base.Center - 342)) / (float)xfmem.viewport.wd;
// Based on that, choose the index such that points which are far away from the z-axis use the 10th "k" value and such that central points use the first value.
float floatindex = 9.f - std::abs(offset) * 9.f;
floatindex = (floatindex < 0.f) ? 0.f : (floatindex > 9.f) ? 9.f : floatindex; // TODO: This shouldn't be necessary!
// Get the two closest integer indices, look up the corresponding samples
int indexlower = (int)floor(floatindex);
int indexupper = indexlower + 1;
// Look up coefficient... Seems like multiplying by 4 makes Fortune Street work properly (fog is too strong without the factor)
float klower = bpmem.fogRange.K[indexlower/2].GetValue(indexlower%2) * 4.f;
float kupper = bpmem.fogRange.K[indexupper/2].GetValue(indexupper%2) * 4.f;
// linearly interpolate the samples and multiple ze by the resulting adjustment factor
float factor = indexupper - floatindex;
float k = klower * factor + kupper * (1.f - factor);
float x_adjust = sqrt(offset*offset + k*k)/k;
ze *= x_adjust; // NOTE: This is basically dividing by a cosine (hidden behind GXInitFogAdjTable): 1/cos = c/b = sqrt(a^2+b^2)/b
}
ze -= bpmem.fog.c_proj_fsel.GetC();
// clamp 0 to 1
float fog = (ze<0.0f) ? 0.0f : ((ze>1.0f) ? 1.0f : ze);
switch (bpmem.fog.c_proj_fsel.fsel)
{
case 4: // exp
fog = 1.0f - pow(2.0f, -8.0f * fog);
break;
case 5: // exp2
fog = 1.0f - pow(2.0f, -8.0f * fog * fog);
break;
case 6: // backward exp
fog = 1.0f - fog;
fog = pow(2.0f, -8.0f * fog);
break;
case 7: // backward exp2
fog = 1.0f - fog;
fog = pow(2.0f, -8.0f * fog * fog);
break;
}
// lerp from output to fog color
u32 fogInt = (u32)(fog * 256);
u32 invFog = 256 - fogInt;
output[RED_C] = (output[RED_C] * invFog + fogInt * bpmem.fog.color.r) >> 8;
output[GRN_C] = (output[GRN_C] * invFog + fogInt * bpmem.fog.color.g) >> 8;
output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8;
}
bool late_ztest = !bpmem.zcontrol.early_ztest || !g_SWVideoConfig.bZComploc;
if (late_ztest && bpmem.zmode.testenable)
if (bpmem.fogRange.Base.Enabled)
{
// TODO: Check against hw if these values get incremented even if depth testing is disabled
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT);
// TODO: This is untested and should definitely be checked against real hw.
// - No idea if offset is really normalized against the viewport width or against the projection matrix or yet something else
// - scaling of the "k" coefficient isn't clear either.
if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2]))
return;
// First, calculate the offset from the viewport center (normalized to 0..1)
float offset = (Position[0] - (bpmem.fogRange.Base.Center - 342)) / (float)xfmem.viewport.wd;
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_OUTPUT);
// Based on that, choose the index such that points which are far away from the z-axis use the 10th "k" value and such that central points use the first value.
float floatindex = 9.f - std::abs(offset) * 9.f;
floatindex = (floatindex < 0.f) ? 0.f : (floatindex > 9.f) ? 9.f : floatindex; // TODO: This shouldn't be necessary!
// Get the two closest integer indices, look up the corresponding samples
int indexlower = (int)floor(floatindex);
int indexupper = indexlower + 1;
// Look up coefficient... Seems like multiplying by 4 makes Fortune Street work properly (fog is too strong without the factor)
float klower = bpmem.fogRange.K[indexlower/2].GetValue(indexlower%2) * 4.f;
float kupper = bpmem.fogRange.K[indexupper/2].GetValue(indexupper%2) * 4.f;
// linearly interpolate the samples and multiple ze by the resulting adjustment factor
float factor = indexupper - floatindex;
float k = klower * factor + kupper * (1.f - factor);
float x_adjust = sqrt(offset*offset + k*k)/k;
ze *= x_adjust; // NOTE: This is basically dividing by a cosine (hidden behind GXInitFogAdjTable): 1/cos = c/b = sqrt(a^2+b^2)/b
}
ze -= bpmem.fog.c_proj_fsel.GetC();
// clamp 0 to 1
float fog = (ze<0.0f) ? 0.0f : ((ze>1.0f) ? 1.0f : ze);
switch (bpmem.fog.c_proj_fsel.fsel)
{
case 4: // exp
fog = 1.0f - pow(2.0f, -8.0f * fog);
break;
case 5: // exp2
fog = 1.0f - pow(2.0f, -8.0f * fog * fog);
break;
case 6: // backward exp
fog = 1.0f - fog;
fog = pow(2.0f, -8.0f * fog);
break;
case 7: // backward exp2
fog = 1.0f - fog;
fog = pow(2.0f, -8.0f * fog * fog);
break;
}
// lerp from output to fog color
u32 fogInt = (u32)(fog * 256);
u32 invFog = 256 - fogInt;
output[RED_C] = (output[RED_C] * invFog + fogInt * bpmem.fog.color.r) >> 8;
output[GRN_C] = (output[GRN_C] * invFog + fogInt * bpmem.fog.color.g) >> 8;
output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8;
}
bool late_ztest = !bpmem.zcontrol.early_ztest || !g_SWVideoConfig.bZComploc;
if (late_ztest && bpmem.zmode.testenable)
{
// TODO: Check against hw if these values get incremented even if depth testing is disabled
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT);
if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2]))
return;
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_OUTPUT);
}
// branchless bounding box update
@ -777,12 +772,6 @@ void Tev::Draw()
BoundingBox::coords[BoundingBox::TOP] = std::min((u16)Position[1], BoundingBox::coords[BoundingBox::TOP]);
BoundingBox::coords[BoundingBox::BOTTOM] = std::max((u16)Position[1], BoundingBox::coords[BoundingBox::BOTTOM]);
// if we are only calculating the bounding box,
// there's no need to actually draw anything
if (BoundingBox::active)
return;
#if ALLOW_TEV_DUMPS
if (g_SWVideoConfig.bDumpTevStages)
{