Software: Store offset in Slope
This is needed since we need a separate offset for zfreeze to work correctly. It also makes the code a bit less jank.
This commit is contained in:
parent
3a742e99bb
commit
164e0f742d
|
@ -20,16 +20,82 @@ namespace Rasterizer
|
|||
{
|
||||
static constexpr int BLOCK_SIZE = 2;
|
||||
|
||||
struct SlopeContext
|
||||
{
|
||||
SlopeContext(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2,
|
||||
s32 x0, s32 y0)
|
||||
: x0(x0), y0(y0)
|
||||
{
|
||||
// adjust a little less than 0.5
|
||||
const float adjust = 0.495f;
|
||||
|
||||
xOff = ((float)x0 - v0->screenPosition.x) + adjust;
|
||||
yOff = ((float)y0 - v0->screenPosition.y) + adjust;
|
||||
|
||||
dx10 = v1->screenPosition.x - v0->screenPosition.x;
|
||||
dx20 = v2->screenPosition.x - v0->screenPosition.x;
|
||||
dy10 = v1->screenPosition.y - v0->screenPosition.y;
|
||||
dy20 = v2->screenPosition.y - v0->screenPosition.y;
|
||||
}
|
||||
s32 x0;
|
||||
s32 y0;
|
||||
float xOff;
|
||||
float yOff;
|
||||
float dx10;
|
||||
float dx20;
|
||||
float dy10;
|
||||
float dy20;
|
||||
};
|
||||
|
||||
struct Slope
|
||||
{
|
||||
Slope() = default;
|
||||
Slope(float f0, float f1, float f2, const SlopeContext& ctx) : f0(f0)
|
||||
{
|
||||
float delta_20 = f2 - f0;
|
||||
float delta_10 = f1 - f0;
|
||||
|
||||
// x2 - x0 y1 - y0 x1 - x0 y2 - y0
|
||||
float a = delta_20 * ctx.dy10 - delta_10 * ctx.dy20;
|
||||
float b = ctx.dx20 * delta_10 - ctx.dx10 * delta_20;
|
||||
float c = ctx.dx20 * ctx.dy10 - ctx.dx10 * ctx.dy20;
|
||||
|
||||
dfdx = a / c;
|
||||
dfdy = b / c;
|
||||
|
||||
x0 = ctx.x0;
|
||||
y0 = ctx.y0;
|
||||
xOff = ctx.xOff;
|
||||
yOff = ctx.yOff;
|
||||
}
|
||||
|
||||
// These default values are used in the unlikely case that zfreeze is enabled when drawing the
|
||||
// first primitive.
|
||||
// TODO: This is just a guess!
|
||||
float dfdx = 0.0f;
|
||||
float dfdy = 0.0f;
|
||||
float f0 = 1.0f;
|
||||
|
||||
// Both an s32 value and a float value are used to minimize rounding error
|
||||
// TODO: is this really needed?
|
||||
s32 x0 = 0;
|
||||
s32 y0 = 0;
|
||||
float xOff = 0.0f;
|
||||
float yOff = 0.0f;
|
||||
|
||||
float GetValue(s32 x, s32 y) const
|
||||
{
|
||||
float dx = xOff + (float)(x - x0);
|
||||
float dy = yOff + (float)(y - y0);
|
||||
return f0 + (dfdx * dx) + (dfdy * dy);
|
||||
}
|
||||
};
|
||||
|
||||
static Slope ZSlope;
|
||||
static Slope WSlope;
|
||||
static Slope ColorSlopes[2][4];
|
||||
static Slope TexSlopes[8][3];
|
||||
|
||||
static s32 vertex0X;
|
||||
static s32 vertex0Y;
|
||||
static float vertexOffsetX;
|
||||
static float vertexOffsetY;
|
||||
|
||||
static Tev tev;
|
||||
static RasterBlock rasterBlock;
|
||||
|
||||
|
@ -37,11 +103,9 @@ void Init()
|
|||
{
|
||||
tev.Init();
|
||||
|
||||
// Set initial z reference plane in the unlikely case that zfreeze is enabled when drawing the
|
||||
// first primitive.
|
||||
// TODO: This is just a guess!
|
||||
ZSlope.dfdx = ZSlope.dfdy = 0.f;
|
||||
ZSlope.f0 = 1.f;
|
||||
// The other slopes are set each for each primitive drawn, but zfreeze means that the z slope
|
||||
// needs to be set to an (untested) default value.
|
||||
ZSlope = Slope();
|
||||
}
|
||||
|
||||
// Returns approximation of log2(f) in s28.4
|
||||
|
@ -75,10 +139,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi)
|
|||
{
|
||||
INCSTAT(g_stats.this_frame.rasterized_pixels);
|
||||
|
||||
float dx = vertexOffsetX + (float)(x - vertex0X);
|
||||
float dy = vertexOffsetY + (float)(y - vertex0Y);
|
||||
|
||||
s32 z = (s32)std::clamp<float>(ZSlope.GetValue(dx, dy), 0.0f, 16777215.0f);
|
||||
s32 z = (s32)std::clamp<float>(ZSlope.GetValue(x, y), 0.0f, 16777215.0f);
|
||||
|
||||
if (bpmem.UseEarlyDepthTest())
|
||||
{
|
||||
|
@ -104,7 +165,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi)
|
|||
{
|
||||
for (int comp = 0; comp < 4; comp++)
|
||||
{
|
||||
u16 color = (u16)ColorSlopes[i][comp].GetValue(dx, dy);
|
||||
u16 color = (u16)ColorSlopes[i][comp].GetValue(x, y);
|
||||
|
||||
// clamp color value to 0
|
||||
u16 mask = ~(color >> 8);
|
||||
|
@ -136,31 +197,6 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi)
|
|||
tev.Draw();
|
||||
}
|
||||
|
||||
static void InitTriangle(float X1, float Y1, s32 xi, s32 yi)
|
||||
{
|
||||
vertex0X = xi;
|
||||
vertex0Y = yi;
|
||||
|
||||
// adjust a little less than 0.5
|
||||
const float adjust = 0.495f;
|
||||
|
||||
vertexOffsetX = ((float)xi - X1) + adjust;
|
||||
vertexOffsetY = ((float)yi - Y1) + adjust;
|
||||
}
|
||||
|
||||
static void InitSlope(Slope* slope, float f1, float f2, float f3, float DX31, float DX12,
|
||||
float DY12, float DY31)
|
||||
{
|
||||
float DF31 = f3 - f1;
|
||||
float DF21 = f2 - f1;
|
||||
float a = DF31 * -DY12 - DF21 * DY31;
|
||||
float b = DX31 * DF21 + DX12 * DF31;
|
||||
float c = -DX12 * DY31 - DX31 * -DY12;
|
||||
slope->dfdx = -a / c;
|
||||
slope->dfdy = -b / c;
|
||||
slope->f0 = f1;
|
||||
}
|
||||
|
||||
static inline void CalculateLOD(s32* lodp, bool* linear, u32 texmap, u32 texcoord)
|
||||
{
|
||||
auto texUnit = bpmem.tex.GetUnit(texmap);
|
||||
|
@ -220,22 +256,22 @@ static void BuildBlock(s32 blockX, s32 blockY)
|
|||
{
|
||||
RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];
|
||||
|
||||
float dx = vertexOffsetX + (float)(xi + blockX - vertex0X);
|
||||
float dy = vertexOffsetY + (float)(yi + blockY - vertex0Y);
|
||||
s32 x = xi + blockX;
|
||||
s32 y = yi + blockY;
|
||||
|
||||
float invW = 1.0f / WSlope.GetValue(dx, dy);
|
||||
float invW = 1.0f / WSlope.GetValue(x, y);
|
||||
pixel.InvW = invW;
|
||||
|
||||
// tex coords
|
||||
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
|
||||
{
|
||||
float projection = invW;
|
||||
float q = TexSlopes[i][2].GetValue(dx, dy) * invW;
|
||||
float q = TexSlopes[i][2].GetValue(x, y) * invW;
|
||||
if (q != 0.0f)
|
||||
projection = invW / q;
|
||||
|
||||
pixel.Uv[i][0] = TexSlopes[i][0].GetValue(dx, dy) * projection;
|
||||
pixel.Uv[i][1] = TexSlopes[i][1].GetValue(dx, dy) * projection;
|
||||
pixel.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * projection;
|
||||
pixel.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * projection;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -334,19 +370,12 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v
|
|||
if (minx >= maxx || miny >= maxy)
|
||||
return;
|
||||
|
||||
// Setup slopes
|
||||
float fltx1 = v0->screenPosition.x;
|
||||
float flty1 = v0->screenPosition.y;
|
||||
float fltdx31 = v2->screenPosition.x - fltx1;
|
||||
float fltdx12 = fltx1 - v1->screenPosition.x;
|
||||
float fltdy12 = flty1 - v1->screenPosition.y;
|
||||
float fltdy31 = v2->screenPosition.y - flty1;
|
||||
|
||||
InitTriangle(fltx1, flty1, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4);
|
||||
// Set up slopes
|
||||
const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4);
|
||||
|
||||
float w[3] = {1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w,
|
||||
1.0f / v2->projectedPosition.w};
|
||||
InitSlope(&WSlope, w[0], w[1], w[2], fltdx31, fltdx12, fltdy12, fltdy31);
|
||||
WSlope = Slope(w[0], w[1], w[2], ctx);
|
||||
|
||||
// TODO: The zfreeze emulation is not quite correct, yet!
|
||||
// Many things might prevent us from reaching this line (culling, clipping, scissoring).
|
||||
|
@ -355,21 +384,21 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v
|
|||
// We're currently sloppy at this since we abort early if any of the culling/clipping/scissoring
|
||||
// tests fail.
|
||||
if (!bpmem.genMode.zfreeze)
|
||||
InitSlope(&ZSlope, v0->screenPosition[2], v1->screenPosition[2], v2->screenPosition[2], fltdx31,
|
||||
fltdx12, fltdy12, fltdy31);
|
||||
ZSlope = Slope(v0->screenPosition.z, v1->screenPosition.z, v2->screenPosition.z, ctx);
|
||||
|
||||
for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++)
|
||||
{
|
||||
for (int comp = 0; comp < 4; comp++)
|
||||
InitSlope(&ColorSlopes[i][comp], v0->color[i][comp], v1->color[i][comp], v2->color[i][comp],
|
||||
fltdx31, fltdx12, fltdy12, fltdy31);
|
||||
ColorSlopes[i][comp] = Slope(v0->color[i][comp], v1->color[i][comp], v2->color[i][comp], ctx);
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
|
||||
{
|
||||
for (int comp = 0; comp < 3; comp++)
|
||||
InitSlope(&TexSlopes[i][comp], v0->texCoords[i][comp] * w[0], v1->texCoords[i][comp] * w[1],
|
||||
v2->texCoords[i][comp] * w[2], fltdx31, fltdx12, fltdy12, fltdy31);
|
||||
{
|
||||
TexSlopes[i][comp] = Slope(v0->texCoords[i][comp] * w[0], v1->texCoords[i][comp] * w[1],
|
||||
v2->texCoords[i][comp] * w[2], ctx);
|
||||
}
|
||||
}
|
||||
|
||||
// Half-edge constants
|
||||
|
|
|
@ -16,15 +16,6 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v
|
|||
|
||||
void SetTevReg(int reg, int comp, s16 color);
|
||||
|
||||
struct Slope
|
||||
{
|
||||
float dfdx;
|
||||
float dfdy;
|
||||
float f0;
|
||||
|
||||
float GetValue(float dx, float dy) const { return f0 + (dfdx * dx) + (dfdy * dy); }
|
||||
};
|
||||
|
||||
struct RasterBlockPixel
|
||||
{
|
||||
float InvW;
|
||||
|
|
Loading…
Reference in New Issue