3D: faster and more accurate interpolation
This commit is contained in:
parent
bc1385e905
commit
d656e6e7ff
|
@ -891,6 +891,7 @@ void SubmitPolygon()
|
|||
else
|
||||
z = 0x3FFF;
|
||||
|
||||
// checkme (Z<0 shouldn't be possible, but Z>0xFFFFFF is possible)
|
||||
if (z < 0) z = 0;
|
||||
else if (z > 0xFFFFFF) z = 0xFFFFFF;
|
||||
|
||||
|
|
|
@ -155,49 +155,82 @@ void Reset()
|
|||
// interpolation, avoiding precision loss from the aforementioned approximation.
|
||||
// Which is desirable when using the GPU to draw 2D graphics.
|
||||
|
||||
template<int dir>
|
||||
class Interpolator
|
||||
{
|
||||
public:
|
||||
Interpolator() {}
|
||||
Interpolator(s32 x0, s32 x1, s32 w0, s32 w1, int shift)
|
||||
Interpolator(s32 x0, s32 x1, s32 w0, s32 w1)
|
||||
{
|
||||
Setup(x0, x1, w0, w1, shift);
|
||||
Setup(x0, x1, w0, w1);
|
||||
}
|
||||
|
||||
void Setup(s32 x0, s32 x1, s32 w0, s32 w1, int shift)
|
||||
void Setup(s32 x0, s32 x1, s32 w0, s32 w1)
|
||||
{
|
||||
this->x0 = x0;
|
||||
this->x1 = x1;
|
||||
this->xdiff = x1 - x0;
|
||||
this->shift = shift;
|
||||
|
||||
this->w0factor = (s64)w0 * xdiff;
|
||||
this->w1factor = (s64)w1 * xdiff;
|
||||
this->wdiff = w1 - w0;
|
||||
// calculate reciprocals for linear mode and Z interpolation
|
||||
// TODO eventually: use a faster reciprocal function?
|
||||
if (this->xdiff != 0)
|
||||
this->xrecip = (1<<30) / this->xdiff;
|
||||
else
|
||||
this->xrecip = 0;
|
||||
this->xrecip_z = this->xrecip >> 8;
|
||||
|
||||
// linear mode is used if both W values are equal and have
|
||||
// low-order bits cleared (0-6 along X, 1-6 along Y)
|
||||
u32 mask = dir ? 0x7E : 0x7F;
|
||||
if ((w0 == w1) && !(w0 & mask) && !(w1 & mask))
|
||||
this->linear = true;
|
||||
else
|
||||
this->linear = false;
|
||||
|
||||
if (dir)
|
||||
{
|
||||
// along Y
|
||||
|
||||
if ((w0 & 0x1) && !(w1 & 0x1))
|
||||
{
|
||||
this->w0n = w0 - 1;
|
||||
this->w0d = w0 + 1;
|
||||
this->w1d = w1;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->w0n = w0 & 0xFFFE;
|
||||
this->w0d = w0 & 0xFFFE;
|
||||
this->w1d = w1 & 0xFFFE;
|
||||
}
|
||||
|
||||
this->shift = 9;
|
||||
}
|
||||
else
|
||||
{
|
||||
// along X
|
||||
|
||||
this->w0n = w0;
|
||||
this->w0d = w0;
|
||||
this->w1d = w1;
|
||||
|
||||
this->shift = 8;
|
||||
}
|
||||
}
|
||||
|
||||
void SetX(s32 x)
|
||||
{
|
||||
x -= x0;
|
||||
this->x = x;
|
||||
if (xdiff != 0 && wdiff != 0)
|
||||
if (xdiff != 0 && !linear)
|
||||
{
|
||||
// TODO: hardware tests show that this method is too precise
|
||||
// I haven't yet figured out what the hardware does, though
|
||||
s64 num = ((s64)x * w0n) << shift;
|
||||
s32 den = (x * w0d) + ((xdiff-x) * w1d);
|
||||
|
||||
if (w1factor==0 || w0factor==0) { yfactor = 0; return; }
|
||||
|
||||
s64 num = ((s64)x << (shift + 40)) / w1factor;
|
||||
s64 denw0 = ((s64)(xdiff-x) << 40) / w0factor;
|
||||
s64 denw1 = num >> shift;
|
||||
|
||||
s64 denom = denw0 + denw1;
|
||||
if (denom == 0)
|
||||
yfactor = 0;
|
||||
else
|
||||
{
|
||||
yfactor = (s32)(num / denom);
|
||||
}
|
||||
// this seems to be a proper division on hardware :/
|
||||
// I haven't been able to find cases that produce imperfect output
|
||||
if (den == 0) yfactor = 0;
|
||||
else yfactor = (s32)(num / den);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -205,7 +238,7 @@ public:
|
|||
{
|
||||
if (xdiff == 0 || y0 == y1) return y0;
|
||||
|
||||
if (wdiff != 0)
|
||||
if (!linear)
|
||||
{
|
||||
// perspective-correct approx. interpolation
|
||||
if (y0 < y1)
|
||||
|
@ -216,10 +249,11 @@ public:
|
|||
else
|
||||
{
|
||||
// linear interpolation
|
||||
// checkme: the rounding bias there (3<<24) is a guess
|
||||
if (y0 < y1)
|
||||
return y0 + (((y1-y0) * x) / xdiff);
|
||||
return y0 + ((((s64)(y1-y0) * x * xrecip) + (3<<24)) >> 30);
|
||||
else
|
||||
return y1 + (((y0-y1) * (xdiff-x)) / xdiff);
|
||||
return y1 + ((((s64)(y0-y1) * (xdiff-x) * xrecip) + (3<<24)) >> 30);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -227,9 +261,9 @@ public:
|
|||
{
|
||||
if (xdiff == 0 || z0 == z1) return z0;
|
||||
|
||||
if ((wdiff != 0) && wbuffer)
|
||||
if (wbuffer)
|
||||
{
|
||||
// perspective-correct approx. interpolation
|
||||
// W-buffering: perspective-correct approx. interpolation
|
||||
if (z0 < z1)
|
||||
return z0 + (((s64)(z1-z0) * yfactor) >> shift);
|
||||
else
|
||||
|
@ -237,21 +271,52 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
// linear interpolation
|
||||
// Z-buffering: linear interpolation
|
||||
// still doesn't quite match hardware...
|
||||
s32 base, disp, factor;
|
||||
|
||||
if (z0 < z1)
|
||||
return z0 + (((s64)(z1-z0) * x) / xdiff);
|
||||
{
|
||||
base = z0;
|
||||
disp = z1 - z0;
|
||||
factor = x;
|
||||
}
|
||||
else
|
||||
return z1 + (((s64)(z0-z1) * (xdiff-x)) / xdiff);
|
||||
{
|
||||
base = z1;
|
||||
disp = z0 - z1,
|
||||
factor = xdiff - x;
|
||||
}
|
||||
|
||||
if (dir)
|
||||
{
|
||||
int shift = 0;
|
||||
while (disp > 0x3FF)
|
||||
{
|
||||
disp >>= 1;
|
||||
shift++;
|
||||
}
|
||||
|
||||
return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
disp >>= 9;
|
||||
return base + (((s64)disp * factor * xrecip_z) >> 13);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
s32 x0, x1, xdiff, x;
|
||||
s64 w0factor, w1factor;
|
||||
s32 wdiff;
|
||||
int shift;
|
||||
|
||||
s32 yfactor;
|
||||
int shift;
|
||||
bool linear;
|
||||
|
||||
s32 xrecip, xrecip_z;
|
||||
s32 w0n, w0d, w1d;
|
||||
|
||||
u32 yfactor;
|
||||
};
|
||||
|
||||
|
||||
|
@ -280,7 +345,7 @@ public:
|
|||
Increment = 0;
|
||||
XMajor = false;
|
||||
|
||||
Interp.Setup(0, 0, 0, 0, 9);
|
||||
Interp.Setup(0, 0, 0, 0);
|
||||
Interp.SetX(0);
|
||||
|
||||
return x0;
|
||||
|
@ -347,8 +412,8 @@ public:
|
|||
|
||||
if (XMajor)
|
||||
{
|
||||
if (side) Interp.Setup(x0-1, x1-1, w0, w1, 9); // checkme
|
||||
else Interp.Setup(x0, x1, w0, w1, 9);
|
||||
if (side) Interp.Setup(x0-1, x1-1, w0, w1); // checkme
|
||||
else Interp.Setup(x0, x1, w0, w1);
|
||||
Interp.SetX(x);
|
||||
|
||||
// used for calculating AA coverage
|
||||
|
@ -356,7 +421,7 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
Interp.Setup(y0, y1, w0, w1, 9);
|
||||
Interp.Setup(y0, y1, w0, w1);
|
||||
Interp.SetX(y);
|
||||
|
||||
//ycov_incr = Increment >> 2;
|
||||
|
@ -434,7 +499,7 @@ public:
|
|||
s32 Increment;
|
||||
bool Negative;
|
||||
bool XMajor;
|
||||
Interpolator Interp;
|
||||
Interpolator<1> Interp;
|
||||
|
||||
private:
|
||||
s32 x0, xmin, xmax;
|
||||
|
@ -1009,8 +1074,8 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y)
|
|||
bool l_filledge, r_filledge;
|
||||
s32 l_edgelen, r_edgelen;
|
||||
s32 l_edgecov, r_edgecov;
|
||||
Interpolator* interp_start;
|
||||
Interpolator* interp_end;
|
||||
Interpolator<1>* interp_start;
|
||||
Interpolator<1>* interp_end;
|
||||
|
||||
xstart = rp->XL;
|
||||
xend = rp->XR;
|
||||
|
@ -1103,7 +1168,7 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y)
|
|||
int edge;
|
||||
|
||||
s32 x = xstart;
|
||||
Interpolator interpX(xstart, xend+1, wl, wr, 8);
|
||||
Interpolator<0> interpX(xstart, xend+1, wl, wr);
|
||||
|
||||
if (x < 0) x = 0;
|
||||
s32 xlimit;
|
||||
|
|
Loading…
Reference in New Issue