GFX3D: Polygon clipping, viewport transformation, perspective correction, and face calculations are now done in fixed-point instead of floating point.

This commit is contained in:
rogerman 2023-02-22 22:07:37 -08:00
parent e1969c470b
commit 7751b59882
2 changed files with 177 additions and 63 deletions

View File

@ -1632,8 +1632,12 @@ void NDSGeometryEngine::AddCurrentVertexToList(GFX3D_GeometryList &targetGList)
printf("wtf\n");
}
VERT &vert = targetGList.rawVertList[vertIndex];
NDSVertex &vtx = targetGList.rawVtxList[vertIndex];
vtx.position = vtxCoordTransformed;
vtx.texCoord = this->_texCoordTransformed;
vtx.color = this->_vtxColor666X;
VERT &vert = targetGList.rawVertList[vertIndex];
vert.coord[0] = (float)vtxCoordTransformed.x / 4096.0f;
vert.coord[1] = (float)vtxCoordTransformed.y / 4096.0f;
vert.coord[2] = (float)vtxCoordTransformed.z / 4096.0f;
@ -1650,6 +1654,7 @@ void NDSGeometryEngine::AddCurrentVertexToList(GFX3D_GeometryList &targetGList)
vert.color[1] = this->_vtxColor666X.g;
vert.color[2] = this->_vtxColor666X.b;
vert.color[3] = this->_vtxColor666X.a;
this->_vtxIndex[this->_vtxCount] = (u16)(targetGList.rawVertCount + this->_vtxCount - continuation);
this->_vtxCount++;
@ -1784,16 +1789,16 @@ void NDSGeometryEngine::GeneratePolygon(POLY &targetPoly, GFX3D_GeometryList &ta
// Tested" Castlevania POR - warp stone, trajectory of ricochet, "Eye of Decay"
if (this->_texParam.PackedFormat == TEXMODE_NONE)
{
const VERT &vert0 = targetGList.rawVertList[targetPoly.vertIndexes[0]];
const VERT &vert1 = targetGList.rawVertList[targetPoly.vertIndexes[1]];
const VERT &vert2 = targetGList.rawVertList[targetPoly.vertIndexes[2]];
const NDSVertex &vtx0 = targetGList.rawVtxList[targetPoly.vertIndexes[0]];
const NDSVertex &vtx1 = targetGList.rawVtxList[targetPoly.vertIndexes[1]];
const NDSVertex &vtx2 = targetGList.rawVtxList[targetPoly.vertIndexes[2]];
if ( ((vert0.x == vert1.x) && (vert0.y == vert1.y)) ||
((vert1.x == vert2.x) && (vert1.y == vert2.y)) ||
((vert0.y == vert1.y) && (vert1.y == vert2.y)) ||
((vert0.x == vert1.x) && (vert1.x == vert2.x)) )
if ( ((vtx0.position.x == vtx1.position.x) && (vtx0.position.y == vtx1.position.y)) ||
((vtx1.position.x == vtx2.position.x) && (vtx1.position.y == vtx2.position.y)) ||
((vtx0.position.y == vtx1.position.y) && (vtx1.position.y == vtx2.position.y)) ||
((vtx0.position.x == vtx1.position.x) && (vtx1.position.x == vtx2.position.x)) )
{
//printf("Line Segmet detected (poly type %i, mode %i, texparam %08X)\n", poly.type, poly.vtxFormat, textureFormat);
//printf("Line Segment detected (poly type %i, mode %i, texparam %08X)\n", poly.type, poly.vtxFormat, textureFormat);
targetPoly.vtxFormat = (PolygonPrimitiveType)(this->_vtxFormat + 4);
}
}
@ -3259,7 +3264,7 @@ static bool gfx3d_ysort_compare(const u16 idx1, const u16 idx2)
static FORCEINLINE s32 iround(const float f)
{
return (s32)f; //lol
return (s32)( (f < 0.0f) ? f - 0.5f : f + 0.5f ); //lol
}
template <ClipperMode CLIPPERMODE>
@ -3268,23 +3273,24 @@ size_t gfx3d_PerformClipping(const GFX3D_GeometryList &gList, CPoly *outCPolyUns
size_t clipCount = 0;
PolygonType cpType = POLYGON_TYPE_UNDEFINED;
const float wScalar = (float)CurrentRenderer->GetFramebufferWidth() / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH;
const float hScalar = (float)CurrentRenderer->GetFramebufferHeight() / (float)GPU_FRAMEBUFFER_NATIVE_HEIGHT;
const s64 wScalar = (s64)CurrentRenderer->GetFramebufferWidth() / (s64)GPU_FRAMEBUFFER_NATIVE_WIDTH;
const s64 hScalar = (s64)CurrentRenderer->GetFramebufferHeight() / (s64)GPU_FRAMEBUFFER_NATIVE_HEIGHT;
for (size_t polyIndex = 0; polyIndex < gList.rawPolyCount; polyIndex++)
{
const POLY &rawPoly = gList.rawPolyList[polyIndex];
const GFX3D_Viewport theViewport = rawPoly.viewport;
const VERT *rawVerts[4] = {
&gList.rawVertList[rawPoly.vertIndexes[0]],
&gList.rawVertList[rawPoly.vertIndexes[1]],
&gList.rawVertList[rawPoly.vertIndexes[2]],
(rawPoly.type == POLYGON_TYPE_QUAD) ? &gList.rawVertList[rawPoly.vertIndexes[3]] : NULL
const NDSVertex *rawPolyVtx[4] = {
&gList.rawVtxList[rawPoly.vertIndexes[0]],
&gList.rawVtxList[rawPoly.vertIndexes[1]],
&gList.rawVtxList[rawPoly.vertIndexes[2]],
(rawPoly.type == POLYGON_TYPE_QUAD) ? &gList.rawVtxList[rawPoly.vertIndexes[3]] : NULL
};
CPoly &cPoly = outCPolyUnsortedList[clipCount];
cpType = GFX3D_GenerateClippedPoly<CLIPPERMODE>(polyIndex, rawPoly.type, rawVerts, cPoly);
cpType = GFX3D_GenerateClippedPoly<CLIPPERMODE>(polyIndex, rawPoly.type, rawPolyVtx, cPoly);
if (cpType == POLYGON_TYPE_UNDEFINED)
{
continue;
@ -3292,53 +3298,95 @@ size_t gfx3d_PerformClipping(const GFX3D_GeometryList &gList, CPoly *outCPolyUns
for (size_t j = 0; j < (size_t)cPoly.type; j++)
{
VERT &vtx = cPoly.clipVerts[j];
NDSVertex &vtx = cPoly.clipVtxFixed[j];
VERT &vert = cPoly.clipVerts[j];
VertexCoord64x4 vtx64 = {
(s64)vtx.position.x,
(s64)vtx.position.y,
(s64)vtx.position.z,
(s64)vtx.position.w,
};
// TODO: Possible divide by zero with the w-coordinate.
// Is the vertex being read correctly? Is 0 a valid value for w?
// If both of these questions answer to yes, then how does the NDS handle a NaN?
// For now, simply prevent w from being zero.
//
// Test case: Dance scenes in Princess Debut can generate undefined vertices
// when the -ffast-math option (relaxed IEEE754 compliance) is used.
const float vtxW = (vtx.coord[3] != 0.0f) ? vtx.coord[3] : 0.00000001f;
if (vtx64.w != 0)
{
//homogeneous divide
vtx64.x = ((vtx64.x + vtx64.w) * (theViewport.width << 16)) / (2 * vtx64.w);
vtx64.y = ((vtx64.y + vtx64.w) * (theViewport.height << 16)) / (2 * vtx64.w);
vtx64.z = ((vtx64.z + vtx64.w) * ( 1LL << 12)) / (2 * vtx64.w);
}
else
{
// TODO: Possible divide by zero with the w-coordinate.
// Is the vertex being read correctly? Is 0 a valid value for w?
// If both of these questions answer to yes, then how does the NDS handle this?
// For now, simply ignore w if it is zero.
//
// Test case: Dance scenes in Princess Debut can generate undefined vertices
// when the -ffast-math option (relaxed IEEE754 compliance) is used.
//homogeneous divide
vtx.coord[0] = (vtx.coord[0]+vtxW) / (2*vtxW);
vtx.coord[1] = (vtx.coord[1]+vtxW) / (2*vtxW);
vtx.coord[2] = (vtx.coord[2]+vtxW) / (2*vtxW);
//homogeneous divide
vtx64.x = (vtx64.x * (theViewport.width << 16));
vtx64.y = (vtx64.y * (theViewport.height << 16));
vtx64.z = (vtx64.z * ( 1LL << 12));
}
//CONSIDER: do we need to guarantee that these are in bounds? perhaps not.
//vtx.coord[0] = max( 0.0f, min(1.0f, vtx.coord[0]) );
//vtx.coord[1] = max( 0.0f, min(1.0f, vtx.coord[1]) );
//vtx.coord[2] = max( 0.0f, min(1.0f, vtx.coord[2]) );
// Finish viewport transformation.
vtx64.x += ((s64)theViewport.x << 16);
vtx64.x *= wScalar;
//viewport transformation
const GFX3D_Viewport theViewport = rawPoly.viewport;
vtx.coord[0] *= (float)theViewport.width;
vtx.coord[0] += (float)theViewport.x;
vtx.coord[0] *= wScalar;
vtx64.y += ((s64)theViewport.y << 16);
vtx64.y = (192LL << 16) - vtx64.y;
vtx64.y *= hScalar;
vtx.coord[1] *= (float)theViewport.height;
vtx.coord[1] += (float)theViewport.y;
vtx.coord[1] = 192 - vtx.coord[1];
vtx.coord[1] *= hScalar;
vtx.position.x = (s32)vtx64.x;
vtx.position.y = (s32)vtx64.y;
vtx.position.z = (s32)vtx64.z;
vtx.position.w = (s32)vtx64.w;
// TODO: Remove these floating-point conversions.
//here is a hack which needs to be removed.
//at some point our shape engine needs these to be converted to "fixed point"
//which is currently just a float
vtx.coord[0] = (float)iround(16.0f * vtx.coord[0]);
vtx.coord[1] = (float)iround(16.0f * vtx.coord[1]);
vert.x = (float)((double)vtx64.x / 4096.0);
vert.y = (float)((double)vtx64.y / 4096.0);
vert.z = (float)((double)vtx64.z / 4096.0);
vert.w = (float)((double)vtx64.w / 4096.0);
vert.x = (float)iround(vert.x);
vert.y = (float)iround(vert.y);
if (CLIPPERMODE != ClipperMode_DetermineClipOnly)
{
vtx.texcoord[0] /= vtxW;
vtx.texcoord[1] /= vtxW;
if (vtx.position.w != 0)
{
// Texture coordinates
vert.u = (float)( (double)(((s64)vtx.texCoord.u << 48) / vtx64.w) / (double)(1LL << 40) );
vert.v = (float)( (double)(((s64)vtx.texCoord.v << 48) / vtx64.w) / (double)(1LL << 40) );
vtx.texCoord.u = (vtx.texCoord.u << 12) / vtx.position.w;
vtx.texCoord.v = (vtx.texCoord.v << 12) / vtx.position.w;
//perspective-correct the colors
vtx.fcolor[0] /= vtxW;
vtx.fcolor[1] /= vtxW;
vtx.fcolor[2] /= vtxW;
// Vertex color
s32 r_32 = ((s32)vtx.color.r << 24) / vtx.position.w;
s32 g_32 = ((s32)vtx.color.g << 24) / vtx.position.w;
s32 b_32 = ((s32)vtx.color.b << 24) / vtx.position.w;
r_32 = min<s32>(max<s32>(r_32, 0x00), 0x0003FFFF);
g_32 = min<s32>(max<s32>(g_32, 0x00), 0x0003FFFF);
b_32 = min<s32>(max<s32>(b_32, 0x00), 0x0003FFFF);
vert.rf = (float)r_32 / 4096.0f;
vert.gf = (float)g_32 / 4096.0f;
vert.bf = (float)b_32 / 4096.0f;
vtx.color.r = (u8)vert.rf;
vtx.color.g = (u8)vert.gf;
vtx.color.b = (u8)vert.bf;
}
else
{
vert.u = (float)(vtx.texCoord.u / 16);
vert.v = (float)(vtx.texCoord.v / 16);
}
vert.color32 = vtx.color.color;
}
}
@ -3353,15 +3401,15 @@ size_t gfx3d_PerformClipping(const GFX3D_GeometryList &gList, CPoly *outCPolyUns
// we have to support somewhat non-convex polygons (see NSMB world map 1st screen).
// this version should handle those cases better.
const VERT *vtx = cPoly.clipVerts;
const NDSVertex *vtx = cPoly.clipVtxFixed;
const size_t n = cPoly.type - 1;
float facing = (vtx[0].y + vtx[n].y) * (vtx[0].x - vtx[n].x) +
(vtx[1].y + vtx[0].y) * (vtx[1].x - vtx[0].x) +
(vtx[2].y + vtx[1].y) * (vtx[2].x - vtx[1].x);
s64 facing = ((s64)vtx[0].position.y + (s64)vtx[n].position.y) * ((s64)vtx[0].position.x - (s64)vtx[n].position.x) +
((s64)vtx[1].position.y + (s64)vtx[0].position.y) * ((s64)vtx[1].position.x - (s64)vtx[0].position.x) +
((s64)vtx[2].position.y + (s64)vtx[1].position.y) * ((s64)vtx[2].position.x - (s64)vtx[1].position.x);
for (size_t j = 2; j < n; j++)
{
facing += (vtx[j+1].y + vtx[j].y) * (vtx[j+1].x - vtx[j].x);
facing += ((s64)vtx[j+1].position.y + (s64)vtx[j].position.y) * ((s64)vtx[j+1].position.x - (s64)vtx[j].position.x);
}
cPoly.isPolyBackFacing = (facing < 0);
@ -4335,16 +4383,16 @@ static FORCEINLINE void GFX3D_ClipPoint(const NDSVertex &insideVtx, const NDSVer
switch (CLIPPERMODE)
{
case ClipperMode_Full:
outClippedVtx.texCoord.u = GFX3D_LerpSigned<s16>(t_s64, insideVtx.texCoord.u, outsideVtx.texCoord.u);
outClippedVtx.texCoord.v = GFX3D_LerpSigned<s16>(t_s64, insideVtx.texCoord.v, outsideVtx.texCoord.v);
outClippedVtx.texCoord.u = GFX3D_LerpSigned<s32>(t_s64, insideVtx.texCoord.s, outsideVtx.texCoord.s);
outClippedVtx.texCoord.v = GFX3D_LerpSigned<s32>(t_s64, insideVtx.texCoord.t, outsideVtx.texCoord.t);
outClippedVtx.color.r = GFX3D_LerpUnsigned<u8>(t_u64, insideVtx.color.r, outsideVtx.color.r);
outClippedVtx.color.g = GFX3D_LerpUnsigned<u8>(t_u64, insideVtx.color.g, outsideVtx.color.g);
outClippedVtx.color.b = GFX3D_LerpUnsigned<u8>(t_u64, insideVtx.color.b, outsideVtx.color.b);
break;
case ClipperMode_FullColorInterpolate:
outClippedVtx.texCoord.u = GFX3D_LerpSigned<s16>(t_s64, insideVtx.texCoord.u, outsideVtx.texCoord.u);
outClippedVtx.texCoord.v = GFX3D_LerpSigned<s16>(t_s64, insideVtx.texCoord.v, outsideVtx.texCoord.v);
outClippedVtx.texCoord.u = GFX3D_LerpSigned<s32>(t_s64, insideVtx.texCoord.s, outsideVtx.texCoord.s);
outClippedVtx.texCoord.v = GFX3D_LerpSigned<s32>(t_s64, insideVtx.texCoord.t, outsideVtx.texCoord.t);
outClippedVtx.color.r = GFX3D_LerpUnsigned<u8>(t_u64, insideVtx.color.r, outsideVtx.color.r);
outClippedVtx.color.g = GFX3D_LerpUnsigned<u8>(t_u64, insideVtx.color.g, outsideVtx.color.g);
outClippedVtx.color.b = GFX3D_LerpUnsigned<u8>(t_u64, insideVtx.color.b, outsideVtx.color.b);

View File

@ -724,10 +724,75 @@ union Vector32x4
typedef union Vector32x4 Vector32x4;
typedef Vector32x4 VertexCoord32x4;
union Vector64x2
{
s64 vec[2];
s64 coord[2];
struct
{
s64 s, t;
};
struct
{
s64 u, v;
};
struct
{
s64 x, y;
} XY;
struct
{
s64 y, z;
} YZ;
struct
{
s64 x, z;
} XZ;
};
typedef union Vector64x2 Vector64x2;
typedef Vector64x2 VertexCoord64x2;
union Vector64x3
{
s64 vec[3];
s64 coord[3];
struct
{
s64 x, y, z;
};
};
typedef union Vector64x3 Vector64x3;
typedef Vector64x3 VertexCoord64x3;
union Vector64x4
{
s64 vec[4];
s64 coord[4];
struct
{
s64 x, y, z, w;
};
struct
{
Vector64x3 vec3;
s64 :64;
};
};
typedef union Vector64x4 Vector64x4;
typedef Vector64x4 VertexCoord64x4;
struct NDSVertex
{
VertexCoord32x4 position;
VertexCoord16x2 texCoord;
VertexCoord32x2 texCoord;
FragmentColor color;
};
typedef struct NDSVertex NDSVertex;
@ -778,6 +843,7 @@ typedef struct GFX3D_State GFX3D_State;
struct GFX3D_GeometryList
{
PAGE_ALIGN VERT rawVertList[VERTLIST_SIZE];
PAGE_ALIGN NDSVertex rawVtxList[VERTLIST_SIZE];
PAGE_ALIGN POLY rawPolyList[POLYLIST_SIZE];
PAGE_ALIGN CPoly clippedPolyList[CLIPPED_POLYLIST_SIZE];