SoftRasterizer: Perform all calculations using fixed-point math instead of float-based math.

- CommonSettings.GFX3D_TXTHack has been repurposed to switch between fixed-point math and float-based math.
- Fix various rendering bugs that were caused by a loss of Z precision introduced in commit 7751b59.
- In Pokemon Diamond/Pearl, the bug that caused random black dots to appear on the ground has been fixed.
This commit is contained in:
rogerman 2023-05-04 12:36:49 -07:00
parent 1dbbeba3a7
commit bf344e9a3e
4 changed files with 521 additions and 518 deletions

View File

@ -3283,27 +3283,23 @@ size_t gfx3d_PerformClipping(const GFX3D_GeometryList &gList, CPoly *outCPolyUns
(s64)vtx.position.w,
};
//homogeneous divide
if (vtx64.w != 0)
{
//homogeneous divide
vtx64.x = ((vtx64.x + vtx64.w) * (theViewport.width << 16)) / (2 * vtx64.w);
vtx64.y = ((vtx64.y + vtx64.w) * (theViewport.height << 16)) / (2 * vtx64.w);
vtx64.z = ((vtx64.z + vtx64.w) * ( 1LL << 12)) / (2 * vtx64.w);
vtx64.x = ((vtx64.x + vtx64.w) * ((s64)theViewport.width << 16)) / (2 * vtx64.w);
vtx64.y = ((vtx64.y + vtx64.w) * ((s64)theViewport.height << 16)) / (2 * vtx64.w);
vtx64.z = ((vtx64.z + vtx64.w) * ( 1LL << 31)) / (2 * vtx64.w);
// Convert Z from 20.12 to 20.43 since we need to preserve as much precision
// as possible for Z-depth calculations. Several games need the precision in
// order to prevent missing polygons, maintain correct coloring, draw 2D-on-3D
// animations, and other types of 3D scenarios.
}
else
{
// TODO: Possible divide by zero with the w-coordinate.
// Is the vertex being read correctly? Is 0 a valid value for w?
// If both of these questions answer to yes, then how does the NDS handle this?
// For now, simply ignore w if it is zero.
//
// Test case: Dance scenes in Princess Debut can generate undefined vertices
// when the -ffast-math option (relaxed IEEE754 compliance) is used.
//homogeneous divide
vtx64.x = (vtx64.x * (theViewport.width << 16));
vtx64.y = (vtx64.y * (theViewport.height << 16));
vtx64.z = (vtx64.z * ( 1LL << 12));
vtx64.x = (vtx64.x * ((s64)theViewport.width << 16));
vtx64.y = (vtx64.y * ((s64)theViewport.height << 16));
vtx64.z = (vtx64.z * ( 1LL << 31)); // See comments above for why we need to convert Z to 22.42.
}
// Finish viewport transformation.
@ -3314,12 +3310,18 @@ size_t gfx3d_PerformClipping(const GFX3D_GeometryList &gList, CPoly *outCPolyUns
vtx64.y = (192LL << 16) - vtx64.y;
vtx64.y *= hScalar;
// We need to fit the 64-bit Z into a 32-bit integer, so we will need to drop some bits.
// - Divide by w = 20.43 --> 20.31
// - Divide by 2 = 20.31 --> 20.30
// - Keep the sign bit = 20.30 --> 20.31
vtx64.z = max<s64>(0x0000000000000000LL, min<s64>(0x000000007FFFFFFFLL, vtx64.z));
// At the very least, we need to save the transformed position so that
// we can use it to calculate the polygon facing later.
vtx.position.x = (s32)vtx64.x;
vtx.position.y = (s32)vtx64.y;
vtx.position.z = (s32)vtx64.z;
vtx.position.w = (s32)vtx64.w;
vtx.position.x = (s32)vtx64.x; // 16.16
vtx.position.y = (s32)vtx64.y; // 16.16
vtx.position.z = (s32)vtx64.z; // 0.31
vtx.position.w = (s32)vtx64.w; // 20.12
}
// Determine the polygon facing.

File diff suppressed because it is too large Load Diff

View File

@ -34,12 +34,17 @@ struct SoftRasterizerPrecalculation
{
Vector2s64 positionCeil;
s64 zPosition;
s64 invWPosition;
Vector2s64 texCoord;
Color3s64 color;
s64 yPrestep;
float zPositionNormalized;
float invWPositionNormalized;
Vector2f32 texCoordNormalized;
Color3f32 colorNormalized;
float yPrestep;
float yPrestepNormalized;
};
typedef struct SoftRasterizerPrecalculation SoftRasterizerPrecalculation;
@ -121,22 +126,17 @@ protected:
u8 _textureWrapMode;
Render3DError _SetupTexture(const POLY &thePoly, size_t polyRenderIndex);
FORCEINLINE Color4u8 _sample(const Vector2f32 &texCoord);
FORCEINLINE float _round_s(double val);
FORCEINLINE Color4u8 _sample(const Vector2s32 &texCoord);
FORCEINLINE float _round_s(const float val);
template<bool ISSHADOWPOLYGON> FORCEINLINE void _shade(const PolygonMode polygonMode, const Color4u8 vtxColor, const Vector2f32 &texCoord, Color4u8 &outColor);
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const Color4f32 &vtxColorFloat, float invu, float invv, float z, float w);
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const edge_fx_fl *pLeft, const edge_fx_fl *pRight);
template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right);
#ifdef ENABLE_SSE2
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> FORCEINLINE void _pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, Color4u8 &dstColor, const __m128 &vtxColorFloat, float invu, float invv, float z, float w);
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> FORCEINLINE void _drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const edge_fx_fl *pLeft, const edge_fx_fl *pRight);
#endif
template<bool ISSHADOWPOLYGON> FORCEINLINE void _shade(const PolygonMode polygonMode, const Color4u8 vtxColor, const Vector2s32 &texCoord, Color4u8 &outColor);
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON> FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isPolyTranslucent, const u32 depth, const Color4u8 &vtxColor, const Vector2s32 texCoord, const size_t fragmentIndex, Color4u8 &dstColor);
template<bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isPolyTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const edge_fx_fl &pLeft, const edge_fx_fl &pRight);
template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> void _runscanlines(const POLYGON_ATTR polyAttr, const bool isPolyTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl &left, edge_fx_fl &right);
template<int TYPE> FORCEINLINE void _rot_verts();
template<bool ISFRONTFACING, int TYPE> void _sort_verts();
template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> void _shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type);
template<bool SLI, bool ISFRONTFACING, bool ISSHADOWPOLYGON, bool USELINEHACK> void _shape_engine(const POLYGON_ATTR polyAttr, const bool isPolyTranslucent, Color4u8 *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type);
public:
void SetSLI(u32 startLine, u32 endLine, bool debug);

View File

@ -566,6 +566,26 @@ union Color4s32
};
typedef union Color4s32 Color4s32;
union Color3s64
{
s64 component[3];
struct { s64 r, g, b; };
};
typedef union Color3s64 Color3s64;
union Color4s64
{
s64 component[4];
struct { s64 r, g, b, a; };
struct
{
Color3s64 color3;
s64 alpha;
};
};
typedef union Color4s64 Color4s64;
union Color3f32
{
float component[3];