nv2a: implement screen coordinate rounding to 4 bit fractional precision

Xbox triangle rasterization appears to follow the usual top-left rule. However, since Xemu renders to an OpenGL framebuffer object (FBO) instead of directly to the default framebuffer, Xemu actually has what could be called the bottom-left triangle rasterization rule. I'll address that in another commit. Also, note that the ProjAdjacentGeometry_0.5625 test in nxdk_pgraph_tests is very sensitive to floating point rounding errors. For example, the nxdk_pgraph_tests commit 66b32a0b1feba32a0db7a95d6358e84f7a6246ad changed the math library which caused the test result to change also on real Xbox hardware due to floating point rounding error differences in matrix inverse computation. Apart from the bottom-left rasterization issue, the differing result between Xbox and the rounding I am proposing here for Xemu seems to stem from floating point rounding that happens in screen coordinate calculations before the rounding to 4 bit precision takes place. Fixing such rounding issues would require carrying all preceding floating point computations exactly in the same order and with same precision as Xbox. Note that Xbox Direct3D library seems to add 0.03125 (1/32) to screen coordinates by default. Likely the idea there was to make floating point screen coordinates round to the nearest screen coordinates in 4 bit fixed point precision. So the Xbox Direct3D library (and therefore games) already mitigate against precarious rounding when exactly half-integer coordinates are used by games. Actually they would use integer coordinates because it is Direct3D 8, but since nv2a appears to rasterize at half-integer coordinates like OpenGL, Xbox Direct3D also adds 0.5 to screen coordinates in addition to 1/32.
2025-01-13 18:52:25 +02:00 · 2025-01-13 18:52:25 +02:00 · 987c778981
parent c720af00bb
commit 987c778981
6 changed files with 20 additions and 69 deletions
--- a/hw/xbox/nv2a/pgraph/gl/renderer.h
+++ b/hw/xbox/nv2a/pgraph/gl/renderer.h
@ -111,7 +111,6 @@ typedef struct ShaderBinding {
    GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
    uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];

-    GLint inv_viewport_loc;
    GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
    GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
    GLint ltc1_loc[NV2A_LTC1_COUNT];
--- a/hw/xbox/nv2a/pgraph/gl/shaders.c
+++ b/hw/xbox/nv2a/pgraph/gl/shaders.c
@ -158,7 +158,6 @@ static void update_shader_constant_locations(ShaderBinding *binding)
    binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor");
    binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam");

-    binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport");
    for (int i = 0; i < NV2A_LTCTXA_COUNT; i++) {
        snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i);
        binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
@ -847,28 +846,6 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
        if (binding->specular_power_loc != -1) {
    	    glUniform1f(binding->specular_power_loc, pg->specular_power);
 	    }
-
-        /* estimate the viewport by assuming it matches the surface ... */
-        unsigned int aa_width = 1, aa_height = 1;
-        pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
-
-        float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width);
-        float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height);
-        float m33 = zmax;
-        float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
-        float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
-
-        float invViewport[16] = {
-            1.0/m11, 0, 0, 0,
-            0, 1.0/m22, 0, 0,
-            0, 0, 1.0/m33, 0,
-            -1.0+m41/m11, 1.0+m42/m22, 0, 1.0
-        };
-
-        if (binding->inv_viewport_loc != -1) {
-            glUniformMatrix4fv(binding->inv_viewport_loc,
-                               1, GL_FALSE, &invViewport[0]);
-        }
    }

    /* update vertex program constants */
--- a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c
@ -115,8 +115,6 @@ GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz")
 GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz")
 "\n"
 );
-    mstring_append_fmt(uniforms,
-"%smat4 invViewport;\n", u);

    /* Skinning */
    unsigned int count;
@ -471,13 +469,18 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz
    }

    mstring_append(body,
-    "   oPos = tPosition * compositeMat;\n"
-    "   oPos.w = clampAwayZeroInf(oPos.w);\n"
-    "   oPos = invViewport * oPos;\n"
+    "  oPos = tPosition * compositeMat;\n"
+    "  oPos.z = oPos.z / clipRange.y;\n"
+    "  oPos.w = clampAwayZeroInf(oPos.w);\n"
+    "  oPos.xy /= oPos.w;\n"
+    "  oPos.xy += c[" stringify(NV_IGRAPH_XF_XFCTX_VPOFF) "].xy;\n"
+    "  oPos.xy = floor(oPos.xy * 16.0f) / 16.0f;\n"
+    "  oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n"
+    "  oPos.xy *= oPos.w;\n"
    );

-    if (state->vulkan) {
-        mstring_append(body, "   oPos.y *= -1;\n");
+    if (!state->vulkan) {
+        mstring_append(body, "  oPos.y = -oPos.y;\n");
    }

    /* FIXME: Testing */
--- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c
@ -821,22 +821,14 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version,
    assert(has_final);

    mstring_append(body,
-        /* the shaders leave the result in screen space, while
-         * opengl expects it in clip space.
-         * TODO: the pixel-center co-ordinate differences should handled
+        /* The shaders leave the result in screen space, while OpenGL expects it
+         * in clip space. Xbox NV2A rasterizer appears to have 4 bit precision
+         * fixed point fractional part and to convert floating point coordinates
+         * by flooring.
         */
-        "  oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n"
-        );
+        "  oPos.xy = floor(oPos.xy * 16.0f) / 16.0f;\n"
+        "  oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n"

-    if (vulkan) {
-        mstring_append(body,
-                       "  oPos.y = 2.0 * oPos.y / surfaceSize.y - 1.0;\n");
-    } else {
-        mstring_append(body, "  oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) "
-                             "/ surfaceSize.y;\n");
-    }
-
-    mstring_append(body,
        "  oPos.z = oPos.z / clipRange.y;\n"
        "  oPos.w = clampAwayZeroInf(oPos.w);\n"

@ -849,4 +841,8 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version,
         */
        "  oPos.xyz *= oPos.w;\n"
    );
+
+    if (!vulkan) {
+        mstring_append(body, "  oPos.y = -oPos.y;\n");
+    }
 }
--- a/hw/xbox/nv2a/pgraph/vk/renderer.h
+++ b/hw/xbox/nv2a/pgraph/vk/renderer.h
@ -179,7 +179,6 @@ typedef struct ShaderBinding {
    int vsh_constant_loc;
    uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];

-    int inv_viewport_loc;
    int ltctxa_loc;
    int ltctxb_loc;
    int ltc1_loc;
--- a/hw/xbox/nv2a/pgraph/vk/shaders.c
+++ b/hw/xbox/nv2a/pgraph/vk/shaders.c
@ -283,8 +283,6 @@ static void update_shader_constant_locations(ShaderBinding *binding)
    binding->fog_param_loc =
        uniform_index(&binding->vertex->uniforms, "fogParam");

-    binding->inv_viewport_loc =
-        uniform_index(&binding->vertex->uniforms, "invViewport");
    binding->ltctxa_loc = uniform_index(&binding->vertex->uniforms, "ltctxa");
    binding->ltctxb_loc = uniform_index(&binding->vertex->uniforms, "ltctxb");
    binding->ltc1_loc = uniform_index(&binding->vertex->uniforms, "ltc1");
@ -617,27 +615,6 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
            uniform1f(&binding->vertex->uniforms, binding->specular_power_loc,
                      pg->specular_power);
        }
-
-        /* estimate the viewport by assuming it matches the surface ... */
-        unsigned int aa_width = 1, aa_height = 1;
-        pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
-
-        float m11 = 0.5 * (pg->surface_binding_dim.width / aa_width);
-        float m22 = -0.5 * (pg->surface_binding_dim.height / aa_height);
-        float m33 = zmax;
-        float m41 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
-        float m42 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
-
-        float invViewport[16] = {
-            1.0 / m11, 0,  0, 0,         0, 1.0 / m22,        0,
-            0,         0,  0, 1.0 / m33, 0, -1.0 + m41 / m11, 1.0 + m42 / m22,
-            0,         1.0
-        };
-
-        if (binding->inv_viewport_loc != -1) {
-            uniformMatrix4fv(&binding->vertex->uniforms,
-                                    binding->inv_viewport_loc, &invViewport[0]);
-        }
    }

    /* update vertex program constants */