From 987c778981605ddc728bd0600db80cc0499eaf6f Mon Sep 17 00:00:00 2001 From: coldhex Date: Mon, 13 Jan 2025 18:52:25 +0200 Subject: [PATCH] nv2a: implement screen coordinate rounding to 4 bit fractional precision Xbox triangle rasterization appears to follow the usual top-left rule. However, since Xemu renders to an OpenGL framebuffer object (FBO) instead of directly to the default framebuffer, Xemu actually has what could be called the bottom-left triangle rasterization rule. I'll address that in another commit. Also, note that the ProjAdjacentGeometry_0.5625 test in nxdk_pgraph_tests is very sensitive to floating point rounding errors. For example, the nxdk_pgraph_tests commit 66b32a0b1feba32a0db7a95d6358e84f7a6246ad changed the math library which caused the test result to change also on real Xbox hardware due to floating point rounding error differences in matrix inverse computation. Apart from the bottom-left rasterization issue, the differing result between Xbox and the rounding I am proposing here for Xemu seems to stem from floating point rounding that happens in screen coordinate calculations before the rounding to 4 bit precision takes place. Fixing such rounding issues would require carrying all preceding floating point computations exactly in the same order and with same precision as Xbox. Note that Xbox Direct3D library seems to add 0.03125 (1/32) to screen coordinates by default. Likely the idea there was to make floating point screen coordinates round to the nearest screen coordinates in 4 bit fixed point precision. So the Xbox Direct3D library (and therefore games) already mitigate against precarious rounding when exactly half-integer coordinates are used by games. Actually they would use integer coordinates because it is Direct3D 8, but since nv2a appears to rasterize at half-integer coordinates like OpenGL, Xbox Direct3D also adds 0.5 to screen coordinates in addition to 1/32. --- hw/xbox/nv2a/pgraph/gl/renderer.h | 1 - hw/xbox/nv2a/pgraph/gl/shaders.c | 23 ----------------------- hw/xbox/nv2a/pgraph/glsl/vsh-ff.c | 17 ++++++++++------- hw/xbox/nv2a/pgraph/glsl/vsh-prog.c | 24 ++++++++++-------------- hw/xbox/nv2a/pgraph/vk/renderer.h | 1 - hw/xbox/nv2a/pgraph/vk/shaders.c | 23 ----------------------- 6 files changed, 20 insertions(+), 69 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index d1a64bb024..b9074d9644 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -111,7 +111,6 @@ typedef struct ShaderBinding { GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS]; uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; - GLint inv_viewport_loc; GLint ltctxa_loc[NV2A_LTCTXA_COUNT]; GLint ltctxb_loc[NV2A_LTCTXB_COUNT]; GLint ltc1_loc[NV2A_LTC1_COUNT]; diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index af3cfd2f40..dbf555a621 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -158,7 +158,6 @@ static void update_shader_constant_locations(ShaderBinding *binding) binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor"); binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam"); - binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport"); for (int i = 0; i < NV2A_LTCTXA_COUNT; i++) { snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i); binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp); @@ -847,28 +846,6 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, if (binding->specular_power_loc != -1) { glUniform1f(binding->specular_power_loc, pg->specular_power); } - - /* estimate the viewport by assuming it matches the surface ... */ - unsigned int aa_width = 1, aa_height = 1; - pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); - - float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width); - float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height); - float m33 = zmax; - float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0]; - float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1]; - - float invViewport[16] = { - 1.0/m11, 0, 0, 0, - 0, 1.0/m22, 0, 0, - 0, 0, 1.0/m33, 0, - -1.0+m41/m11, 1.0+m42/m22, 0, 1.0 - }; - - if (binding->inv_viewport_loc != -1) { - glUniformMatrix4fv(binding->inv_viewport_loc, - 1, GL_FALSE, &invViewport[0]); - } } /* update vertex program constants */ diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c index 6bc637d582..02b1a8fda8 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -115,8 +115,6 @@ GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz") GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz") "\n" ); - mstring_append_fmt(uniforms, -"%smat4 invViewport;\n", u); /* Skinning */ unsigned int count; @@ -471,13 +469,18 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz } mstring_append(body, - " oPos = tPosition * compositeMat;\n" - " oPos.w = clampAwayZeroInf(oPos.w);\n" - " oPos = invViewport * oPos;\n" + " oPos = tPosition * compositeMat;\n" + " oPos.z = oPos.z / clipRange.y;\n" + " oPos.w = clampAwayZeroInf(oPos.w);\n" + " oPos.xy /= oPos.w;\n" + " oPos.xy += c[" stringify(NV_IGRAPH_XF_XFCTX_VPOFF) "].xy;\n" + " oPos.xy = floor(oPos.xy * 16.0f) / 16.0f;\n" + " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" + " oPos.xy *= oPos.w;\n" ); - if (state->vulkan) { - mstring_append(body, " oPos.y *= -1;\n"); + if (!state->vulkan) { + mstring_append(body, " oPos.y = -oPos.y;\n"); } /* FIXME: Testing */ diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index 3e0ab5fbba..e26d0c0304 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -821,22 +821,14 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version, assert(has_final); mstring_append(body, - /* the shaders leave the result in screen space, while - * opengl expects it in clip space. - * TODO: the pixel-center co-ordinate differences should handled + /* The shaders leave the result in screen space, while OpenGL expects it + * in clip space. Xbox NV2A rasterizer appears to have 4 bit precision + * fixed point fractional part and to convert floating point coordinates + * by flooring. */ - " oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n" - ); + " oPos.xy = floor(oPos.xy * 16.0f) / 16.0f;\n" + " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" - if (vulkan) { - mstring_append(body, - " oPos.y = 2.0 * oPos.y / surfaceSize.y - 1.0;\n"); - } else { - mstring_append(body, " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) " - "/ surfaceSize.y;\n"); - } - - mstring_append(body, " oPos.z = oPos.z / clipRange.y;\n" " oPos.w = clampAwayZeroInf(oPos.w);\n" @@ -849,4 +841,8 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version, */ " oPos.xyz *= oPos.w;\n" ); + + if (!vulkan) { + mstring_append(body, " oPos.y = -oPos.y;\n"); + } } diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 4112517e58..91bbf0f31d 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -179,7 +179,6 @@ typedef struct ShaderBinding { int vsh_constant_loc; uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; - int inv_viewport_loc; int ltctxa_loc; int ltctxb_loc; int ltc1_loc; diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c index 78122c701d..0a6e8a2b5c 100644 --- a/hw/xbox/nv2a/pgraph/vk/shaders.c +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -283,8 +283,6 @@ static void update_shader_constant_locations(ShaderBinding *binding) binding->fog_param_loc = uniform_index(&binding->vertex->uniforms, "fogParam"); - binding->inv_viewport_loc = - uniform_index(&binding->vertex->uniforms, "invViewport"); binding->ltctxa_loc = uniform_index(&binding->vertex->uniforms, "ltctxa"); binding->ltctxb_loc = uniform_index(&binding->vertex->uniforms, "ltctxb"); binding->ltc1_loc = uniform_index(&binding->vertex->uniforms, "ltc1"); @@ -617,27 +615,6 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, uniform1f(&binding->vertex->uniforms, binding->specular_power_loc, pg->specular_power); } - - /* estimate the viewport by assuming it matches the surface ... */ - unsigned int aa_width = 1, aa_height = 1; - pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); - - float m11 = 0.5 * (pg->surface_binding_dim.width / aa_width); - float m22 = -0.5 * (pg->surface_binding_dim.height / aa_height); - float m33 = zmax; - float m41 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0]; - float m42 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1]; - - float invViewport[16] = { - 1.0 / m11, 0, 0, 0, 0, 1.0 / m22, 0, - 0, 0, 0, 1.0 / m33, 0, -1.0 + m41 / m11, 1.0 + m42 / m22, - 0, 1.0 - }; - - if (binding->inv_viewport_loc != -1) { - uniformMatrix4fv(&binding->vertex->uniforms, - binding->inv_viewport_loc, &invViewport[0]); - } } /* update vertex program constants */