nv2a: implement screen coordinate rounding to 4 bit fractional precision

Xbox triangle rasterization appears to follow the usual top-left rule.
However, since Xemu renders to an OpenGL framebuffer object (FBO) instead
of directly to the default framebuffer, Xemu actually has what could be
called the bottom-left triangle rasterization rule. I'll address that in
another commit.

Also, note that the ProjAdjacentGeometry_0.5625 test in nxdk_pgraph_tests
is very sensitive to floating point rounding errors. For example, the
nxdk_pgraph_tests commit 66b32a0b1feba32a0db7a95d6358e84f7a6246ad changed
the math library which caused the test result to change also on real Xbox
hardware due to floating point rounding error differences in matrix
inverse computation. Apart from the bottom-left rasterization issue, the
differing result between Xbox and the rounding I am proposing here for
Xemu seems to stem from floating point rounding that happens in screen
coordinate calculations before the rounding to 4 bit precision takes place.
Fixing such rounding issues would require carrying all preceding floating
point computations exactly in the same order and with same precision as
Xbox. Note that Xbox Direct3D library seems to add 0.03125 (1/32) to
screen coordinates by default. Likely the idea there was to make floating
point screen coordinates round to the nearest screen coordinates in
4 bit fixed point precision. So the Xbox Direct3D library (and therefore
games) already mitigate against precarious rounding when exactly
half-integer coordinates are used by games. Actually they would use
integer coordinates because it is Direct3D 8, but since nv2a appears to
rasterize at half-integer coordinates like OpenGL, Xbox Direct3D
also adds 0.5 to screen coordinates in addition to 1/32.
This commit is contained in:
coldhex 2025-01-13 18:52:25 +02:00
parent c720af00bb
commit 987c778981
6 changed files with 20 additions and 69 deletions

View File

@ -111,7 +111,6 @@ typedef struct ShaderBinding {
GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
GLint inv_viewport_loc;
GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
GLint ltc1_loc[NV2A_LTC1_COUNT];

View File

@ -158,7 +158,6 @@ static void update_shader_constant_locations(ShaderBinding *binding)
binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor");
binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam");
binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport");
for (int i = 0; i < NV2A_LTCTXA_COUNT; i++) {
snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i);
binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
@ -847,28 +846,6 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
if (binding->specular_power_loc != -1) {
glUniform1f(binding->specular_power_loc, pg->specular_power);
}
/* estimate the viewport by assuming it matches the surface ... */
unsigned int aa_width = 1, aa_height = 1;
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width);
float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height);
float m33 = zmax;
float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
float invViewport[16] = {
1.0/m11, 0, 0, 0,
0, 1.0/m22, 0, 0,
0, 0, 1.0/m33, 0,
-1.0+m41/m11, 1.0+m42/m22, 0, 1.0
};
if (binding->inv_viewport_loc != -1) {
glUniformMatrix4fv(binding->inv_viewport_loc,
1, GL_FALSE, &invViewport[0]);
}
}
/* update vertex program constants */

View File

@ -115,8 +115,6 @@ GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz")
GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz")
"\n"
);
mstring_append_fmt(uniforms,
"%smat4 invViewport;\n", u);
/* Skinning */
unsigned int count;
@ -471,13 +469,18 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz
}
mstring_append(body,
" oPos = tPosition * compositeMat;\n"
" oPos.w = clampAwayZeroInf(oPos.w);\n"
" oPos = invViewport * oPos;\n"
" oPos = tPosition * compositeMat;\n"
" oPos.z = oPos.z / clipRange.y;\n"
" oPos.w = clampAwayZeroInf(oPos.w);\n"
" oPos.xy /= oPos.w;\n"
" oPos.xy += c[" stringify(NV_IGRAPH_XF_XFCTX_VPOFF) "].xy;\n"
" oPos.xy = floor(oPos.xy * 16.0f) / 16.0f;\n"
" oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n"
" oPos.xy *= oPos.w;\n"
);
if (state->vulkan) {
mstring_append(body, " oPos.y *= -1;\n");
if (!state->vulkan) {
mstring_append(body, " oPos.y = -oPos.y;\n");
}
/* FIXME: Testing */

View File

@ -821,22 +821,14 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version,
assert(has_final);
mstring_append(body,
/* the shaders leave the result in screen space, while
* opengl expects it in clip space.
* TODO: the pixel-center co-ordinate differences should handled
/* The shaders leave the result in screen space, while OpenGL expects it
* in clip space. Xbox NV2A rasterizer appears to have 4 bit precision
* fixed point fractional part and to convert floating point coordinates
* by flooring.
*/
" oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n"
);
" oPos.xy = floor(oPos.xy * 16.0f) / 16.0f;\n"
" oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n"
if (vulkan) {
mstring_append(body,
" oPos.y = 2.0 * oPos.y / surfaceSize.y - 1.0;\n");
} else {
mstring_append(body, " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) "
"/ surfaceSize.y;\n");
}
mstring_append(body,
" oPos.z = oPos.z / clipRange.y;\n"
" oPos.w = clampAwayZeroInf(oPos.w);\n"
@ -849,4 +841,8 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version,
*/
" oPos.xyz *= oPos.w;\n"
);
if (!vulkan) {
mstring_append(body, " oPos.y = -oPos.y;\n");
}
}

View File

@ -179,7 +179,6 @@ typedef struct ShaderBinding {
int vsh_constant_loc;
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
int inv_viewport_loc;
int ltctxa_loc;
int ltctxb_loc;
int ltc1_loc;

View File

@ -283,8 +283,6 @@ static void update_shader_constant_locations(ShaderBinding *binding)
binding->fog_param_loc =
uniform_index(&binding->vertex->uniforms, "fogParam");
binding->inv_viewport_loc =
uniform_index(&binding->vertex->uniforms, "invViewport");
binding->ltctxa_loc = uniform_index(&binding->vertex->uniforms, "ltctxa");
binding->ltctxb_loc = uniform_index(&binding->vertex->uniforms, "ltctxb");
binding->ltc1_loc = uniform_index(&binding->vertex->uniforms, "ltc1");
@ -617,27 +615,6 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
uniform1f(&binding->vertex->uniforms, binding->specular_power_loc,
pg->specular_power);
}
/* estimate the viewport by assuming it matches the surface ... */
unsigned int aa_width = 1, aa_height = 1;
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
float m11 = 0.5 * (pg->surface_binding_dim.width / aa_width);
float m22 = -0.5 * (pg->surface_binding_dim.height / aa_height);
float m33 = zmax;
float m41 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
float m42 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
float invViewport[16] = {
1.0 / m11, 0, 0, 0, 0, 1.0 / m22, 0,
0, 0, 0, 1.0 / m33, 0, -1.0 + m41 / m11, 1.0 + m42 / m22,
0, 1.0
};
if (binding->inv_viewport_loc != -1) {
uniformMatrix4fv(&binding->vertex->uniforms,
binding->inv_viewport_loc, &invViewport[0]);
}
}
/* update vertex program constants */