From 798ad308198e85e9506af96ff641cd2bd43aaa66 Mon Sep 17 00:00:00 2001 From: coldhex Date: Fri, 28 Feb 2025 20:28:19 +0200 Subject: [PATCH] nv2a: Perspective-correct interpolation for w-buffering z_perspective is true implies w-buffering and then the w-coordinate stored in the depth buffer should also be interpolated in a perspective-correct way. We do this by calculating w and setting gl_FragDepth in the fragment shader. Since enabling polygon offset and setting values using glPolygonOffset won't have any effect when manually setting gl_FragDepth for w-buffering, we introduce the depthOffset variable to obtain similar behaviour (but the glPolygonOffset factor-argument is currently not emulated.) (Note that glPolygonOffset is OpenGL implementation-dependent and it might be good to use depthOffset for z-buffering as well, but this is not done here and we still use OpenGL/Vulkan zbias functionality.) This also implements depth clipping and clamping in the fragment shader. If triangles are clipped, the shadows of the small rocks in Halo 2 Beaver Creek map can have flickering horizontal lines. The shadows are drawn on the ground in another pass with the same models as for the ground, but for some reason with depth clamping enabled. The flickering happens if Xemu clips the ground triangles, but the exact same shadow triangles are depth clamped, so there are small differences in the coordinates. The shadows are drawn with depth function GL_EQUAL so there is no tolerance for any differences. Clipping in the fragment shader solves the problem because the ground and shadow triangles remain exactly the same regardless of depth clipping/clamping. For some performance gain, it might be a good idea to cull triangles by depth in the geometry shader, but this is not implemented here. In the programmable vertex shader we always multiply position output by w because this improves numerical stability in subsequent floating point computations by modern GPUs. This usually means that the perspective divide done by the vertex program gets undone. The magic bounding constants 5.42101e-020 and 1.884467e+019 are replaced by 5.421011e-20 and 1.8446744e19, i.e. more decimals added. This makes the 32-bit floating point numbers represent exactly 2^(-64) and 2^64 (raw bits 0x1f800000 and 0x5f800000) which seem more likely the correct values although testing with hardware was not done to this precision. Testing indicates that the same RCC instruction magic constants are also applied to both fixed function and programmable vertex shader w-coordinate output. This bounding replaces the special test for w==0.0 and abs(w)==inf which used to set vtx_inv_w=1.0 (which did not match Xbox hardware behaviour.) --- hw/xbox/nv2a/pgraph/gl/draw.c | 14 +---- hw/xbox/nv2a/pgraph/gl/renderer.h | 1 + hw/xbox/nv2a/pgraph/gl/shaders.c | 31 +++++++++- hw/xbox/nv2a/pgraph/glsl/common.c | 28 +++++---- hw/xbox/nv2a/pgraph/glsl/geom.c | 8 --- hw/xbox/nv2a/pgraph/glsl/psh.c | 91 +++++++++++++++++++++++------ hw/xbox/nv2a/pgraph/glsl/vsh-ff.c | 11 +--- hw/xbox/nv2a/pgraph/glsl/vsh-prog.c | 57 ++++-------------- hw/xbox/nv2a/pgraph/glsl/vsh-prog.h | 2 +- hw/xbox/nv2a/pgraph/glsl/vsh.c | 49 ++++++++-------- hw/xbox/nv2a/pgraph/psh.h | 2 + hw/xbox/nv2a/pgraph/shaders.c | 5 ++ hw/xbox/nv2a/pgraph/vk/draw.c | 12 +--- hw/xbox/nv2a/pgraph/vk/renderer.h | 2 + hw/xbox/nv2a/pgraph/vk/shaders.c | 48 +++++++++++++-- 15 files changed, 210 insertions(+), 151 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c index a9032562fa..af87c41e28 100644 --- a/hw/xbox/nv2a/pgraph/gl/draw.c +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -203,10 +203,6 @@ void pgraph_gl_draw_begin(NV2AState *d) glDisable(GL_CULL_FACE); } - /* Clipping */ - glEnable(GL_CLIP_DISTANCE0); - glEnable(GL_CLIP_DISTANCE1); - /* Front-face select */ glFrontFace(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_FRONTFACE @@ -240,6 +236,8 @@ void pgraph_gl_draw_begin(NV2AState *d) GLfloat zfactor = *(float*)&zfactor_u32; uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); GLfloat zbias = *(float*)&zbias_u32; + // FIXME: with Linux and Mesa, zbias must be multiplied by 0.5 in + // order to have the same depth value offset as Xbox. glPolygonOffset(zfactor, zbias); } @@ -255,13 +253,7 @@ void pgraph_gl_draw_begin(NV2AState *d) glDisable(GL_DEPTH_TEST); } - if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE), - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) { - glEnable(GL_DEPTH_CLAMP); - } else { - glDisable(GL_DEPTH_CLAMP); - } + glEnable(GL_DEPTH_CLAMP); if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), NV_PGRAPH_CONTROL_3_SHADEMODE) == diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index 5c765361d6..3529006898 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -106,6 +106,7 @@ typedef struct ShaderBinding { GLint surface_size_loc; GLint clip_range_loc; + GLint depth_offset_loc; GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS]; uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index 3095ca3c3a..ad1c21f4a2 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -154,6 +154,7 @@ static void update_shader_constant_locations(ShaderBinding *binding) } binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize"); binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange"); + binding->depth_offset_loc = glGetUniformLocation(binding->gl_program, "depthOffset"); binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor"); binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam"); @@ -886,11 +887,36 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, uint32_t v[2]; v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN); v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX); - float zclip_min = *(float*)&v[0] / zmax * 2.0 - 1.0; - float zclip_max = *(float*)&v[1] / zmax * 2.0 - 1.0; + float zclip_min = *(float *)&v[0]; + float zclip_max = *(float *)&v[1]; glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max); } + if (binding->depth_offset_loc != -1) { + float zbias = 0.0f; + + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { + uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); + zbias = *(float *)&zbias_u32; + + if (pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR) != 0 && + (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) { + /* TODO: emulate zfactor when z_perspective true, i.e. + * w-buffering. Perhaps calculate an additional offset based on + * triangle orientation in geometry shader and pass the result + * to fragment shader and add it to gl_FragDepth as well. + */ + NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR for w-buffering"); + } + } + + glUniform1f(binding->depth_offset_loc, zbias); + } + /* Clipping regions */ unsigned int max_gl_width = pg->surface_binding_dim.width; unsigned int max_gl_height = pg->surface_binding_dim.height; @@ -956,6 +982,7 @@ static bool test_shaders_dirty(PGRAPHState *pg) CR_1(NV_PGRAPH_CSV1_B) \ CR_1(NV_PGRAPH_SETUPRASTER) \ CR_1(NV_PGRAPH_SHADERPROG) \ + CR_1(NV_PGRAPH_ZCOMPRESSOCCLUDE) \ CR_8(NV_PGRAPH_COMBINECOLORI0) \ CR_8(NV_PGRAPH_COMBINECOLORO0) \ CR_8(NV_PGRAPH_COMBINEALPHAI0) \ diff --git a/hw/xbox/nv2a/pgraph/glsl/common.c b/hw/xbox/nv2a/pgraph/glsl/common.c index 7059880373..0bcfe7ce76 100644 --- a/hw/xbox/nv2a/pgraph/glsl/common.c +++ b/hw/xbox/nv2a/pgraph/glsl/common.c @@ -23,34 +23,32 @@ MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array) { - const char *flat_s = "flat"; - const char *noperspective_s = "noperspective"; - const char *qualifier_s = smooth ? noperspective_s : flat_s; - const char *qualifiers[11] = { - noperspective_s, flat_s, qualifier_s, qualifier_s, - qualifier_s, qualifier_s, noperspective_s, noperspective_s, - noperspective_s, noperspective_s, noperspective_s - }; + const char *flat_s = "flat "; + const char *smooth_s = ""; + const char *qualifier_s = smooth ? smooth_s : flat_s; + const char *qualifiers[9] = { qualifier_s, qualifier_s, qualifier_s, + qualifier_s, smooth_s, smooth_s, + smooth_s, smooth_s, smooth_s }; const char *in_out_s = in ? "in" : "out"; const char *float_s = "float"; const char *vec4_s = "vec4"; - const char *types[11] = { float_s, float_s, vec4_s, vec4_s, vec4_s, vec4_s, - float_s, vec4_s, vec4_s, vec4_s, vec4_s }; + const char *types[9] = { vec4_s, vec4_s, vec4_s, vec4_s, float_s, + vec4_s, vec4_s, vec4_s, vec4_s }; const char *prefix_s = prefix ? "v_" : ""; - const char *names[11] = { - "vtx_inv_w", "vtx_inv_w_flat", "vtxD0", "vtxD1", "vtxB0", "vtxB1", - "vtxFog", "vtxT0", "vtxT1", "vtxT2", "vtxT3", + const char *names[9] = { + "vtxD0", "vtxD1", "vtxB0", "vtxB1", "vtxFog", + "vtxT0", "vtxT1", "vtxT2", "vtxT3", }; const char *suffix_s = array ? "[]" : ""; - for (int i = 0; i < 11; i++) { + for (int i = 0; i < 9; i++) { if (location) { mstring_append_fmt(out, "layout(location = %d) ", i); } - mstring_append_fmt(out, "%s %s %s %s%s%s;\n", + mstring_append_fmt(out, "%s%s %s %s%s%s;\n", qualifiers[i], in_out_s, types[i], prefix_s, names[i], suffix_s); } diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c index 0e738f0280..df265b96d3 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.c +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -182,10 +182,6 @@ MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode, "void emit_vertex(int index, int _unused) {\n" " gl_Position = gl_in[index].gl_Position;\n" " gl_PointSize = gl_in[index].gl_PointSize;\n" - // " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" - // " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" - " vtx_inv_w = v_vtx_inv_w[index];\n" - " vtx_inv_w_flat = v_vtx_inv_w[index];\n" " vtxD0 = v_vtxD0[index];\n" " vtxD1 = v_vtxD1[index];\n" " vtxB0 = v_vtxB0[index];\n" @@ -202,10 +198,6 @@ MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode, "void emit_vertex(int index, int provoking_index) {\n" " gl_Position = gl_in[index].gl_Position;\n" " gl_PointSize = gl_in[index].gl_PointSize;\n" - // " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" - // " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" - " vtx_inv_w = v_vtx_inv_w[index];\n" - " vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n" " vtxD0 = v_vtxD0[provoking_index];\n" " vtxD1 = v_vtxD1[provoking_index];\n" " vtxB0 = v_vtxB0[provoking_index];\n" diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index 44c56a9c9b..08fec32ec7 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -745,8 +745,10 @@ static MString* psh_convert(struct PixelShader *ps) mstring_append_fmt(preflight, "%sint alphaRef;\n" "%svec4 fogColor;\n" - "%sivec4 clipRegion[8];\n", - u, u, u); + "%sivec4 clipRegion[8];\n" + "%svec4 clipRange;\n" + "%sfloat depthOffset;\n", + u, u, u, u, u); for (int i = 0; i < 4; i++) { mstring_append_fmt(preflight, "%smat2 bumpMat%d;\n" "%sfloat bumpScale%d;\n" @@ -861,28 +863,62 @@ static MString* psh_convert(struct PixelShader *ps) "}\n"); } - /* calculate perspective-correct inputs */ - MString *vars = mstring_new(); - if (ps->state.smooth_shading) { - mstring_append(vars, "vec4 pD0 = vtxD0 / vtx_inv_w;\n"); - mstring_append(vars, "vec4 pD1 = vtxD1 / vtx_inv_w;\n"); - mstring_append(vars, "vec4 pB0 = vtxB0 / vtx_inv_w;\n"); - mstring_append(vars, "vec4 pB1 = vtxB1 / vtx_inv_w;\n"); - } else { - mstring_append(vars, "vec4 pD0 = vtxD0 / vtx_inv_w_flat;\n"); - mstring_append(vars, "vec4 pD1 = vtxD1 / vtx_inv_w_flat;\n"); - mstring_append(vars, "vec4 pB0 = vtxB0 / vtx_inv_w_flat;\n"); - mstring_append(vars, "vec4 pB1 = vtxB1 / vtx_inv_w_flat;\n"); + /* Depth clipping */ + if (ps->state.depth_clipping) { + if (ps->state.z_perspective) { + mstring_append( + clip, "float zvalue = 1.0/gl_FragCoord.w + depthOffset;\n" + "if (zvalue < clipRange.z || clipRange.w < zvalue) {\n" + " discard;\n" + "}\n"); + } else { + /* Take care of floating point precision problems. MS dashboard + * outputs exactly 0.0 z-coordinates and then our fixed function + * vertex shader outputs -w as the z-coordinate when OpenGL is + * used. Since -w/w = -1, this should give us exactly 0.0 as + * gl_FragCoord.z here. Unfortunately, with AMD Radeon RX 6600 the + * result is slightly greater than 0. MS dashboard sets the clip + * range to [0.0, 0.0] and so the imprecision causes unwanted + * clipping. Note that since Vulkan uses NDC range [0,1] it + * doesn't suffer from this problem with Radeon. Also, despite the + * imprecision OpenGL Radeon writes the correct value 0 to the depth + * buffer (if writing is enabled.) Radeon appears to write floored + * values. To compare, Intel integrated UHD 770 has gl_FragCoord.z + * exactly 0 (and writes rounded to closest integer values to the + * depth buffer.) Radeon OpenGL problem could also be fixed by using + * glClipControl(), but it requires OpenGL 4.5. + * Above is based on experiments with Linux and Mesa. + */ + if (ps->state.vulkan) { + mstring_append( + clip, "if (gl_FragCoord.z*clipRange.y < clipRange.z ||\n" + " gl_FragCoord.z*clipRange.y > clipRange.w) {\n" + " discard;\n" + "}\n"); + } else { + mstring_append( + clip, "if ((gl_FragCoord.z + 1.0f/16777216.0f)*clipRange.y < clipRange.z ||\n" + " (gl_FragCoord.z - 1.0f/16777216.0f)*clipRange.y > clipRange.w) {\n" + " discard;\n" + "}\n"); + } + } } - mstring_append(vars, "vec4 pFog = vec4(fogColor.rgb, clamp(vtxFog / vtx_inv_w, 0.0, 1.0));\n"); - mstring_append(vars, "vec4 pT0 = vtxT0 / vtx_inv_w;\n"); - mstring_append(vars, "vec4 pT1 = vtxT1 / vtx_inv_w;\n"); - mstring_append(vars, "vec4 pT2 = vtxT2 / vtx_inv_w;\n"); + + MString *vars = mstring_new(); + mstring_append(vars, "vec4 pD0 = vtxD0;\n"); + mstring_append(vars, "vec4 pD1 = vtxD1;\n"); + mstring_append(vars, "vec4 pB0 = vtxB0;\n"); + mstring_append(vars, "vec4 pB1 = vtxB1;\n"); + mstring_append(vars, "vec4 pFog = vec4(fogColor.rgb, clamp(vtxFog, 0.0, 1.0));\n"); + mstring_append(vars, "vec4 pT0 = vtxT0;\n"); + mstring_append(vars, "vec4 pT1 = vtxT1;\n"); + mstring_append(vars, "vec4 pT2 = vtxT2;\n"); if (ps->state.point_sprite) { assert(!ps->state.rect_tex[3]); mstring_append(vars, "vec4 pT3 = vec4(gl_PointCoord, 1.0, 1.0);\n"); } else { - mstring_append(vars, "vec4 pT3 = vtxT3 / vtx_inv_w;\n"); + mstring_append(vars, "vec4 pT3 = vtxT3;\n"); } mstring_append(vars, "\n"); mstring_append(vars, "vec4 v0 = pD0;\n"); @@ -1208,6 +1244,23 @@ static MString* psh_convert(struct PixelShader *ps) } } + if (ps->state.z_perspective) { + if (!ps->state.depth_clipping) { + mstring_append(ps->code, + "float zvalue = 1.0/gl_FragCoord.w + depthOffset;\n"); + } + /* TODO: With integer depth buffers Xbox hardware floors values and so + * does Radeon, but Intel UHD 770 rounds to nearest. Should probably + * floor here explicitly (in some way that doesn't also cause + * imprecision issues due to division by clipRange.y) + */ + mstring_append(ps->code, + "gl_FragDepth = clamp(zvalue, clipRange.z, clipRange.w)/clipRange.y;\n"); + } else if (!ps->state.depth_clipping) { + mstring_append(ps->code, + "gl_FragDepth = clamp(gl_FragCoord.z, clipRange.z/clipRange.y, clipRange.w/clipRange.y);\n"); + } + MString *final = mstring_new(); mstring_append_fmt(final, "#version %d\n\n", ps->state.vulkan ? 450 : 400); mstring_append(final, mstring_get_str(preflight)); diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c index 59749003cd..cccb49a33c 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -422,12 +422,11 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz mstring_append(body, " oPos = invViewport * (tPosition * compositeMat);\n" + " oPos.w = (2.0f * step(0.0f, oPos.w) - 1.0f) * clamp(abs(oPos.w), 5.421011e-20, 1.8446744e19);\n" ); if (state->vulkan) { mstring_append(body, " oPos.y *= -1;\n"); - } else { - mstring_append(body, " oPos.z = oPos.z * 2.0 - oPos.w;\n"); } /* FIXME: Testing */ @@ -445,14 +444,6 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz mstring_append_fmt(body, " oPts.x = %f * %d;\n", state->point_size, state->surface_scale_factor); } - - mstring_append(body, - " if (oPos.w == 0.0 || isinf(oPos.w)) {\n" - " vtx_inv_w = 1.0;\n" - " } else {\n" - " vtx_inv_w = 1.0 / oPos.w;\n" - " }\n" - " vtx_inv_w_flat = vtx_inv_w;\n"); } static void append_skinning_code(MString* str, bool mix, diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index 650d95854c..66fd4df9d0 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -735,13 +735,8 @@ static const char* vsh_header = "#define RCC(dest, mask, src) dest.mask = _RCC(_in(src).x).mask\n" "vec4 _RCC(float src)\n" "{\n" - " float t = 1.0 / src;\n" - " if (t > 0.0) {\n" - " t = clamp(t, 5.42101e-020, 1.884467e+019);\n" - " } else {\n" - " t = clamp(t, -1.884467e+019, -5.42101e-020);\n" - " }\n" - " return vec4(t);\n" + " src = (2.0f * step(0.0f, src) - 1.0f) * clamp(abs(src), 5.421011e-20, 1.8446744e19);\n" + " return vec4(1.0 / src);\n" "}\n" "\n" "#define RSQ(dest, mask, src) dest.mask = _RSQ(_in(src).x).mask\n" @@ -797,7 +792,6 @@ static const char* vsh_header = void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens, unsigned int length, - bool z_perspective, bool vulkan, MString *header, MString *body) { @@ -826,18 +820,6 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version, } assert(has_final); - /* pre-divide and output the generated W so we can do persepctive correct - * interpolation manually. OpenGL can't, since we give it a W of 1 to work - * around the perspective divide */ - mstring_append(body, - " if (oPos.w == 0.0 || isinf(oPos.w)) {\n" - " vtx_inv_w = 1.0;\n" - " } else {\n" - " vtx_inv_w = 1.0 / oPos.w;\n" - " }\n" - " vtx_inv_w_flat = vtx_inv_w;\n" - ); - mstring_append(body, /* the shaders leave the result in screen space, while * opengl expects it in clip space. @@ -854,32 +836,17 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version, "/ surfaceSize.y;\n"); } - if (z_perspective) { - mstring_append(body, " oPos.z = oPos.w;\n"); - } - mstring_append(body, - " if (clipRange.y != clipRange.x) {\n"); - if (vulkan) { - mstring_append(body, " oPos.z /= clipRange.y;\n"); - } else { - mstring_append(body, - " oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y " - "- clipRange.x)) - 1;\n"); - } - mstring_append(body, - " }\n" + " oPos.z = oPos.z / clipRange.y;\n" + " oPos.w = (2.0f * step(0.0f, oPos.w) - 1.0f) * clamp(abs(oPos.w), 5.421011e-20, 1.8446744e19);\n" - /* Correct for the perspective divide */ - " if (oPos.w < 0.0) {\n" - /* undo the perspective divide in the case where the point would be - * clipped so opengl can clip it correctly */ - " oPos.xyz *= oPos.w;\n" - " } else {\n" - /* we don't want the OpenGL perspective divide to happen, but we - * can't multiply by W because it could be meaningless here */ - " oPos.w = 1.0;\n" - " }\n" + /* Undo perspective divide by w. + * Note that games may also have vertex shaders that do + * not divide by w (such as 2D-graphics menus or overlays), but since + * OpenGL will later on divide by the same w, we get back the same + * screen space coordinates (perhaps with some loss of floating point + * precision, though.) + */ + " oPos.xyz *= oPos.w;\n" ); - } diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h index 84d8141c5e..cffb6be3b3 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h @@ -29,7 +29,7 @@ #define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens, - unsigned int length, bool z_perspective, + unsigned int length, bool vulkan, MString *header, MString *body); #endif diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c index a60fbe265d..2a49c1f11a 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -79,8 +79,6 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) if (prefix_outputs) { mstring_append(header, - "#define vtx_inv_w v_vtx_inv_w\n" - "#define vtx_inv_w_flat v_vtx_inv_w_flat\n" "#define vtxD0 v_vtxD0\n" "#define vtxD1 v_vtxD1\n" "#define vtxB0 v_vtxB0\n" @@ -142,7 +140,7 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) } else if (state->vertex_program) { pgraph_gen_vsh_prog_glsl(VSH_VERSION_XVS, (uint32_t *)state->program_data, - state->program_length, state->z_perspective, + state->program_length, state->vulkan, header, body); } else { assert(false); @@ -233,27 +231,30 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) } /* Set outputs */ - const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat"; - mstring_append_fmt(body, "\n" - " vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n" - " vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n" - " vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n" - " vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n" - " vtxFog = oFog.x * vtx_inv_w;\n" - " vtxT0 = oT0 * vtx_inv_w;\n" - " vtxT1 = oT1 * vtx_inv_w;\n" - " vtxT2 = oT2 * vtx_inv_w;\n" - " vtxT3 = oT3 * vtx_inv_w;\n" - " gl_Position = oPos;\n" - " gl_PointSize = oPts.x;\n" - // " gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near - // " gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far - "\n" - "}\n", - shade_model_mult, - shade_model_mult, - shade_model_mult, - shade_model_mult); + mstring_append(body, "\n" + " vtxD0 = clamp(oD0, 0.0, 1.0);\n" + " vtxD1 = clamp(oD1, 0.0, 1.0);\n" + " vtxB0 = clamp(oB0, 0.0, 1.0);\n" + " vtxB1 = clamp(oB1, 0.0, 1.0);\n" + " vtxFog = oFog.x;\n" + " vtxT0 = oT0;\n" + " vtxT1 = oT1;\n" + " vtxT2 = oT2;\n" + " vtxT3 = oT3;\n" + " gl_PointSize = oPts.x;\n" + ); + + if (state->vulkan) { + mstring_append(body, + " gl_Position = oPos;\n" + ); + } else { + mstring_append(body, + " gl_Position = vec4(oPos.x, oPos.y, 2.0*oPos.z - oPos.w, oPos.w);\n" + ); + } + + mstring_append(body, "}\n"); /* Return combined header + source */ if (state->vulkan) { diff --git a/hw/xbox/nv2a/pgraph/psh.h b/hw/xbox/nv2a/pgraph/psh.h index 1366045707..c54e650e99 100644 --- a/hw/xbox/nv2a/pgraph/psh.h +++ b/hw/xbox/nv2a/pgraph/psh.h @@ -85,6 +85,8 @@ typedef struct PshState { bool window_clip_exclusive; bool smooth_shading; + bool depth_clipping; + bool z_perspective; } PshState; #endif diff --git a/hw/xbox/nv2a/pgraph/shaders.c b/hw/xbox/nv2a/pgraph/shaders.c index 8d2c77a535..6e13f2084c 100644 --- a/hw/xbox/nv2a/pgraph/shaders.c +++ b/hw/xbox/nv2a/pgraph/shaders.c @@ -94,6 +94,7 @@ ShaderState pgraph_get_shader_state(PGRAPHState *pg) state.vertex_program = vertex_program, state.z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE; + state.psh.z_perspective = state.z_perspective; state.point_params_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), NV_PGRAPH_CSV0_D_POINTPARAMSENABLE); @@ -117,6 +118,10 @@ ShaderState pgraph_get_shader_state(PGRAPHState *pg) NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH; state.psh.smooth_shading = state.smooth_shading; + state.psh.depth_clipping = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE), + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CULL; + state.program_length = 0; if (vertex_program) { diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 4cde028b1f..b3c41bc96b 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -816,7 +816,7 @@ static void create_pipeline(PGRAPHState *pg) VkPipelineRasterizationStateCreateInfo rasterizer = { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .depthClampEnable = VK_FALSE, + .depthClampEnable = VK_TRUE, .rasterizerDiscardEnable = VK_FALSE, .polygonMode = pgraph_polygon_mode_vk_map[r->shader_binding->state .polygon_front_mode], @@ -958,10 +958,6 @@ static void create_pipeline(PGRAPHState *pg) .pDynamicStates = dynamic_states, }; - // /* Clipping */ - // glEnable(GL_CLIP_DISTANCE0); - // glEnable(GL_CLIP_DISTANCE1); - // /* Polygon offset */ // /* FIXME: GL implementation-specific, maybe do this in VS? */ // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & @@ -983,12 +979,6 @@ static void create_pipeline(PGRAPHState *pg) rasterizer.depthBiasConstantFactor = zbias; } - if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE), - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) { - rasterizer.depthClampEnable = VK_TRUE; - } - // FIXME: Dither // if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & // NV_PGRAPH_CONTROL_0_DITHERENABLE)) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 781cc8dc49..1357d707b0 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -173,6 +173,8 @@ typedef struct ShaderBinding { int surface_size_loc; int clip_range_loc; + int clip_range_floc; + int depth_offset_loc; int vsh_constant_loc; uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c index 5fce943d49..421a81ba60 100644 --- a/hw/xbox/nv2a/pgraph/vk/shaders.c +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -276,6 +276,10 @@ static void update_shader_constant_locations(ShaderBinding *binding) uniform_index(&binding->vertex->uniforms, "surfaceSize"); binding->clip_range_loc = uniform_index(&binding->vertex->uniforms, "clipRange"); + binding->clip_range_floc = + uniform_index(&binding->fragment->uniforms, "clipRange"); + binding->depth_offset_loc = + uniform_index(&binding->fragment->uniforms, "depthOffset"); binding->fog_param_loc = uniform_index(&binding->vertex->uniforms, "fogParam"); @@ -637,14 +641,47 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, pg->surface_binding_dim.height / aa_height); } - if (binding->clip_range_loc != -1) { + if (binding->clip_range_loc != -1 || binding->clip_range_floc != -1) { uint32_t v[2]; v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN); v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX); - float zclip_min = *(float *)&v[0] / zmax * 2.0 - 1.0; - float zclip_max = *(float *)&v[1] / zmax * 2.0 - 1.0; - uniform4f(&binding->vertex->uniforms, binding->clip_range_loc, 0, - zmax, zclip_min, zclip_max); + float zclip_min = *(float *)&v[0]; + float zclip_max = *(float *)&v[1]; + + if (binding->clip_range_loc != -1) { + uniform4f(&binding->vertex->uniforms, binding->clip_range_loc, 0, + zmax, zclip_min, zclip_max); + } + if (binding->clip_range_floc != -1) { + uniform4f(&binding->fragment->uniforms, binding->clip_range_floc, 0, + zmax, zclip_min, zclip_max); + } + } + + if (binding->depth_offset_loc != -1) { + float zbias = 0.0f; + + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { + uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); + zbias = *(float *)&zbias_u32; + + if (pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR) != 0 && + (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) { + /* TODO: emulate zfactor when z_perspective true, i.e. + * w-buffering. Perhaps calculate an additional offset based on + * triangle orientation in geometry shader and pass the result + * to fragment shader and add it to gl_FragDepth as well. + */ + NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR for w-buffering"); + } + } + + uniform1f(&binding->fragment->uniforms, binding->depth_offset_loc, + zbias); } /* Clipping regions */ @@ -724,6 +761,7 @@ static bool check_shaders_dirty(PGRAPHState *pg) NV_PGRAPH_SHADERCTL, NV_PGRAPH_SHADERPROG, NV_PGRAPH_SHADOWCTL, + NV_PGRAPH_ZCOMPRESSOCCLUDE, }; for (int i = 0; i < ARRAY_SIZE(regs); i++) { if (pgraph_is_reg_dirty(pg, regs[i])) {