diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c index a9032562fa..af87c41e28 100644 --- a/hw/xbox/nv2a/pgraph/gl/draw.c +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -203,10 +203,6 @@ void pgraph_gl_draw_begin(NV2AState *d) glDisable(GL_CULL_FACE); } - /* Clipping */ - glEnable(GL_CLIP_DISTANCE0); - glEnable(GL_CLIP_DISTANCE1); - /* Front-face select */ glFrontFace(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_FRONTFACE @@ -240,6 +236,8 @@ void pgraph_gl_draw_begin(NV2AState *d) GLfloat zfactor = *(float*)&zfactor_u32; uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); GLfloat zbias = *(float*)&zbias_u32; + // FIXME: with Linux and Mesa, zbias must be multiplied by 0.5 in + // order to have the same depth value offset as Xbox. glPolygonOffset(zfactor, zbias); } @@ -255,13 +253,7 @@ void pgraph_gl_draw_begin(NV2AState *d) glDisable(GL_DEPTH_TEST); } - if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE), - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) { - glEnable(GL_DEPTH_CLAMP); - } else { - glDisable(GL_DEPTH_CLAMP); - } + glEnable(GL_DEPTH_CLAMP); if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), NV_PGRAPH_CONTROL_3_SHADEMODE) == diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index 5c765361d6..3529006898 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -106,6 +106,7 @@ typedef struct ShaderBinding { GLint surface_size_loc; GLint clip_range_loc; + GLint depth_offset_loc; GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS]; uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index 3095ca3c3a..ad1c21f4a2 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -154,6 +154,7 @@ static void update_shader_constant_locations(ShaderBinding *binding) } binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize"); binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange"); + binding->depth_offset_loc = glGetUniformLocation(binding->gl_program, "depthOffset"); binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor"); binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam"); @@ -886,11 +887,36 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, uint32_t v[2]; v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN); v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX); - float zclip_min = *(float*)&v[0] / zmax * 2.0 - 1.0; - float zclip_max = *(float*)&v[1] / zmax * 2.0 - 1.0; + float zclip_min = *(float *)&v[0]; + float zclip_max = *(float *)&v[1]; glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max); } + if (binding->depth_offset_loc != -1) { + float zbias = 0.0f; + + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { + uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); + zbias = *(float *)&zbias_u32; + + if (pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR) != 0 && + (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) { + /* TODO: emulate zfactor when z_perspective true, i.e. + * w-buffering. Perhaps calculate an additional offset based on + * triangle orientation in geometry shader and pass the result + * to fragment shader and add it to gl_FragDepth as well. + */ + NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR for w-buffering"); + } + } + + glUniform1f(binding->depth_offset_loc, zbias); + } + /* Clipping regions */ unsigned int max_gl_width = pg->surface_binding_dim.width; unsigned int max_gl_height = pg->surface_binding_dim.height; @@ -956,6 +982,7 @@ static bool test_shaders_dirty(PGRAPHState *pg) CR_1(NV_PGRAPH_CSV1_B) \ CR_1(NV_PGRAPH_SETUPRASTER) \ CR_1(NV_PGRAPH_SHADERPROG) \ + CR_1(NV_PGRAPH_ZCOMPRESSOCCLUDE) \ CR_8(NV_PGRAPH_COMBINECOLORI0) \ CR_8(NV_PGRAPH_COMBINECOLORO0) \ CR_8(NV_PGRAPH_COMBINEALPHAI0) \ diff --git a/hw/xbox/nv2a/pgraph/glsl/common.c b/hw/xbox/nv2a/pgraph/glsl/common.c index 7059880373..0bcfe7ce76 100644 --- a/hw/xbox/nv2a/pgraph/glsl/common.c +++ b/hw/xbox/nv2a/pgraph/glsl/common.c @@ -23,34 +23,32 @@ MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array) { - const char *flat_s = "flat"; - const char *noperspective_s = "noperspective"; - const char *qualifier_s = smooth ? noperspective_s : flat_s; - const char *qualifiers[11] = { - noperspective_s, flat_s, qualifier_s, qualifier_s, - qualifier_s, qualifier_s, noperspective_s, noperspective_s, - noperspective_s, noperspective_s, noperspective_s - }; + const char *flat_s = "flat "; + const char *smooth_s = ""; + const char *qualifier_s = smooth ? smooth_s : flat_s; + const char *qualifiers[9] = { qualifier_s, qualifier_s, qualifier_s, + qualifier_s, smooth_s, smooth_s, + smooth_s, smooth_s, smooth_s }; const char *in_out_s = in ? "in" : "out"; const char *float_s = "float"; const char *vec4_s = "vec4"; - const char *types[11] = { float_s, float_s, vec4_s, vec4_s, vec4_s, vec4_s, - float_s, vec4_s, vec4_s, vec4_s, vec4_s }; + const char *types[9] = { vec4_s, vec4_s, vec4_s, vec4_s, float_s, + vec4_s, vec4_s, vec4_s, vec4_s }; const char *prefix_s = prefix ? "v_" : ""; - const char *names[11] = { - "vtx_inv_w", "vtx_inv_w_flat", "vtxD0", "vtxD1", "vtxB0", "vtxB1", - "vtxFog", "vtxT0", "vtxT1", "vtxT2", "vtxT3", + const char *names[9] = { + "vtxD0", "vtxD1", "vtxB0", "vtxB1", "vtxFog", + "vtxT0", "vtxT1", "vtxT2", "vtxT3", }; const char *suffix_s = array ? "[]" : ""; - for (int i = 0; i < 11; i++) { + for (int i = 0; i < 9; i++) { if (location) { mstring_append_fmt(out, "layout(location = %d) ", i); } - mstring_append_fmt(out, "%s %s %s %s%s%s;\n", + mstring_append_fmt(out, "%s%s %s %s%s%s;\n", qualifiers[i], in_out_s, types[i], prefix_s, names[i], suffix_s); } diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c index 0e738f0280..df265b96d3 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.c +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -182,10 +182,6 @@ MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode, "void emit_vertex(int index, int _unused) {\n" " gl_Position = gl_in[index].gl_Position;\n" " gl_PointSize = gl_in[index].gl_PointSize;\n" - // " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" - // " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" - " vtx_inv_w = v_vtx_inv_w[index];\n" - " vtx_inv_w_flat = v_vtx_inv_w[index];\n" " vtxD0 = v_vtxD0[index];\n" " vtxD1 = v_vtxD1[index];\n" " vtxB0 = v_vtxB0[index];\n" @@ -202,10 +198,6 @@ MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode, "void emit_vertex(int index, int provoking_index) {\n" " gl_Position = gl_in[index].gl_Position;\n" " gl_PointSize = gl_in[index].gl_PointSize;\n" - // " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n" - // " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n" - " vtx_inv_w = v_vtx_inv_w[index];\n" - " vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n" " vtxD0 = v_vtxD0[provoking_index];\n" " vtxD1 = v_vtxD1[provoking_index];\n" " vtxB0 = v_vtxB0[provoking_index];\n" diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index 44c56a9c9b..08fec32ec7 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -745,8 +745,10 @@ static MString* psh_convert(struct PixelShader *ps) mstring_append_fmt(preflight, "%sint alphaRef;\n" "%svec4 fogColor;\n" - "%sivec4 clipRegion[8];\n", - u, u, u); + "%sivec4 clipRegion[8];\n" + "%svec4 clipRange;\n" + "%sfloat depthOffset;\n", + u, u, u, u, u); for (int i = 0; i < 4; i++) { mstring_append_fmt(preflight, "%smat2 bumpMat%d;\n" "%sfloat bumpScale%d;\n" @@ -861,28 +863,62 @@ static MString* psh_convert(struct PixelShader *ps) "}\n"); } - /* calculate perspective-correct inputs */ - MString *vars = mstring_new(); - if (ps->state.smooth_shading) { - mstring_append(vars, "vec4 pD0 = vtxD0 / vtx_inv_w;\n"); - mstring_append(vars, "vec4 pD1 = vtxD1 / vtx_inv_w;\n"); - mstring_append(vars, "vec4 pB0 = vtxB0 / vtx_inv_w;\n"); - mstring_append(vars, "vec4 pB1 = vtxB1 / vtx_inv_w;\n"); - } else { - mstring_append(vars, "vec4 pD0 = vtxD0 / vtx_inv_w_flat;\n"); - mstring_append(vars, "vec4 pD1 = vtxD1 / vtx_inv_w_flat;\n"); - mstring_append(vars, "vec4 pB0 = vtxB0 / vtx_inv_w_flat;\n"); - mstring_append(vars, "vec4 pB1 = vtxB1 / vtx_inv_w_flat;\n"); + /* Depth clipping */ + if (ps->state.depth_clipping) { + if (ps->state.z_perspective) { + mstring_append( + clip, "float zvalue = 1.0/gl_FragCoord.w + depthOffset;\n" + "if (zvalue < clipRange.z || clipRange.w < zvalue) {\n" + " discard;\n" + "}\n"); + } else { + /* Take care of floating point precision problems. MS dashboard + * outputs exactly 0.0 z-coordinates and then our fixed function + * vertex shader outputs -w as the z-coordinate when OpenGL is + * used. Since -w/w = -1, this should give us exactly 0.0 as + * gl_FragCoord.z here. Unfortunately, with AMD Radeon RX 6600 the + * result is slightly greater than 0. MS dashboard sets the clip + * range to [0.0, 0.0] and so the imprecision causes unwanted + * clipping. Note that since Vulkan uses NDC range [0,1] it + * doesn't suffer from this problem with Radeon. Also, despite the + * imprecision OpenGL Radeon writes the correct value 0 to the depth + * buffer (if writing is enabled.) Radeon appears to write floored + * values. To compare, Intel integrated UHD 770 has gl_FragCoord.z + * exactly 0 (and writes rounded to closest integer values to the + * depth buffer.) Radeon OpenGL problem could also be fixed by using + * glClipControl(), but it requires OpenGL 4.5. + * Above is based on experiments with Linux and Mesa. + */ + if (ps->state.vulkan) { + mstring_append( + clip, "if (gl_FragCoord.z*clipRange.y < clipRange.z ||\n" + " gl_FragCoord.z*clipRange.y > clipRange.w) {\n" + " discard;\n" + "}\n"); + } else { + mstring_append( + clip, "if ((gl_FragCoord.z + 1.0f/16777216.0f)*clipRange.y < clipRange.z ||\n" + " (gl_FragCoord.z - 1.0f/16777216.0f)*clipRange.y > clipRange.w) {\n" + " discard;\n" + "}\n"); + } + } } - mstring_append(vars, "vec4 pFog = vec4(fogColor.rgb, clamp(vtxFog / vtx_inv_w, 0.0, 1.0));\n"); - mstring_append(vars, "vec4 pT0 = vtxT0 / vtx_inv_w;\n"); - mstring_append(vars, "vec4 pT1 = vtxT1 / vtx_inv_w;\n"); - mstring_append(vars, "vec4 pT2 = vtxT2 / vtx_inv_w;\n"); + + MString *vars = mstring_new(); + mstring_append(vars, "vec4 pD0 = vtxD0;\n"); + mstring_append(vars, "vec4 pD1 = vtxD1;\n"); + mstring_append(vars, "vec4 pB0 = vtxB0;\n"); + mstring_append(vars, "vec4 pB1 = vtxB1;\n"); + mstring_append(vars, "vec4 pFog = vec4(fogColor.rgb, clamp(vtxFog, 0.0, 1.0));\n"); + mstring_append(vars, "vec4 pT0 = vtxT0;\n"); + mstring_append(vars, "vec4 pT1 = vtxT1;\n"); + mstring_append(vars, "vec4 pT2 = vtxT2;\n"); if (ps->state.point_sprite) { assert(!ps->state.rect_tex[3]); mstring_append(vars, "vec4 pT3 = vec4(gl_PointCoord, 1.0, 1.0);\n"); } else { - mstring_append(vars, "vec4 pT3 = vtxT3 / vtx_inv_w;\n"); + mstring_append(vars, "vec4 pT3 = vtxT3;\n"); } mstring_append(vars, "\n"); mstring_append(vars, "vec4 v0 = pD0;\n"); @@ -1208,6 +1244,23 @@ static MString* psh_convert(struct PixelShader *ps) } } + if (ps->state.z_perspective) { + if (!ps->state.depth_clipping) { + mstring_append(ps->code, + "float zvalue = 1.0/gl_FragCoord.w + depthOffset;\n"); + } + /* TODO: With integer depth buffers Xbox hardware floors values and so + * does Radeon, but Intel UHD 770 rounds to nearest. Should probably + * floor here explicitly (in some way that doesn't also cause + * imprecision issues due to division by clipRange.y) + */ + mstring_append(ps->code, + "gl_FragDepth = clamp(zvalue, clipRange.z, clipRange.w)/clipRange.y;\n"); + } else if (!ps->state.depth_clipping) { + mstring_append(ps->code, + "gl_FragDepth = clamp(gl_FragCoord.z, clipRange.z/clipRange.y, clipRange.w/clipRange.y);\n"); + } + MString *final = mstring_new(); mstring_append_fmt(final, "#version %d\n\n", ps->state.vulkan ? 450 : 400); mstring_append(final, mstring_get_str(preflight)); diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c index 59749003cd..cccb49a33c 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -422,12 +422,11 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz mstring_append(body, " oPos = invViewport * (tPosition * compositeMat);\n" + " oPos.w = (2.0f * step(0.0f, oPos.w) - 1.0f) * clamp(abs(oPos.w), 5.421011e-20, 1.8446744e19);\n" ); if (state->vulkan) { mstring_append(body, " oPos.y *= -1;\n"); - } else { - mstring_append(body, " oPos.z = oPos.z * 2.0 - oPos.w;\n"); } /* FIXME: Testing */ @@ -445,14 +444,6 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz mstring_append_fmt(body, " oPts.x = %f * %d;\n", state->point_size, state->surface_scale_factor); } - - mstring_append(body, - " if (oPos.w == 0.0 || isinf(oPos.w)) {\n" - " vtx_inv_w = 1.0;\n" - " } else {\n" - " vtx_inv_w = 1.0 / oPos.w;\n" - " }\n" - " vtx_inv_w_flat = vtx_inv_w;\n"); } static void append_skinning_code(MString* str, bool mix, diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index 650d95854c..66fd4df9d0 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -735,13 +735,8 @@ static const char* vsh_header = "#define RCC(dest, mask, src) dest.mask = _RCC(_in(src).x).mask\n" "vec4 _RCC(float src)\n" "{\n" - " float t = 1.0 / src;\n" - " if (t > 0.0) {\n" - " t = clamp(t, 5.42101e-020, 1.884467e+019);\n" - " } else {\n" - " t = clamp(t, -1.884467e+019, -5.42101e-020);\n" - " }\n" - " return vec4(t);\n" + " src = (2.0f * step(0.0f, src) - 1.0f) * clamp(abs(src), 5.421011e-20, 1.8446744e19);\n" + " return vec4(1.0 / src);\n" "}\n" "\n" "#define RSQ(dest, mask, src) dest.mask = _RSQ(_in(src).x).mask\n" @@ -797,7 +792,6 @@ static const char* vsh_header = void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens, unsigned int length, - bool z_perspective, bool vulkan, MString *header, MString *body) { @@ -826,18 +820,6 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version, } assert(has_final); - /* pre-divide and output the generated W so we can do persepctive correct - * interpolation manually. OpenGL can't, since we give it a W of 1 to work - * around the perspective divide */ - mstring_append(body, - " if (oPos.w == 0.0 || isinf(oPos.w)) {\n" - " vtx_inv_w = 1.0;\n" - " } else {\n" - " vtx_inv_w = 1.0 / oPos.w;\n" - " }\n" - " vtx_inv_w_flat = vtx_inv_w;\n" - ); - mstring_append(body, /* the shaders leave the result in screen space, while * opengl expects it in clip space. @@ -854,32 +836,17 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version, "/ surfaceSize.y;\n"); } - if (z_perspective) { - mstring_append(body, " oPos.z = oPos.w;\n"); - } - mstring_append(body, - " if (clipRange.y != clipRange.x) {\n"); - if (vulkan) { - mstring_append(body, " oPos.z /= clipRange.y;\n"); - } else { - mstring_append(body, - " oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y " - "- clipRange.x)) - 1;\n"); - } - mstring_append(body, - " }\n" + " oPos.z = oPos.z / clipRange.y;\n" + " oPos.w = (2.0f * step(0.0f, oPos.w) - 1.0f) * clamp(abs(oPos.w), 5.421011e-20, 1.8446744e19);\n" - /* Correct for the perspective divide */ - " if (oPos.w < 0.0) {\n" - /* undo the perspective divide in the case where the point would be - * clipped so opengl can clip it correctly */ - " oPos.xyz *= oPos.w;\n" - " } else {\n" - /* we don't want the OpenGL perspective divide to happen, but we - * can't multiply by W because it could be meaningless here */ - " oPos.w = 1.0;\n" - " }\n" + /* Undo perspective divide by w. + * Note that games may also have vertex shaders that do + * not divide by w (such as 2D-graphics menus or overlays), but since + * OpenGL will later on divide by the same w, we get back the same + * screen space coordinates (perhaps with some loss of floating point + * precision, though.) + */ + " oPos.xyz *= oPos.w;\n" ); - } diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h index 84d8141c5e..cffb6be3b3 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h @@ -29,7 +29,7 @@ #define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens, - unsigned int length, bool z_perspective, + unsigned int length, bool vulkan, MString *header, MString *body); #endif diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c index a60fbe265d..2a49c1f11a 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -79,8 +79,6 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) if (prefix_outputs) { mstring_append(header, - "#define vtx_inv_w v_vtx_inv_w\n" - "#define vtx_inv_w_flat v_vtx_inv_w_flat\n" "#define vtxD0 v_vtxD0\n" "#define vtxD1 v_vtxD1\n" "#define vtxB0 v_vtxB0\n" @@ -142,7 +140,7 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) } else if (state->vertex_program) { pgraph_gen_vsh_prog_glsl(VSH_VERSION_XVS, (uint32_t *)state->program_data, - state->program_length, state->z_perspective, + state->program_length, state->vulkan, header, body); } else { assert(false); @@ -233,27 +231,30 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) } /* Set outputs */ - const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat"; - mstring_append_fmt(body, "\n" - " vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n" - " vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n" - " vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n" - " vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n" - " vtxFog = oFog.x * vtx_inv_w;\n" - " vtxT0 = oT0 * vtx_inv_w;\n" - " vtxT1 = oT1 * vtx_inv_w;\n" - " vtxT2 = oT2 * vtx_inv_w;\n" - " vtxT3 = oT3 * vtx_inv_w;\n" - " gl_Position = oPos;\n" - " gl_PointSize = oPts.x;\n" - // " gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near - // " gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far - "\n" - "}\n", - shade_model_mult, - shade_model_mult, - shade_model_mult, - shade_model_mult); + mstring_append(body, "\n" + " vtxD0 = clamp(oD0, 0.0, 1.0);\n" + " vtxD1 = clamp(oD1, 0.0, 1.0);\n" + " vtxB0 = clamp(oB0, 0.0, 1.0);\n" + " vtxB1 = clamp(oB1, 0.0, 1.0);\n" + " vtxFog = oFog.x;\n" + " vtxT0 = oT0;\n" + " vtxT1 = oT1;\n" + " vtxT2 = oT2;\n" + " vtxT3 = oT3;\n" + " gl_PointSize = oPts.x;\n" + ); + + if (state->vulkan) { + mstring_append(body, + " gl_Position = oPos;\n" + ); + } else { + mstring_append(body, + " gl_Position = vec4(oPos.x, oPos.y, 2.0*oPos.z - oPos.w, oPos.w);\n" + ); + } + + mstring_append(body, "}\n"); /* Return combined header + source */ if (state->vulkan) { diff --git a/hw/xbox/nv2a/pgraph/psh.h b/hw/xbox/nv2a/pgraph/psh.h index 1366045707..c54e650e99 100644 --- a/hw/xbox/nv2a/pgraph/psh.h +++ b/hw/xbox/nv2a/pgraph/psh.h @@ -85,6 +85,8 @@ typedef struct PshState { bool window_clip_exclusive; bool smooth_shading; + bool depth_clipping; + bool z_perspective; } PshState; #endif diff --git a/hw/xbox/nv2a/pgraph/shaders.c b/hw/xbox/nv2a/pgraph/shaders.c index 8d2c77a535..6e13f2084c 100644 --- a/hw/xbox/nv2a/pgraph/shaders.c +++ b/hw/xbox/nv2a/pgraph/shaders.c @@ -94,6 +94,7 @@ ShaderState pgraph_get_shader_state(PGRAPHState *pg) state.vertex_program = vertex_program, state.z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE; + state.psh.z_perspective = state.z_perspective; state.point_params_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), NV_PGRAPH_CSV0_D_POINTPARAMSENABLE); @@ -117,6 +118,10 @@ ShaderState pgraph_get_shader_state(PGRAPHState *pg) NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH; state.psh.smooth_shading = state.smooth_shading; + state.psh.depth_clipping = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE), + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == + NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CULL; + state.program_length = 0; if (vertex_program) { diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 4cde028b1f..b3c41bc96b 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -816,7 +816,7 @@ static void create_pipeline(PGRAPHState *pg) VkPipelineRasterizationStateCreateInfo rasterizer = { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .depthClampEnable = VK_FALSE, + .depthClampEnable = VK_TRUE, .rasterizerDiscardEnable = VK_FALSE, .polygonMode = pgraph_polygon_mode_vk_map[r->shader_binding->state .polygon_front_mode], @@ -958,10 +958,6 @@ static void create_pipeline(PGRAPHState *pg) .pDynamicStates = dynamic_states, }; - // /* Clipping */ - // glEnable(GL_CLIP_DISTANCE0); - // glEnable(GL_CLIP_DISTANCE1); - // /* Polygon offset */ // /* FIXME: GL implementation-specific, maybe do this in VS? */ // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & @@ -983,12 +979,6 @@ static void create_pipeline(PGRAPHState *pg) rasterizer.depthBiasConstantFactor = zbias; } - if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE), - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) == - NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) { - rasterizer.depthClampEnable = VK_TRUE; - } - // FIXME: Dither // if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & // NV_PGRAPH_CONTROL_0_DITHERENABLE)) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 781cc8dc49..1357d707b0 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -173,6 +173,8 @@ typedef struct ShaderBinding { int surface_size_loc; int clip_range_loc; + int clip_range_floc; + int depth_offset_loc; int vsh_constant_loc; uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c index 5fce943d49..421a81ba60 100644 --- a/hw/xbox/nv2a/pgraph/vk/shaders.c +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -276,6 +276,10 @@ static void update_shader_constant_locations(ShaderBinding *binding) uniform_index(&binding->vertex->uniforms, "surfaceSize"); binding->clip_range_loc = uniform_index(&binding->vertex->uniforms, "clipRange"); + binding->clip_range_floc = + uniform_index(&binding->fragment->uniforms, "clipRange"); + binding->depth_offset_loc = + uniform_index(&binding->fragment->uniforms, "depthOffset"); binding->fog_param_loc = uniform_index(&binding->vertex->uniforms, "fogParam"); @@ -637,14 +641,47 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, pg->surface_binding_dim.height / aa_height); } - if (binding->clip_range_loc != -1) { + if (binding->clip_range_loc != -1 || binding->clip_range_floc != -1) { uint32_t v[2]; v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN); v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX); - float zclip_min = *(float *)&v[0] / zmax * 2.0 - 1.0; - float zclip_max = *(float *)&v[1] / zmax * 2.0 - 1.0; - uniform4f(&binding->vertex->uniforms, binding->clip_range_loc, 0, - zmax, zclip_min, zclip_max); + float zclip_min = *(float *)&v[0]; + float zclip_max = *(float *)&v[1]; + + if (binding->clip_range_loc != -1) { + uniform4f(&binding->vertex->uniforms, binding->clip_range_loc, 0, + zmax, zclip_min, zclip_max); + } + if (binding->clip_range_floc != -1) { + uniform4f(&binding->fragment->uniforms, binding->clip_range_floc, 0, + zmax, zclip_min, zclip_max); + } + } + + if (binding->depth_offset_loc != -1) { + float zbias = 0.0f; + + if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & + (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | + NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { + uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); + zbias = *(float *)&zbias_u32; + + if (pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR) != 0 && + (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) { + /* TODO: emulate zfactor when z_perspective true, i.e. + * w-buffering. Perhaps calculate an additional offset based on + * triangle orientation in geometry shader and pass the result + * to fragment shader and add it to gl_FragDepth as well. + */ + NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR for w-buffering"); + } + } + + uniform1f(&binding->fragment->uniforms, binding->depth_offset_loc, + zbias); } /* Clipping regions */ @@ -724,6 +761,7 @@ static bool check_shaders_dirty(PGRAPHState *pg) NV_PGRAPH_SHADERCTL, NV_PGRAPH_SHADERPROG, NV_PGRAPH_SHADOWCTL, + NV_PGRAPH_ZCOMPRESSOCCLUDE, }; for (int i = 0; i < ARRAY_SIZE(regs); i++) { if (pgraph_is_reg_dirty(pg, regs[i])) {