From c7705578f5c22f67bc2e6617cb306c179422222a Mon Sep 17 00:00:00 2001 From: coldhex Date: Mon, 6 Jan 2025 12:13:11 +0200 Subject: [PATCH 1/8] nv2a: Fix vertex ordering of triangle strips in polygon line mode This fixes nxdk_pgraph_tests W_param tests a bit more. Note that the geometry shader approach in Xemu for polygon line mode doesn't currently implement face culling. It could be improved by using the built-in gl_FrontFacing variable in the geometry shader. --- hw/xbox/nv2a/pgraph/glsl/geom.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c index ca46a5f3af..893f7126f3 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.c +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -139,26 +139,6 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " EndPrimitive();\n"; break; case PRIM_TYPE_TRIANGLE_STRIP: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - /* Imagine a quad made of a tristrip, the comments tell you which - * vertex we are using */ - body = " if ((gl_PrimitiveIDIn & 1) == 0) {\n" - " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 0);\n" /* bottom right */ - " }\n" - " emit_vertex(1, 0);\n" /* top right */ - " emit_vertex(2, 0);\n" /* bottom left */ - " emit_vertex(0, 0);\n" /* bottom right */ - " } else {\n" - " emit_vertex(2, 0);\n" /* bottom left */ - " emit_vertex(1, 0);\n" /* top left */ - " emit_vertex(0, 0);\n" /* top right */ - " }\n" - " EndPrimitive();\n"; - break; case PRIM_TYPE_TRIANGLE_FAN: if (polygon_mode == POLY_MODE_FILL) { return NULL; } assert(polygon_mode == POLY_MODE_LINE); From e21af7a8f2e5925c2c30797b3b347546c1d0ca7c Mon Sep 17 00:00:00 2001 From: coldhex Date: Mon, 5 May 2025 22:08:30 +0300 Subject: [PATCH 2/8] nv2a: Depth buffer precision improvements and polygon offset slope factor 1. Use barycentric coordinates to interpolate depth values. Linux, Mesa and AMD Radeon RX 6600 with Vulkan driver currently has quite poor interpolation precision, which results in artifacts in at least Chronicles of Riddick. Intel integrated UHD 770 has much better precision, for example. This commit handles depth interpolation manually. Also note that the previous w-buffer interpolation used gl_FragCoord.w which can't produce all w-values, e.g. 1.0f/16777046.0f equals 1.0f/16777047.0f with 32-bit floats. This also uses depth value differences in interpolation which has the desired property that a triangle with the same z-value on all vertices will result in exactly that same z-value when interpolated. At least the game Shenmue II sky rendering relies on this. 2. Computes polygon depth bias slope for both z-buffering and w-buffering. These are computed by taking the max and abs of partial derivatives of either of the functions z=z(x,y) or w=w(x,y), where x,y,z,w are screen-space coordinates. This matches Xbox hardware for z-buffering where the partial derivatives are constants over any fixed triangle. However, for w-buffering the partial derivatives vary over any fixed triangle, but Xbox appears to compute just a single depth slope at the first visible pixel (where "first" means something like first in top-left order) and uses that over the whole triangle. This commit computes the slope per-pixel. The way to compute the partial derivatives is by using the chain-rule, e.g. dw/dx = -w^2 * d(1/w)/dx. This is useful since 1/w is linear in screen-space and therefore d(1/w)/dx is constant over any fixed triangle. But, as mentioned, finding out the w-value for the first visible pixel of a triangle is difficult in OpenGL/Vulkan and is not done here. Instead we calculate depth slope per-pixel. --- hw/xbox/nv2a/pgraph/gl/draw.c | 42 +-- hw/xbox/nv2a/pgraph/gl/shaders.c | 7 +- hw/xbox/nv2a/pgraph/glsl/common.c | 4 + hw/xbox/nv2a/pgraph/glsl/common.h | 2 + hw/xbox/nv2a/pgraph/glsl/geom.c | 444 ++++++++++++++++------- hw/xbox/nv2a/pgraph/glsl/geom.h | 1 + hw/xbox/nv2a/pgraph/glsl/psh.c | 240 ++++++++---- hw/xbox/nv2a/pgraph/glsl/psh.h | 12 + hw/xbox/nv2a/pgraph/glsl/shaders.c | 3 +- hw/xbox/nv2a/pgraph/glsl/vsh-ff.c | 3 +- hw/xbox/nv2a/pgraph/glsl/vsh-prog.c | 6 +- hw/xbox/nv2a/pgraph/glsl/vsh.c | 8 + hw/xbox/nv2a/pgraph/vk/draw.c | 35 +- hw/xbox/nv2a/pgraph/vk/surface-compute.c | 3 +- 14 files changed, 526 insertions(+), 284 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c index 79c18040f9..7af27d845c 100644 --- a/hw/xbox/nv2a/pgraph/gl/draw.c +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -208,38 +208,10 @@ void pgraph_gl_draw_begin(NV2AState *d) & NV_PGRAPH_SETUPRASTER_FRONTFACE ? GL_CW : GL_CCW); - /* Polygon offset */ - /* FIXME: GL implementation-specific, maybe do this in VS? */ - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) { - glEnable(GL_POLYGON_OFFSET_FILL); - } else { - glDisable(GL_POLYGON_OFFSET_FILL); - } - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) { - glEnable(GL_POLYGON_OFFSET_LINE); - } else { - glDisable(GL_POLYGON_OFFSET_LINE); - } - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) { - glEnable(GL_POLYGON_OFFSET_POINT); - } else { - glDisable(GL_POLYGON_OFFSET_POINT); - } - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { - uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR); - GLfloat zfactor = *(float*)&zfactor_u32; - uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); - GLfloat zbias = *(float*)&zbias_u32; - // FIXME: with Linux and Mesa, zbias must be multiplied by 0.5 in - // order to have the same depth value offset as Xbox. - glPolygonOffset(zfactor, zbias); - } + /* Polygon offset is handled in geometry and fragment shaders explicitly */ + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_POLYGON_OFFSET_LINE); + glDisable(GL_POLYGON_OFFSET_POINT); /* Depth testing */ if (depth_test) { @@ -255,12 +227,6 @@ void pgraph_gl_draw_begin(NV2AState *d) glEnable(GL_DEPTH_CLAMP); - if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), - NV_PGRAPH_CONTROL_3_SHADEMODE) == - NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) { - glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); - } - if (stencil_test) { glEnable(GL_STENCIL_TEST); diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index 30b4c5cbba..4400133434 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -31,10 +31,6 @@ static GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode) { - if (polygon_mode == POLY_MODE_POINT) { - return GL_POINTS; - } - switch (primitive_mode) { case PRIM_TYPE_POINTS: return GL_POINTS; case PRIM_TYPE_LINES: return GL_LINES; @@ -705,6 +701,9 @@ static void apply_uniform_updates(const UniformInfo *info, int *locs, case UniformElementType_int: glUniform1iv(locs[i], info[i].count, value); break; + case UniformElementType_ivec2: + glUniform2iv(locs[i], info[i].count, value); + break; case UniformElementType_ivec4: glUniform4iv(locs[i], info[i].count, value); break; diff --git a/hw/xbox/nv2a/pgraph/glsl/common.c b/hw/xbox/nv2a/pgraph/glsl/common.c index 338f58ab9a..887d2a2e23 100644 --- a/hw/xbox/nv2a/pgraph/glsl/common.c +++ b/hw/xbox/nv2a/pgraph/glsl/common.c @@ -48,6 +48,10 @@ MString *pgraph_glsl_get_vtx_header(MString *out, bool location, bool smooth, { smooth_s, vec4_s, "vtxT1" }, { smooth_s, vec4_s, "vtxT2" }, { smooth_s, vec4_s, "vtxT3" }, + { flat_s, vec4_s, "vtxPos0" }, + { flat_s, vec4_s, "vtxPos1" }, + { flat_s, vec4_s, "vtxPos2" }, + { flat_s, float_s, "triMZ" }, }; for (int i = 0; i < ARRAY_SIZE(attr); i++) { diff --git a/hw/xbox/nv2a/pgraph/glsl/common.h b/hw/xbox/nv2a/pgraph/glsl/common.h index 9dc1fa0347..4b327421be 100644 --- a/hw/xbox/nv2a/pgraph/glsl/common.h +++ b/hw/xbox/nv2a/pgraph/glsl/common.h @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu/mstring.h" +typedef int ivec2[2]; typedef int ivec4[4]; typedef float mat2[2 * 2]; typedef unsigned int uint; @@ -35,6 +36,7 @@ typedef float vec4[4]; #define UNIFORM_ELEMENT_TYPE_X(DECL) \ DECL(float) \ DECL(int) \ + DECL(ivec2) \ DECL(ivec4) \ DECL(mat2) \ DECL(uint) \ diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c index 893f7126f3..2170696a08 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.c +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -37,6 +37,9 @@ void pgraph_glsl_set_geom_state(PGRAPHState *pg, GeomState *state) state->smooth_shading = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), NV_PGRAPH_CONTROL_3_SHADEMODE) == NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH; + + state->z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE; } bool pgraph_glsl_need_geom(const GeomState *state) @@ -45,63 +48,24 @@ bool pgraph_glsl_need_geom(const GeomState *state) assert(state->polygon_front_mode == state->polygon_back_mode); enum ShaderPolygonMode polygon_mode = state->polygon_front_mode; - /* POINT mode shouldn't require any special work */ - if (polygon_mode == POLY_MODE_POINT) { - return false; - } - switch (state->primitive_mode) { + case PRIM_TYPE_POINTS: + return false; + case PRIM_TYPE_LINES: + case PRIM_TYPE_LINE_LOOP: + case PRIM_TYPE_LINE_STRIP: case PRIM_TYPE_TRIANGLES: - if (polygon_mode == POLY_MODE_FILL) { - return false; - } - return true; case PRIM_TYPE_TRIANGLE_STRIP: - if (polygon_mode == POLY_MODE_FILL) { - return false; - } - assert(polygon_mode == POLY_MODE_LINE); - return true; case PRIM_TYPE_TRIANGLE_FAN: - if (polygon_mode == POLY_MODE_FILL) { - return false; - } - assert(polygon_mode == POLY_MODE_LINE); - return true; case PRIM_TYPE_QUADS: - if (polygon_mode == POLY_MODE_LINE) { - return true; - } else if (polygon_mode == POLY_MODE_FILL) { - return true; - } else { - assert(false); - return false; - } - break; case PRIM_TYPE_QUAD_STRIP: - if (polygon_mode == POLY_MODE_LINE) { - return true; - } else if (polygon_mode == POLY_MODE_FILL) { - return true; - } else { - assert(false); - return false; - } - break; + return true; case PRIM_TYPE_POLYGON: - if (polygon_mode == POLY_MODE_LINE) { - return false; - } - if (polygon_mode == POLY_MODE_FILL) { - if (state->smooth_shading) { - return false; - } - return true; - } else { + if (polygon_mode == POLY_MODE_POINT) { assert(false); return false; } - break; + return true; default: return false; } @@ -113,106 +77,225 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) assert(state->polygon_front_mode == state->polygon_back_mode); enum ShaderPolygonMode polygon_mode = state->polygon_front_mode; - /* POINT mode shouldn't require any special work */ - if (polygon_mode == POLY_MODE_POINT) { - return NULL; - } - - /* Handle LINE and FILL mode */ + bool need_triz = false; + bool need_quadz = false; + bool need_linez = false; const char *layout_in = NULL; const char *layout_out = NULL; const char *body = NULL; + switch (state->primitive_mode) { case PRIM_TYPE_POINTS: return NULL; - case PRIM_TYPE_LINES: return NULL; - case PRIM_TYPE_LINE_LOOP: return NULL; - case PRIM_TYPE_LINE_STRIP: return NULL; - case PRIM_TYPE_TRIANGLES: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - body = " emit_vertex(0, 0);\n" - " emit_vertex(1, 0);\n" - " emit_vertex(2, 0);\n" - " emit_vertex(0, 0);\n" + case PRIM_TYPE_LINES: + case PRIM_TYPE_LINE_LOOP: + case PRIM_TYPE_LINE_STRIP: + need_linez = true; + layout_in = "layout(lines) in;\n"; + layout_out = "layout(line_strip, max_vertices = 2) out;\n"; + body = " mat4 pz = calc_linez(0, 1);\n" + " emit_vertex(0, 0, pz);\n" + " emit_vertex(1, 1, pz);\n" " EndPrimitive();\n"; break; + case PRIM_TYPE_TRIANGLES: + need_triz = true; + layout_in = "layout(triangles) in;\n"; + if (polygon_mode == POLY_MODE_FILL) { + layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; + body = " mat4 pz = calc_triz(0, 1, 2);\n" + " emit_vertex(0, 0, pz);\n" + " emit_vertex(1, 1, pz);\n" + " emit_vertex(2, 2, pz);\n" + " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_LINE) { + need_linez = true; + layout_out = "layout(line_strip, max_vertices = 4) out;\n"; + body = " float triMZ = calc_triz(0, 1, 2)[3].x;\n" + " mat4 pz1 = calc_linez(0, 1);\n" + " pz1[3].x = triMZ;\n" + " mat4 pz2 = calc_linez(1, 2);\n" + " pz2[3].x = triMZ;\n" + " mat4 pz3 = calc_linez(2, 0);\n" + " pz3[3].x = triMZ;\n" + " emit_vertex(0, 0, pz1);\n" + " emit_vertex(1, 0, pz1);\n" + " emit_vertex(2, 0, pz2);\n" + " emit_vertex(0, 0, pz3);\n" + " EndPrimitive();\n"; + } else { + assert(polygon_mode == POLY_MODE_POINT); + layout_out = "layout(points, max_vertices = 3) out;\n"; + body = " mat4 pz = calc_triz(0, 1, 2);\n" + " emit_vertex(0, 0, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(1, 0, mat4(pz[1], pz[1], pz[1], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(2, 0, mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " EndPrimitive();\n"; + } + break; case PRIM_TYPE_TRIANGLE_STRIP: case PRIM_TYPE_TRIANGLE_FAN: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); + need_triz = true; layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - body = " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 0);\n" - " }\n" - " emit_vertex(1, 0);\n" - " emit_vertex(2, 0);\n" - " emit_vertex(0, 0);\n" - " EndPrimitive();\n"; + if (polygon_mode == POLY_MODE_FILL) { + layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; + body = " mat4 pz = calc_triz(0, 1, 2);\n" + " emit_vertex(0, 0, pz);\n" + " emit_vertex(1, 1, pz);\n" + " emit_vertex(2, 2, pz);\n" + " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_LINE) { + need_linez = true; + layout_out = "layout(line_strip, max_vertices = 4) out;\n"; + body = " float triMZ = calc_triz(0, 1, 2)[3].x;\n" + " mat4 pz1 = calc_linez(0, 1);\n" + " pz1[3].x = triMZ;\n" + " mat4 pz2 = calc_linez(1, 2);\n" + " pz2[3].x = triMZ;\n" + " mat4 pz3 = calc_linez(2, 0);\n" + " pz3[3].x = triMZ;\n" + " if (gl_PrimitiveIDIn == 0) {\n" + " emit_vertex(0, 0, pz1);\n" + " }\n" + " emit_vertex(1, 0, pz1);\n" + " emit_vertex(2, 0, pz2);\n" + " emit_vertex(0, 0, pz3);\n" + " EndPrimitive();\n"; + } else { + assert(polygon_mode == POLY_MODE_POINT); + layout_out = "layout(points, max_vertices = 3) out;\n"; + body = " mat4 pz = calc_triz(0, 1, 2);\n" + " if (gl_PrimitiveIDIn == 0) {\n" + " emit_vertex(0, 0, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(1, 0, mat4(pz[1], pz[1], pz[1], pz[3]));\n" + " EndPrimitive();\n" + " }\n" + " emit_vertex(2, 0, mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " EndPrimitive();\n"; + } break; case PRIM_TYPE_QUADS: + need_quadz = true; layout_in = "layout(lines_adjacency) in;\n"; if (polygon_mode == POLY_MODE_LINE) { + need_linez = true; layout_out = "layout(line_strip, max_vertices = 5) out;\n"; - body = " emit_vertex(0, 3);\n" - " emit_vertex(1, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(3, 3);\n" - " emit_vertex(0, 3);\n" + body = " mat4 pz, pzs;\n" + " calc_quadz(0, 1, 2, 3, pz, pzs);\n" + " mat4 pz1 = calc_linez(0, 1);\n" + " pz1[3].x = pz[3].x;\n" + " mat4 pz2 = calc_linez(1, 2);\n" + " pz2[3].x = pz[3].x;\n" + " mat4 pz3 = calc_linez(2, 3);\n" + " pz3[3].x = pzs[3].x;\n" + " mat4 pz4 = calc_linez(3, 0);\n" + " pz4[3].x = pzs[3].x;\n" + " emit_vertex(0, 3, pz1);\n" + " emit_vertex(1, 3, pz1);\n" + " emit_vertex(2, 3, pz2);\n" + " emit_vertex(3, 3, pz3);\n" + " emit_vertex(0, 3, pz4);\n" " EndPrimitive();\n"; } else if (polygon_mode == POLY_MODE_FILL) { layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; - body = " emit_vertex(3, 3);\n" - " emit_vertex(0, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(1, 3);\n" + body = " mat4 pz, pz2;\n" + " calc_quadz(0, 1, 2, 3, pz, pz2);\n" + " emit_vertex(1, 3, pz);\n" + " emit_vertex(2, 3, pz2);\n" + " emit_vertex(0, 3, pz);\n" + " emit_vertex(3, 3, pz2);\n" " EndPrimitive();\n"; } else { - assert(false); - return NULL; + assert(polygon_mode == POLY_MODE_POINT); + layout_out = "layout(points, max_vertices = 4) out;\n"; + body = " mat4 pz, pz2;\n" + " calc_quadz(0, 1, 2, 3, pz, pz2);\n" + " emit_vertex(0, 3, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(1, 3, mat4(pz[1], pz[1], pz[1], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(2, 3, mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(3, 3, mat4(pz2[2], pz2[2], pz2[2], pz2[3]));\n" + " EndPrimitive();\n"; } break; case PRIM_TYPE_QUAD_STRIP: + need_quadz = true; layout_in = "layout(lines_adjacency) in;\n"; if (polygon_mode == POLY_MODE_LINE) { + need_linez = true; layout_out = "layout(line_strip, max_vertices = 5) out;\n"; body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" + " mat4 pz, pzs;\n" + " calc_quadz(2, 0, 1, 3, pz, pzs);\n" + " mat4 pz1 = calc_linez(0, 1);\n" + " pz1[3].x = pz[3].x;\n" + " mat4 pz2 = calc_linez(1, 3);\n" + " pz2[3].x = pzs[3].x;\n" + " mat4 pz3 = calc_linez(3, 2);\n" + " pz3[3].x = pzs[3].x;\n" + " mat4 pz4 = calc_linez(2, 0);\n" + " pz4[3].x = pz[3].x;\n" " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 3);\n" + " emit_vertex(0, 3, pz1);\n" " }\n" - " emit_vertex(1, 3);\n" - " emit_vertex(3, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(0, 3);\n" + " emit_vertex(1, 3, pz1);\n" + " emit_vertex(3, 3, pz2);\n" + " emit_vertex(2, 3, pz3);\n" + " emit_vertex(0, 3, pz4);\n" " EndPrimitive();\n"; } else if (polygon_mode == POLY_MODE_FILL) { layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" - " emit_vertex(0, 3);\n" - " emit_vertex(1, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(3, 3);\n" + " mat4 pz, pz2;\n" + " calc_quadz(2, 0, 1, 3, pz, pz2);\n" + " emit_vertex(0, 3, pz);\n" + " emit_vertex(1, 3, pz2);\n" + " emit_vertex(2, 3, pz);\n" + " emit_vertex(3, 3, pz2);\n" " EndPrimitive();\n"; } else { - assert(false); - return NULL; + assert(polygon_mode == POLY_MODE_POINT); + layout_out = "layout(points, max_vertices = 4) out;\n"; + body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" + " mat4 pz, pz2;\n" + " calc_quadz(2, 0, 1, 3, pz, pz2);\n" + " if (gl_PrimitiveIDIn == 0) {\n" + " emit_vertex(0, 3, mat4(pz[1], pz[1], pz[1], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(1, 3, mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " EndPrimitive();\n" + " }\n" + " emit_vertex(2, 3, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(3, 3, mat4(pz2[2], pz2[2], pz2[2], pz2[3]));\n" + " EndPrimitive();\n"; } break; case PRIM_TYPE_POLYGON: - if (polygon_mode == POLY_MODE_LINE) { - return NULL; - } if (polygon_mode == POLY_MODE_FILL) { - if (state->smooth_shading) { - return NULL; - } + need_triz = true; layout_in = "layout(triangles) in;\n"; layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; - body = " emit_vertex(0, 2);\n" - " emit_vertex(1, 2);\n" - " emit_vertex(2, 2);\n" + body = " mat4 pz = calc_triz(0, 1, 2);\n" + " emit_vertex(0, 0, pz);\n" + " emit_vertex(1, 0, pz);\n" + " emit_vertex(2, 0, pz);\n" + " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_LINE) { + need_linez = true; + // FIXME: input here is lines and not triangles so we cannot + // calculate triangle plane slope. Also, the first vertex of the + // polygon is unavailable so flat shading provoking vertex is + // wrong. + layout_in = "layout(lines) in;\n"; + layout_out = "layout(line_strip, max_vertices = 2) out;\n"; + body = " mat4 pz = calc_linez(0, 1);\n" + " emit_vertex(0, 0, pz);\n" + " emit_vertex(1, 1, pz);\n" " EndPrimitive();\n"; } else { assert(false); @@ -233,6 +316,8 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) mstring_from_fmt("#version %d\n\n" "%s" "%s" + "\n" + "#define v_vtxPos v_vtxPos0\n" "\n", opts.vulkan ? 450 : 400, layout_in, layout_out); pgraph_glsl_get_vtx_header(output, opts.vulkan, state->smooth_shading, true, @@ -241,37 +326,124 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) false, false, false); if (state->smooth_shading) { - mstring_append(output, - "void emit_vertex(int index, int _unused) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " gl_PointSize = gl_in[index].gl_PointSize;\n" - " vtxD0 = v_vtxD0[index];\n" - " vtxD1 = v_vtxD1[index];\n" - " vtxB0 = v_vtxB0[index];\n" - " vtxB1 = v_vtxB1[index];\n" - " vtxFog = v_vtxFog[index];\n" - " vtxT0 = v_vtxT0[index];\n" - " vtxT1 = v_vtxT1[index];\n" - " vtxT2 = v_vtxT2[index];\n" - " vtxT3 = v_vtxT3[index];\n" - " EmitVertex();\n" - "}\n"); + mstring_append( + output, + "void emit_vertex(int index, int _unused, mat4 pz) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " gl_PointSize = gl_in[index].gl_PointSize;\n" + " vtxD0 = v_vtxD0[index];\n" + " vtxD1 = v_vtxD1[index];\n" + " vtxB0 = v_vtxB0[index];\n" + " vtxB1 = v_vtxB1[index];\n" + " vtxFog = v_vtxFog[index];\n" + " vtxT0 = v_vtxT0[index];\n" + " vtxT1 = v_vtxT1[index];\n" + " vtxT2 = v_vtxT2[index];\n" + " vtxT3 = v_vtxT3[index];\n" + " vtxPos0 = pz[0];\n" + " vtxPos1 = pz[1];\n" + " vtxPos2 = pz[2];\n" + " triMZ = (isnan(pz[3].x) || isinf(pz[3].x)) ? 0.0 : pz[3].x;\n" + " EmitVertex();\n" + "}\n"); } else { - mstring_append(output, - "void emit_vertex(int index, int provoking_index) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " gl_PointSize = gl_in[index].gl_PointSize;\n" - " vtxD0 = v_vtxD0[provoking_index];\n" - " vtxD1 = v_vtxD1[provoking_index];\n" - " vtxB0 = v_vtxB0[provoking_index];\n" - " vtxB1 = v_vtxB1[provoking_index];\n" - " vtxFog = v_vtxFog[index];\n" - " vtxT0 = v_vtxT0[index];\n" - " vtxT1 = v_vtxT1[index];\n" - " vtxT2 = v_vtxT2[index];\n" - " vtxT3 = v_vtxT3[index];\n" - " EmitVertex();\n" - "}\n"); + mstring_append( + output, + "void emit_vertex(int index, int provoking_index, mat4 pz) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " gl_PointSize = gl_in[index].gl_PointSize;\n" + " vtxD0 = v_vtxD0[provoking_index];\n" + " vtxD1 = v_vtxD1[provoking_index];\n" + " vtxB0 = v_vtxB0[provoking_index];\n" + " vtxB1 = v_vtxB1[provoking_index];\n" + " vtxFog = v_vtxFog[index];\n" + " vtxT0 = v_vtxT0[index];\n" + " vtxT1 = v_vtxT1[index];\n" + " vtxT2 = v_vtxT2[index];\n" + " vtxT3 = v_vtxT3[index];\n" + " vtxPos0 = pz[0];\n" + " vtxPos1 = pz[1];\n" + " vtxPos2 = pz[2];\n" + " triMZ = (isnan(pz[3].x) || isinf(pz[3].x)) ? 0.0 : pz[3].x;\n" + " EmitVertex();\n" + "}\n"); + } + + if (need_triz || need_quadz) { + mstring_append( + output, + // Kahan's algorithm for computing a*b - c*d using FMA for higher + // precision. See e.g.: + // Muller et al, "Handbook of Floating-Point Arithmetic", 2nd ed. + // or + // Claude-Pierre Jeannerod, Nicolas Louvet, and Jean-Michel Muller, + // Further analysis of Kahan's algorithm for the accurate + // computation of 2x2 determinants, + // Mathematics of Computation 82(284), October 2013. + "float kahan_det(float a, float b, float c, float d) {\n" + " precise float cd = c*d;\n" + " precise float err = fma(-c, d, cd);\n" + " precise float res = fma(a, b, -cd) + err;\n" + " return res;\n" + "}\n"); + } + + if (state->z_perspective) { + if (need_triz || need_quadz) { + mstring_append( + output, + "mat4 calc_triz(int i0, int i1, int i2) {\n" + " mat2 m = mat2(v_vtxPos[i1].xy - v_vtxPos[i0].xy,\n" + " v_vtxPos[i2].xy - v_vtxPos[i0].xy);\n" + " precise vec2 b = vec2(v_vtxPos[i0].w - v_vtxPos[i1].w,\n" + " v_vtxPos[i0].w - v_vtxPos[i2].w);\n" + " b /= vec2(v_vtxPos[i1].w, v_vtxPos[i2].w) * v_vtxPos[i0].w;\n" + // The following computes dzx and dzy same as + // vec2 dz = b * inverse(m); + " float det = kahan_det(m[0].x, m[1].y, m[1].x, m[0].y);\n" + " float dzx = kahan_det(b.x, m[1].y, b.y, m[0].y) / det;\n" + " float dzy = kahan_det(b.y, m[0].x, b.x, m[1].x) / det;\n" + " float triMZ = max(abs(dzx), abs(dzy));\n" + " return mat4(v_vtxPos[i0], v_vtxPos[i1], v_vtxPos[i2], triMZ, vec3(0.0));\n" + "}\n"); + } + } else { + if (need_triz || need_quadz) { + mstring_append( + output, + "mat4 calc_triz(int i0, int i1, int i2) {\n" + " mat2 m = mat2(v_vtxPos[i1].xy - v_vtxPos[i0].xy,\n" + " v_vtxPos[i2].xy - v_vtxPos[i0].xy);\n" + " precise vec2 b = vec2(v_vtxPos[i1].z - v_vtxPos[i0].z,\n" + " v_vtxPos[i2].z - v_vtxPos[i0].z);\n" + // The following computes dzx and dzy same as + // vec2 dz = b * inverse(m); + " float det = kahan_det(m[0].x, m[1].y, m[1].x, m[0].y);\n" + " float dzx = kahan_det(b.x, m[1].y, b.y, m[0].y) / det;\n" + " float dzy = kahan_det(b.y, m[0].x, b.x, m[1].x) / det;\n" + " float triMZ = max(abs(dzx), abs(dzy));\n" + " return mat4(v_vtxPos[i0], v_vtxPos[i1], v_vtxPos[i2], triMZ, vec3(0.0));\n" + "}\n"); + } + } + + if (need_linez) { + mstring_append( + output, + "mat4 calc_linez(int i0, int i1) {\n" + " vec2 delta = v_vtxPos[i1].xy - v_vtxPos[i0].xy;\n" + " vec2 v2 = vec2(-delta.y, delta.x) + v_vtxPos[i0].xy;\n" + " return mat4(v_vtxPos[i0], v_vtxPos[i1], vec4(v2, v_vtxPos[i0].zw), vec4(0.0));\n" + "}\n"); + } + + if (need_quadz) { + mstring_append( + output, + "void calc_quadz(int i0, int i1, int i2, int i3, out mat4 triz1, out mat4 triz2) {\n" + " triz1 = calc_triz(i0, i1, i2);\n" + " triz2 = calc_triz(i0, i2, i3);\n" + "}\n"); } mstring_append_fmt(output, diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.h b/hw/xbox/nv2a/pgraph/glsl/geom.h index 41ff255161..7dbd807345 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.h +++ b/hw/xbox/nv2a/pgraph/glsl/geom.h @@ -30,6 +30,7 @@ typedef struct { enum ShaderPolygonMode polygon_front_mode; enum ShaderPolygonMode polygon_back_mode; bool smooth_shading; + bool z_perspective; } GeomState; typedef struct GenGeomGlslOptions { diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index 7484b2aa10..ddf04be25c 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -209,6 +209,26 @@ void pgraph_glsl_set_psh_state(PGRAPHState *pg, PshState *state) state->conv_tex[i] = kernel; } + + state->surface_zeta_format = pg->surface_shape.zeta_format; + unsigned int z_format = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), + NV_PGRAPH_SETUPRASTER_Z_FORMAT); + + switch (pg->surface_shape.zeta_format) { + case NV097_SET_SURFACE_FORMAT_ZETA_Z16: + state->depth_format = + z_format ? DEPTH_FORMAT_F16 : DEPTH_FORMAT_D16; + break; + case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: + state->depth_format = + z_format ? DEPTH_FORMAT_F24 : DEPTH_FORMAT_D24; + break; + default: + fprintf(stderr, "Unknown zeta surface format: 0x%x\n", + pg->surface_shape.zeta_format); + assert(false); + break; + } } struct InputInfo { @@ -875,6 +895,23 @@ static MString* psh_convert(struct PixelShader *ps) "vec3 dotmap_hilo_hemisphere(vec4 col) {\n" " return col.rgb;\n" // FIXME "}\n" + // Kahan's algorithm for computing determinant using FMA for higher + // precision. See e.g.: + // Muller et al, "Handbook of Floating-Point Arithmetic", 2nd ed. + // or + // Claude-Pierre Jeannerod, Nicolas Louvet, and Jean-Michel Muller, + // Further analysis of Kahan's algorithm for the accurate + // computation of 2x2 determinants, + // Mathematics of Computation 82(284), October 2013. + "float kahan_det(vec2 a, vec2 b) {\n" + " precise float cd = a.y*b.x;\n" + " precise float err = fma(-a.y, b.x, cd);\n" + " precise float res = fma(a.x, b.y, -cd) + err;\n" + " return res;\n" + "}\n" + "float area(vec2 a, vec2 b, vec2 c) {\n" + " return kahan_det(b - a, c - a);\n" + "}\n" "const float[9] gaussian3x3 = float[9](\n" " 1.0/16.0, 2.0/16.0, 1.0/16.0,\n" " 2.0/16.0, 4.0/16.0, 2.0/16.0,\n" @@ -911,45 +948,69 @@ static MString* psh_convert(struct PixelShader *ps) "}\n"); } + if (ps->state->z_perspective) { + mstring_append( + clip, + "vec2 unscaled_xy = gl_FragCoord.xy / surfaceScale;\n" + "precise float bc0 = area(unscaled_xy, vtxPos1.xy, vtxPos2.xy);\n" + "precise float bc1 = area(unscaled_xy, vtxPos2.xy, vtxPos0.xy);\n" + "precise float bc2 = area(unscaled_xy, vtxPos0.xy, vtxPos1.xy);\n" + "bc0 /= vtxPos0.w;\n" + "bc1 /= vtxPos1.w;\n" + "bc2 /= vtxPos2.w;\n" + "float inv_bcsum = 1.0 / (bc0 + bc1 + bc2);\n" + // Denominator can be zero in case the rasterized primitive is a + // point or a degenerate line or triangle. + "if (isinf(inv_bcsum)) {\n" + " inv_bcsum = 0.0;\n" + "}\n" + "bc1 *= inv_bcsum;\n" + "bc2 *= inv_bcsum;\n" + "precise float zvalue = vtxPos0.w + (bc1*(vtxPos1.w - vtxPos0.w) + bc2*(vtxPos2.w - vtxPos0.w));\n" + // If GPU clipping is inaccurate, the point gl_FragCoord.xy might + // be above the horizon of the plane of a rasterized triangle + // making the interpolated w-coordinate above zero or negative. We + // should prevent such wrapping through infinity by clamping to + // infinity. + "if (zvalue > 0.0) {\n" + " float zslopeofs = depthFactor*triMZ*zvalue*zvalue;\n" + " zvalue += depthOffset;\n" + " zvalue += zslopeofs;\n" + "} else {\n" + " zvalue = uintBitsToFloat(0x7F7FFFFFu);\n" + "}\n" + "if (isnan(zvalue)) {\n" + " zvalue = uintBitsToFloat(0x7F7FFFFFu);\n" + "}\n"); + } else { + mstring_append( + clip, + "vec2 unscaled_xy = gl_FragCoord.xy / surfaceScale;\n" + "precise float bc0 = area(unscaled_xy, vtxPos1.xy, vtxPos2.xy);\n" + "precise float bc1 = area(unscaled_xy, vtxPos2.xy, vtxPos0.xy);\n" + "precise float bc2 = area(unscaled_xy, vtxPos0.xy, vtxPos1.xy);\n" + "float inv_bcsum = 1.0 / (bc0 + bc1 + bc2);\n" + // Denominator can be zero in case the rasterized primitive is a + // point or a degenerate line or triangle. + "if (isinf(inv_bcsum)) {\n" + " inv_bcsum = 0.0;\n" + "}\n" + "bc1 *= inv_bcsum;\n" + "bc2 *= inv_bcsum;\n" + "precise float zvalue = vtxPos0.z + (bc1*(vtxPos1.z - vtxPos0.z) + bc2*(vtxPos2.z - vtxPos0.z));\n" + "zvalue += depthOffset;\n" + "zvalue += depthFactor*triMZ;\n"); + } + + /* Depth clipping */ if (ps->state->depth_clipping) { - if (ps->state->z_perspective) { - mstring_append( - clip, "float zvalue = 1.0/gl_FragCoord.w + depthOffset;\n" - "if (zvalue < clipRange.z || clipRange.w < zvalue) {\n" - " discard;\n" - "}\n"); - } else { - /* Take care of floating point precision problems. MS dashboard - * outputs exactly 0.0 z-coordinates and then our fixed function - * vertex shader outputs -w as the z-coordinate when OpenGL is - * used. Since -w/w = -1, this should give us exactly 0.0 as - * gl_FragCoord.z here. Unfortunately, with AMD Radeon RX 6600 the - * result is slightly greater than 0. MS dashboard sets the clip - * range to [0.0, 0.0] and so the imprecision causes unwanted - * clipping. Note that since Vulkan uses NDC range [0,1] it - * doesn't suffer from this problem with Radeon. Also, despite the - * imprecision OpenGL Radeon writes the correct value 0 to the depth - * buffer (if writing is enabled.) Radeon appears to write floored - * values. To compare, Intel integrated UHD 770 has gl_FragCoord.z - * exactly 0 (and writes rounded to closest integer values to the - * depth buffer.) Radeon OpenGL problem could also be fixed by using - * glClipControl(), but it requires OpenGL 4.5. - * Above is based on experiments with Linux and Mesa. - */ - if (ps->opts.vulkan) { - mstring_append( - clip, "if (gl_FragCoord.z*clipRange.y < clipRange.z ||\n" - " gl_FragCoord.z*clipRange.y > clipRange.w) {\n" - " discard;\n" - "}\n"); - } else { - mstring_append( - clip, "if ((gl_FragCoord.z + 1.0f/16777216.0f)*clipRange.y < clipRange.z ||\n" - " (gl_FragCoord.z - 1.0f/16777216.0f)*clipRange.y > clipRange.w) {\n" - " discard;\n" - "}\n"); - } - } + mstring_append( + clip, "if (zvalue < clipRange.z || clipRange.w < zvalue) {\n" + " discard;\n" + "}\n"); + } else { + mstring_append( + clip, "zvalue = clamp(zvalue, clipRange.z, clipRange.w);\n"); } MString *vars = mstring_new(); @@ -1334,21 +1395,33 @@ static MString* psh_convert(struct PixelShader *ps) } } - if (ps->state->z_perspective) { - if (!ps->state->depth_clipping) { - mstring_append(ps->code, - "float zvalue = 1.0/gl_FragCoord.w + depthOffset;\n"); - } - /* TODO: With integer depth buffers Xbox hardware floors values and so - * does Radeon, but Intel UHD 770 rounds to nearest. Should probably - * floor here explicitly (in some way that doesn't also cause - * imprecision issues due to division by clipRange.y) - */ - mstring_append(ps->code, - "gl_FragDepth = clamp(zvalue, clipRange.z, clipRange.w)/clipRange.y;\n"); - } else if (!ps->state->depth_clipping) { - mstring_append(ps->code, - "gl_FragDepth = clamp(gl_FragCoord.z, clipRange.z/clipRange.y, clipRange.w/clipRange.y);\n"); + /* With integer depth buffers Xbox hardware floors values. For gl_FragDepth + * range [0,1] Radeon floors values to integer depth buffer, but Intel UHD + * 770 rounds to nearest. For 24-bit OpenGL/Vulkan integer depth buffer, + * we divide the desired depth integer value by 16777216.0, then add 1 in + * integer bit representation to get the same result as dividing the + * desired depth integer by 16777215.0 would give. (GPUs can't divide by + * 16777215.0, only multiply by 1.0/16777215.0 which gives different results + * due to rounding.) + */ + + switch (ps->state->depth_format) { + case DEPTH_FORMAT_D16: + // 16-bit unsigned int + mstring_append( + ps->code, + "gl_FragDepth = floor(zvalue) / 65535.0;\n"); + break; + case DEPTH_FORMAT_D24: + // 24-bit unsigned int + mstring_append( + ps->code, + "gl_FragDepth = uintBitsToFloat(floatBitsToUint(floor(zvalue) / 16777216.0) + 1u);\n"); + break; + default: + // TODO: handle floating-point depth buffers properly + mstring_append(ps->code, "gl_FragDepth = zvalue / clipRange.y;\n"); + break; } MString *final = mstring_new(); @@ -1542,31 +1615,62 @@ void pgraph_glsl_set_psh_uniform_values(PGRAPHState *pg, pgraph_glsl_set_clip_range_uniform_value(pg, values->clipRange[0]); } + bool polygon_offset_enabled = false; + if (pg->primitive_mode >= PRIM_TYPE_TRIANGLES) { + uint32_t raster = pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER); + uint32_t polygon_mode = + GET_MASK(raster, NV_PGRAPH_SETUPRASTER_FRONTFACEMODE); + + if ((polygon_mode == NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL && + (raster & NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE)) || + (polygon_mode == NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE && + (raster & NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE)) || + (polygon_mode == NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT && + (raster & NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE))) { + polygon_offset_enabled = true; + } + } + if (locs[PshUniform_depthOffset] != -1) { float zbias = 0.0f; - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { + if (polygon_offset_enabled) { uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); zbias = *(float *)&zbias_u32; - - if (pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR) != 0 && - (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & - NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) { - /* TODO: emulate zfactor when z_perspective true, i.e. - * w-buffering. Perhaps calculate an additional offset based on - * triangle orientation in geometry shader and pass the result - * to fragment shader and add it to gl_FragDepth as well. - */ - NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR for w-buffering"); - } } values->depthOffset[0] = zbias; } + if (locs[PshUniform_depthFactor] != -1) { + float zfactor = 0.0f; + + if (polygon_offset_enabled) { + uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR); + zfactor = *(float *)&zfactor_u32; + if (zfactor != 0.0f && + (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) { + /* FIXME: for w-buffering, polygon slope in screen-space is + * computed per-pixel, but Xbox appears to use constant that + * is the polygon slope at the first visible pixel in top-left + * order. + */ + NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR only partially implemented for w-buffering"); + } + } + + values->depthFactor[0] = zfactor; + } + + if (locs[PshUniform_surfaceScale] != -1) { + unsigned int wscale = 1, hscale = 1; + pgraph_apply_anti_aliasing_factor(pg, &wscale, &hscale); + pgraph_apply_scaling_factor(pg, &wscale, &hscale); + values->surfaceScale[0][0] = wscale; + values->surfaceScale[0][1] = hscale; + } + unsigned int max_gl_width = pg->surface_binding_dim.width; unsigned int max_gl_height = pg->surface_binding_dim.height; pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height); diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.h b/hw/xbox/nv2a/pgraph/glsl/psh.h index 1a04c53dff..84d3137a0a 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.h +++ b/hw/xbox/nv2a/pgraph/glsl/psh.h @@ -27,6 +27,13 @@ typedef struct PGRAPHState PGRAPHState; +enum PshDepthFormat { + DEPTH_FORMAT_D24, + DEPTH_FORMAT_D16, + DEPTH_FORMAT_F24, + DEPTH_FORMAT_F16, +}; + typedef struct PshState { uint32_t combiner_control; uint32_t shader_stage_program; @@ -61,6 +68,9 @@ typedef struct PshState { bool smooth_shading; bool depth_clipping; bool z_perspective; + + unsigned int surface_zeta_format; + enum PshDepthFormat depth_format; } PshState; void pgraph_glsl_set_psh_state(PGRAPHState *pg, PshState *state); @@ -75,8 +85,10 @@ void pgraph_glsl_set_psh_state(PGRAPHState *pg, PshState *state); DECL(S, colorKey, uint, 4) \ DECL(S, colorKeyMask, uint, 4) \ DECL(S, consts, vec4, 18) \ + DECL(S, depthFactor, float, 1) \ DECL(S, depthOffset, float, 1) \ DECL(S, fogColor, vec4, 1) \ + DECL(S, surfaceScale, ivec2, 1) \ DECL(S, texScale, float, 4) DECL_UNIFORM_TYPES(PshUniform, PSH_UNIFORM_DECL_X) diff --git a/hw/xbox/nv2a/pgraph/glsl/shaders.c b/hw/xbox/nv2a/pgraph/glsl/shaders.c index 44ed5437fc..2d6cfaf7d2 100644 --- a/hw/xbox/nv2a/pgraph/glsl/shaders.c +++ b/hw/xbox/nv2a/pgraph/glsl/shaders.c @@ -73,7 +73,8 @@ bool pgraph_glsl_check_shader_state_dirty(PGRAPHState *pg, pg->swizzle_attrs != state->vsh.swizzle_attrs || pg->compressed_attrs != state->vsh.compressed_attrs || pg->primitive_mode != state->geom.primitive_mode || - pg->surface_scale_factor != state->vsh.surface_scale_factor) { + pg->surface_scale_factor != state->vsh.surface_scale_factor || + pg->surface_shape.zeta_format != state->psh.surface_zeta_format) { return true; } diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c index a892c2001b..703c1595c5 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -479,11 +479,12 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz mstring_append(body, " oPos = tPosition * compositeMat;\n" - " oPos.z = oPos.z / clipRange.y;\n" " oPos.w = clampAwayZeroInf(oPos.w);\n" " oPos.xy /= oPos.w;\n" " oPos.xy += c[" stringify(NV_IGRAPH_XF_XFCTX_VPOFF) "].xy;\n" " oPos.xy = roundScreenCoords(oPos.xy);\n" + " vec4 vtxPos = vec4(oPos.xy, oPos.z / oPos.w, oPos.w);\n" + " oPos.z = oPos.z / clipRange.y;\n" " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" " oPos.xy *= oPos.w;\n" ); diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index 582194af89..4a1d57b1f1 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -755,10 +755,10 @@ void pgraph_glsl_gen_vsh_prog(uint16_t version, const uint32_t *tokens, * in clip space. */ " oPos.xy = roundScreenCoords(oPos.xy);\n" - " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" - - " oPos.z = oPos.z / clipRange.y;\n" " oPos.w = clampAwayZeroInf(oPos.w);\n" + " vec4 vtxPos = oPos;\n" + " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" + " oPos.z = oPos.z / clipRange.y;\n" /* Undo perspective divide by w. * Note that games may also have vertex shaders that do diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c index 9bce9f30bf..5b0857dc75 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -245,6 +245,10 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts) "#define vtxT1 v_vtxT1\n" "#define vtxT2 v_vtxT2\n" "#define vtxT3 v_vtxT3\n" + "#define vtxPos0 v_vtxPos0\n" + "#define vtxPos1 v_vtxPos1\n" + "#define vtxPos2 v_vtxPos2\n" + "#define triMZ v_triMZ\n" ); } mstring_append(header, "\n"); @@ -393,6 +397,10 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts) " vtxT1 = oT1;\n" " vtxT2 = oT2;\n" " vtxT3 = oT3;\n" + " vtxPos0 = vtxPos;\n" + " vtxPos1 = vtxPos;\n" + " vtxPos2 = vtxPos;\n" + " triMZ = 0.0;\n" " gl_PointSize = oPts.x;\n" ); diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 28b8194468..c982d19bc4 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -54,10 +54,6 @@ static VkPrimitiveTopology get_primitive_topology(PGRAPHState *pg) int polygon_mode = r->shader_binding->state.geom.polygon_front_mode; int primitive_mode = r->shader_binding->state.geom.primitive_mode; - if (polygon_mode == POLY_MODE_POINT) { - return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; - } - // FIXME: Replace with LUT switch (primitive_mode) { case PRIM_TYPE_POINTS: @@ -795,12 +791,10 @@ static void create_pipeline(PGRAPHState *pg) VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state; if (r->provoking_vertex_extension_enabled) { + // TODO: remove use of provoking vertex extension since we just want + // the default last vertex convention always. VkProvokingVertexModeEXT provoking_mode = - GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), - NV_PGRAPH_CONTROL_3_SHADEMODE) == - NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT ? - VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT : - VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; + VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; provoking_state = (VkPipelineRasterizationProvokingVertexStateCreateInfoEXT){ @@ -809,8 +803,6 @@ static void create_pipeline(PGRAPHState *pg) .provokingVertexMode = provoking_mode, }; rasterizer_next_struct = &provoking_state; - } else { - // FIXME: Handle in shader? } VkPipelineRasterizationStateCreateInfo rasterizer = { @@ -968,27 +960,6 @@ static void create_pipeline(PGRAPHState *pg) .pDynamicStates = dynamic_states, }; - // /* Polygon offset */ - // /* FIXME: GL implementation-specific, maybe do this in VS? */ - // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - // NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) - // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - // NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) - // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - // NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { - uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR); - float zfactor = *(float *)&zfactor_u32; - uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); - float zbias = *(float *)&zbias_u32; - rasterizer.depthBiasEnable = VK_TRUE; - rasterizer.depthBiasSlopeFactor = zfactor; - rasterizer.depthBiasConstantFactor = zbias; - } - // FIXME: Dither // if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & // NV_PGRAPH_CONTROL_0_DITHERENABLE)) diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index 54cf610402..50180eb6b9 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -106,7 +106,8 @@ const char *unpack_z24s8_to_d32_sfloat_s8_uint_glsl = "void main() {\n" " uint idx_out = gl_GlobalInvocationID.x;\n" " uint idx_in = get_input_idx(idx_out);\n" - " depth_out[idx_out] = float(depth_stencil_in[idx_in] >> 8) / float(0xffffff);\n" + // Conversion to float depth must be the same as in fragment shader + " depth_out[idx_out] = uintBitsToFloat(floatBitsToUint(float(depth_stencil_in[idx_in] >> 8) / 16777216.0) + 1u);\n" " if (idx_out % 4 == 0) {\n" " uint stencil_value = 0;\n" " for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels From e96663669e84af1a6fd6c2fc8dae7222dd03fe23 Mon Sep 17 00:00:00 2001 From: coldhex Date: Sat, 17 May 2025 12:34:59 +0300 Subject: [PATCH 3/8] nv2a: Fix polygon line mode and implement flat shading provoking vertex Xbox draws lines in polygon mode without trying to avoid overlaps, e.g. internal edge lines are drawn twice for triangle strips and fans. This is evidenced by using additive blending and also stencil adds. This commit removes the gl_PrimitiveIDIn==0 checks which were there to avoid drawing lines twice. This commit also implements flat shading first/last provoking vertex handling. This fixes triangle strip and fan flat shading in nxdk_pgraph_tests shade model tests. --- hw/xbox/nv2a/nv2a_regs.h | 6 + hw/xbox/nv2a/pgraph/glsl/geom.c | 272 ++++++++++++++---------------- hw/xbox/nv2a/pgraph/glsl/geom.h | 1 + hw/xbox/nv2a/pgraph/methods.h.inc | 1 + hw/xbox/nv2a/pgraph/pgraph.c | 7 + hw/xbox/nv2a/pgraph/vk/draw.c | 4 +- 6 files changed, 139 insertions(+), 152 deletions(-) diff --git a/hw/xbox/nv2a/nv2a_regs.h b/hw/xbox/nv2a/nv2a_regs.h index 206e858b23..837f422409 100644 --- a/hw/xbox/nv2a/nv2a_regs.h +++ b/hw/xbox/nv2a/nv2a_regs.h @@ -471,6 +471,9 @@ # define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR 7 # define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR 8 #define NV_PGRAPH_CONTROL_3 0x00001958 +# define NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX (1 << 0) +# define NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX_LAST 0 +# define NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX_FIRST 1 # define NV_PGRAPH_CONTROL_3_SHADEMODE (1 << 7) # define NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT 0 # define NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH 1 @@ -1062,6 +1065,9 @@ # define NV097_SET_TEXGEN_VIEW_MODEL_INFINITE_VIEWER 1 # define NV097_SET_FOG_PLANE 0x000009D0 # define NV097_SET_SPECULAR_PARAMS 0x000009E0 +# define NV097_SET_PROVOKING_VERTEX 0x000009FC +# define NV097_SET_PROVOKING_VERTEX_LAST 0 +# define NV097_SET_PROVOKING_VERTEX_FIRST 1 # define NV097_SET_SCENE_AMBIENT_COLOR 0x00000A10 # define NV097_SET_VIEWPORT_OFFSET 0x00000A20 # define NV097_SET_POINT_PARAMS 0x00000A30 diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c index 2170696a08..dd5dc363f5 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.c +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -38,6 +38,11 @@ void pgraph_glsl_set_geom_state(PGRAPHState *pg, GeomState *state) NV_PGRAPH_CONTROL_3_SHADEMODE) == NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH; + state->first_vertex_is_provoking = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), + NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX) == + NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX_FIRST; + state->z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE; } @@ -83,67 +88,51 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) const char *layout_in = NULL; const char *layout_out = NULL; const char *body = NULL; + const char *provoking_index = "0"; + /* TODO: frontface/backface culling for polygon modes POLY_MODE_LINE and + * POLY_MODE_POINT. + * FIXME: OpenGL/Vulkan does not specify absolute vertex order when input + * is a triangle list, triangle strip or fan. Only vertex winding order + * is specified. Currently we assume input triangle vertex order follows + * the last provoking vertex convention. + */ switch (state->primitive_mode) { case PRIM_TYPE_POINTS: return NULL; case PRIM_TYPE_LINES: case PRIM_TYPE_LINE_LOOP: case PRIM_TYPE_LINE_STRIP: + provoking_index = state->first_vertex_is_provoking ? "0" : "1"; need_linez = true; layout_in = "layout(lines) in;\n"; layout_out = "layout(line_strip, max_vertices = 2) out;\n"; body = " mat4 pz = calc_linez(0, 1);\n" - " emit_vertex(0, 0, pz);\n" - " emit_vertex(1, 1, pz);\n" + " emit_vertex(0, pz);\n" + " emit_vertex(1, pz);\n" " EndPrimitive();\n"; break; case PRIM_TYPE_TRIANGLES: - need_triz = true; - layout_in = "layout(triangles) in;\n"; - if (polygon_mode == POLY_MODE_FILL) { - layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; - body = " mat4 pz = calc_triz(0, 1, 2);\n" - " emit_vertex(0, 0, pz);\n" - " emit_vertex(1, 1, pz);\n" - " emit_vertex(2, 2, pz);\n" - " EndPrimitive();\n"; - } else if (polygon_mode == POLY_MODE_LINE) { - need_linez = true; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - body = " float triMZ = calc_triz(0, 1, 2)[3].x;\n" - " mat4 pz1 = calc_linez(0, 1);\n" - " pz1[3].x = triMZ;\n" - " mat4 pz2 = calc_linez(1, 2);\n" - " pz2[3].x = triMZ;\n" - " mat4 pz3 = calc_linez(2, 0);\n" - " pz3[3].x = triMZ;\n" - " emit_vertex(0, 0, pz1);\n" - " emit_vertex(1, 0, pz1);\n" - " emit_vertex(2, 0, pz2);\n" - " emit_vertex(0, 0, pz3);\n" - " EndPrimitive();\n"; - } else { - assert(polygon_mode == POLY_MODE_POINT); - layout_out = "layout(points, max_vertices = 3) out;\n"; - body = " mat4 pz = calc_triz(0, 1, 2);\n" - " emit_vertex(0, 0, mat4(pz[0], pz[0], pz[0], pz[3]));\n" - " EndPrimitive();\n" - " emit_vertex(1, 0, mat4(pz[1], pz[1], pz[1], pz[3]));\n" - " EndPrimitive();\n" - " emit_vertex(2, 0, mat4(pz[2], pz[2], pz[2], pz[3]));\n" - " EndPrimitive();\n"; - } - break; case PRIM_TYPE_TRIANGLE_STRIP: case PRIM_TYPE_TRIANGLE_FAN: + if (state->first_vertex_is_provoking) { + if (state->primitive_mode == PRIM_TYPE_TRIANGLE_STRIP) { + provoking_index = "gl_PrimitiveIDIn & 1"; + } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_FAN) { + provoking_index = "1"; + } else { + provoking_index = "0"; + } + } else { + provoking_index = "2"; + } need_triz = true; layout_in = "layout(triangles) in;\n"; if (polygon_mode == POLY_MODE_FILL) { layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; body = " mat4 pz = calc_triz(0, 1, 2);\n" - " emit_vertex(0, 0, pz);\n" - " emit_vertex(1, 1, pz);\n" - " emit_vertex(2, 2, pz);\n" + " emit_vertex(0, pz);\n" + " emit_vertex(1, pz);\n" + " emit_vertex(2, pz);\n" " EndPrimitive();\n"; } else if (polygon_mode == POLY_MODE_LINE) { need_linez = true; @@ -155,31 +144,37 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " pz2[3].x = triMZ;\n" " mat4 pz3 = calc_linez(2, 0);\n" " pz3[3].x = triMZ;\n" - " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 0, pz1);\n" - " }\n" - " emit_vertex(1, 0, pz1);\n" - " emit_vertex(2, 0, pz2);\n" - " emit_vertex(0, 0, pz3);\n" + " emit_vertex(0, pz1);\n" + " emit_vertex(1, pz1);\n" + " emit_vertex(2, pz2);\n" + " emit_vertex(0, pz3);\n" " EndPrimitive();\n"; } else { assert(polygon_mode == POLY_MODE_POINT); layout_out = "layout(points, max_vertices = 3) out;\n"; body = " mat4 pz = calc_triz(0, 1, 2);\n" - " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 0, mat4(pz[0], pz[0], pz[0], pz[3]));\n" - " EndPrimitive();\n" - " emit_vertex(1, 0, mat4(pz[1], pz[1], pz[1], pz[3]));\n" - " EndPrimitive();\n" - " }\n" - " emit_vertex(2, 0, mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " emit_vertex(0, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(1, mat4(pz[1], pz[1], pz[1], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(2, mat4(pz[2], pz[2], pz[2], pz[3]));\n" " EndPrimitive();\n"; } break; case PRIM_TYPE_QUADS: + provoking_index = "3"; need_quadz = true; layout_in = "layout(lines_adjacency) in;\n"; - if (polygon_mode == POLY_MODE_LINE) { + if (polygon_mode == POLY_MODE_FILL) { + layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; + body = " mat4 pz, pz2;\n" + " calc_quadz(0, 1, 2, 3, pz, pz2);\n" + " emit_vertex(1, pz);\n" + " emit_vertex(2, pz2);\n" + " emit_vertex(0, pz);\n" + " emit_vertex(3, pz2);\n" + " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_LINE) { need_linez = true; layout_out = "layout(line_strip, max_vertices = 5) out;\n"; body = " mat4 pz, pzs;\n" @@ -192,40 +187,42 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " pz3[3].x = pzs[3].x;\n" " mat4 pz4 = calc_linez(3, 0);\n" " pz4[3].x = pzs[3].x;\n" - " emit_vertex(0, 3, pz1);\n" - " emit_vertex(1, 3, pz1);\n" - " emit_vertex(2, 3, pz2);\n" - " emit_vertex(3, 3, pz3);\n" - " emit_vertex(0, 3, pz4);\n" - " EndPrimitive();\n"; - } else if (polygon_mode == POLY_MODE_FILL) { - layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; - body = " mat4 pz, pz2;\n" - " calc_quadz(0, 1, 2, 3, pz, pz2);\n" - " emit_vertex(1, 3, pz);\n" - " emit_vertex(2, 3, pz2);\n" - " emit_vertex(0, 3, pz);\n" - " emit_vertex(3, 3, pz2);\n" + " emit_vertex(0, pz1);\n" + " emit_vertex(1, pz1);\n" + " emit_vertex(2, pz2);\n" + " emit_vertex(3, pz3);\n" + " emit_vertex(0, pz4);\n" " EndPrimitive();\n"; } else { assert(polygon_mode == POLY_MODE_POINT); layout_out = "layout(points, max_vertices = 4) out;\n"; body = " mat4 pz, pz2;\n" " calc_quadz(0, 1, 2, 3, pz, pz2);\n" - " emit_vertex(0, 3, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + " emit_vertex(0, mat4(pz[0], pz[0], pz[0], pz[3]));\n" " EndPrimitive();\n" - " emit_vertex(1, 3, mat4(pz[1], pz[1], pz[1], pz[3]));\n" + " emit_vertex(1, mat4(pz[1], pz[1], pz[1], pz[3]));\n" " EndPrimitive();\n" - " emit_vertex(2, 3, mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " emit_vertex(2, mat4(pz[2], pz[2], pz[2], pz[3]));\n" " EndPrimitive();\n" - " emit_vertex(3, 3, mat4(pz2[2], pz2[2], pz2[2], pz2[3]));\n" + " emit_vertex(3, mat4(pz2[2], pz2[2], pz2[2], pz2[3]));\n" " EndPrimitive();\n"; } break; case PRIM_TYPE_QUAD_STRIP: + provoking_index = "3"; need_quadz = true; layout_in = "layout(lines_adjacency) in;\n"; - if (polygon_mode == POLY_MODE_LINE) { + if (polygon_mode == POLY_MODE_FILL) { + layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; + body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" + " mat4 pz, pz2;\n" + " calc_quadz(2, 0, 1, 3, pz, pz2);\n" + " emit_vertex(0, pz);\n" + " emit_vertex(1, pz2);\n" + " emit_vertex(2, pz);\n" + " emit_vertex(3, pz2);\n" + " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_LINE) { need_linez = true; layout_out = "layout(line_strip, max_vertices = 5) out;\n"; body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" @@ -239,23 +236,11 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " pz3[3].x = pzs[3].x;\n" " mat4 pz4 = calc_linez(2, 0);\n" " pz4[3].x = pz[3].x;\n" - " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 3, pz1);\n" - " }\n" - " emit_vertex(1, 3, pz1);\n" - " emit_vertex(3, 3, pz2);\n" - " emit_vertex(2, 3, pz3);\n" - " emit_vertex(0, 3, pz4);\n" - " EndPrimitive();\n"; - } else if (polygon_mode == POLY_MODE_FILL) { - layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; - body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" - " mat4 pz, pz2;\n" - " calc_quadz(2, 0, 1, 3, pz, pz2);\n" - " emit_vertex(0, 3, pz);\n" - " emit_vertex(1, 3, pz2);\n" - " emit_vertex(2, 3, pz);\n" - " emit_vertex(3, 3, pz2);\n" + " emit_vertex(0, pz1);\n" + " emit_vertex(1, pz1);\n" + " emit_vertex(3, pz2);\n" + " emit_vertex(2, pz3);\n" + " emit_vertex(0, pz4);\n" " EndPrimitive();\n"; } else { assert(polygon_mode == POLY_MODE_POINT); @@ -263,39 +248,39 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" " mat4 pz, pz2;\n" " calc_quadz(2, 0, 1, 3, pz, pz2);\n" - " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 3, mat4(pz[1], pz[1], pz[1], pz[3]));\n" - " EndPrimitive();\n" - " emit_vertex(1, 3, mat4(pz[2], pz[2], pz[2], pz[3]));\n" - " EndPrimitive();\n" - " }\n" - " emit_vertex(2, 3, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + " emit_vertex(0, mat4(pz[1], pz[1], pz[1], pz[3]));\n" " EndPrimitive();\n" - " emit_vertex(3, 3, mat4(pz2[2], pz2[2], pz2[2], pz2[3]));\n" + " emit_vertex(1, mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(2, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(3, mat4(pz2[2], pz2[2], pz2[2], pz2[3]));\n" " EndPrimitive();\n"; } break; case PRIM_TYPE_POLYGON: + provoking_index = "0"; if (polygon_mode == POLY_MODE_FILL) { need_triz = true; layout_in = "layout(triangles) in;\n"; layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; body = " mat4 pz = calc_triz(0, 1, 2);\n" - " emit_vertex(0, 0, pz);\n" - " emit_vertex(1, 0, pz);\n" - " emit_vertex(2, 0, pz);\n" + " emit_vertex(0, pz);\n" + " emit_vertex(1, pz);\n" + " emit_vertex(2, pz);\n" " EndPrimitive();\n"; } else if (polygon_mode == POLY_MODE_LINE) { need_linez = true; - // FIXME: input here is lines and not triangles so we cannot - // calculate triangle plane slope. Also, the first vertex of the - // polygon is unavailable so flat shading provoking vertex is - // wrong. + /* FIXME: input here is lines and not triangles so we cannot + * calculate triangle plane slope. Also, the first vertex of the + * polygon is unavailable so flat shading provoking vertex is + * wrong. + */ layout_in = "layout(lines) in;\n"; layout_out = "layout(line_strip, max_vertices = 2) out;\n"; body = " mat4 pz = calc_linez(0, 1);\n" - " emit_vertex(0, 0, pz);\n" - " emit_vertex(1, 1, pz);\n" + " emit_vertex(0, pz);\n" + " emit_vertex(1, pz);\n" " EndPrimitive();\n"; } else { assert(false); @@ -326,49 +311,34 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) false, false, false); if (state->smooth_shading) { - mstring_append( - output, - "void emit_vertex(int index, int _unused, mat4 pz) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " gl_PointSize = gl_in[index].gl_PointSize;\n" - " vtxD0 = v_vtxD0[index];\n" - " vtxD1 = v_vtxD1[index];\n" - " vtxB0 = v_vtxB0[index];\n" - " vtxB1 = v_vtxB1[index];\n" - " vtxFog = v_vtxFog[index];\n" - " vtxT0 = v_vtxT0[index];\n" - " vtxT1 = v_vtxT1[index];\n" - " vtxT2 = v_vtxT2[index];\n" - " vtxT3 = v_vtxT3[index];\n" - " vtxPos0 = pz[0];\n" - " vtxPos1 = pz[1];\n" - " vtxPos2 = pz[2];\n" - " triMZ = (isnan(pz[3].x) || isinf(pz[3].x)) ? 0.0 : pz[3].x;\n" - " EmitVertex();\n" - "}\n"); - } else { - mstring_append( - output, - "void emit_vertex(int index, int provoking_index, mat4 pz) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " gl_PointSize = gl_in[index].gl_PointSize;\n" - " vtxD0 = v_vtxD0[provoking_index];\n" - " vtxD1 = v_vtxD1[provoking_index];\n" - " vtxB0 = v_vtxB0[provoking_index];\n" - " vtxB1 = v_vtxB1[provoking_index];\n" - " vtxFog = v_vtxFog[index];\n" - " vtxT0 = v_vtxT0[index];\n" - " vtxT1 = v_vtxT1[index];\n" - " vtxT2 = v_vtxT2[index];\n" - " vtxT3 = v_vtxT3[index];\n" - " vtxPos0 = pz[0];\n" - " vtxPos1 = pz[1];\n" - " vtxPos2 = pz[2];\n" - " triMZ = (isnan(pz[3].x) || isinf(pz[3].x)) ? 0.0 : pz[3].x;\n" - " EmitVertex();\n" - "}\n"); + provoking_index = "index"; } + mstring_append_fmt( + output, + "void emit_vertex(int index, mat4 pz) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " gl_PointSize = gl_in[index].gl_PointSize;\n" + " vtxD0 = v_vtxD0[%s];\n" + " vtxD1 = v_vtxD1[%s];\n" + " vtxB0 = v_vtxB0[%s];\n" + " vtxB1 = v_vtxB1[%s];\n" + " vtxFog = v_vtxFog[index];\n" + " vtxT0 = v_vtxT0[index];\n" + " vtxT1 = v_vtxT1[index];\n" + " vtxT2 = v_vtxT2[index];\n" + " vtxT3 = v_vtxT3[index];\n" + " vtxPos0 = pz[0];\n" + " vtxPos1 = pz[1];\n" + " vtxPos2 = pz[2];\n" + " triMZ = (isnan(pz[3].x) || isinf(pz[3].x)) ? 0.0 : pz[3].x;\n" + " EmitVertex();\n" + "}\n", + provoking_index, + provoking_index, + provoking_index, + provoking_index); + if (need_triz || need_quadz) { mstring_append( output, @@ -430,10 +400,12 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) if (need_linez) { mstring_append( output, + // Calculate a third vertex by rotating 90 degrees so that triangle + // interpolation in fragment shader can be used as is for lines. "mat4 calc_linez(int i0, int i1) {\n" " vec2 delta = v_vtxPos[i1].xy - v_vtxPos[i0].xy;\n" " vec2 v2 = vec2(-delta.y, delta.x) + v_vtxPos[i0].xy;\n" - " return mat4(v_vtxPos[i0], v_vtxPos[i1], vec4(v2, v_vtxPos[i0].zw), vec4(0.0));\n" + " return mat4(v_vtxPos[i0], v_vtxPos[i1], v2, v_vtxPos[i0].zw, vec4(0.0));\n" "}\n"); } diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.h b/hw/xbox/nv2a/pgraph/glsl/geom.h index 7dbd807345..55229d2e46 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.h +++ b/hw/xbox/nv2a/pgraph/glsl/geom.h @@ -30,6 +30,7 @@ typedef struct { enum ShaderPolygonMode polygon_front_mode; enum ShaderPolygonMode polygon_back_mode; bool smooth_shading; + bool first_vertex_is_provoking; bool z_perspective; } GeomState; diff --git a/hw/xbox/nv2a/pgraph/methods.h.inc b/hw/xbox/nv2a/pgraph/methods.h.inc index d475f9b4b1..3f9e026103 100644 --- a/hw/xbox/nv2a/pgraph/methods.h.inc +++ b/hw/xbox/nv2a/pgraph/methods.h.inc @@ -68,6 +68,7 @@ DEF_METHOD(NV097, SET_STENCIL_OP_FAIL) DEF_METHOD(NV097, SET_STENCIL_OP_ZFAIL) DEF_METHOD(NV097, SET_STENCIL_OP_ZPASS) DEF_METHOD(NV097, SET_SHADE_MODE) +DEF_METHOD(NV097, SET_PROVOKING_VERTEX) DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR) DEF_METHOD(NV097, SET_POLYGON_OFFSET_BIAS) DEF_METHOD(NV097, SET_FRONT_POLYGON_MODE) diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c index e3ecb09d12..2e93d77d9c 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.c +++ b/hw/xbox/nv2a/pgraph/pgraph.c @@ -1534,6 +1534,13 @@ DEF_METHOD(NV097, SET_SHADE_MODE) } } +DEF_METHOD(NV097, SET_PROVOKING_VERTEX) +{ + assert((parameter & ~1) == 0); + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX, + parameter); +} + DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR) { pgraph_reg_w(pg, NV_PGRAPH_ZOFFSETFACTOR, parameter); diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index c982d19bc4..e4bd562149 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -791,8 +791,8 @@ static void create_pipeline(PGRAPHState *pg) VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state; if (r->provoking_vertex_extension_enabled) { - // TODO: remove use of provoking vertex extension since we just want - // the default last vertex convention always. + // Use last provoking vertex convention to match geometry shader + // assumption, because Vulkan default is first vertex convention. VkProvokingVertexModeEXT provoking_mode = VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; From 0008aaa63981df7a39981a46721093b61524a970 Mon Sep 17 00:00:00 2001 From: coldhex Date: Sun, 29 Jun 2025 15:32:50 +0300 Subject: [PATCH 4/8] nv2a: Vertex reordering for flat shading in geometry shader Test OpenGL/Vulkan geometry shader triangle, strip and fan vertex ordering during backend initialization. OpenGL/Vulkan does not guarantee absolute vertex order for geometry shader input triangles. The test results are used to reorder input triangle vertices into the first vertex convention order so that correct provoking vertex can be chosen for flat shading. Also, this removes use of the Vulkan provoking vertex extension. The default first vertex convention is now used when emitting line strips in geometry shader. (It would of course be possible to always emit only separate line segments and then the convention wouldn't matter at all.) --- hw/xbox/nv2a/pgraph/gl/draw.c | 6 + hw/xbox/nv2a/pgraph/gl/gpuprops.c | 357 +++++++++++++++++ hw/xbox/nv2a/pgraph/gl/meson.build | 1 + hw/xbox/nv2a/pgraph/gl/renderer.c | 3 + hw/xbox/nv2a/pgraph/gl/renderer.h | 2 + hw/xbox/nv2a/pgraph/glsl/geom.c | 135 +++++-- hw/xbox/nv2a/pgraph/glsl/geom.h | 2 + hw/xbox/nv2a/pgraph/pgraph.h | 10 + hw/xbox/nv2a/pgraph/vk/draw.c | 17 - hw/xbox/nv2a/pgraph/vk/gpuprops.c | 605 +++++++++++++++++++++++++++++ hw/xbox/nv2a/pgraph/vk/instance.c | 15 - hw/xbox/nv2a/pgraph/vk/meson.build | 1 + hw/xbox/nv2a/pgraph/vk/renderer.c | 3 + hw/xbox/nv2a/pgraph/vk/renderer.h | 5 +- 14 files changed, 1090 insertions(+), 72 deletions(-) create mode 100644 hw/xbox/nv2a/pgraph/gl/gpuprops.c create mode 100644 hw/xbox/nv2a/pgraph/vk/gpuprops.c diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c index 7af27d845c..9562ea30d8 100644 --- a/hw/xbox/nv2a/pgraph/gl/draw.c +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -227,6 +227,12 @@ void pgraph_gl_draw_begin(NV2AState *d) glEnable(GL_DEPTH_CLAMP); + /* Set first vertex convention to match Vulkan default. This is needed + * because geometry shader outputs line strips with data for fragment + * shader. + */ + glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); + if (stencil_test) { glEnable(GL_STENCIL_TEST); diff --git a/hw/xbox/nv2a/pgraph/gl/gpuprops.c b/hw/xbox/nv2a/pgraph/gl/gpuprops.c new file mode 100644 index 0000000000..29cb1e7b0e --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/gpuprops.c @@ -0,0 +1,357 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2025 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "debug.h" +#include "renderer.h" + +static GPUProperties pgraph_gl_gpu_properties; + +static const char *vertex_shader_source = + "#version 400\n" + "out vec3 v_fragColor;\n" + "\n" + "vec2 positions[11] = vec2[](\n" + " vec2(-0.5, -0.75),\n" + " vec2(-0.25, -0.25),\n" + " vec2(-0.75, -0.25),\n" + " vec2(0.25, -0.25),\n" + " vec2(0.25, -0.75),\n" + " vec2(0.75, -0.25),\n" + " vec2(0.75, -0.75),\n" + " vec2(-0.75, 0.75),\n" + " vec2(-0.75, 0.25),\n" + " vec2(-0.25, 0.25),\n" + " vec2(-0.25, 0.75)\n" + ");\n" + "\n" + "vec3 colors[11] = vec3[](\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0)\n" + ");\n" + "\n" + "void main() {\n" + " gl_Position = vec4(positions[gl_VertexID], 0.0, 1.0);\n" + " v_fragColor = colors[gl_VertexID];\n" + "}\n"; + +static const char *geometry_shader_source = + "#version 400\n" + "layout(triangles) in;\n" + "layout(triangle_strip, max_vertices = 3) out;\n" + "out vec3 fragColor;\n" + "in vec3 v_fragColor[];\n" + "\n" + "void emit_vertex(int index) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " fragColor = v_fragColor[0];\n" + " EmitVertex();\n" + "}\n" + "\n" + "void main() {\n" + " emit_vertex(0);\n" + " emit_vertex(1);\n" + " emit_vertex(2);\n" + " EndPrimitive();\n" + "}\n"; + +static const char *fragment_shader_source = + "#version 400\n" + "out vec4 outColor;\n" + "in vec3 fragColor;\n" + "\n" + "void main() {\n" + " outColor = vec4(fragColor, 1.0);\n" + "}\n"; + +static GLuint compile_shader(GLenum type, const char *source) +{ + GLuint shader = glCreateShader(type); + glShaderSource(shader, 1, &source, NULL); + glCompileShader(shader); + + GLint success; + glGetShaderiv(shader, GL_COMPILE_STATUS, &success); + if (!success) { + char log[512]; + glGetShaderInfoLog(shader, sizeof(log), NULL, log); + log[sizeof(log) - 1] = '\0'; + fprintf(stderr, "GL shader type %d compilation failed: %s\n", type, + log); + assert(false); + } + + return shader; +} + +static GLuint create_program(const char *vert_source, const char *geom_source, + const char *frag_source) +{ + GLuint vert_shader = compile_shader(GL_VERTEX_SHADER, vert_source); + GLuint geom_shader = compile_shader(GL_GEOMETRY_SHADER, geom_source); + GLuint frag_shader = compile_shader(GL_FRAGMENT_SHADER, frag_source); + + GLuint shader_prog = glCreateProgram(); + glAttachShader(shader_prog, vert_shader); + glAttachShader(shader_prog, geom_shader); + glAttachShader(shader_prog, frag_shader); + glLinkProgram(shader_prog); + + GLint success; + glGetProgramiv(shader_prog, GL_LINK_STATUS, &success); + if (!success) { + char log[512]; + glGetProgramInfoLog(shader_prog, sizeof(log), NULL, log); + log[sizeof(log) - 1] = '\0'; + fprintf(stderr, "GL shader linking failed: %s\n", log); + assert(false); + } + + glDeleteShader(vert_shader); + glDeleteShader(geom_shader); + glDeleteShader(frag_shader); + + return shader_prog; +} + +static void check_gl_error(const char *context) +{ + GLenum err; + int limit = 10; + + while ((err = glGetError()) != GL_NO_ERROR) { + fprintf(stderr, "GPU properties OpenGL error 0x%X in %s\n", err, + context); + if (--limit <= 0) { + fprintf( + stderr, + "Too many OpenGL errors in %s — possible infinite error loop\n", + context); + break; + } + } +} + +static uint8_t *render_geom_shader_triangles(int width, int height) +{ + // Create the framebuffer and renderbuffer for it + GLuint fbo, rbo; + glGenFramebuffers(1, &fbo); + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + glGenRenderbuffers(1, &rbo); + glBindRenderbuffer(GL_RENDERBUFFER, rbo); + glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height); + check_gl_error("glRenderbufferStorage"); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_RENDERBUFFER, rbo); + check_gl_error("glFramebufferRenderbuffer"); + + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + GLuint shader_prog = create_program( + vertex_shader_source, geometry_shader_source, fragment_shader_source); + assert(shader_prog != 0); + + glUseProgram(shader_prog); + check_gl_error("glUseProgram"); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + check_gl_error("glClear"); + + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); + glViewport(0, 0, width, height); + check_gl_error("state setup"); + + GLuint vao; + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + check_gl_error("glBindVertexArray"); + glDrawArrays(GL_TRIANGLES, 0, 3); + glDrawArrays(GL_TRIANGLE_STRIP, 3, 4); + glDrawArrays(GL_TRIANGLE_FAN, 7, 4); + check_gl_error("glDrawArrays"); + glFinish(); // glFinish should be unnecessary + + void *pixels = g_malloc(width * height * 4); + assert(pixels != NULL); + glReadBuffer(GL_COLOR_ATTACHMENT0); + glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, pixels); + check_gl_error("glReadPixels"); + + glBindVertexArray(0); + glDeleteVertexArrays(1, &vao); + glUseProgram(0); + glDeleteProgram(shader_prog); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDeleteFramebuffers(1, &fbo); + glBindRenderbuffer(GL_RENDERBUFFER, 0); + glDeleteRenderbuffers(1, &rbo); + + return (uint8_t *)pixels; +} + +static bool colors_match(int r1, int g1, int b1, int r2, int g2, int b2) +{ + int dr = r1 - r2; + int dg = g1 - g2; + int db = b1 - b2; + + return (dr * dr + dg * dg + db * db) <= 16; +} + +static int get_color_index(uint8_t *pixel) +{ + int r = pixel[0]; + int g = pixel[1]; + int b = pixel[2]; + + if (colors_match(r, g, b, 0, 0, 255)) { + return 0; + } else if (colors_match(r, g, b, 0, 255, 0)) { + return 1; + } else if (colors_match(r, g, b, 0, 255, 255)) { + return 2; + } else if (colors_match(r, g, b, 255, 0, 0)) { + return 3; + } else { + return -1; + } +} + +static int calc_offset_from_ndc(float x, float y, int width, int height) +{ + int x0 = (int)((x + 1.0f) * width * 0.5f); + int y0 = (int)((y + 1.0f) * height * 0.5f); + + x0 = MAX(x0, 0); + y0 = MAX(y0, 0); + x0 = MIN(x0, width - 1); + y0 = MIN(y0, height - 1); + + return y0 * width + x0; +} + +static void determine_triangle_winding_order(uint8_t *pixels, int width, + int height, GPUProperties *props) +{ + uint8_t *tri_pix = + pixels + calc_offset_from_ndc(-0.5f, -0.5f, width, height) * 4; + uint8_t *strip0_pix = + pixels + calc_offset_from_ndc(0.417f, -0.417f, width, height) * 4; + uint8_t *strip1_pix = + pixels + calc_offset_from_ndc(0.583f, -0.583f, width, height) * 4; + uint8_t *fan_pix = + pixels + calc_offset_from_ndc(-0.583f, 0.417f, width, height) * 4; + uint8_t *fan2_pix = + pixels + calc_offset_from_ndc(-0.417f, 0.583f, width, height) * 4; + + int tri_rot = get_color_index(tri_pix); + if (tri_rot < 0 || tri_rot > 2) { + fprintf(stderr, + "Could not determine triangle rotation, got color: R=%d, G=%d, " + "B=%d\n", + tri_pix[0], tri_pix[1], tri_pix[2]); + tri_rot = 0; + } + props->geom_shader_winding.tri = tri_rot; + + int strip0_rot = get_color_index(strip0_pix); + if (strip0_rot < 0 || strip0_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip0 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip0_pix[0], strip0_pix[1], strip0_pix[2]); + strip0_rot = 0; + } + int strip1_rot = get_color_index(strip1_pix) - 1; + if (strip1_rot < 0 || strip1_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip1 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip1_pix[0], strip1_pix[1], strip1_pix[2]); + strip1_rot = 0; + } + props->geom_shader_winding.tri_strip0 = strip0_rot; + props->geom_shader_winding.tri_strip1 = (3 - strip1_rot) % 3; + + int fan_rot = get_color_index(fan_pix); + int fan2_rot = get_color_index(fan2_pix); + if (fan2_rot == 0) { + fan2_rot = 1; + } + fan2_rot--; + if (fan_rot != fan2_rot) { + fprintf(stderr, + "Unexpected inconsistency in triangle fan winding, got colors: " + "R=%d, G=%d, B=%d and R=%d, G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2], fan2_pix[0], fan2_pix[1], + fan2_pix[2]); + fan_rot = 1; + } + if (fan_rot < 0 || fan_rot > 2) { + fprintf(stderr, + "Could not determine triangle fan rotation, got color: R=%d, " + "G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2]); + fan_rot = 1; + } + props->geom_shader_winding.tri_fan = (fan_rot + 2) % 3; +} + +void pgraph_gl_determine_gpu_properties(NV2AState *d) +{ + const int width = 640; + const int height = 480; + + GloContext *g_context = glo_context_create(); + glo_set_current(g_context); + + uint8_t *pixels = render_geom_shader_triangles(width, height); + determine_triangle_winding_order(pixels, width, height, + &pgraph_gl_gpu_properties); + g_free(pixels); + + fprintf(stderr, "GL geometry shader winding: %d, %d, %d, %d\n", + pgraph_gl_gpu_properties.geom_shader_winding.tri, + pgraph_gl_gpu_properties.geom_shader_winding.tri_strip0, + pgraph_gl_gpu_properties.geom_shader_winding.tri_strip1, + pgraph_gl_gpu_properties.geom_shader_winding.tri_fan); + + glo_context_destroy(g_context); + glo_set_current(g_nv2a_context_render); +} + +GPUProperties *pgraph_gl_get_gpu_properties(void) +{ + return &pgraph_gl_gpu_properties; +} diff --git a/hw/xbox/nv2a/pgraph/gl/meson.build b/hw/xbox/nv2a/pgraph/gl/meson.build index ab25eacb7d..c19a542adb 100644 --- a/hw/xbox/nv2a/pgraph/gl/meson.build +++ b/hw/xbox/nv2a/pgraph/gl/meson.build @@ -3,6 +3,7 @@ specific_ss.add([sdl, gloffscreen, files( 'debug.c', 'display.c', 'draw.c', + 'gpuprops.c', 'renderer.c', 'reports.c', 'shaders.c', diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c index 36b8029439..74ece1c5f9 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.c +++ b/hw/xbox/nv2a/pgraph/gl/renderer.c @@ -66,6 +66,8 @@ static void pgraph_gl_init(NV2AState *d, Error **errp) pg->uniform_attrs = 0; pg->swizzle_attrs = 0; + + pgraph_gl_determine_gpu_properties(d); } static void pgraph_gl_finalize(NV2AState *d) @@ -195,6 +197,7 @@ static PGRAPHRenderer pgraph_gl_renderer = { .set_surface_scale_factor = pgraph_gl_set_surface_scale_factor, .get_surface_scale_factor = pgraph_gl_get_surface_scale_factor, .get_framebuffer_surface = pgraph_gl_get_framebuffer_surface, + .get_gpu_properties = pgraph_gl_get_gpu_properties, } }; diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index 28277fcdf5..5a2524bfbb 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -286,5 +286,7 @@ void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg); void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale); unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d); int pgraph_gl_get_framebuffer_surface(NV2AState *d); +void pgraph_gl_determine_gpu_properties(NV2AState *d); +GPUProperties *pgraph_gl_get_gpu_properties(void); #endif diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c index dd5dc363f5..ec4d19fe5f 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.c +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -45,6 +45,39 @@ void pgraph_glsl_set_geom_state(PGRAPHState *pg, GeomState *state) state->z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE; + + if (pg->renderer->ops.get_gpu_properties) { + GPUProperties *gpu_props = pg->renderer->ops.get_gpu_properties(); + + switch (state->primitive_mode) { + case PRIM_TYPE_TRIANGLES: + state->tri_rot0 = gpu_props->geom_shader_winding.tri; + state->tri_rot1 = state->tri_rot0; + break; + case PRIM_TYPE_TRIANGLE_STRIP: + state->tri_rot0 = gpu_props->geom_shader_winding.tri_strip0; + state->tri_rot1 = gpu_props->geom_shader_winding.tri_strip1; + break; + case PRIM_TYPE_TRIANGLE_FAN: + case PRIM_TYPE_POLYGON: + state->tri_rot0 = gpu_props->geom_shader_winding.tri_fan; + state->tri_rot1 = state->tri_rot0; + break; + default: + break; + } + } +} + +static const char *get_vertex_order(int rot) +{ + if (rot == 0) { + return "ivec3(0, 1, 2)"; + } else if (rot == 1) { + return "ivec3(2, 0, 1)"; + } else { + return "ivec3(1, 2, 0)"; + } } bool pgraph_glsl_need_geom(const GeomState *state) @@ -115,49 +148,47 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) case PRIM_TYPE_TRIANGLE_STRIP: case PRIM_TYPE_TRIANGLE_FAN: if (state->first_vertex_is_provoking) { - if (state->primitive_mode == PRIM_TYPE_TRIANGLE_STRIP) { - provoking_index = "gl_PrimitiveIDIn & 1"; - } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_FAN) { - provoking_index = "1"; - } else { - provoking_index = "0"; - } + provoking_index = "v[0]"; + } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_STRIP) { + provoking_index = "v[2 - (gl_PrimitiveIDIn & 1)]"; + } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_FAN) { + provoking_index = "v[1]"; } else { - provoking_index = "2"; + provoking_index = "v[2]"; } need_triz = true; layout_in = "layout(triangles) in;\n"; if (polygon_mode == POLY_MODE_FILL) { layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; - body = " mat4 pz = calc_triz(0, 1, 2);\n" - " emit_vertex(0, pz);\n" - " emit_vertex(1, pz);\n" - " emit_vertex(2, pz);\n" + body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n" + " emit_vertex(v[0], pz);\n" + " emit_vertex(v[1], pz);\n" + " emit_vertex(v[2], pz);\n" " EndPrimitive();\n"; } else if (polygon_mode == POLY_MODE_LINE) { need_linez = true; layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - body = " float triMZ = calc_triz(0, 1, 2)[3].x;\n" - " mat4 pz1 = calc_linez(0, 1);\n" - " pz1[3].x = triMZ;\n" - " mat4 pz2 = calc_linez(1, 2);\n" - " pz2[3].x = triMZ;\n" - " mat4 pz3 = calc_linez(2, 0);\n" - " pz3[3].x = triMZ;\n" - " emit_vertex(0, pz1);\n" - " emit_vertex(1, pz1);\n" - " emit_vertex(2, pz2);\n" - " emit_vertex(0, pz3);\n" + body = " float dz = calc_triz(v[0], v[1], v[2])[3].x;\n" + " mat4 pz1 = calc_linez(v[0], v[1]);\n" + " pz1[3].x = dz;\n" + " mat4 pz2 = calc_linez(v[1], v[2]);\n" + " pz2[3].x = dz;\n" + " mat4 pz3 = calc_linez(v[2], v[0]);\n" + " pz3[3].x = dz;\n" + " emit_vertex(v[0], pz1);\n" + " emit_vertex(v[1], pz2);\n" + " emit_vertex(v[2], pz3);\n" + " emit_vertex(v[0], pz3);\n" " EndPrimitive();\n"; } else { assert(polygon_mode == POLY_MODE_POINT); layout_out = "layout(points, max_vertices = 3) out;\n"; - body = " mat4 pz = calc_triz(0, 1, 2);\n" - " emit_vertex(0, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n" + " emit_vertex(v[0], mat4(pz[0], pz[0], pz[0], pz[3]));\n" " EndPrimitive();\n" - " emit_vertex(1, mat4(pz[1], pz[1], pz[1], pz[3]));\n" + " emit_vertex(v[1], mat4(pz[1], pz[1], pz[1], pz[3]));\n" " EndPrimitive();\n" - " emit_vertex(2, mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " emit_vertex(v[2], mat4(pz[2], pz[2], pz[2], pz[3]));\n" " EndPrimitive();\n"; } break; @@ -188,9 +219,9 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " mat4 pz4 = calc_linez(3, 0);\n" " pz4[3].x = pzs[3].x;\n" " emit_vertex(0, pz1);\n" - " emit_vertex(1, pz1);\n" - " emit_vertex(2, pz2);\n" - " emit_vertex(3, pz3);\n" + " emit_vertex(1, pz2);\n" + " emit_vertex(2, pz3);\n" + " emit_vertex(3, pz4);\n" " emit_vertex(0, pz4);\n" " EndPrimitive();\n"; } else { @@ -237,9 +268,9 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " mat4 pz4 = calc_linez(2, 0);\n" " pz4[3].x = pz[3].x;\n" " emit_vertex(0, pz1);\n" - " emit_vertex(1, pz1);\n" - " emit_vertex(3, pz2);\n" - " emit_vertex(2, pz3);\n" + " emit_vertex(1, pz2);\n" + " emit_vertex(3, pz3);\n" + " emit_vertex(2, pz4);\n" " emit_vertex(0, pz4);\n" " EndPrimitive();\n"; } else { @@ -259,17 +290,18 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) } break; case PRIM_TYPE_POLYGON: - provoking_index = "0"; if (polygon_mode == POLY_MODE_FILL) { + provoking_index = "v[2]"; need_triz = true; layout_in = "layout(triangles) in;\n"; layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; - body = " mat4 pz = calc_triz(0, 1, 2);\n" - " emit_vertex(0, pz);\n" - " emit_vertex(1, pz);\n" - " emit_vertex(2, pz);\n" + body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n" + " emit_vertex(v[0], pz);\n" + " emit_vertex(v[1], pz);\n" + " emit_vertex(v[2], pz);\n" " EndPrimitive();\n"; } else if (polygon_mode == POLY_MODE_LINE) { + provoking_index = "0"; need_linez = true; /* FIXME: input here is lines and not triangles so we cannot * calculate triangle plane slope. Also, the first vertex of the @@ -310,6 +342,30 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) pgraph_glsl_get_vtx_header(output, opts.vulkan, state->smooth_shading, false, false, false); + char vertex_order_buf[80]; + const char *vertex_order_body = ""; + + if (need_triz) { + /* Input triangle absolute vertex order is not guaranteed by OpenGL + * or Vulkan, only winding order is. Reorder vertices here to first + * vertex convention which we assumed above when setting + * provoking_index. This mostly only matters with flat shading, but + * we reorder always to get consistent results across GPU vendors + * regarding floating-point rounding when calculating with vtxPos0/1/2. + */ + mstring_append(output, "ivec3 v;\n"); + if (state->tri_rot0 == state->tri_rot1) { + snprintf(vertex_order_buf, sizeof(vertex_order_buf), " v = %s;\n", + get_vertex_order(state->tri_rot0)); + } else { + snprintf(vertex_order_buf, sizeof(vertex_order_buf), + " v = (gl_PrimitiveIDIn & 1) == 0 ? %s : %s;\n", + get_vertex_order(state->tri_rot0), + get_vertex_order(state->tri_rot1)); + } + vertex_order_body = vertex_order_buf; + } + if (state->smooth_shading) { provoking_index = "index"; } @@ -422,8 +478,9 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) "\n" "void main() {\n" "%s" + "%s" "}\n", - body); + vertex_order_body, body); return output; } diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.h b/hw/xbox/nv2a/pgraph/glsl/geom.h index 55229d2e46..a00302f39b 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.h +++ b/hw/xbox/nv2a/pgraph/glsl/geom.h @@ -32,6 +32,8 @@ typedef struct { bool smooth_shading; bool first_vertex_is_provoking; bool z_perspective; + short tri_rot0; + short tri_rot1; } GeomState; typedef struct GenGeomGlslOptions { diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index 49f941d450..0449270b55 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -96,6 +96,15 @@ typedef struct BetaState { uint32_t beta; } BetaState; +typedef struct GPUProperties { + struct { + short tri; + short tri_strip0; + short tri_strip1; + short tri_fan; + } geom_shader_winding; +} GPUProperties; + typedef struct PGRAPHRenderer { CONFIG_DISPLAY_RENDERER type; const char *name; @@ -122,6 +131,7 @@ typedef struct PGRAPHRenderer { void (*set_surface_scale_factor)(NV2AState *d, unsigned int scale); unsigned int (*get_surface_scale_factor)(NV2AState *d); int (*get_framebuffer_surface)(NV2AState *d); + GPUProperties *(*get_gpu_properties)(void); } ops; } PGRAPHRenderer; diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index e4bd562149..d78087e56b 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -788,23 +788,6 @@ static void create_pipeline(PGRAPHState *pg) void *rasterizer_next_struct = NULL; - VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state; - - if (r->provoking_vertex_extension_enabled) { - // Use last provoking vertex convention to match geometry shader - // assumption, because Vulkan default is first vertex convention. - VkProvokingVertexModeEXT provoking_mode = - VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; - - provoking_state = - (VkPipelineRasterizationProvokingVertexStateCreateInfoEXT){ - .sType = - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, - .provokingVertexMode = provoking_mode, - }; - rasterizer_next_struct = &provoking_state; - } - VkPipelineRasterizationStateCreateInfo rasterizer = { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .depthClampEnable = VK_TRUE, diff --git a/hw/xbox/nv2a/pgraph/vk/gpuprops.c b/hw/xbox/nv2a/pgraph/vk/gpuprops.c new file mode 100644 index 0000000000..1562be0993 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/gpuprops.c @@ -0,0 +1,605 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024-2025 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "debug.h" +#include "renderer.h" + +static GPUProperties pgraph_vk_gpu_properties; + +static const char *vertex_shader_source = + "#version 450\n" + "layout(location = 0) out vec3 v_fragColor;\n" + "\n" + "vec2 positions[11] = vec2[](\n" + " vec2(-0.5, -0.75),\n" + " vec2(-0.25, -0.25),\n" + " vec2(-0.75, -0.25),\n" + " vec2(0.25, -0.25),\n" + " vec2(0.25, -0.75),\n" + " vec2(0.75, -0.25),\n" + " vec2(0.75, -0.75),\n" + " vec2(-0.75, 0.75),\n" + " vec2(-0.75, 0.25),\n" + " vec2(-0.25, 0.25),\n" + " vec2(-0.25, 0.75)\n" + ");\n" + "\n" + "vec3 colors[11] = vec3[](\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0)\n" + ");\n" + "\n" + "void main() {\n" + " gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0);\n" + " v_fragColor = colors[gl_VertexIndex];\n" + "}\n"; + +static const char *geometry_shader_source = + "#version 450\n" + "layout(triangles) in;\n" + "layout(triangle_strip, max_vertices = 3) out;\n" + "layout(location = 0) out vec3 fragColor;\n" + "layout(location = 0) in vec3 v_fragColor[];\n" + "\n" + "void emit_vertex(int index) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " fragColor = v_fragColor[0];\n" + " EmitVertex();\n" + "}\n" + "\n" + "void main() {\n" + " emit_vertex(0);\n" + " emit_vertex(1);\n" + " emit_vertex(2);\n" + " EndPrimitive();\n" + "}\n"; + +static const char *fragment_shader_source = + "#version 450\n" + "layout(location = 0) out vec4 outColor;\n" + "layout(location = 0) in vec3 fragColor;\n" + "\n" + "void main() {\n" + " outColor = vec4(fragColor, 1.0);\n" + "}\n"; + +static VkPipeline create_test_pipeline( + NV2AState *d, VkPrimitiveTopology primitive_topology, + VkShaderModule vert_shader_module, VkShaderModule geom_shader_module, + VkShaderModule frag_shader_module, VkPipelineLayout pipeline_layout, + VkRenderPass render_pass, int width, int height) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPipelineShaderStageCreateInfo shader_stages[] = { + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vert_shader_module, + .pName = "main", + }, + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_GEOMETRY_BIT, + .module = geom_shader_module, + .pName = "main", + }, + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = frag_shader_module, + .pName = "main", + }, + }; + + VkPipelineVertexInputStateCreateInfo vertex_input_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 0, + .vertexAttributeDescriptionCount = 0, + }; + + VkPipelineInputAssemblyStateCreateInfo input_assembly = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = primitive_topology, + .primitiveRestartEnable = VK_FALSE, + }; + + VkViewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = (float)width, + .height = (float)height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + VkRect2D scissor = { + .offset = { 0, 0 }, + .extent.width = width, + .extent.height = height, + }; + VkPipelineViewportStateCreateInfo viewport_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .pViewports = &viewport, + .scissorCount = 1, + .pScissors = &scissor, + }; + + VkPipelineRasterizationStateCreateInfo rasterizer = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .lineWidth = 1.0f, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + }; + + VkPipelineMultisampleStateCreateInfo multisampling = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .sampleShadingEnable = VK_FALSE, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + }; + + VkPipelineColorBlendAttachmentState color_blend_attachment = { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + .blendEnable = VK_FALSE, + }; + + VkPipelineColorBlendStateCreateInfo color_blending = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment, + .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }, + }; + + VkGraphicsPipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(shader_stages), + .pStages = shader_stages, + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_state, + .pRasterizationState = &rasterizer, + .pMultisampleState = &multisampling, + .pColorBlendState = &color_blending, + .layout = pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + }; + + VkPipeline pipeline; + VK_CHECK(vkCreateGraphicsPipelines(r->device, VK_NULL_HANDLE, 1, + &pipeline_info, NULL, &pipeline)); + + return pipeline; +} + +static uint8_t *render_geom_shader_triangles(NV2AState *d, int width, + int height) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + // Create image + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .extent.width = width, + .extent.height = height, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + VkImage offscreen_image; + VK_CHECK( + vkCreateImage(r->device, &image_create_info, NULL, &offscreen_image)); + + // Allocate and bind image memory + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(r->device, offscreen_image, + &memory_requirements); + + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = memory_requirements.size, + .memoryTypeIndex = + pgraph_vk_get_memory_type(pg, memory_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), + }; + + VkDeviceMemory image_memory; + VK_CHECK(vkAllocateMemory(r->device, &alloc_info, NULL, &image_memory)); + VK_CHECK(vkBindImageMemory(r->device, offscreen_image, image_memory, 0)); + + // Create Image View + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = offscreen_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_create_info.format, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }; + + VkImageView offscreen_image_view; + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &offscreen_image_view)); + + // Buffer for image CPU access + VkBufferCreateInfo buffer_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = width * height * 4, // RGBA8 = 4 bytes per pixel + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + VkBuffer cpu_buffer; + VK_CHECK(vkCreateBuffer(r->device, &buffer_info, NULL, &cpu_buffer)); + + // Allocate and bind memory for image CPU access + VkMemoryRequirements host_mem_requirements; + vkGetBufferMemoryRequirements(r->device, cpu_buffer, + &host_mem_requirements); + + VkMemoryAllocateInfo host_alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = host_mem_requirements.size, + .memoryTypeIndex = + pgraph_vk_get_memory_type(pg, host_mem_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), + }; + + VkDeviceMemory cpu_buffer_memory; + VK_CHECK(vkAllocateMemory(r->device, &host_alloc_info, NULL, + &cpu_buffer_memory)); + VK_CHECK(vkBindBufferMemory(r->device, cpu_buffer, cpu_buffer_memory, 0)); + + + VkAttachmentDescription color_attachment = { + .format = VK_FORMAT_R8G8B8A8_UNORM, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }; + + VkAttachmentReference color_ref = { + 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + }; + + VkSubpassDescription subpass = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .colorAttachmentCount = 1, + .pColorAttachments = &color_ref, + }; + + VkRenderPassCreateInfo render_pass_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &color_attachment, + .subpassCount = 1, + .pSubpasses = &subpass, + }; + + VkRenderPass render_pass; + VK_CHECK( + vkCreateRenderPass(r->device, &render_pass_info, NULL, &render_pass)); + + VkFramebufferCreateInfo fb_info = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = render_pass, + .attachmentCount = 1, + .pAttachments = &offscreen_image_view, + .width = width, + .height = height, + .layers = 1, + }; + + VkFramebuffer framebuffer; + VK_CHECK(vkCreateFramebuffer(r->device, &fb_info, NULL, &framebuffer)); + + ShaderModuleInfo *vsh_info = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_VERTEX_BIT, vertex_shader_source); + ShaderModuleInfo *geom_info = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_GEOMETRY_BIT, geometry_shader_source); + ShaderModuleInfo *psh_info = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader_source); + + VkShaderModule vert_shader_module = vsh_info->module; + VkShaderModule geom_shader_module = geom_info->module; + VkShaderModule frag_shader_module = psh_info->module; + + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 0, + .pushConstantRangeCount = 0, + }; + + VkPipelineLayout pipeline_layout; + VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, + &pipeline_layout)); + + VkPipeline tri_pipeline = create_test_pipeline( + d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, vert_shader_module, + geom_shader_module, frag_shader_module, pipeline_layout, render_pass, + width, height); + + VkPipeline strip_pipeline = create_test_pipeline( + d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, vert_shader_module, + geom_shader_module, frag_shader_module, pipeline_layout, render_pass, + width, height); + + VkPipeline fan_pipeline = create_test_pipeline( + d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN, vert_shader_module, + geom_shader_module, frag_shader_module, pipeline_layout, render_pass, + width, height); + + pgraph_vk_destroy_shader_module(r, psh_info); + pgraph_vk_destroy_shader_module(r, geom_info); + pgraph_vk_destroy_shader_module(r, vsh_info); + + VkCommandBufferBeginInfo begin_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + }; + VK_CHECK(vkBeginCommandBuffer(r->command_buffer, &begin_info)); + + // Begin render pass + VkClearValue clear_color = { + .color.float32 = { 0.0f, 0.0f, 0.0f, 1.0f }, + }; + VkRenderPassBeginInfo rp_begin = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = render_pass, + .framebuffer = framebuffer, + .renderArea.extent.width = width, + .renderArea.extent.height = height, + .clearValueCount = 1, + .pClearValues = &clear_color, + }; + + vkCmdBeginRenderPass(r->command_buffer, &rp_begin, + VK_SUBPASS_CONTENTS_INLINE); + + vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + tri_pipeline); + vkCmdDraw(r->command_buffer, 3, 1, 0, 0); + vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + strip_pipeline); + vkCmdDraw(r->command_buffer, 4, 1, 3, 0); + vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + fan_pipeline); + vkCmdDraw(r->command_buffer, 4, 1, 7, 0); + + vkCmdEndRenderPass(r->command_buffer); + + // Synchronize and transition framebuffer for copying to CPU + pgraph_vk_transition_image_layout(pg, r->command_buffer, offscreen_image, + image_create_info.format, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + // Copy framebuffer to CPU memory + VkBufferImageCopy region = { + .bufferOffset = 0, + .bufferRowLength = 0, // tightly packed + .bufferImageHeight = 0, + + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + + .imageOffset = { 0, 0, 0 }, + .imageExtent = { width, height, 1 }, + }; + + vkCmdCopyImageToBuffer(r->command_buffer, offscreen_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, cpu_buffer, 1, + ®ion); + + VK_CHECK(vkEndCommandBuffer(r->command_buffer)); + + VkSubmitInfo submit_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &r->command_buffer, + }; + + VK_CHECK(vkQueueSubmit(r->queue, 1, &submit_info, VK_NULL_HANDLE)); + VK_CHECK(vkQueueWaitIdle(r->queue)); + + void *data; + VK_CHECK( + vkMapMemory(r->device, cpu_buffer_memory, 0, VK_WHOLE_SIZE, 0, &data)); + void *pixels = g_malloc(width * height * 4); + assert(pixels != NULL); + memcpy(pixels, data, width * height * 4); + vkUnmapMemory(r->device, cpu_buffer_memory); + + vkDestroyPipeline(r->device, strip_pipeline, NULL); + vkDestroyPipeline(r->device, fan_pipeline, NULL); + vkDestroyPipeline(r->device, tri_pipeline, NULL); + vkDestroyPipelineLayout(r->device, pipeline_layout, NULL); + vkDestroyFramebuffer(r->device, framebuffer, NULL); + vkDestroyRenderPass(r->device, render_pass, NULL); + vkDestroyImageView(r->device, offscreen_image_view, NULL); + vkDestroyBuffer(r->device, cpu_buffer, NULL); + vkFreeMemory(r->device, cpu_buffer_memory, NULL); + vkDestroyImage(r->device, offscreen_image, NULL); + vkFreeMemory(r->device, image_memory, NULL); + + return (uint8_t *)pixels; +} + +static bool colors_match(int r1, int g1, int b1, int r2, int g2, int b2) +{ + int dr = r1 - r2; + int dg = g1 - g2; + int db = b1 - b2; + + return (dr * dr + dg * dg + db * db) <= 16; +} + +static int get_color_index(uint8_t *pixel) +{ + int r = pixel[0]; + int g = pixel[1]; + int b = pixel[2]; + + if (colors_match(r, g, b, 0, 0, 255)) { + return 0; + } else if (colors_match(r, g, b, 0, 255, 0)) { + return 1; + } else if (colors_match(r, g, b, 0, 255, 255)) { + return 2; + } else if (colors_match(r, g, b, 255, 0, 0)) { + return 3; + } else { + return -1; + } +} + +static int calc_offset_from_ndc(float x, float y, int width, int height) +{ + int x0 = (int)((x + 1.0f) * width * 0.5f); + int y0 = (int)((y + 1.0f) * height * 0.5f); + + x0 = MAX(x0, 0); + y0 = MAX(y0, 0); + x0 = MIN(x0, width - 1); + y0 = MIN(y0, height - 1); + + return y0 * width + x0; +} + +static void determine_triangle_winding_order(uint8_t *pixels, int width, + int height, GPUProperties *props) +{ + uint8_t *tri_pix = + pixels + calc_offset_from_ndc(-0.5f, -0.5f, width, height) * 4; + uint8_t *strip0_pix = + pixels + calc_offset_from_ndc(0.417f, -0.417f, width, height) * 4; + uint8_t *strip1_pix = + pixels + calc_offset_from_ndc(0.583f, -0.583f, width, height) * 4; + uint8_t *fan_pix = + pixels + calc_offset_from_ndc(-0.583f, 0.417f, width, height) * 4; + uint8_t *fan2_pix = + pixels + calc_offset_from_ndc(-0.417f, 0.583f, width, height) * 4; + + int tri_rot = get_color_index(tri_pix); + if (tri_rot < 0 || tri_rot > 2) { + fprintf(stderr, + "Could not determine triangle rotation, got color: R=%d, G=%d, " + "B=%d\n", + tri_pix[0], tri_pix[1], tri_pix[2]); + tri_rot = 0; + } + props->geom_shader_winding.tri = tri_rot; + + int strip0_rot = get_color_index(strip0_pix); + if (strip0_rot < 0 || strip0_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip0 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip0_pix[0], strip0_pix[1], strip0_pix[2]); + strip0_rot = 0; + } + int strip1_rot = get_color_index(strip1_pix) - 1; + if (strip1_rot < 0 || strip1_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip1 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip1_pix[0], strip1_pix[1], strip1_pix[2]); + strip1_rot = 0; + } + props->geom_shader_winding.tri_strip0 = strip0_rot; + props->geom_shader_winding.tri_strip1 = (3 - strip1_rot) % 3; + + int fan_rot = get_color_index(fan_pix); + int fan2_rot = get_color_index(fan2_pix); + if (fan2_rot == 0) { + fan2_rot = 1; + } + fan2_rot--; + if (fan_rot != fan2_rot) { + fprintf(stderr, + "Unexpected inconsistency in triangle fan winding, got colors: " + "R=%d, G=%d, B=%d and R=%d, G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2], fan2_pix[0], fan2_pix[1], + fan2_pix[2]); + fan_rot = 1; + } + if (fan_rot < 0 || fan_rot > 2) { + fprintf(stderr, + "Could not determine triangle fan rotation, got color: R=%d, " + "G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2]); + fan_rot = 1; + } + props->geom_shader_winding.tri_fan = (fan_rot + 2) % 3; +} + +void pgraph_vk_determine_gpu_properties(NV2AState *d) +{ + const int width = 640; + const int height = 480; + + uint8_t *pixels = render_geom_shader_triangles(d, width, height); + determine_triangle_winding_order(pixels, width, height, + &pgraph_vk_gpu_properties); + g_free(pixels); + + fprintf(stderr, "VK geometry shader winding: %d, %d, %d, %d\n", + pgraph_vk_gpu_properties.geom_shader_winding.tri, + pgraph_vk_gpu_properties.geom_shader_winding.tri_strip0, + pgraph_vk_gpu_properties.geom_shader_winding.tri_strip1, + pgraph_vk_gpu_properties.geom_shader_winding.tri_fan); +} + +GPUProperties *pgraph_vk_get_gpu_properties(void) +{ + return &pgraph_vk_gpu_properties; +} diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index 3d993cb735..fcada9e29b 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -401,10 +401,6 @@ static void add_optional_device_extension_names( add_extension_if_available(available_extensions, enabled_extension_names, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); - r->provoking_vertex_extension_enabled = - add_extension_if_available(available_extensions, enabled_extension_names, - VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); - r->memory_budget_extension_enabled = add_extension_if_available( available_extensions, enabled_extension_names, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME); @@ -570,17 +566,6 @@ static bool create_logical_device(PGRAPHState *pg, Error **errp) void *next_struct = NULL; - VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features; - if (r->provoking_vertex_extension_enabled) { - provoking_vertex_features = (VkPhysicalDeviceProvokingVertexFeaturesEXT){ - .sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, - .provokingVertexLast = VK_TRUE, - .pNext = next_struct, - }; - next_struct = &provoking_vertex_features; - } - VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_features; if (r->custom_border_color_extension_enabled) { custom_border_features = (VkPhysicalDeviceCustomBorderColorFeaturesEXT){ diff --git a/hw/xbox/nv2a/pgraph/vk/meson.build b/hw/xbox/nv2a/pgraph/vk/meson.build index 24c2474cb9..b5d75e2faa 100644 --- a/hw/xbox/nv2a/pgraph/vk/meson.build +++ b/hw/xbox/nv2a/pgraph/vk/meson.build @@ -9,6 +9,7 @@ specific_ss.add([sdl, volk, libglslang, vma, vulkan, spirv_reflect, gloffscreen, 'display.c', 'draw.c', 'glsl.c', + 'gpuprops.c', 'image.c', 'instance.c', 'renderer.c', diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c index 3dbc724b95..4272bbceb6 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.c +++ b/hw/xbox/nv2a/pgraph/vk/renderer.c @@ -62,6 +62,8 @@ static void pgraph_vk_init(NV2AState *d, Error **errp) pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr, memory_region_size(d->vram)); + + pgraph_vk_determine_gpu_properties(d); } static void pgraph_vk_finalize(NV2AState *d) @@ -227,6 +229,7 @@ static PGRAPHRenderer pgraph_vk_renderer = { .set_surface_scale_factor = pgraph_vk_set_surface_scale_factor, .get_surface_scale_factor = pgraph_vk_get_surface_scale_factor, .get_framebuffer_surface = pgraph_vk_get_framebuffer_surface, + .get_gpu_properties = pgraph_vk_get_gpu_properties, } }; diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index c2233a7b17..9f87114187 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -325,7 +325,6 @@ typedef struct PGRAPHVkState { bool debug_utils_extension_enabled; bool custom_border_color_extension_enabled; - bool provoking_vertex_extension_enabled; bool memory_budget_extension_enabled; VkPhysicalDevice physical_device; @@ -594,4 +593,8 @@ void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd); // blit.c void pgraph_vk_image_blit(NV2AState *d); +// gpuprops.c +void pgraph_vk_determine_gpu_properties(NV2AState *d); +GPUProperties *pgraph_vk_get_gpu_properties(void); + #endif From 8c3c27c5b10ac4d7d516cc01e8d37b9252e4894c Mon Sep 17 00:00:00 2001 From: coldhex Date: Wed, 9 Jul 2025 13:11:55 +0300 Subject: [PATCH 5/8] nv2a/gl: Work around an Nvidia geometry shader compiler bug This adds redundant computation to the simple geometry shader for winding testing based on the assumption that Nvidia GeForce compiler has a bug which may incorrectly detect a simple geometry shader as a passthrough shader. --- hw/xbox/nv2a/pgraph/gl/gpuprops.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/gpuprops.c b/hw/xbox/nv2a/pgraph/gl/gpuprops.c index 29cb1e7b0e..58b195e8c3 100644 --- a/hw/xbox/nv2a/pgraph/gl/gpuprops.c +++ b/hw/xbox/nv2a/pgraph/gl/gpuprops.c @@ -68,16 +68,21 @@ static const char *geometry_shader_source = "out vec3 fragColor;\n" "in vec3 v_fragColor[];\n" "\n" - "void emit_vertex(int index) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " fragColor = v_fragColor[0];\n" - " EmitVertex();\n" - "}\n" - "\n" "void main() {\n" - " emit_vertex(0);\n" - " emit_vertex(1);\n" - " emit_vertex(2);\n" + " for (int i = 0; i < 3; i++) {\n" + // This should be just: + // gl_Position = gl_in[i].gl_Position; + // fragColor = v_fragColor[0]; + // but we work around an Nvidia Cg compiler bug which seems to + // misdetect above as a passthrough shader and effectively + // replaces the last line with "fragColor = v_fragColor[i];". + // Doing redundant computation seems to fix it. + // TODO: what is the minimal way to avoid the bug? + " gl_Position = gl_in[i].gl_Position + vec4(1.0/16384.0, 1.0/16384.0, 0.0, 0.0);\n" + " precise vec3 color = v_fragColor[0]*(0.999 + gl_in[i].gl_Position.x/16384.0) + v_fragColor[1]*0.00005 + v_fragColor[2]*0.00005;\n" + " fragColor = color;\n" + " EmitVertex();\n" + " }\n" " EndPrimitive();\n" "}\n"; From 9a030379e655c4e80a1cdbf006e2fba3b8c7df10 Mon Sep 17 00:00:00 2001 From: coldhex Date: Wed, 9 Jul 2025 18:55:53 +0300 Subject: [PATCH 6/8] nv2a/vk: Apply the Nvidia geometry shader bug work around with Vulkan too This is possibly not needed with Vulkan, but apply it anyway just in case. --- hw/xbox/nv2a/pgraph/vk/gpuprops.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/gpuprops.c b/hw/xbox/nv2a/pgraph/vk/gpuprops.c index 1562be0993..ecc8bb3852 100644 --- a/hw/xbox/nv2a/pgraph/vk/gpuprops.c +++ b/hw/xbox/nv2a/pgraph/vk/gpuprops.c @@ -66,16 +66,19 @@ static const char *geometry_shader_source = "layout(location = 0) out vec3 fragColor;\n" "layout(location = 0) in vec3 v_fragColor[];\n" "\n" - "void emit_vertex(int index) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " fragColor = v_fragColor[0];\n" - " EmitVertex();\n" - "}\n" - "\n" "void main() {\n" - " emit_vertex(0);\n" - " emit_vertex(1);\n" - " emit_vertex(2);\n" + " for (int i = 0; i < 3; i++) {\n" + // This should be just: + // gl_Position = gl_in[i].gl_Position; + // fragColor = v_fragColor[0]; + // but we apply the same Nvidia bug work around from gl/gpuprops.c + // to be on the safe side even if the compilers involved with + // Vulkan are different. + " gl_Position = gl_in[i].gl_Position + vec4(1.0/16384.0, 1.0/16384.0, 0.0, 0.0);\n" + " precise vec3 color = v_fragColor[0]*(0.999 + gl_in[i].gl_Position.x/16384.0) + v_fragColor[1]*0.00005 + v_fragColor[2]*0.00005;\n" + " fragColor = color;\n" + " EmitVertex();\n" + " }\n" " EndPrimitive();\n" "}\n"; From 46ea36b8e3e9d05d0f6e63e2d6935e02d8f0600f Mon Sep 17 00:00:00 2001 From: coldhex Date: Fri, 11 Jul 2025 20:57:09 +0300 Subject: [PATCH 7/8] nv2a/glsl: Delete deprecated FIXME comment about about vertex order --- hw/xbox/nv2a/pgraph/glsl/geom.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c index ec4d19fe5f..53a8e3405d 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.c +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -125,10 +125,6 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) /* TODO: frontface/backface culling for polygon modes POLY_MODE_LINE and * POLY_MODE_POINT. - * FIXME: OpenGL/Vulkan does not specify absolute vertex order when input - * is a triangle list, triangle strip or fan. Only vertex winding order - * is specified. Currently we assume input triangle vertex order follows - * the last provoking vertex convention. */ switch (state->primitive_mode) { case PRIM_TYPE_POINTS: return NULL; From e1b34521cd0b6614b6b2116dd4371efd363119ea Mon Sep 17 00:00:00 2001 From: coldhex Date: Sun, 13 Jul 2025 17:42:41 +0300 Subject: [PATCH 8/8] nv2a: Emit separate triangles and line segment from geometry shader Mesa OpenGL radeonsi driver has a bug where triangles in triangle strips emitted from geometry shader may have provoking vertices not corresponding to either first or last vertex convention when GL_FIRST_VERTEX_CONVENTION is used. This commit changes geometry shader such that it always emits separate triangles and line segments and it doesn't matter what vertex OpenGL or Vulkan implementation chooses as provoking. --- hw/xbox/nv2a/pgraph/gl/draw.c | 5 +- hw/xbox/nv2a/pgraph/glsl/geom.c | 103 ++++++++++++-------------------- 2 files changed, 38 insertions(+), 70 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c index 9562ea30d8..11fc6ffc7d 100644 --- a/hw/xbox/nv2a/pgraph/gl/draw.c +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -227,10 +227,7 @@ void pgraph_gl_draw_begin(NV2AState *d) glEnable(GL_DEPTH_CLAMP); - /* Set first vertex convention to match Vulkan default. This is needed - * because geometry shader outputs line strips with data for fragment - * shader. - */ + /* Set first vertex convention to match Vulkan default */ glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); if (stencil_test) { diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c index 53a8e3405d..afe57d65d9 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.c +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -135,10 +135,7 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) need_linez = true; layout_in = "layout(lines) in;\n"; layout_out = "layout(line_strip, max_vertices = 2) out;\n"; - body = " mat4 pz = calc_linez(0, 1);\n" - " emit_vertex(0, pz);\n" - " emit_vertex(1, pz);\n" - " EndPrimitive();\n"; + body = " emit_line(0, 1, 0.0);\n"; break; case PRIM_TYPE_TRIANGLES: case PRIM_TYPE_TRIANGLE_STRIP: @@ -163,19 +160,11 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " EndPrimitive();\n"; } else if (polygon_mode == POLY_MODE_LINE) { need_linez = true; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; + layout_out = "layout(line_strip, max_vertices = 6) out;\n"; body = " float dz = calc_triz(v[0], v[1], v[2])[3].x;\n" - " mat4 pz1 = calc_linez(v[0], v[1]);\n" - " pz1[3].x = dz;\n" - " mat4 pz2 = calc_linez(v[1], v[2]);\n" - " pz2[3].x = dz;\n" - " mat4 pz3 = calc_linez(v[2], v[0]);\n" - " pz3[3].x = dz;\n" - " emit_vertex(v[0], pz1);\n" - " emit_vertex(v[1], pz2);\n" - " emit_vertex(v[2], pz3);\n" - " emit_vertex(v[0], pz3);\n" - " EndPrimitive();\n"; + " emit_line(v[0], v[1], dz);\n" + " emit_line(v[1], v[2], dz);\n" + " emit_line(v[2], v[0], dz);\n"; } else { assert(polygon_mode == POLY_MODE_POINT); layout_out = "layout(points, max_vertices = 3) out;\n"; @@ -193,33 +182,26 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) need_quadz = true; layout_in = "layout(lines_adjacency) in;\n"; if (polygon_mode == POLY_MODE_FILL) { - layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; + layout_out = "layout(triangle_strip, max_vertices = 6) out;\n"; body = " mat4 pz, pz2;\n" " calc_quadz(0, 1, 2, 3, pz, pz2);\n" " emit_vertex(1, pz);\n" - " emit_vertex(2, pz2);\n" + " emit_vertex(2, pz);\n" " emit_vertex(0, pz);\n" + " EndPrimitive();\n" + " emit_vertex(2, pz2);\n" " emit_vertex(3, pz2);\n" + " emit_vertex(0, pz2);\n" " EndPrimitive();\n"; } else if (polygon_mode == POLY_MODE_LINE) { need_linez = true; - layout_out = "layout(line_strip, max_vertices = 5) out;\n"; + layout_out = "layout(line_strip, max_vertices = 8) out;\n"; body = " mat4 pz, pzs;\n" " calc_quadz(0, 1, 2, 3, pz, pzs);\n" - " mat4 pz1 = calc_linez(0, 1);\n" - " pz1[3].x = pz[3].x;\n" - " mat4 pz2 = calc_linez(1, 2);\n" - " pz2[3].x = pz[3].x;\n" - " mat4 pz3 = calc_linez(2, 3);\n" - " pz3[3].x = pzs[3].x;\n" - " mat4 pz4 = calc_linez(3, 0);\n" - " pz4[3].x = pzs[3].x;\n" - " emit_vertex(0, pz1);\n" - " emit_vertex(1, pz2);\n" - " emit_vertex(2, pz3);\n" - " emit_vertex(3, pz4);\n" - " emit_vertex(0, pz4);\n" - " EndPrimitive();\n"; + " emit_line(0, 1, pz[3].x);\n" + " emit_line(1, 2, pz[3].x);\n" + " emit_line(2, 3, pzs[3].x);\n" + " emit_line(3, 0, pzs[3].x);\n"; } else { assert(polygon_mode == POLY_MODE_POINT); layout_out = "layout(points, max_vertices = 4) out;\n"; @@ -240,35 +222,28 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) need_quadz = true; layout_in = "layout(lines_adjacency) in;\n"; if (polygon_mode == POLY_MODE_FILL) { - layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; + layout_out = "layout(triangle_strip, max_vertices = 6) out;\n"; body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" " mat4 pz, pz2;\n" " calc_quadz(2, 0, 1, 3, pz, pz2);\n" " emit_vertex(0, pz);\n" - " emit_vertex(1, pz2);\n" + " emit_vertex(1, pz);\n" " emit_vertex(2, pz);\n" + " EndPrimitive();\n" + " emit_vertex(2, pz2);\n" + " emit_vertex(1, pz2);\n" " emit_vertex(3, pz2);\n" " EndPrimitive();\n"; } else if (polygon_mode == POLY_MODE_LINE) { need_linez = true; - layout_out = "layout(line_strip, max_vertices = 5) out;\n"; + layout_out = "layout(line_strip, max_vertices = 8) out;\n"; body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" " mat4 pz, pzs;\n" " calc_quadz(2, 0, 1, 3, pz, pzs);\n" - " mat4 pz1 = calc_linez(0, 1);\n" - " pz1[3].x = pz[3].x;\n" - " mat4 pz2 = calc_linez(1, 3);\n" - " pz2[3].x = pzs[3].x;\n" - " mat4 pz3 = calc_linez(3, 2);\n" - " pz3[3].x = pzs[3].x;\n" - " mat4 pz4 = calc_linez(2, 0);\n" - " pz4[3].x = pz[3].x;\n" - " emit_vertex(0, pz1);\n" - " emit_vertex(1, pz2);\n" - " emit_vertex(3, pz3);\n" - " emit_vertex(2, pz4);\n" - " emit_vertex(0, pz4);\n" - " EndPrimitive();\n"; + " emit_line(0, 1, pz[3].x);\n" + " emit_line(1, 3, pzs[3].x);\n" + " emit_line(3, 2, pzs[3].x);\n" + " emit_line(2, 0, pz[3].x);\n"; } else { assert(polygon_mode == POLY_MODE_POINT); layout_out = "layout(points, max_vertices = 4) out;\n"; @@ -306,10 +281,7 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) */ layout_in = "layout(lines) in;\n"; layout_out = "layout(line_strip, max_vertices = 2) out;\n"; - body = " mat4 pz = calc_linez(0, 1);\n" - " emit_vertex(0, pz);\n" - " emit_vertex(1, pz);\n" - " EndPrimitive();\n"; + body = " emit_line(0, 1, 0.0);\n"; } else { assert(false); return NULL; @@ -408,10 +380,8 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " precise float res = fma(a, b, -cd) + err;\n" " return res;\n" "}\n"); - } - if (state->z_perspective) { - if (need_triz || need_quadz) { + if (state->z_perspective) { mstring_append( output, "mat4 calc_triz(int i0, int i1, int i2) {\n" @@ -425,12 +395,10 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " float det = kahan_det(m[0].x, m[1].y, m[1].x, m[0].y);\n" " float dzx = kahan_det(b.x, m[1].y, b.y, m[0].y) / det;\n" " float dzy = kahan_det(b.y, m[0].x, b.x, m[1].x) / det;\n" - " float triMZ = max(abs(dzx), abs(dzy));\n" - " return mat4(v_vtxPos[i0], v_vtxPos[i1], v_vtxPos[i2], triMZ, vec3(0.0));\n" + " float dz = max(abs(dzx), abs(dzy));\n" + " return mat4(v_vtxPos[i0], v_vtxPos[i1], v_vtxPos[i2], dz, vec3(0.0));\n" "}\n"); - } - } else { - if (need_triz || need_quadz) { + } else { mstring_append( output, "mat4 calc_triz(int i0, int i1, int i2) {\n" @@ -443,8 +411,8 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " float det = kahan_det(m[0].x, m[1].y, m[1].x, m[0].y);\n" " float dzx = kahan_det(b.x, m[1].y, b.y, m[0].y) / det;\n" " float dzy = kahan_det(b.y, m[0].x, b.x, m[1].x) / det;\n" - " float triMZ = max(abs(dzx), abs(dzy));\n" - " return mat4(v_vtxPos[i0], v_vtxPos[i1], v_vtxPos[i2], triMZ, vec3(0.0));\n" + " float dz = max(abs(dzx), abs(dzy));\n" + " return mat4(v_vtxPos[i0], v_vtxPos[i1], v_vtxPos[i2], dz, vec3(0.0));\n" "}\n"); } } @@ -454,10 +422,13 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) output, // Calculate a third vertex by rotating 90 degrees so that triangle // interpolation in fragment shader can be used as is for lines. - "mat4 calc_linez(int i0, int i1) {\n" + "void emit_line(int i0, int i1, float dz) {\n" " vec2 delta = v_vtxPos[i1].xy - v_vtxPos[i0].xy;\n" " vec2 v2 = vec2(-delta.y, delta.x) + v_vtxPos[i0].xy;\n" - " return mat4(v_vtxPos[i0], v_vtxPos[i1], v2, v_vtxPos[i0].zw, vec4(0.0));\n" + " mat4 pz = mat4(v_vtxPos[i0], v_vtxPos[i1], v2, v_vtxPos[i0].zw, dz, vec3(0.0));\n" + " emit_vertex(i0, pz);\n" + " emit_vertex(i1, pz);\n" + " EndPrimitive();\n" "}\n"); }