diff --git a/hw/xbox/nv2a/nv2a_regs.h b/hw/xbox/nv2a/nv2a_regs.h index 206e858b23..837f422409 100644 --- a/hw/xbox/nv2a/nv2a_regs.h +++ b/hw/xbox/nv2a/nv2a_regs.h @@ -471,6 +471,9 @@ # define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR 7 # define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR 8 #define NV_PGRAPH_CONTROL_3 0x00001958 +# define NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX (1 << 0) +# define NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX_LAST 0 +# define NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX_FIRST 1 # define NV_PGRAPH_CONTROL_3_SHADEMODE (1 << 7) # define NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT 0 # define NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH 1 @@ -1062,6 +1065,9 @@ # define NV097_SET_TEXGEN_VIEW_MODEL_INFINITE_VIEWER 1 # define NV097_SET_FOG_PLANE 0x000009D0 # define NV097_SET_SPECULAR_PARAMS 0x000009E0 +# define NV097_SET_PROVOKING_VERTEX 0x000009FC +# define NV097_SET_PROVOKING_VERTEX_LAST 0 +# define NV097_SET_PROVOKING_VERTEX_FIRST 1 # define NV097_SET_SCENE_AMBIENT_COLOR 0x00000A10 # define NV097_SET_VIEWPORT_OFFSET 0x00000A20 # define NV097_SET_POINT_PARAMS 0x00000A30 diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c index 79c18040f9..11fc6ffc7d 100644 --- a/hw/xbox/nv2a/pgraph/gl/draw.c +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -208,38 +208,10 @@ void pgraph_gl_draw_begin(NV2AState *d) & NV_PGRAPH_SETUPRASTER_FRONTFACE ? GL_CW : GL_CCW); - /* Polygon offset */ - /* FIXME: GL implementation-specific, maybe do this in VS? */ - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) { - glEnable(GL_POLYGON_OFFSET_FILL); - } else { - glDisable(GL_POLYGON_OFFSET_FILL); - } - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) { - glEnable(GL_POLYGON_OFFSET_LINE); - } else { - glDisable(GL_POLYGON_OFFSET_LINE); - } - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) { - glEnable(GL_POLYGON_OFFSET_POINT); - } else { - glDisable(GL_POLYGON_OFFSET_POINT); - } - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { - uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR); - GLfloat zfactor = *(float*)&zfactor_u32; - uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); - GLfloat zbias = *(float*)&zbias_u32; - // FIXME: with Linux and Mesa, zbias must be multiplied by 0.5 in - // order to have the same depth value offset as Xbox. - glPolygonOffset(zfactor, zbias); - } + /* Polygon offset is handled in geometry and fragment shaders explicitly */ + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_POLYGON_OFFSET_LINE); + glDisable(GL_POLYGON_OFFSET_POINT); /* Depth testing */ if (depth_test) { @@ -255,11 +227,8 @@ void pgraph_gl_draw_begin(NV2AState *d) glEnable(GL_DEPTH_CLAMP); - if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), - NV_PGRAPH_CONTROL_3_SHADEMODE) == - NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) { - glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); - } + /* Set first vertex convention to match Vulkan default */ + glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); if (stencil_test) { glEnable(GL_STENCIL_TEST); diff --git a/hw/xbox/nv2a/pgraph/gl/gpuprops.c b/hw/xbox/nv2a/pgraph/gl/gpuprops.c new file mode 100644 index 0000000000..58b195e8c3 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/gpuprops.c @@ -0,0 +1,362 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2025 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "debug.h" +#include "renderer.h" + +static GPUProperties pgraph_gl_gpu_properties; + +static const char *vertex_shader_source = + "#version 400\n" + "out vec3 v_fragColor;\n" + "\n" + "vec2 positions[11] = vec2[](\n" + " vec2(-0.5, -0.75),\n" + " vec2(-0.25, -0.25),\n" + " vec2(-0.75, -0.25),\n" + " vec2(0.25, -0.25),\n" + " vec2(0.25, -0.75),\n" + " vec2(0.75, -0.25),\n" + " vec2(0.75, -0.75),\n" + " vec2(-0.75, 0.75),\n" + " vec2(-0.75, 0.25),\n" + " vec2(-0.25, 0.25),\n" + " vec2(-0.25, 0.75)\n" + ");\n" + "\n" + "vec3 colors[11] = vec3[](\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0)\n" + ");\n" + "\n" + "void main() {\n" + " gl_Position = vec4(positions[gl_VertexID], 0.0, 1.0);\n" + " v_fragColor = colors[gl_VertexID];\n" + "}\n"; + +static const char *geometry_shader_source = + "#version 400\n" + "layout(triangles) in;\n" + "layout(triangle_strip, max_vertices = 3) out;\n" + "out vec3 fragColor;\n" + "in vec3 v_fragColor[];\n" + "\n" + "void main() {\n" + " for (int i = 0; i < 3; i++) {\n" + // This should be just: + // gl_Position = gl_in[i].gl_Position; + // fragColor = v_fragColor[0]; + // but we work around an Nvidia Cg compiler bug which seems to + // misdetect above as a passthrough shader and effectively + // replaces the last line with "fragColor = v_fragColor[i];". + // Doing redundant computation seems to fix it. + // TODO: what is the minimal way to avoid the bug? + " gl_Position = gl_in[i].gl_Position + vec4(1.0/16384.0, 1.0/16384.0, 0.0, 0.0);\n" + " precise vec3 color = v_fragColor[0]*(0.999 + gl_in[i].gl_Position.x/16384.0) + v_fragColor[1]*0.00005 + v_fragColor[2]*0.00005;\n" + " fragColor = color;\n" + " EmitVertex();\n" + " }\n" + " EndPrimitive();\n" + "}\n"; + +static const char *fragment_shader_source = + "#version 400\n" + "out vec4 outColor;\n" + "in vec3 fragColor;\n" + "\n" + "void main() {\n" + " outColor = vec4(fragColor, 1.0);\n" + "}\n"; + +static GLuint compile_shader(GLenum type, const char *source) +{ + GLuint shader = glCreateShader(type); + glShaderSource(shader, 1, &source, NULL); + glCompileShader(shader); + + GLint success; + glGetShaderiv(shader, GL_COMPILE_STATUS, &success); + if (!success) { + char log[512]; + glGetShaderInfoLog(shader, sizeof(log), NULL, log); + log[sizeof(log) - 1] = '\0'; + fprintf(stderr, "GL shader type %d compilation failed: %s\n", type, + log); + assert(false); + } + + return shader; +} + +static GLuint create_program(const char *vert_source, const char *geom_source, + const char *frag_source) +{ + GLuint vert_shader = compile_shader(GL_VERTEX_SHADER, vert_source); + GLuint geom_shader = compile_shader(GL_GEOMETRY_SHADER, geom_source); + GLuint frag_shader = compile_shader(GL_FRAGMENT_SHADER, frag_source); + + GLuint shader_prog = glCreateProgram(); + glAttachShader(shader_prog, vert_shader); + glAttachShader(shader_prog, geom_shader); + glAttachShader(shader_prog, frag_shader); + glLinkProgram(shader_prog); + + GLint success; + glGetProgramiv(shader_prog, GL_LINK_STATUS, &success); + if (!success) { + char log[512]; + glGetProgramInfoLog(shader_prog, sizeof(log), NULL, log); + log[sizeof(log) - 1] = '\0'; + fprintf(stderr, "GL shader linking failed: %s\n", log); + assert(false); + } + + glDeleteShader(vert_shader); + glDeleteShader(geom_shader); + glDeleteShader(frag_shader); + + return shader_prog; +} + +static void check_gl_error(const char *context) +{ + GLenum err; + int limit = 10; + + while ((err = glGetError()) != GL_NO_ERROR) { + fprintf(stderr, "GPU properties OpenGL error 0x%X in %s\n", err, + context); + if (--limit <= 0) { + fprintf( + stderr, + "Too many OpenGL errors in %s — possible infinite error loop\n", + context); + break; + } + } +} + +static uint8_t *render_geom_shader_triangles(int width, int height) +{ + // Create the framebuffer and renderbuffer for it + GLuint fbo, rbo; + glGenFramebuffers(1, &fbo); + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + glGenRenderbuffers(1, &rbo); + glBindRenderbuffer(GL_RENDERBUFFER, rbo); + glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height); + check_gl_error("glRenderbufferStorage"); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_RENDERBUFFER, rbo); + check_gl_error("glFramebufferRenderbuffer"); + + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + GLuint shader_prog = create_program( + vertex_shader_source, geometry_shader_source, fragment_shader_source); + assert(shader_prog != 0); + + glUseProgram(shader_prog); + check_gl_error("glUseProgram"); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + check_gl_error("glClear"); + + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); + glViewport(0, 0, width, height); + check_gl_error("state setup"); + + GLuint vao; + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + check_gl_error("glBindVertexArray"); + glDrawArrays(GL_TRIANGLES, 0, 3); + glDrawArrays(GL_TRIANGLE_STRIP, 3, 4); + glDrawArrays(GL_TRIANGLE_FAN, 7, 4); + check_gl_error("glDrawArrays"); + glFinish(); // glFinish should be unnecessary + + void *pixels = g_malloc(width * height * 4); + assert(pixels != NULL); + glReadBuffer(GL_COLOR_ATTACHMENT0); + glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, pixels); + check_gl_error("glReadPixels"); + + glBindVertexArray(0); + glDeleteVertexArrays(1, &vao); + glUseProgram(0); + glDeleteProgram(shader_prog); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDeleteFramebuffers(1, &fbo); + glBindRenderbuffer(GL_RENDERBUFFER, 0); + glDeleteRenderbuffers(1, &rbo); + + return (uint8_t *)pixels; +} + +static bool colors_match(int r1, int g1, int b1, int r2, int g2, int b2) +{ + int dr = r1 - r2; + int dg = g1 - g2; + int db = b1 - b2; + + return (dr * dr + dg * dg + db * db) <= 16; +} + +static int get_color_index(uint8_t *pixel) +{ + int r = pixel[0]; + int g = pixel[1]; + int b = pixel[2]; + + if (colors_match(r, g, b, 0, 0, 255)) { + return 0; + } else if (colors_match(r, g, b, 0, 255, 0)) { + return 1; + } else if (colors_match(r, g, b, 0, 255, 255)) { + return 2; + } else if (colors_match(r, g, b, 255, 0, 0)) { + return 3; + } else { + return -1; + } +} + +static int calc_offset_from_ndc(float x, float y, int width, int height) +{ + int x0 = (int)((x + 1.0f) * width * 0.5f); + int y0 = (int)((y + 1.0f) * height * 0.5f); + + x0 = MAX(x0, 0); + y0 = MAX(y0, 0); + x0 = MIN(x0, width - 1); + y0 = MIN(y0, height - 1); + + return y0 * width + x0; +} + +static void determine_triangle_winding_order(uint8_t *pixels, int width, + int height, GPUProperties *props) +{ + uint8_t *tri_pix = + pixels + calc_offset_from_ndc(-0.5f, -0.5f, width, height) * 4; + uint8_t *strip0_pix = + pixels + calc_offset_from_ndc(0.417f, -0.417f, width, height) * 4; + uint8_t *strip1_pix = + pixels + calc_offset_from_ndc(0.583f, -0.583f, width, height) * 4; + uint8_t *fan_pix = + pixels + calc_offset_from_ndc(-0.583f, 0.417f, width, height) * 4; + uint8_t *fan2_pix = + pixels + calc_offset_from_ndc(-0.417f, 0.583f, width, height) * 4; + + int tri_rot = get_color_index(tri_pix); + if (tri_rot < 0 || tri_rot > 2) { + fprintf(stderr, + "Could not determine triangle rotation, got color: R=%d, G=%d, " + "B=%d\n", + tri_pix[0], tri_pix[1], tri_pix[2]); + tri_rot = 0; + } + props->geom_shader_winding.tri = tri_rot; + + int strip0_rot = get_color_index(strip0_pix); + if (strip0_rot < 0 || strip0_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip0 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip0_pix[0], strip0_pix[1], strip0_pix[2]); + strip0_rot = 0; + } + int strip1_rot = get_color_index(strip1_pix) - 1; + if (strip1_rot < 0 || strip1_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip1 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip1_pix[0], strip1_pix[1], strip1_pix[2]); + strip1_rot = 0; + } + props->geom_shader_winding.tri_strip0 = strip0_rot; + props->geom_shader_winding.tri_strip1 = (3 - strip1_rot) % 3; + + int fan_rot = get_color_index(fan_pix); + int fan2_rot = get_color_index(fan2_pix); + if (fan2_rot == 0) { + fan2_rot = 1; + } + fan2_rot--; + if (fan_rot != fan2_rot) { + fprintf(stderr, + "Unexpected inconsistency in triangle fan winding, got colors: " + "R=%d, G=%d, B=%d and R=%d, G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2], fan2_pix[0], fan2_pix[1], + fan2_pix[2]); + fan_rot = 1; + } + if (fan_rot < 0 || fan_rot > 2) { + fprintf(stderr, + "Could not determine triangle fan rotation, got color: R=%d, " + "G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2]); + fan_rot = 1; + } + props->geom_shader_winding.tri_fan = (fan_rot + 2) % 3; +} + +void pgraph_gl_determine_gpu_properties(NV2AState *d) +{ + const int width = 640; + const int height = 480; + + GloContext *g_context = glo_context_create(); + glo_set_current(g_context); + + uint8_t *pixels = render_geom_shader_triangles(width, height); + determine_triangle_winding_order(pixels, width, height, + &pgraph_gl_gpu_properties); + g_free(pixels); + + fprintf(stderr, "GL geometry shader winding: %d, %d, %d, %d\n", + pgraph_gl_gpu_properties.geom_shader_winding.tri, + pgraph_gl_gpu_properties.geom_shader_winding.tri_strip0, + pgraph_gl_gpu_properties.geom_shader_winding.tri_strip1, + pgraph_gl_gpu_properties.geom_shader_winding.tri_fan); + + glo_context_destroy(g_context); + glo_set_current(g_nv2a_context_render); +} + +GPUProperties *pgraph_gl_get_gpu_properties(void) +{ + return &pgraph_gl_gpu_properties; +} diff --git a/hw/xbox/nv2a/pgraph/gl/meson.build b/hw/xbox/nv2a/pgraph/gl/meson.build index ab25eacb7d..c19a542adb 100644 --- a/hw/xbox/nv2a/pgraph/gl/meson.build +++ b/hw/xbox/nv2a/pgraph/gl/meson.build @@ -3,6 +3,7 @@ specific_ss.add([sdl, gloffscreen, files( 'debug.c', 'display.c', 'draw.c', + 'gpuprops.c', 'renderer.c', 'reports.c', 'shaders.c', diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c index 36b8029439..74ece1c5f9 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.c +++ b/hw/xbox/nv2a/pgraph/gl/renderer.c @@ -66,6 +66,8 @@ static void pgraph_gl_init(NV2AState *d, Error **errp) pg->uniform_attrs = 0; pg->swizzle_attrs = 0; + + pgraph_gl_determine_gpu_properties(d); } static void pgraph_gl_finalize(NV2AState *d) @@ -195,6 +197,7 @@ static PGRAPHRenderer pgraph_gl_renderer = { .set_surface_scale_factor = pgraph_gl_set_surface_scale_factor, .get_surface_scale_factor = pgraph_gl_get_surface_scale_factor, .get_framebuffer_surface = pgraph_gl_get_framebuffer_surface, + .get_gpu_properties = pgraph_gl_get_gpu_properties, } }; diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index 28277fcdf5..5a2524bfbb 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -286,5 +286,7 @@ void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg); void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale); unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d); int pgraph_gl_get_framebuffer_surface(NV2AState *d); +void pgraph_gl_determine_gpu_properties(NV2AState *d); +GPUProperties *pgraph_gl_get_gpu_properties(void); #endif diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index 30b4c5cbba..4400133434 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -31,10 +31,6 @@ static GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode) { - if (polygon_mode == POLY_MODE_POINT) { - return GL_POINTS; - } - switch (primitive_mode) { case PRIM_TYPE_POINTS: return GL_POINTS; case PRIM_TYPE_LINES: return GL_LINES; @@ -705,6 +701,9 @@ static void apply_uniform_updates(const UniformInfo *info, int *locs, case UniformElementType_int: glUniform1iv(locs[i], info[i].count, value); break; + case UniformElementType_ivec2: + glUniform2iv(locs[i], info[i].count, value); + break; case UniformElementType_ivec4: glUniform4iv(locs[i], info[i].count, value); break; diff --git a/hw/xbox/nv2a/pgraph/glsl/common.c b/hw/xbox/nv2a/pgraph/glsl/common.c index 338f58ab9a..887d2a2e23 100644 --- a/hw/xbox/nv2a/pgraph/glsl/common.c +++ b/hw/xbox/nv2a/pgraph/glsl/common.c @@ -48,6 +48,10 @@ MString *pgraph_glsl_get_vtx_header(MString *out, bool location, bool smooth, { smooth_s, vec4_s, "vtxT1" }, { smooth_s, vec4_s, "vtxT2" }, { smooth_s, vec4_s, "vtxT3" }, + { flat_s, vec4_s, "vtxPos0" }, + { flat_s, vec4_s, "vtxPos1" }, + { flat_s, vec4_s, "vtxPos2" }, + { flat_s, float_s, "triMZ" }, }; for (int i = 0; i < ARRAY_SIZE(attr); i++) { diff --git a/hw/xbox/nv2a/pgraph/glsl/common.h b/hw/xbox/nv2a/pgraph/glsl/common.h index 9dc1fa0347..4b327421be 100644 --- a/hw/xbox/nv2a/pgraph/glsl/common.h +++ b/hw/xbox/nv2a/pgraph/glsl/common.h @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu/mstring.h" +typedef int ivec2[2]; typedef int ivec4[4]; typedef float mat2[2 * 2]; typedef unsigned int uint; @@ -35,6 +36,7 @@ typedef float vec4[4]; #define UNIFORM_ELEMENT_TYPE_X(DECL) \ DECL(float) \ DECL(int) \ + DECL(ivec2) \ DECL(ivec4) \ DECL(mat2) \ DECL(uint) \ diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c index ca46a5f3af..afe57d65d9 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.c +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -37,6 +37,47 @@ void pgraph_glsl_set_geom_state(PGRAPHState *pg, GeomState *state) state->smooth_shading = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), NV_PGRAPH_CONTROL_3_SHADEMODE) == NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH; + + state->first_vertex_is_provoking = + GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), + NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX) == + NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX_FIRST; + + state->z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE; + + if (pg->renderer->ops.get_gpu_properties) { + GPUProperties *gpu_props = pg->renderer->ops.get_gpu_properties(); + + switch (state->primitive_mode) { + case PRIM_TYPE_TRIANGLES: + state->tri_rot0 = gpu_props->geom_shader_winding.tri; + state->tri_rot1 = state->tri_rot0; + break; + case PRIM_TYPE_TRIANGLE_STRIP: + state->tri_rot0 = gpu_props->geom_shader_winding.tri_strip0; + state->tri_rot1 = gpu_props->geom_shader_winding.tri_strip1; + break; + case PRIM_TYPE_TRIANGLE_FAN: + case PRIM_TYPE_POLYGON: + state->tri_rot0 = gpu_props->geom_shader_winding.tri_fan; + state->tri_rot1 = state->tri_rot0; + break; + default: + break; + } + } +} + +static const char *get_vertex_order(int rot) +{ + if (rot == 0) { + return "ivec3(0, 1, 2)"; + } else if (rot == 1) { + return "ivec3(2, 0, 1)"; + } else { + return "ivec3(1, 2, 0)"; + } } bool pgraph_glsl_need_geom(const GeomState *state) @@ -45,63 +86,24 @@ bool pgraph_glsl_need_geom(const GeomState *state) assert(state->polygon_front_mode == state->polygon_back_mode); enum ShaderPolygonMode polygon_mode = state->polygon_front_mode; - /* POINT mode shouldn't require any special work */ - if (polygon_mode == POLY_MODE_POINT) { - return false; - } - switch (state->primitive_mode) { + case PRIM_TYPE_POINTS: + return false; + case PRIM_TYPE_LINES: + case PRIM_TYPE_LINE_LOOP: + case PRIM_TYPE_LINE_STRIP: case PRIM_TYPE_TRIANGLES: - if (polygon_mode == POLY_MODE_FILL) { - return false; - } - return true; case PRIM_TYPE_TRIANGLE_STRIP: - if (polygon_mode == POLY_MODE_FILL) { - return false; - } - assert(polygon_mode == POLY_MODE_LINE); - return true; case PRIM_TYPE_TRIANGLE_FAN: - if (polygon_mode == POLY_MODE_FILL) { - return false; - } - assert(polygon_mode == POLY_MODE_LINE); - return true; case PRIM_TYPE_QUADS: - if (polygon_mode == POLY_MODE_LINE) { - return true; - } else if (polygon_mode == POLY_MODE_FILL) { - return true; - } else { - assert(false); - return false; - } - break; case PRIM_TYPE_QUAD_STRIP: - if (polygon_mode == POLY_MODE_LINE) { - return true; - } else if (polygon_mode == POLY_MODE_FILL) { - return true; - } else { - assert(false); - return false; - } - break; + return true; case PRIM_TYPE_POLYGON: - if (polygon_mode == POLY_MODE_LINE) { - return false; - } - if (polygon_mode == POLY_MODE_FILL) { - if (state->smooth_shading) { - return false; - } - return true; - } else { + if (polygon_mode == POLY_MODE_POINT) { assert(false); return false; } - break; + return true; default: return false; } @@ -113,127 +115,173 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) assert(state->polygon_front_mode == state->polygon_back_mode); enum ShaderPolygonMode polygon_mode = state->polygon_front_mode; - /* POINT mode shouldn't require any special work */ - if (polygon_mode == POLY_MODE_POINT) { - return NULL; - } - - /* Handle LINE and FILL mode */ + bool need_triz = false; + bool need_quadz = false; + bool need_linez = false; const char *layout_in = NULL; const char *layout_out = NULL; const char *body = NULL; + const char *provoking_index = "0"; + + /* TODO: frontface/backface culling for polygon modes POLY_MODE_LINE and + * POLY_MODE_POINT. + */ switch (state->primitive_mode) { case PRIM_TYPE_POINTS: return NULL; - case PRIM_TYPE_LINES: return NULL; - case PRIM_TYPE_LINE_LOOP: return NULL; - case PRIM_TYPE_LINE_STRIP: return NULL; + case PRIM_TYPE_LINES: + case PRIM_TYPE_LINE_LOOP: + case PRIM_TYPE_LINE_STRIP: + provoking_index = state->first_vertex_is_provoking ? "0" : "1"; + need_linez = true; + layout_in = "layout(lines) in;\n"; + layout_out = "layout(line_strip, max_vertices = 2) out;\n"; + body = " emit_line(0, 1, 0.0);\n"; + break; case PRIM_TYPE_TRIANGLES: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - body = " emit_vertex(0, 0);\n" - " emit_vertex(1, 0);\n" - " emit_vertex(2, 0);\n" - " emit_vertex(0, 0);\n" - " EndPrimitive();\n"; - break; case PRIM_TYPE_TRIANGLE_STRIP: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); - layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - /* Imagine a quad made of a tristrip, the comments tell you which - * vertex we are using */ - body = " if ((gl_PrimitiveIDIn & 1) == 0) {\n" - " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 0);\n" /* bottom right */ - " }\n" - " emit_vertex(1, 0);\n" /* top right */ - " emit_vertex(2, 0);\n" /* bottom left */ - " emit_vertex(0, 0);\n" /* bottom right */ - " } else {\n" - " emit_vertex(2, 0);\n" /* bottom left */ - " emit_vertex(1, 0);\n" /* top left */ - " emit_vertex(0, 0);\n" /* top right */ - " }\n" - " EndPrimitive();\n"; - break; case PRIM_TYPE_TRIANGLE_FAN: - if (polygon_mode == POLY_MODE_FILL) { return NULL; } - assert(polygon_mode == POLY_MODE_LINE); + if (state->first_vertex_is_provoking) { + provoking_index = "v[0]"; + } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_STRIP) { + provoking_index = "v[2 - (gl_PrimitiveIDIn & 1)]"; + } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_FAN) { + provoking_index = "v[1]"; + } else { + provoking_index = "v[2]"; + } + need_triz = true; layout_in = "layout(triangles) in;\n"; - layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - body = " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 0);\n" - " }\n" - " emit_vertex(1, 0);\n" - " emit_vertex(2, 0);\n" - " emit_vertex(0, 0);\n" - " EndPrimitive();\n"; + if (polygon_mode == POLY_MODE_FILL) { + layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; + body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n" + " emit_vertex(v[0], pz);\n" + " emit_vertex(v[1], pz);\n" + " emit_vertex(v[2], pz);\n" + " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_LINE) { + need_linez = true; + layout_out = "layout(line_strip, max_vertices = 6) out;\n"; + body = " float dz = calc_triz(v[0], v[1], v[2])[3].x;\n" + " emit_line(v[0], v[1], dz);\n" + " emit_line(v[1], v[2], dz);\n" + " emit_line(v[2], v[0], dz);\n"; + } else { + assert(polygon_mode == POLY_MODE_POINT); + layout_out = "layout(points, max_vertices = 3) out;\n"; + body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n" + " emit_vertex(v[0], mat4(pz[0], pz[0], pz[0], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(v[1], mat4(pz[1], pz[1], pz[1], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(v[2], mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " EndPrimitive();\n"; + } break; case PRIM_TYPE_QUADS: + provoking_index = "3"; + need_quadz = true; layout_in = "layout(lines_adjacency) in;\n"; - if (polygon_mode == POLY_MODE_LINE) { - layout_out = "layout(line_strip, max_vertices = 5) out;\n"; - body = " emit_vertex(0, 3);\n" - " emit_vertex(1, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(3, 3);\n" - " emit_vertex(0, 3);\n" - " EndPrimitive();\n"; - } else if (polygon_mode == POLY_MODE_FILL) { - layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; - body = " emit_vertex(3, 3);\n" - " emit_vertex(0, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(1, 3);\n" + if (polygon_mode == POLY_MODE_FILL) { + layout_out = "layout(triangle_strip, max_vertices = 6) out;\n"; + body = " mat4 pz, pz2;\n" + " calc_quadz(0, 1, 2, 3, pz, pz2);\n" + " emit_vertex(1, pz);\n" + " emit_vertex(2, pz);\n" + " emit_vertex(0, pz);\n" + " EndPrimitive();\n" + " emit_vertex(2, pz2);\n" + " emit_vertex(3, pz2);\n" + " emit_vertex(0, pz2);\n" " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_LINE) { + need_linez = true; + layout_out = "layout(line_strip, max_vertices = 8) out;\n"; + body = " mat4 pz, pzs;\n" + " calc_quadz(0, 1, 2, 3, pz, pzs);\n" + " emit_line(0, 1, pz[3].x);\n" + " emit_line(1, 2, pz[3].x);\n" + " emit_line(2, 3, pzs[3].x);\n" + " emit_line(3, 0, pzs[3].x);\n"; } else { - assert(false); - return NULL; + assert(polygon_mode == POLY_MODE_POINT); + layout_out = "layout(points, max_vertices = 4) out;\n"; + body = " mat4 pz, pz2;\n" + " calc_quadz(0, 1, 2, 3, pz, pz2);\n" + " emit_vertex(0, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(1, mat4(pz[1], pz[1], pz[1], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(2, mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(3, mat4(pz2[2], pz2[2], pz2[2], pz2[3]));\n" + " EndPrimitive();\n"; } break; case PRIM_TYPE_QUAD_STRIP: + provoking_index = "3"; + need_quadz = true; layout_in = "layout(lines_adjacency) in;\n"; - if (polygon_mode == POLY_MODE_LINE) { - layout_out = "layout(line_strip, max_vertices = 5) out;\n"; + if (polygon_mode == POLY_MODE_FILL) { + layout_out = "layout(triangle_strip, max_vertices = 6) out;\n"; body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" - " if (gl_PrimitiveIDIn == 0) {\n" - " emit_vertex(0, 3);\n" - " }\n" - " emit_vertex(1, 3);\n" - " emit_vertex(3, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(0, 3);\n" + " mat4 pz, pz2;\n" + " calc_quadz(2, 0, 1, 3, pz, pz2);\n" + " emit_vertex(0, pz);\n" + " emit_vertex(1, pz);\n" + " emit_vertex(2, pz);\n" + " EndPrimitive();\n" + " emit_vertex(2, pz2);\n" + " emit_vertex(1, pz2);\n" + " emit_vertex(3, pz2);\n" " EndPrimitive();\n"; - } else if (polygon_mode == POLY_MODE_FILL) { - layout_out = "layout(triangle_strip, max_vertices = 4) out;\n"; + } else if (polygon_mode == POLY_MODE_LINE) { + need_linez = true; + layout_out = "layout(line_strip, max_vertices = 8) out;\n"; body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" - " emit_vertex(0, 3);\n" - " emit_vertex(1, 3);\n" - " emit_vertex(2, 3);\n" - " emit_vertex(3, 3);\n" - " EndPrimitive();\n"; + " mat4 pz, pzs;\n" + " calc_quadz(2, 0, 1, 3, pz, pzs);\n" + " emit_line(0, 1, pz[3].x);\n" + " emit_line(1, 3, pzs[3].x);\n" + " emit_line(3, 2, pzs[3].x);\n" + " emit_line(2, 0, pz[3].x);\n"; } else { - assert(false); - return NULL; + assert(polygon_mode == POLY_MODE_POINT); + layout_out = "layout(points, max_vertices = 4) out;\n"; + body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n" + " mat4 pz, pz2;\n" + " calc_quadz(2, 0, 1, 3, pz, pz2);\n" + " emit_vertex(0, mat4(pz[1], pz[1], pz[1], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(1, mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(2, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + " EndPrimitive();\n" + " emit_vertex(3, mat4(pz2[2], pz2[2], pz2[2], pz2[3]));\n" + " EndPrimitive();\n"; } break; case PRIM_TYPE_POLYGON: - if (polygon_mode == POLY_MODE_LINE) { - return NULL; - } if (polygon_mode == POLY_MODE_FILL) { - if (state->smooth_shading) { - return NULL; - } + provoking_index = "v[2]"; + need_triz = true; layout_in = "layout(triangles) in;\n"; layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; - body = " emit_vertex(0, 2);\n" - " emit_vertex(1, 2);\n" - " emit_vertex(2, 2);\n" + body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n" + " emit_vertex(v[0], pz);\n" + " emit_vertex(v[1], pz);\n" + " emit_vertex(v[2], pz);\n" " EndPrimitive();\n"; + } else if (polygon_mode == POLY_MODE_LINE) { + provoking_index = "0"; + need_linez = true; + /* FIXME: input here is lines and not triangles so we cannot + * calculate triangle plane slope. Also, the first vertex of the + * polygon is unavailable so flat shading provoking vertex is + * wrong. + */ + layout_in = "layout(lines) in;\n"; + layout_out = "layout(line_strip, max_vertices = 2) out;\n"; + body = " emit_line(0, 1, 0.0);\n"; } else { assert(false); return NULL; @@ -253,6 +301,8 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) mstring_from_fmt("#version %d\n\n" "%s" "%s" + "\n" + "#define v_vtxPos v_vtxPos0\n" "\n", opts.vulkan ? 450 : 400, layout_in, layout_out); pgraph_glsl_get_vtx_header(output, opts.vulkan, state->smooth_shading, true, @@ -260,46 +310,144 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) pgraph_glsl_get_vtx_header(output, opts.vulkan, state->smooth_shading, false, false, false); + char vertex_order_buf[80]; + const char *vertex_order_body = ""; + + if (need_triz) { + /* Input triangle absolute vertex order is not guaranteed by OpenGL + * or Vulkan, only winding order is. Reorder vertices here to first + * vertex convention which we assumed above when setting + * provoking_index. This mostly only matters with flat shading, but + * we reorder always to get consistent results across GPU vendors + * regarding floating-point rounding when calculating with vtxPos0/1/2. + */ + mstring_append(output, "ivec3 v;\n"); + if (state->tri_rot0 == state->tri_rot1) { + snprintf(vertex_order_buf, sizeof(vertex_order_buf), " v = %s;\n", + get_vertex_order(state->tri_rot0)); + } else { + snprintf(vertex_order_buf, sizeof(vertex_order_buf), + " v = (gl_PrimitiveIDIn & 1) == 0 ? %s : %s;\n", + get_vertex_order(state->tri_rot0), + get_vertex_order(state->tri_rot1)); + } + vertex_order_body = vertex_order_buf; + } + if (state->smooth_shading) { - mstring_append(output, - "void emit_vertex(int index, int _unused) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " gl_PointSize = gl_in[index].gl_PointSize;\n" - " vtxD0 = v_vtxD0[index];\n" - " vtxD1 = v_vtxD1[index];\n" - " vtxB0 = v_vtxB0[index];\n" - " vtxB1 = v_vtxB1[index];\n" - " vtxFog = v_vtxFog[index];\n" - " vtxT0 = v_vtxT0[index];\n" - " vtxT1 = v_vtxT1[index];\n" - " vtxT2 = v_vtxT2[index];\n" - " vtxT3 = v_vtxT3[index];\n" - " EmitVertex();\n" - "}\n"); - } else { - mstring_append(output, - "void emit_vertex(int index, int provoking_index) {\n" - " gl_Position = gl_in[index].gl_Position;\n" - " gl_PointSize = gl_in[index].gl_PointSize;\n" - " vtxD0 = v_vtxD0[provoking_index];\n" - " vtxD1 = v_vtxD1[provoking_index];\n" - " vtxB0 = v_vtxB0[provoking_index];\n" - " vtxB1 = v_vtxB1[provoking_index];\n" - " vtxFog = v_vtxFog[index];\n" - " vtxT0 = v_vtxT0[index];\n" - " vtxT1 = v_vtxT1[index];\n" - " vtxT2 = v_vtxT2[index];\n" - " vtxT3 = v_vtxT3[index];\n" - " EmitVertex();\n" - "}\n"); + provoking_index = "index"; + } + + mstring_append_fmt( + output, + "void emit_vertex(int index, mat4 pz) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " gl_PointSize = gl_in[index].gl_PointSize;\n" + " vtxD0 = v_vtxD0[%s];\n" + " vtxD1 = v_vtxD1[%s];\n" + " vtxB0 = v_vtxB0[%s];\n" + " vtxB1 = v_vtxB1[%s];\n" + " vtxFog = v_vtxFog[index];\n" + " vtxT0 = v_vtxT0[index];\n" + " vtxT1 = v_vtxT1[index];\n" + " vtxT2 = v_vtxT2[index];\n" + " vtxT3 = v_vtxT3[index];\n" + " vtxPos0 = pz[0];\n" + " vtxPos1 = pz[1];\n" + " vtxPos2 = pz[2];\n" + " triMZ = (isnan(pz[3].x) || isinf(pz[3].x)) ? 0.0 : pz[3].x;\n" + " EmitVertex();\n" + "}\n", + provoking_index, + provoking_index, + provoking_index, + provoking_index); + + if (need_triz || need_quadz) { + mstring_append( + output, + // Kahan's algorithm for computing a*b - c*d using FMA for higher + // precision. See e.g.: + // Muller et al, "Handbook of Floating-Point Arithmetic", 2nd ed. + // or + // Claude-Pierre Jeannerod, Nicolas Louvet, and Jean-Michel Muller, + // Further analysis of Kahan's algorithm for the accurate + // computation of 2x2 determinants, + // Mathematics of Computation 82(284), October 2013. + "float kahan_det(float a, float b, float c, float d) {\n" + " precise float cd = c*d;\n" + " precise float err = fma(-c, d, cd);\n" + " precise float res = fma(a, b, -cd) + err;\n" + " return res;\n" + "}\n"); + + if (state->z_perspective) { + mstring_append( + output, + "mat4 calc_triz(int i0, int i1, int i2) {\n" + " mat2 m = mat2(v_vtxPos[i1].xy - v_vtxPos[i0].xy,\n" + " v_vtxPos[i2].xy - v_vtxPos[i0].xy);\n" + " precise vec2 b = vec2(v_vtxPos[i0].w - v_vtxPos[i1].w,\n" + " v_vtxPos[i0].w - v_vtxPos[i2].w);\n" + " b /= vec2(v_vtxPos[i1].w, v_vtxPos[i2].w) * v_vtxPos[i0].w;\n" + // The following computes dzx and dzy same as + // vec2 dz = b * inverse(m); + " float det = kahan_det(m[0].x, m[1].y, m[1].x, m[0].y);\n" + " float dzx = kahan_det(b.x, m[1].y, b.y, m[0].y) / det;\n" + " float dzy = kahan_det(b.y, m[0].x, b.x, m[1].x) / det;\n" + " float dz = max(abs(dzx), abs(dzy));\n" + " return mat4(v_vtxPos[i0], v_vtxPos[i1], v_vtxPos[i2], dz, vec3(0.0));\n" + "}\n"); + } else { + mstring_append( + output, + "mat4 calc_triz(int i0, int i1, int i2) {\n" + " mat2 m = mat2(v_vtxPos[i1].xy - v_vtxPos[i0].xy,\n" + " v_vtxPos[i2].xy - v_vtxPos[i0].xy);\n" + " precise vec2 b = vec2(v_vtxPos[i1].z - v_vtxPos[i0].z,\n" + " v_vtxPos[i2].z - v_vtxPos[i0].z);\n" + // The following computes dzx and dzy same as + // vec2 dz = b * inverse(m); + " float det = kahan_det(m[0].x, m[1].y, m[1].x, m[0].y);\n" + " float dzx = kahan_det(b.x, m[1].y, b.y, m[0].y) / det;\n" + " float dzy = kahan_det(b.y, m[0].x, b.x, m[1].x) / det;\n" + " float dz = max(abs(dzx), abs(dzy));\n" + " return mat4(v_vtxPos[i0], v_vtxPos[i1], v_vtxPos[i2], dz, vec3(0.0));\n" + "}\n"); + } + } + + if (need_linez) { + mstring_append( + output, + // Calculate a third vertex by rotating 90 degrees so that triangle + // interpolation in fragment shader can be used as is for lines. + "void emit_line(int i0, int i1, float dz) {\n" + " vec2 delta = v_vtxPos[i1].xy - v_vtxPos[i0].xy;\n" + " vec2 v2 = vec2(-delta.y, delta.x) + v_vtxPos[i0].xy;\n" + " mat4 pz = mat4(v_vtxPos[i0], v_vtxPos[i1], v2, v_vtxPos[i0].zw, dz, vec3(0.0));\n" + " emit_vertex(i0, pz);\n" + " emit_vertex(i1, pz);\n" + " EndPrimitive();\n" + "}\n"); + } + + if (need_quadz) { + mstring_append( + output, + "void calc_quadz(int i0, int i1, int i2, int i3, out mat4 triz1, out mat4 triz2) {\n" + " triz1 = calc_triz(i0, i1, i2);\n" + " triz2 = calc_triz(i0, i2, i3);\n" + "}\n"); } mstring_append_fmt(output, "\n" "void main() {\n" "%s" + "%s" "}\n", - body); + vertex_order_body, body); return output; } diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.h b/hw/xbox/nv2a/pgraph/glsl/geom.h index 41ff255161..a00302f39b 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.h +++ b/hw/xbox/nv2a/pgraph/glsl/geom.h @@ -30,6 +30,10 @@ typedef struct { enum ShaderPolygonMode polygon_front_mode; enum ShaderPolygonMode polygon_back_mode; bool smooth_shading; + bool first_vertex_is_provoking; + bool z_perspective; + short tri_rot0; + short tri_rot1; } GeomState; typedef struct GenGeomGlslOptions { diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c index 7484b2aa10..ddf04be25c 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.c +++ b/hw/xbox/nv2a/pgraph/glsl/psh.c @@ -209,6 +209,26 @@ void pgraph_glsl_set_psh_state(PGRAPHState *pg, PshState *state) state->conv_tex[i] = kernel; } + + state->surface_zeta_format = pg->surface_shape.zeta_format; + unsigned int z_format = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), + NV_PGRAPH_SETUPRASTER_Z_FORMAT); + + switch (pg->surface_shape.zeta_format) { + case NV097_SET_SURFACE_FORMAT_ZETA_Z16: + state->depth_format = + z_format ? DEPTH_FORMAT_F16 : DEPTH_FORMAT_D16; + break; + case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: + state->depth_format = + z_format ? DEPTH_FORMAT_F24 : DEPTH_FORMAT_D24; + break; + default: + fprintf(stderr, "Unknown zeta surface format: 0x%x\n", + pg->surface_shape.zeta_format); + assert(false); + break; + } } struct InputInfo { @@ -875,6 +895,23 @@ static MString* psh_convert(struct PixelShader *ps) "vec3 dotmap_hilo_hemisphere(vec4 col) {\n" " return col.rgb;\n" // FIXME "}\n" + // Kahan's algorithm for computing determinant using FMA for higher + // precision. See e.g.: + // Muller et al, "Handbook of Floating-Point Arithmetic", 2nd ed. + // or + // Claude-Pierre Jeannerod, Nicolas Louvet, and Jean-Michel Muller, + // Further analysis of Kahan's algorithm for the accurate + // computation of 2x2 determinants, + // Mathematics of Computation 82(284), October 2013. + "float kahan_det(vec2 a, vec2 b) {\n" + " precise float cd = a.y*b.x;\n" + " precise float err = fma(-a.y, b.x, cd);\n" + " precise float res = fma(a.x, b.y, -cd) + err;\n" + " return res;\n" + "}\n" + "float area(vec2 a, vec2 b, vec2 c) {\n" + " return kahan_det(b - a, c - a);\n" + "}\n" "const float[9] gaussian3x3 = float[9](\n" " 1.0/16.0, 2.0/16.0, 1.0/16.0,\n" " 2.0/16.0, 4.0/16.0, 2.0/16.0,\n" @@ -911,45 +948,69 @@ static MString* psh_convert(struct PixelShader *ps) "}\n"); } + if (ps->state->z_perspective) { + mstring_append( + clip, + "vec2 unscaled_xy = gl_FragCoord.xy / surfaceScale;\n" + "precise float bc0 = area(unscaled_xy, vtxPos1.xy, vtxPos2.xy);\n" + "precise float bc1 = area(unscaled_xy, vtxPos2.xy, vtxPos0.xy);\n" + "precise float bc2 = area(unscaled_xy, vtxPos0.xy, vtxPos1.xy);\n" + "bc0 /= vtxPos0.w;\n" + "bc1 /= vtxPos1.w;\n" + "bc2 /= vtxPos2.w;\n" + "float inv_bcsum = 1.0 / (bc0 + bc1 + bc2);\n" + // Denominator can be zero in case the rasterized primitive is a + // point or a degenerate line or triangle. + "if (isinf(inv_bcsum)) {\n" + " inv_bcsum = 0.0;\n" + "}\n" + "bc1 *= inv_bcsum;\n" + "bc2 *= inv_bcsum;\n" + "precise float zvalue = vtxPos0.w + (bc1*(vtxPos1.w - vtxPos0.w) + bc2*(vtxPos2.w - vtxPos0.w));\n" + // If GPU clipping is inaccurate, the point gl_FragCoord.xy might + // be above the horizon of the plane of a rasterized triangle + // making the interpolated w-coordinate above zero or negative. We + // should prevent such wrapping through infinity by clamping to + // infinity. + "if (zvalue > 0.0) {\n" + " float zslopeofs = depthFactor*triMZ*zvalue*zvalue;\n" + " zvalue += depthOffset;\n" + " zvalue += zslopeofs;\n" + "} else {\n" + " zvalue = uintBitsToFloat(0x7F7FFFFFu);\n" + "}\n" + "if (isnan(zvalue)) {\n" + " zvalue = uintBitsToFloat(0x7F7FFFFFu);\n" + "}\n"); + } else { + mstring_append( + clip, + "vec2 unscaled_xy = gl_FragCoord.xy / surfaceScale;\n" + "precise float bc0 = area(unscaled_xy, vtxPos1.xy, vtxPos2.xy);\n" + "precise float bc1 = area(unscaled_xy, vtxPos2.xy, vtxPos0.xy);\n" + "precise float bc2 = area(unscaled_xy, vtxPos0.xy, vtxPos1.xy);\n" + "float inv_bcsum = 1.0 / (bc0 + bc1 + bc2);\n" + // Denominator can be zero in case the rasterized primitive is a + // point or a degenerate line or triangle. + "if (isinf(inv_bcsum)) {\n" + " inv_bcsum = 0.0;\n" + "}\n" + "bc1 *= inv_bcsum;\n" + "bc2 *= inv_bcsum;\n" + "precise float zvalue = vtxPos0.z + (bc1*(vtxPos1.z - vtxPos0.z) + bc2*(vtxPos2.z - vtxPos0.z));\n" + "zvalue += depthOffset;\n" + "zvalue += depthFactor*triMZ;\n"); + } + + /* Depth clipping */ if (ps->state->depth_clipping) { - if (ps->state->z_perspective) { - mstring_append( - clip, "float zvalue = 1.0/gl_FragCoord.w + depthOffset;\n" - "if (zvalue < clipRange.z || clipRange.w < zvalue) {\n" - " discard;\n" - "}\n"); - } else { - /* Take care of floating point precision problems. MS dashboard - * outputs exactly 0.0 z-coordinates and then our fixed function - * vertex shader outputs -w as the z-coordinate when OpenGL is - * used. Since -w/w = -1, this should give us exactly 0.0 as - * gl_FragCoord.z here. Unfortunately, with AMD Radeon RX 6600 the - * result is slightly greater than 0. MS dashboard sets the clip - * range to [0.0, 0.0] and so the imprecision causes unwanted - * clipping. Note that since Vulkan uses NDC range [0,1] it - * doesn't suffer from this problem with Radeon. Also, despite the - * imprecision OpenGL Radeon writes the correct value 0 to the depth - * buffer (if writing is enabled.) Radeon appears to write floored - * values. To compare, Intel integrated UHD 770 has gl_FragCoord.z - * exactly 0 (and writes rounded to closest integer values to the - * depth buffer.) Radeon OpenGL problem could also be fixed by using - * glClipControl(), but it requires OpenGL 4.5. - * Above is based on experiments with Linux and Mesa. - */ - if (ps->opts.vulkan) { - mstring_append( - clip, "if (gl_FragCoord.z*clipRange.y < clipRange.z ||\n" - " gl_FragCoord.z*clipRange.y > clipRange.w) {\n" - " discard;\n" - "}\n"); - } else { - mstring_append( - clip, "if ((gl_FragCoord.z + 1.0f/16777216.0f)*clipRange.y < clipRange.z ||\n" - " (gl_FragCoord.z - 1.0f/16777216.0f)*clipRange.y > clipRange.w) {\n" - " discard;\n" - "}\n"); - } - } + mstring_append( + clip, "if (zvalue < clipRange.z || clipRange.w < zvalue) {\n" + " discard;\n" + "}\n"); + } else { + mstring_append( + clip, "zvalue = clamp(zvalue, clipRange.z, clipRange.w);\n"); } MString *vars = mstring_new(); @@ -1334,21 +1395,33 @@ static MString* psh_convert(struct PixelShader *ps) } } - if (ps->state->z_perspective) { - if (!ps->state->depth_clipping) { - mstring_append(ps->code, - "float zvalue = 1.0/gl_FragCoord.w + depthOffset;\n"); - } - /* TODO: With integer depth buffers Xbox hardware floors values and so - * does Radeon, but Intel UHD 770 rounds to nearest. Should probably - * floor here explicitly (in some way that doesn't also cause - * imprecision issues due to division by clipRange.y) - */ - mstring_append(ps->code, - "gl_FragDepth = clamp(zvalue, clipRange.z, clipRange.w)/clipRange.y;\n"); - } else if (!ps->state->depth_clipping) { - mstring_append(ps->code, - "gl_FragDepth = clamp(gl_FragCoord.z, clipRange.z/clipRange.y, clipRange.w/clipRange.y);\n"); + /* With integer depth buffers Xbox hardware floors values. For gl_FragDepth + * range [0,1] Radeon floors values to integer depth buffer, but Intel UHD + * 770 rounds to nearest. For 24-bit OpenGL/Vulkan integer depth buffer, + * we divide the desired depth integer value by 16777216.0, then add 1 in + * integer bit representation to get the same result as dividing the + * desired depth integer by 16777215.0 would give. (GPUs can't divide by + * 16777215.0, only multiply by 1.0/16777215.0 which gives different results + * due to rounding.) + */ + + switch (ps->state->depth_format) { + case DEPTH_FORMAT_D16: + // 16-bit unsigned int + mstring_append( + ps->code, + "gl_FragDepth = floor(zvalue) / 65535.0;\n"); + break; + case DEPTH_FORMAT_D24: + // 24-bit unsigned int + mstring_append( + ps->code, + "gl_FragDepth = uintBitsToFloat(floatBitsToUint(floor(zvalue) / 16777216.0) + 1u);\n"); + break; + default: + // TODO: handle floating-point depth buffers properly + mstring_append(ps->code, "gl_FragDepth = zvalue / clipRange.y;\n"); + break; } MString *final = mstring_new(); @@ -1542,31 +1615,62 @@ void pgraph_glsl_set_psh_uniform_values(PGRAPHState *pg, pgraph_glsl_set_clip_range_uniform_value(pg, values->clipRange[0]); } + bool polygon_offset_enabled = false; + if (pg->primitive_mode >= PRIM_TYPE_TRIANGLES) { + uint32_t raster = pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER); + uint32_t polygon_mode = + GET_MASK(raster, NV_PGRAPH_SETUPRASTER_FRONTFACEMODE); + + if ((polygon_mode == NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL && + (raster & NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE)) || + (polygon_mode == NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE && + (raster & NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE)) || + (polygon_mode == NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT && + (raster & NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE))) { + polygon_offset_enabled = true; + } + } + if (locs[PshUniform_depthOffset] != -1) { float zbias = 0.0f; - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { + if (polygon_offset_enabled) { uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); zbias = *(float *)&zbias_u32; - - if (pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR) != 0 && - (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & - NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) { - /* TODO: emulate zfactor when z_perspective true, i.e. - * w-buffering. Perhaps calculate an additional offset based on - * triangle orientation in geometry shader and pass the result - * to fragment shader and add it to gl_FragDepth as well. - */ - NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR for w-buffering"); - } } values->depthOffset[0] = zbias; } + if (locs[PshUniform_depthFactor] != -1) { + float zfactor = 0.0f; + + if (polygon_offset_enabled) { + uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR); + zfactor = *(float *)&zfactor_u32; + if (zfactor != 0.0f && + (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & + NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) { + /* FIXME: for w-buffering, polygon slope in screen-space is + * computed per-pixel, but Xbox appears to use constant that + * is the polygon slope at the first visible pixel in top-left + * order. + */ + NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR only partially implemented for w-buffering"); + } + } + + values->depthFactor[0] = zfactor; + } + + if (locs[PshUniform_surfaceScale] != -1) { + unsigned int wscale = 1, hscale = 1; + pgraph_apply_anti_aliasing_factor(pg, &wscale, &hscale); + pgraph_apply_scaling_factor(pg, &wscale, &hscale); + values->surfaceScale[0][0] = wscale; + values->surfaceScale[0][1] = hscale; + } + unsigned int max_gl_width = pg->surface_binding_dim.width; unsigned int max_gl_height = pg->surface_binding_dim.height; pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height); diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.h b/hw/xbox/nv2a/pgraph/glsl/psh.h index 1a04c53dff..84d3137a0a 100644 --- a/hw/xbox/nv2a/pgraph/glsl/psh.h +++ b/hw/xbox/nv2a/pgraph/glsl/psh.h @@ -27,6 +27,13 @@ typedef struct PGRAPHState PGRAPHState; +enum PshDepthFormat { + DEPTH_FORMAT_D24, + DEPTH_FORMAT_D16, + DEPTH_FORMAT_F24, + DEPTH_FORMAT_F16, +}; + typedef struct PshState { uint32_t combiner_control; uint32_t shader_stage_program; @@ -61,6 +68,9 @@ typedef struct PshState { bool smooth_shading; bool depth_clipping; bool z_perspective; + + unsigned int surface_zeta_format; + enum PshDepthFormat depth_format; } PshState; void pgraph_glsl_set_psh_state(PGRAPHState *pg, PshState *state); @@ -75,8 +85,10 @@ void pgraph_glsl_set_psh_state(PGRAPHState *pg, PshState *state); DECL(S, colorKey, uint, 4) \ DECL(S, colorKeyMask, uint, 4) \ DECL(S, consts, vec4, 18) \ + DECL(S, depthFactor, float, 1) \ DECL(S, depthOffset, float, 1) \ DECL(S, fogColor, vec4, 1) \ + DECL(S, surfaceScale, ivec2, 1) \ DECL(S, texScale, float, 4) DECL_UNIFORM_TYPES(PshUniform, PSH_UNIFORM_DECL_X) diff --git a/hw/xbox/nv2a/pgraph/glsl/shaders.c b/hw/xbox/nv2a/pgraph/glsl/shaders.c index 44ed5437fc..2d6cfaf7d2 100644 --- a/hw/xbox/nv2a/pgraph/glsl/shaders.c +++ b/hw/xbox/nv2a/pgraph/glsl/shaders.c @@ -73,7 +73,8 @@ bool pgraph_glsl_check_shader_state_dirty(PGRAPHState *pg, pg->swizzle_attrs != state->vsh.swizzle_attrs || pg->compressed_attrs != state->vsh.compressed_attrs || pg->primitive_mode != state->geom.primitive_mode || - pg->surface_scale_factor != state->vsh.surface_scale_factor) { + pg->surface_scale_factor != state->vsh.surface_scale_factor || + pg->surface_shape.zeta_format != state->psh.surface_zeta_format) { return true; } diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c index a892c2001b..703c1595c5 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -479,11 +479,12 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz mstring_append(body, " oPos = tPosition * compositeMat;\n" - " oPos.z = oPos.z / clipRange.y;\n" " oPos.w = clampAwayZeroInf(oPos.w);\n" " oPos.xy /= oPos.w;\n" " oPos.xy += c[" stringify(NV_IGRAPH_XF_XFCTX_VPOFF) "].xy;\n" " oPos.xy = roundScreenCoords(oPos.xy);\n" + " vec4 vtxPos = vec4(oPos.xy, oPos.z / oPos.w, oPos.w);\n" + " oPos.z = oPos.z / clipRange.y;\n" " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" " oPos.xy *= oPos.w;\n" ); diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index 582194af89..4a1d57b1f1 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -755,10 +755,10 @@ void pgraph_glsl_gen_vsh_prog(uint16_t version, const uint32_t *tokens, * in clip space. */ " oPos.xy = roundScreenCoords(oPos.xy);\n" - " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" - - " oPos.z = oPos.z / clipRange.y;\n" " oPos.w = clampAwayZeroInf(oPos.w);\n" + " vec4 vtxPos = oPos;\n" + " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" + " oPos.z = oPos.z / clipRange.y;\n" /* Undo perspective divide by w. * Note that games may also have vertex shaders that do diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c index 9bce9f30bf..5b0857dc75 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -245,6 +245,10 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts) "#define vtxT1 v_vtxT1\n" "#define vtxT2 v_vtxT2\n" "#define vtxT3 v_vtxT3\n" + "#define vtxPos0 v_vtxPos0\n" + "#define vtxPos1 v_vtxPos1\n" + "#define vtxPos2 v_vtxPos2\n" + "#define triMZ v_triMZ\n" ); } mstring_append(header, "\n"); @@ -393,6 +397,10 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts) " vtxT1 = oT1;\n" " vtxT2 = oT2;\n" " vtxT3 = oT3;\n" + " vtxPos0 = vtxPos;\n" + " vtxPos1 = vtxPos;\n" + " vtxPos2 = vtxPos;\n" + " triMZ = 0.0;\n" " gl_PointSize = oPts.x;\n" ); diff --git a/hw/xbox/nv2a/pgraph/methods.h.inc b/hw/xbox/nv2a/pgraph/methods.h.inc index d475f9b4b1..3f9e026103 100644 --- a/hw/xbox/nv2a/pgraph/methods.h.inc +++ b/hw/xbox/nv2a/pgraph/methods.h.inc @@ -68,6 +68,7 @@ DEF_METHOD(NV097, SET_STENCIL_OP_FAIL) DEF_METHOD(NV097, SET_STENCIL_OP_ZFAIL) DEF_METHOD(NV097, SET_STENCIL_OP_ZPASS) DEF_METHOD(NV097, SET_SHADE_MODE) +DEF_METHOD(NV097, SET_PROVOKING_VERTEX) DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR) DEF_METHOD(NV097, SET_POLYGON_OFFSET_BIAS) DEF_METHOD(NV097, SET_FRONT_POLYGON_MODE) diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c index e3ecb09d12..2e93d77d9c 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.c +++ b/hw/xbox/nv2a/pgraph/pgraph.c @@ -1534,6 +1534,13 @@ DEF_METHOD(NV097, SET_SHADE_MODE) } } +DEF_METHOD(NV097, SET_PROVOKING_VERTEX) +{ + assert((parameter & ~1) == 0); + PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_PROVOKING_VERTEX, + parameter); +} + DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR) { pgraph_reg_w(pg, NV_PGRAPH_ZOFFSETFACTOR, parameter); diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index 49f941d450..0449270b55 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -96,6 +96,15 @@ typedef struct BetaState { uint32_t beta; } BetaState; +typedef struct GPUProperties { + struct { + short tri; + short tri_strip0; + short tri_strip1; + short tri_fan; + } geom_shader_winding; +} GPUProperties; + typedef struct PGRAPHRenderer { CONFIG_DISPLAY_RENDERER type; const char *name; @@ -122,6 +131,7 @@ typedef struct PGRAPHRenderer { void (*set_surface_scale_factor)(NV2AState *d, unsigned int scale); unsigned int (*get_surface_scale_factor)(NV2AState *d); int (*get_framebuffer_surface)(NV2AState *d); + GPUProperties *(*get_gpu_properties)(void); } ops; } PGRAPHRenderer; diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 28b8194468..d78087e56b 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -54,10 +54,6 @@ static VkPrimitiveTopology get_primitive_topology(PGRAPHState *pg) int polygon_mode = r->shader_binding->state.geom.polygon_front_mode; int primitive_mode = r->shader_binding->state.geom.primitive_mode; - if (polygon_mode == POLY_MODE_POINT) { - return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; - } - // FIXME: Replace with LUT switch (primitive_mode) { case PRIM_TYPE_POINTS: @@ -792,27 +788,6 @@ static void create_pipeline(PGRAPHState *pg) void *rasterizer_next_struct = NULL; - VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state; - - if (r->provoking_vertex_extension_enabled) { - VkProvokingVertexModeEXT provoking_mode = - GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), - NV_PGRAPH_CONTROL_3_SHADEMODE) == - NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT ? - VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT : - VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; - - provoking_state = - (VkPipelineRasterizationProvokingVertexStateCreateInfoEXT){ - .sType = - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, - .provokingVertexMode = provoking_mode, - }; - rasterizer_next_struct = &provoking_state; - } else { - // FIXME: Handle in shader? - } - VkPipelineRasterizationStateCreateInfo rasterizer = { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .depthClampEnable = VK_TRUE, @@ -968,27 +943,6 @@ static void create_pipeline(PGRAPHState *pg) .pDynamicStates = dynamic_states, }; - // /* Polygon offset */ - // /* FIXME: GL implementation-specific, maybe do this in VS? */ - // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - // NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) - // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - // NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) - // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - // NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) - if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & - (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE | - NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) { - uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR); - float zfactor = *(float *)&zfactor_u32; - uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS); - float zbias = *(float *)&zbias_u32; - rasterizer.depthBiasEnable = VK_TRUE; - rasterizer.depthBiasSlopeFactor = zfactor; - rasterizer.depthBiasConstantFactor = zbias; - } - // FIXME: Dither // if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & // NV_PGRAPH_CONTROL_0_DITHERENABLE)) diff --git a/hw/xbox/nv2a/pgraph/vk/gpuprops.c b/hw/xbox/nv2a/pgraph/vk/gpuprops.c new file mode 100644 index 0000000000..ecc8bb3852 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/gpuprops.c @@ -0,0 +1,608 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024-2025 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "debug.h" +#include "renderer.h" + +static GPUProperties pgraph_vk_gpu_properties; + +static const char *vertex_shader_source = + "#version 450\n" + "layout(location = 0) out vec3 v_fragColor;\n" + "\n" + "vec2 positions[11] = vec2[](\n" + " vec2(-0.5, -0.75),\n" + " vec2(-0.25, -0.25),\n" + " vec2(-0.75, -0.25),\n" + " vec2(0.25, -0.25),\n" + " vec2(0.25, -0.75),\n" + " vec2(0.75, -0.25),\n" + " vec2(0.75, -0.75),\n" + " vec2(-0.75, 0.75),\n" + " vec2(-0.75, 0.25),\n" + " vec2(-0.25, 0.25),\n" + " vec2(-0.25, 0.75)\n" + ");\n" + "\n" + "vec3 colors[11] = vec3[](\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0)\n" + ");\n" + "\n" + "void main() {\n" + " gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0);\n" + " v_fragColor = colors[gl_VertexIndex];\n" + "}\n"; + +static const char *geometry_shader_source = + "#version 450\n" + "layout(triangles) in;\n" + "layout(triangle_strip, max_vertices = 3) out;\n" + "layout(location = 0) out vec3 fragColor;\n" + "layout(location = 0) in vec3 v_fragColor[];\n" + "\n" + "void main() {\n" + " for (int i = 0; i < 3; i++) {\n" + // This should be just: + // gl_Position = gl_in[i].gl_Position; + // fragColor = v_fragColor[0]; + // but we apply the same Nvidia bug work around from gl/gpuprops.c + // to be on the safe side even if the compilers involved with + // Vulkan are different. + " gl_Position = gl_in[i].gl_Position + vec4(1.0/16384.0, 1.0/16384.0, 0.0, 0.0);\n" + " precise vec3 color = v_fragColor[0]*(0.999 + gl_in[i].gl_Position.x/16384.0) + v_fragColor[1]*0.00005 + v_fragColor[2]*0.00005;\n" + " fragColor = color;\n" + " EmitVertex();\n" + " }\n" + " EndPrimitive();\n" + "}\n"; + +static const char *fragment_shader_source = + "#version 450\n" + "layout(location = 0) out vec4 outColor;\n" + "layout(location = 0) in vec3 fragColor;\n" + "\n" + "void main() {\n" + " outColor = vec4(fragColor, 1.0);\n" + "}\n"; + +static VkPipeline create_test_pipeline( + NV2AState *d, VkPrimitiveTopology primitive_topology, + VkShaderModule vert_shader_module, VkShaderModule geom_shader_module, + VkShaderModule frag_shader_module, VkPipelineLayout pipeline_layout, + VkRenderPass render_pass, int width, int height) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPipelineShaderStageCreateInfo shader_stages[] = { + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vert_shader_module, + .pName = "main", + }, + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_GEOMETRY_BIT, + .module = geom_shader_module, + .pName = "main", + }, + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = frag_shader_module, + .pName = "main", + }, + }; + + VkPipelineVertexInputStateCreateInfo vertex_input_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 0, + .vertexAttributeDescriptionCount = 0, + }; + + VkPipelineInputAssemblyStateCreateInfo input_assembly = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = primitive_topology, + .primitiveRestartEnable = VK_FALSE, + }; + + VkViewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = (float)width, + .height = (float)height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + VkRect2D scissor = { + .offset = { 0, 0 }, + .extent.width = width, + .extent.height = height, + }; + VkPipelineViewportStateCreateInfo viewport_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .pViewports = &viewport, + .scissorCount = 1, + .pScissors = &scissor, + }; + + VkPipelineRasterizationStateCreateInfo rasterizer = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .lineWidth = 1.0f, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + }; + + VkPipelineMultisampleStateCreateInfo multisampling = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .sampleShadingEnable = VK_FALSE, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + }; + + VkPipelineColorBlendAttachmentState color_blend_attachment = { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + .blendEnable = VK_FALSE, + }; + + VkPipelineColorBlendStateCreateInfo color_blending = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment, + .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }, + }; + + VkGraphicsPipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(shader_stages), + .pStages = shader_stages, + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_state, + .pRasterizationState = &rasterizer, + .pMultisampleState = &multisampling, + .pColorBlendState = &color_blending, + .layout = pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + }; + + VkPipeline pipeline; + VK_CHECK(vkCreateGraphicsPipelines(r->device, VK_NULL_HANDLE, 1, + &pipeline_info, NULL, &pipeline)); + + return pipeline; +} + +static uint8_t *render_geom_shader_triangles(NV2AState *d, int width, + int height) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + // Create image + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .extent.width = width, + .extent.height = height, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + VkImage offscreen_image; + VK_CHECK( + vkCreateImage(r->device, &image_create_info, NULL, &offscreen_image)); + + // Allocate and bind image memory + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(r->device, offscreen_image, + &memory_requirements); + + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = memory_requirements.size, + .memoryTypeIndex = + pgraph_vk_get_memory_type(pg, memory_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), + }; + + VkDeviceMemory image_memory; + VK_CHECK(vkAllocateMemory(r->device, &alloc_info, NULL, &image_memory)); + VK_CHECK(vkBindImageMemory(r->device, offscreen_image, image_memory, 0)); + + // Create Image View + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = offscreen_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_create_info.format, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }; + + VkImageView offscreen_image_view; + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &offscreen_image_view)); + + // Buffer for image CPU access + VkBufferCreateInfo buffer_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = width * height * 4, // RGBA8 = 4 bytes per pixel + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + VkBuffer cpu_buffer; + VK_CHECK(vkCreateBuffer(r->device, &buffer_info, NULL, &cpu_buffer)); + + // Allocate and bind memory for image CPU access + VkMemoryRequirements host_mem_requirements; + vkGetBufferMemoryRequirements(r->device, cpu_buffer, + &host_mem_requirements); + + VkMemoryAllocateInfo host_alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = host_mem_requirements.size, + .memoryTypeIndex = + pgraph_vk_get_memory_type(pg, host_mem_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), + }; + + VkDeviceMemory cpu_buffer_memory; + VK_CHECK(vkAllocateMemory(r->device, &host_alloc_info, NULL, + &cpu_buffer_memory)); + VK_CHECK(vkBindBufferMemory(r->device, cpu_buffer, cpu_buffer_memory, 0)); + + + VkAttachmentDescription color_attachment = { + .format = VK_FORMAT_R8G8B8A8_UNORM, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }; + + VkAttachmentReference color_ref = { + 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + }; + + VkSubpassDescription subpass = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .colorAttachmentCount = 1, + .pColorAttachments = &color_ref, + }; + + VkRenderPassCreateInfo render_pass_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &color_attachment, + .subpassCount = 1, + .pSubpasses = &subpass, + }; + + VkRenderPass render_pass; + VK_CHECK( + vkCreateRenderPass(r->device, &render_pass_info, NULL, &render_pass)); + + VkFramebufferCreateInfo fb_info = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = render_pass, + .attachmentCount = 1, + .pAttachments = &offscreen_image_view, + .width = width, + .height = height, + .layers = 1, + }; + + VkFramebuffer framebuffer; + VK_CHECK(vkCreateFramebuffer(r->device, &fb_info, NULL, &framebuffer)); + + ShaderModuleInfo *vsh_info = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_VERTEX_BIT, vertex_shader_source); + ShaderModuleInfo *geom_info = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_GEOMETRY_BIT, geometry_shader_source); + ShaderModuleInfo *psh_info = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader_source); + + VkShaderModule vert_shader_module = vsh_info->module; + VkShaderModule geom_shader_module = geom_info->module; + VkShaderModule frag_shader_module = psh_info->module; + + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 0, + .pushConstantRangeCount = 0, + }; + + VkPipelineLayout pipeline_layout; + VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, + &pipeline_layout)); + + VkPipeline tri_pipeline = create_test_pipeline( + d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, vert_shader_module, + geom_shader_module, frag_shader_module, pipeline_layout, render_pass, + width, height); + + VkPipeline strip_pipeline = create_test_pipeline( + d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, vert_shader_module, + geom_shader_module, frag_shader_module, pipeline_layout, render_pass, + width, height); + + VkPipeline fan_pipeline = create_test_pipeline( + d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN, vert_shader_module, + geom_shader_module, frag_shader_module, pipeline_layout, render_pass, + width, height); + + pgraph_vk_destroy_shader_module(r, psh_info); + pgraph_vk_destroy_shader_module(r, geom_info); + pgraph_vk_destroy_shader_module(r, vsh_info); + + VkCommandBufferBeginInfo begin_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + }; + VK_CHECK(vkBeginCommandBuffer(r->command_buffer, &begin_info)); + + // Begin render pass + VkClearValue clear_color = { + .color.float32 = { 0.0f, 0.0f, 0.0f, 1.0f }, + }; + VkRenderPassBeginInfo rp_begin = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = render_pass, + .framebuffer = framebuffer, + .renderArea.extent.width = width, + .renderArea.extent.height = height, + .clearValueCount = 1, + .pClearValues = &clear_color, + }; + + vkCmdBeginRenderPass(r->command_buffer, &rp_begin, + VK_SUBPASS_CONTENTS_INLINE); + + vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + tri_pipeline); + vkCmdDraw(r->command_buffer, 3, 1, 0, 0); + vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + strip_pipeline); + vkCmdDraw(r->command_buffer, 4, 1, 3, 0); + vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + fan_pipeline); + vkCmdDraw(r->command_buffer, 4, 1, 7, 0); + + vkCmdEndRenderPass(r->command_buffer); + + // Synchronize and transition framebuffer for copying to CPU + pgraph_vk_transition_image_layout(pg, r->command_buffer, offscreen_image, + image_create_info.format, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + // Copy framebuffer to CPU memory + VkBufferImageCopy region = { + .bufferOffset = 0, + .bufferRowLength = 0, // tightly packed + .bufferImageHeight = 0, + + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + + .imageOffset = { 0, 0, 0 }, + .imageExtent = { width, height, 1 }, + }; + + vkCmdCopyImageToBuffer(r->command_buffer, offscreen_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, cpu_buffer, 1, + ®ion); + + VK_CHECK(vkEndCommandBuffer(r->command_buffer)); + + VkSubmitInfo submit_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &r->command_buffer, + }; + + VK_CHECK(vkQueueSubmit(r->queue, 1, &submit_info, VK_NULL_HANDLE)); + VK_CHECK(vkQueueWaitIdle(r->queue)); + + void *data; + VK_CHECK( + vkMapMemory(r->device, cpu_buffer_memory, 0, VK_WHOLE_SIZE, 0, &data)); + void *pixels = g_malloc(width * height * 4); + assert(pixels != NULL); + memcpy(pixels, data, width * height * 4); + vkUnmapMemory(r->device, cpu_buffer_memory); + + vkDestroyPipeline(r->device, strip_pipeline, NULL); + vkDestroyPipeline(r->device, fan_pipeline, NULL); + vkDestroyPipeline(r->device, tri_pipeline, NULL); + vkDestroyPipelineLayout(r->device, pipeline_layout, NULL); + vkDestroyFramebuffer(r->device, framebuffer, NULL); + vkDestroyRenderPass(r->device, render_pass, NULL); + vkDestroyImageView(r->device, offscreen_image_view, NULL); + vkDestroyBuffer(r->device, cpu_buffer, NULL); + vkFreeMemory(r->device, cpu_buffer_memory, NULL); + vkDestroyImage(r->device, offscreen_image, NULL); + vkFreeMemory(r->device, image_memory, NULL); + + return (uint8_t *)pixels; +} + +static bool colors_match(int r1, int g1, int b1, int r2, int g2, int b2) +{ + int dr = r1 - r2; + int dg = g1 - g2; + int db = b1 - b2; + + return (dr * dr + dg * dg + db * db) <= 16; +} + +static int get_color_index(uint8_t *pixel) +{ + int r = pixel[0]; + int g = pixel[1]; + int b = pixel[2]; + + if (colors_match(r, g, b, 0, 0, 255)) { + return 0; + } else if (colors_match(r, g, b, 0, 255, 0)) { + return 1; + } else if (colors_match(r, g, b, 0, 255, 255)) { + return 2; + } else if (colors_match(r, g, b, 255, 0, 0)) { + return 3; + } else { + return -1; + } +} + +static int calc_offset_from_ndc(float x, float y, int width, int height) +{ + int x0 = (int)((x + 1.0f) * width * 0.5f); + int y0 = (int)((y + 1.0f) * height * 0.5f); + + x0 = MAX(x0, 0); + y0 = MAX(y0, 0); + x0 = MIN(x0, width - 1); + y0 = MIN(y0, height - 1); + + return y0 * width + x0; +} + +static void determine_triangle_winding_order(uint8_t *pixels, int width, + int height, GPUProperties *props) +{ + uint8_t *tri_pix = + pixels + calc_offset_from_ndc(-0.5f, -0.5f, width, height) * 4; + uint8_t *strip0_pix = + pixels + calc_offset_from_ndc(0.417f, -0.417f, width, height) * 4; + uint8_t *strip1_pix = + pixels + calc_offset_from_ndc(0.583f, -0.583f, width, height) * 4; + uint8_t *fan_pix = + pixels + calc_offset_from_ndc(-0.583f, 0.417f, width, height) * 4; + uint8_t *fan2_pix = + pixels + calc_offset_from_ndc(-0.417f, 0.583f, width, height) * 4; + + int tri_rot = get_color_index(tri_pix); + if (tri_rot < 0 || tri_rot > 2) { + fprintf(stderr, + "Could not determine triangle rotation, got color: R=%d, G=%d, " + "B=%d\n", + tri_pix[0], tri_pix[1], tri_pix[2]); + tri_rot = 0; + } + props->geom_shader_winding.tri = tri_rot; + + int strip0_rot = get_color_index(strip0_pix); + if (strip0_rot < 0 || strip0_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip0 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip0_pix[0], strip0_pix[1], strip0_pix[2]); + strip0_rot = 0; + } + int strip1_rot = get_color_index(strip1_pix) - 1; + if (strip1_rot < 0 || strip1_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip1 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip1_pix[0], strip1_pix[1], strip1_pix[2]); + strip1_rot = 0; + } + props->geom_shader_winding.tri_strip0 = strip0_rot; + props->geom_shader_winding.tri_strip1 = (3 - strip1_rot) % 3; + + int fan_rot = get_color_index(fan_pix); + int fan2_rot = get_color_index(fan2_pix); + if (fan2_rot == 0) { + fan2_rot = 1; + } + fan2_rot--; + if (fan_rot != fan2_rot) { + fprintf(stderr, + "Unexpected inconsistency in triangle fan winding, got colors: " + "R=%d, G=%d, B=%d and R=%d, G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2], fan2_pix[0], fan2_pix[1], + fan2_pix[2]); + fan_rot = 1; + } + if (fan_rot < 0 || fan_rot > 2) { + fprintf(stderr, + "Could not determine triangle fan rotation, got color: R=%d, " + "G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2]); + fan_rot = 1; + } + props->geom_shader_winding.tri_fan = (fan_rot + 2) % 3; +} + +void pgraph_vk_determine_gpu_properties(NV2AState *d) +{ + const int width = 640; + const int height = 480; + + uint8_t *pixels = render_geom_shader_triangles(d, width, height); + determine_triangle_winding_order(pixels, width, height, + &pgraph_vk_gpu_properties); + g_free(pixels); + + fprintf(stderr, "VK geometry shader winding: %d, %d, %d, %d\n", + pgraph_vk_gpu_properties.geom_shader_winding.tri, + pgraph_vk_gpu_properties.geom_shader_winding.tri_strip0, + pgraph_vk_gpu_properties.geom_shader_winding.tri_strip1, + pgraph_vk_gpu_properties.geom_shader_winding.tri_fan); +} + +GPUProperties *pgraph_vk_get_gpu_properties(void) +{ + return &pgraph_vk_gpu_properties; +} diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index 3d993cb735..fcada9e29b 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -401,10 +401,6 @@ static void add_optional_device_extension_names( add_extension_if_available(available_extensions, enabled_extension_names, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); - r->provoking_vertex_extension_enabled = - add_extension_if_available(available_extensions, enabled_extension_names, - VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); - r->memory_budget_extension_enabled = add_extension_if_available( available_extensions, enabled_extension_names, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME); @@ -570,17 +566,6 @@ static bool create_logical_device(PGRAPHState *pg, Error **errp) void *next_struct = NULL; - VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features; - if (r->provoking_vertex_extension_enabled) { - provoking_vertex_features = (VkPhysicalDeviceProvokingVertexFeaturesEXT){ - .sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, - .provokingVertexLast = VK_TRUE, - .pNext = next_struct, - }; - next_struct = &provoking_vertex_features; - } - VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_features; if (r->custom_border_color_extension_enabled) { custom_border_features = (VkPhysicalDeviceCustomBorderColorFeaturesEXT){ diff --git a/hw/xbox/nv2a/pgraph/vk/meson.build b/hw/xbox/nv2a/pgraph/vk/meson.build index 24c2474cb9..b5d75e2faa 100644 --- a/hw/xbox/nv2a/pgraph/vk/meson.build +++ b/hw/xbox/nv2a/pgraph/vk/meson.build @@ -9,6 +9,7 @@ specific_ss.add([sdl, volk, libglslang, vma, vulkan, spirv_reflect, gloffscreen, 'display.c', 'draw.c', 'glsl.c', + 'gpuprops.c', 'image.c', 'instance.c', 'renderer.c', diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c index 3dbc724b95..4272bbceb6 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.c +++ b/hw/xbox/nv2a/pgraph/vk/renderer.c @@ -62,6 +62,8 @@ static void pgraph_vk_init(NV2AState *d, Error **errp) pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr, memory_region_size(d->vram)); + + pgraph_vk_determine_gpu_properties(d); } static void pgraph_vk_finalize(NV2AState *d) @@ -227,6 +229,7 @@ static PGRAPHRenderer pgraph_vk_renderer = { .set_surface_scale_factor = pgraph_vk_set_surface_scale_factor, .get_surface_scale_factor = pgraph_vk_get_surface_scale_factor, .get_framebuffer_surface = pgraph_vk_get_framebuffer_surface, + .get_gpu_properties = pgraph_vk_get_gpu_properties, } }; diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index c2233a7b17..9f87114187 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -325,7 +325,6 @@ typedef struct PGRAPHVkState { bool debug_utils_extension_enabled; bool custom_border_color_extension_enabled; - bool provoking_vertex_extension_enabled; bool memory_budget_extension_enabled; VkPhysicalDevice physical_device; @@ -594,4 +593,8 @@ void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd); // blit.c void pgraph_vk_image_blit(NV2AState *d); +// gpuprops.c +void pgraph_vk_determine_gpu_properties(NV2AState *d); +GPUProperties *pgraph_vk_get_gpu_properties(void); + #endif diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index 54cf610402..50180eb6b9 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -106,7 +106,8 @@ const char *unpack_z24s8_to_d32_sfloat_s8_uint_glsl = "void main() {\n" " uint idx_out = gl_GlobalInvocationID.x;\n" " uint idx_in = get_input_idx(idx_out);\n" - " depth_out[idx_out] = float(depth_stencil_in[idx_in] >> 8) / float(0xffffff);\n" + // Conversion to float depth must be the same as in fragment shader + " depth_out[idx_out] = uintBitsToFloat(floatBitsToUint(float(depth_stencil_in[idx_in] >> 8) / 16777216.0) + 1u);\n" " if (idx_out % 4 == 0) {\n" " uint stencil_value = 0;\n" " for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels