From 0008aaa63981df7a39981a46721093b61524a970 Mon Sep 17 00:00:00 2001 From: coldhex Date: Sun, 29 Jun 2025 15:32:50 +0300 Subject: [PATCH] nv2a: Vertex reordering for flat shading in geometry shader Test OpenGL/Vulkan geometry shader triangle, strip and fan vertex ordering during backend initialization. OpenGL/Vulkan does not guarantee absolute vertex order for geometry shader input triangles. The test results are used to reorder input triangle vertices into the first vertex convention order so that correct provoking vertex can be chosen for flat shading. Also, this removes use of the Vulkan provoking vertex extension. The default first vertex convention is now used when emitting line strips in geometry shader. (It would of course be possible to always emit only separate line segments and then the convention wouldn't matter at all.) --- hw/xbox/nv2a/pgraph/gl/draw.c | 6 + hw/xbox/nv2a/pgraph/gl/gpuprops.c | 357 +++++++++++++++++ hw/xbox/nv2a/pgraph/gl/meson.build | 1 + hw/xbox/nv2a/pgraph/gl/renderer.c | 3 + hw/xbox/nv2a/pgraph/gl/renderer.h | 2 + hw/xbox/nv2a/pgraph/glsl/geom.c | 135 +++++-- hw/xbox/nv2a/pgraph/glsl/geom.h | 2 + hw/xbox/nv2a/pgraph/pgraph.h | 10 + hw/xbox/nv2a/pgraph/vk/draw.c | 17 - hw/xbox/nv2a/pgraph/vk/gpuprops.c | 605 +++++++++++++++++++++++++++++ hw/xbox/nv2a/pgraph/vk/instance.c | 15 - hw/xbox/nv2a/pgraph/vk/meson.build | 1 + hw/xbox/nv2a/pgraph/vk/renderer.c | 3 + hw/xbox/nv2a/pgraph/vk/renderer.h | 5 +- 14 files changed, 1090 insertions(+), 72 deletions(-) create mode 100644 hw/xbox/nv2a/pgraph/gl/gpuprops.c create mode 100644 hw/xbox/nv2a/pgraph/vk/gpuprops.c diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c index 7af27d845c..9562ea30d8 100644 --- a/hw/xbox/nv2a/pgraph/gl/draw.c +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -227,6 +227,12 @@ void pgraph_gl_draw_begin(NV2AState *d) glEnable(GL_DEPTH_CLAMP); + /* Set first vertex convention to match Vulkan default. This is needed + * because geometry shader outputs line strips with data for fragment + * shader. + */ + glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); + if (stencil_test) { glEnable(GL_STENCIL_TEST); diff --git a/hw/xbox/nv2a/pgraph/gl/gpuprops.c b/hw/xbox/nv2a/pgraph/gl/gpuprops.c new file mode 100644 index 0000000000..29cb1e7b0e --- /dev/null +++ b/hw/xbox/nv2a/pgraph/gl/gpuprops.c @@ -0,0 +1,357 @@ +/* + * Geforce NV2A PGRAPH OpenGL Renderer + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018-2025 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "debug.h" +#include "renderer.h" + +static GPUProperties pgraph_gl_gpu_properties; + +static const char *vertex_shader_source = + "#version 400\n" + "out vec3 v_fragColor;\n" + "\n" + "vec2 positions[11] = vec2[](\n" + " vec2(-0.5, -0.75),\n" + " vec2(-0.25, -0.25),\n" + " vec2(-0.75, -0.25),\n" + " vec2(0.25, -0.25),\n" + " vec2(0.25, -0.75),\n" + " vec2(0.75, -0.25),\n" + " vec2(0.75, -0.75),\n" + " vec2(-0.75, 0.75),\n" + " vec2(-0.75, 0.25),\n" + " vec2(-0.25, 0.25),\n" + " vec2(-0.25, 0.75)\n" + ");\n" + "\n" + "vec3 colors[11] = vec3[](\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0)\n" + ");\n" + "\n" + "void main() {\n" + " gl_Position = vec4(positions[gl_VertexID], 0.0, 1.0);\n" + " v_fragColor = colors[gl_VertexID];\n" + "}\n"; + +static const char *geometry_shader_source = + "#version 400\n" + "layout(triangles) in;\n" + "layout(triangle_strip, max_vertices = 3) out;\n" + "out vec3 fragColor;\n" + "in vec3 v_fragColor[];\n" + "\n" + "void emit_vertex(int index) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " fragColor = v_fragColor[0];\n" + " EmitVertex();\n" + "}\n" + "\n" + "void main() {\n" + " emit_vertex(0);\n" + " emit_vertex(1);\n" + " emit_vertex(2);\n" + " EndPrimitive();\n" + "}\n"; + +static const char *fragment_shader_source = + "#version 400\n" + "out vec4 outColor;\n" + "in vec3 fragColor;\n" + "\n" + "void main() {\n" + " outColor = vec4(fragColor, 1.0);\n" + "}\n"; + +static GLuint compile_shader(GLenum type, const char *source) +{ + GLuint shader = glCreateShader(type); + glShaderSource(shader, 1, &source, NULL); + glCompileShader(shader); + + GLint success; + glGetShaderiv(shader, GL_COMPILE_STATUS, &success); + if (!success) { + char log[512]; + glGetShaderInfoLog(shader, sizeof(log), NULL, log); + log[sizeof(log) - 1] = '\0'; + fprintf(stderr, "GL shader type %d compilation failed: %s\n", type, + log); + assert(false); + } + + return shader; +} + +static GLuint create_program(const char *vert_source, const char *geom_source, + const char *frag_source) +{ + GLuint vert_shader = compile_shader(GL_VERTEX_SHADER, vert_source); + GLuint geom_shader = compile_shader(GL_GEOMETRY_SHADER, geom_source); + GLuint frag_shader = compile_shader(GL_FRAGMENT_SHADER, frag_source); + + GLuint shader_prog = glCreateProgram(); + glAttachShader(shader_prog, vert_shader); + glAttachShader(shader_prog, geom_shader); + glAttachShader(shader_prog, frag_shader); + glLinkProgram(shader_prog); + + GLint success; + glGetProgramiv(shader_prog, GL_LINK_STATUS, &success); + if (!success) { + char log[512]; + glGetProgramInfoLog(shader_prog, sizeof(log), NULL, log); + log[sizeof(log) - 1] = '\0'; + fprintf(stderr, "GL shader linking failed: %s\n", log); + assert(false); + } + + glDeleteShader(vert_shader); + glDeleteShader(geom_shader); + glDeleteShader(frag_shader); + + return shader_prog; +} + +static void check_gl_error(const char *context) +{ + GLenum err; + int limit = 10; + + while ((err = glGetError()) != GL_NO_ERROR) { + fprintf(stderr, "GPU properties OpenGL error 0x%X in %s\n", err, + context); + if (--limit <= 0) { + fprintf( + stderr, + "Too many OpenGL errors in %s — possible infinite error loop\n", + context); + break; + } + } +} + +static uint8_t *render_geom_shader_triangles(int width, int height) +{ + // Create the framebuffer and renderbuffer for it + GLuint fbo, rbo; + glGenFramebuffers(1, &fbo); + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + glGenRenderbuffers(1, &rbo); + glBindRenderbuffer(GL_RENDERBUFFER, rbo); + glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height); + check_gl_error("glRenderbufferStorage"); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_RENDERBUFFER, rbo); + check_gl_error("glFramebufferRenderbuffer"); + + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + GLuint shader_prog = create_program( + vertex_shader_source, geometry_shader_source, fragment_shader_source); + assert(shader_prog != 0); + + glUseProgram(shader_prog); + check_gl_error("glUseProgram"); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + check_gl_error("glClear"); + + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); + glViewport(0, 0, width, height); + check_gl_error("state setup"); + + GLuint vao; + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + check_gl_error("glBindVertexArray"); + glDrawArrays(GL_TRIANGLES, 0, 3); + glDrawArrays(GL_TRIANGLE_STRIP, 3, 4); + glDrawArrays(GL_TRIANGLE_FAN, 7, 4); + check_gl_error("glDrawArrays"); + glFinish(); // glFinish should be unnecessary + + void *pixels = g_malloc(width * height * 4); + assert(pixels != NULL); + glReadBuffer(GL_COLOR_ATTACHMENT0); + glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, pixels); + check_gl_error("glReadPixels"); + + glBindVertexArray(0); + glDeleteVertexArrays(1, &vao); + glUseProgram(0); + glDeleteProgram(shader_prog); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDeleteFramebuffers(1, &fbo); + glBindRenderbuffer(GL_RENDERBUFFER, 0); + glDeleteRenderbuffers(1, &rbo); + + return (uint8_t *)pixels; +} + +static bool colors_match(int r1, int g1, int b1, int r2, int g2, int b2) +{ + int dr = r1 - r2; + int dg = g1 - g2; + int db = b1 - b2; + + return (dr * dr + dg * dg + db * db) <= 16; +} + +static int get_color_index(uint8_t *pixel) +{ + int r = pixel[0]; + int g = pixel[1]; + int b = pixel[2]; + + if (colors_match(r, g, b, 0, 0, 255)) { + return 0; + } else if (colors_match(r, g, b, 0, 255, 0)) { + return 1; + } else if (colors_match(r, g, b, 0, 255, 255)) { + return 2; + } else if (colors_match(r, g, b, 255, 0, 0)) { + return 3; + } else { + return -1; + } +} + +static int calc_offset_from_ndc(float x, float y, int width, int height) +{ + int x0 = (int)((x + 1.0f) * width * 0.5f); + int y0 = (int)((y + 1.0f) * height * 0.5f); + + x0 = MAX(x0, 0); + y0 = MAX(y0, 0); + x0 = MIN(x0, width - 1); + y0 = MIN(y0, height - 1); + + return y0 * width + x0; +} + +static void determine_triangle_winding_order(uint8_t *pixels, int width, + int height, GPUProperties *props) +{ + uint8_t *tri_pix = + pixels + calc_offset_from_ndc(-0.5f, -0.5f, width, height) * 4; + uint8_t *strip0_pix = + pixels + calc_offset_from_ndc(0.417f, -0.417f, width, height) * 4; + uint8_t *strip1_pix = + pixels + calc_offset_from_ndc(0.583f, -0.583f, width, height) * 4; + uint8_t *fan_pix = + pixels + calc_offset_from_ndc(-0.583f, 0.417f, width, height) * 4; + uint8_t *fan2_pix = + pixels + calc_offset_from_ndc(-0.417f, 0.583f, width, height) * 4; + + int tri_rot = get_color_index(tri_pix); + if (tri_rot < 0 || tri_rot > 2) { + fprintf(stderr, + "Could not determine triangle rotation, got color: R=%d, G=%d, " + "B=%d\n", + tri_pix[0], tri_pix[1], tri_pix[2]); + tri_rot = 0; + } + props->geom_shader_winding.tri = tri_rot; + + int strip0_rot = get_color_index(strip0_pix); + if (strip0_rot < 0 || strip0_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip0 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip0_pix[0], strip0_pix[1], strip0_pix[2]); + strip0_rot = 0; + } + int strip1_rot = get_color_index(strip1_pix) - 1; + if (strip1_rot < 0 || strip1_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip1 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip1_pix[0], strip1_pix[1], strip1_pix[2]); + strip1_rot = 0; + } + props->geom_shader_winding.tri_strip0 = strip0_rot; + props->geom_shader_winding.tri_strip1 = (3 - strip1_rot) % 3; + + int fan_rot = get_color_index(fan_pix); + int fan2_rot = get_color_index(fan2_pix); + if (fan2_rot == 0) { + fan2_rot = 1; + } + fan2_rot--; + if (fan_rot != fan2_rot) { + fprintf(stderr, + "Unexpected inconsistency in triangle fan winding, got colors: " + "R=%d, G=%d, B=%d and R=%d, G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2], fan2_pix[0], fan2_pix[1], + fan2_pix[2]); + fan_rot = 1; + } + if (fan_rot < 0 || fan_rot > 2) { + fprintf(stderr, + "Could not determine triangle fan rotation, got color: R=%d, " + "G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2]); + fan_rot = 1; + } + props->geom_shader_winding.tri_fan = (fan_rot + 2) % 3; +} + +void pgraph_gl_determine_gpu_properties(NV2AState *d) +{ + const int width = 640; + const int height = 480; + + GloContext *g_context = glo_context_create(); + glo_set_current(g_context); + + uint8_t *pixels = render_geom_shader_triangles(width, height); + determine_triangle_winding_order(pixels, width, height, + &pgraph_gl_gpu_properties); + g_free(pixels); + + fprintf(stderr, "GL geometry shader winding: %d, %d, %d, %d\n", + pgraph_gl_gpu_properties.geom_shader_winding.tri, + pgraph_gl_gpu_properties.geom_shader_winding.tri_strip0, + pgraph_gl_gpu_properties.geom_shader_winding.tri_strip1, + pgraph_gl_gpu_properties.geom_shader_winding.tri_fan); + + glo_context_destroy(g_context); + glo_set_current(g_nv2a_context_render); +} + +GPUProperties *pgraph_gl_get_gpu_properties(void) +{ + return &pgraph_gl_gpu_properties; +} diff --git a/hw/xbox/nv2a/pgraph/gl/meson.build b/hw/xbox/nv2a/pgraph/gl/meson.build index ab25eacb7d..c19a542adb 100644 --- a/hw/xbox/nv2a/pgraph/gl/meson.build +++ b/hw/xbox/nv2a/pgraph/gl/meson.build @@ -3,6 +3,7 @@ specific_ss.add([sdl, gloffscreen, files( 'debug.c', 'display.c', 'draw.c', + 'gpuprops.c', 'renderer.c', 'reports.c', 'shaders.c', diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c index 36b8029439..74ece1c5f9 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.c +++ b/hw/xbox/nv2a/pgraph/gl/renderer.c @@ -66,6 +66,8 @@ static void pgraph_gl_init(NV2AState *d, Error **errp) pg->uniform_attrs = 0; pg->swizzle_attrs = 0; + + pgraph_gl_determine_gpu_properties(d); } static void pgraph_gl_finalize(NV2AState *d) @@ -195,6 +197,7 @@ static PGRAPHRenderer pgraph_gl_renderer = { .set_surface_scale_factor = pgraph_gl_set_surface_scale_factor, .get_surface_scale_factor = pgraph_gl_get_surface_scale_factor, .get_framebuffer_surface = pgraph_gl_get_framebuffer_surface, + .get_gpu_properties = pgraph_gl_get_gpu_properties, } }; diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index 28277fcdf5..5a2524bfbb 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -286,5 +286,7 @@ void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg); void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale); unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d); int pgraph_gl_get_framebuffer_surface(NV2AState *d); +void pgraph_gl_determine_gpu_properties(NV2AState *d); +GPUProperties *pgraph_gl_get_gpu_properties(void); #endif diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c index dd5dc363f5..ec4d19fe5f 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.c +++ b/hw/xbox/nv2a/pgraph/glsl/geom.c @@ -45,6 +45,39 @@ void pgraph_glsl_set_geom_state(PGRAPHState *pg, GeomState *state) state->z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE; + + if (pg->renderer->ops.get_gpu_properties) { + GPUProperties *gpu_props = pg->renderer->ops.get_gpu_properties(); + + switch (state->primitive_mode) { + case PRIM_TYPE_TRIANGLES: + state->tri_rot0 = gpu_props->geom_shader_winding.tri; + state->tri_rot1 = state->tri_rot0; + break; + case PRIM_TYPE_TRIANGLE_STRIP: + state->tri_rot0 = gpu_props->geom_shader_winding.tri_strip0; + state->tri_rot1 = gpu_props->geom_shader_winding.tri_strip1; + break; + case PRIM_TYPE_TRIANGLE_FAN: + case PRIM_TYPE_POLYGON: + state->tri_rot0 = gpu_props->geom_shader_winding.tri_fan; + state->tri_rot1 = state->tri_rot0; + break; + default: + break; + } + } +} + +static const char *get_vertex_order(int rot) +{ + if (rot == 0) { + return "ivec3(0, 1, 2)"; + } else if (rot == 1) { + return "ivec3(2, 0, 1)"; + } else { + return "ivec3(1, 2, 0)"; + } } bool pgraph_glsl_need_geom(const GeomState *state) @@ -115,49 +148,47 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) case PRIM_TYPE_TRIANGLE_STRIP: case PRIM_TYPE_TRIANGLE_FAN: if (state->first_vertex_is_provoking) { - if (state->primitive_mode == PRIM_TYPE_TRIANGLE_STRIP) { - provoking_index = "gl_PrimitiveIDIn & 1"; - } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_FAN) { - provoking_index = "1"; - } else { - provoking_index = "0"; - } + provoking_index = "v[0]"; + } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_STRIP) { + provoking_index = "v[2 - (gl_PrimitiveIDIn & 1)]"; + } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_FAN) { + provoking_index = "v[1]"; } else { - provoking_index = "2"; + provoking_index = "v[2]"; } need_triz = true; layout_in = "layout(triangles) in;\n"; if (polygon_mode == POLY_MODE_FILL) { layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; - body = " mat4 pz = calc_triz(0, 1, 2);\n" - " emit_vertex(0, pz);\n" - " emit_vertex(1, pz);\n" - " emit_vertex(2, pz);\n" + body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n" + " emit_vertex(v[0], pz);\n" + " emit_vertex(v[1], pz);\n" + " emit_vertex(v[2], pz);\n" " EndPrimitive();\n"; } else if (polygon_mode == POLY_MODE_LINE) { need_linez = true; layout_out = "layout(line_strip, max_vertices = 4) out;\n"; - body = " float triMZ = calc_triz(0, 1, 2)[3].x;\n" - " mat4 pz1 = calc_linez(0, 1);\n" - " pz1[3].x = triMZ;\n" - " mat4 pz2 = calc_linez(1, 2);\n" - " pz2[3].x = triMZ;\n" - " mat4 pz3 = calc_linez(2, 0);\n" - " pz3[3].x = triMZ;\n" - " emit_vertex(0, pz1);\n" - " emit_vertex(1, pz1);\n" - " emit_vertex(2, pz2);\n" - " emit_vertex(0, pz3);\n" + body = " float dz = calc_triz(v[0], v[1], v[2])[3].x;\n" + " mat4 pz1 = calc_linez(v[0], v[1]);\n" + " pz1[3].x = dz;\n" + " mat4 pz2 = calc_linez(v[1], v[2]);\n" + " pz2[3].x = dz;\n" + " mat4 pz3 = calc_linez(v[2], v[0]);\n" + " pz3[3].x = dz;\n" + " emit_vertex(v[0], pz1);\n" + " emit_vertex(v[1], pz2);\n" + " emit_vertex(v[2], pz3);\n" + " emit_vertex(v[0], pz3);\n" " EndPrimitive();\n"; } else { assert(polygon_mode == POLY_MODE_POINT); layout_out = "layout(points, max_vertices = 3) out;\n"; - body = " mat4 pz = calc_triz(0, 1, 2);\n" - " emit_vertex(0, mat4(pz[0], pz[0], pz[0], pz[3]));\n" + body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n" + " emit_vertex(v[0], mat4(pz[0], pz[0], pz[0], pz[3]));\n" " EndPrimitive();\n" - " emit_vertex(1, mat4(pz[1], pz[1], pz[1], pz[3]));\n" + " emit_vertex(v[1], mat4(pz[1], pz[1], pz[1], pz[3]));\n" " EndPrimitive();\n" - " emit_vertex(2, mat4(pz[2], pz[2], pz[2], pz[3]));\n" + " emit_vertex(v[2], mat4(pz[2], pz[2], pz[2], pz[3]));\n" " EndPrimitive();\n"; } break; @@ -188,9 +219,9 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " mat4 pz4 = calc_linez(3, 0);\n" " pz4[3].x = pzs[3].x;\n" " emit_vertex(0, pz1);\n" - " emit_vertex(1, pz1);\n" - " emit_vertex(2, pz2);\n" - " emit_vertex(3, pz3);\n" + " emit_vertex(1, pz2);\n" + " emit_vertex(2, pz3);\n" + " emit_vertex(3, pz4);\n" " emit_vertex(0, pz4);\n" " EndPrimitive();\n"; } else { @@ -237,9 +268,9 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) " mat4 pz4 = calc_linez(2, 0);\n" " pz4[3].x = pz[3].x;\n" " emit_vertex(0, pz1);\n" - " emit_vertex(1, pz1);\n" - " emit_vertex(3, pz2);\n" - " emit_vertex(2, pz3);\n" + " emit_vertex(1, pz2);\n" + " emit_vertex(3, pz3);\n" + " emit_vertex(2, pz4);\n" " emit_vertex(0, pz4);\n" " EndPrimitive();\n"; } else { @@ -259,17 +290,18 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) } break; case PRIM_TYPE_POLYGON: - provoking_index = "0"; if (polygon_mode == POLY_MODE_FILL) { + provoking_index = "v[2]"; need_triz = true; layout_in = "layout(triangles) in;\n"; layout_out = "layout(triangle_strip, max_vertices = 3) out;\n"; - body = " mat4 pz = calc_triz(0, 1, 2);\n" - " emit_vertex(0, pz);\n" - " emit_vertex(1, pz);\n" - " emit_vertex(2, pz);\n" + body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n" + " emit_vertex(v[0], pz);\n" + " emit_vertex(v[1], pz);\n" + " emit_vertex(v[2], pz);\n" " EndPrimitive();\n"; } else if (polygon_mode == POLY_MODE_LINE) { + provoking_index = "0"; need_linez = true; /* FIXME: input here is lines and not triangles so we cannot * calculate triangle plane slope. Also, the first vertex of the @@ -310,6 +342,30 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) pgraph_glsl_get_vtx_header(output, opts.vulkan, state->smooth_shading, false, false, false); + char vertex_order_buf[80]; + const char *vertex_order_body = ""; + + if (need_triz) { + /* Input triangle absolute vertex order is not guaranteed by OpenGL + * or Vulkan, only winding order is. Reorder vertices here to first + * vertex convention which we assumed above when setting + * provoking_index. This mostly only matters with flat shading, but + * we reorder always to get consistent results across GPU vendors + * regarding floating-point rounding when calculating with vtxPos0/1/2. + */ + mstring_append(output, "ivec3 v;\n"); + if (state->tri_rot0 == state->tri_rot1) { + snprintf(vertex_order_buf, sizeof(vertex_order_buf), " v = %s;\n", + get_vertex_order(state->tri_rot0)); + } else { + snprintf(vertex_order_buf, sizeof(vertex_order_buf), + " v = (gl_PrimitiveIDIn & 1) == 0 ? %s : %s;\n", + get_vertex_order(state->tri_rot0), + get_vertex_order(state->tri_rot1)); + } + vertex_order_body = vertex_order_buf; + } + if (state->smooth_shading) { provoking_index = "index"; } @@ -422,8 +478,9 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts) "\n" "void main() {\n" "%s" + "%s" "}\n", - body); + vertex_order_body, body); return output; } diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.h b/hw/xbox/nv2a/pgraph/glsl/geom.h index 55229d2e46..a00302f39b 100644 --- a/hw/xbox/nv2a/pgraph/glsl/geom.h +++ b/hw/xbox/nv2a/pgraph/glsl/geom.h @@ -32,6 +32,8 @@ typedef struct { bool smooth_shading; bool first_vertex_is_provoking; bool z_perspective; + short tri_rot0; + short tri_rot1; } GeomState; typedef struct GenGeomGlslOptions { diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index 49f941d450..0449270b55 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -96,6 +96,15 @@ typedef struct BetaState { uint32_t beta; } BetaState; +typedef struct GPUProperties { + struct { + short tri; + short tri_strip0; + short tri_strip1; + short tri_fan; + } geom_shader_winding; +} GPUProperties; + typedef struct PGRAPHRenderer { CONFIG_DISPLAY_RENDERER type; const char *name; @@ -122,6 +131,7 @@ typedef struct PGRAPHRenderer { void (*set_surface_scale_factor)(NV2AState *d, unsigned int scale); unsigned int (*get_surface_scale_factor)(NV2AState *d); int (*get_framebuffer_surface)(NV2AState *d); + GPUProperties *(*get_gpu_properties)(void); } ops; } PGRAPHRenderer; diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index e4bd562149..d78087e56b 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -788,23 +788,6 @@ static void create_pipeline(PGRAPHState *pg) void *rasterizer_next_struct = NULL; - VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state; - - if (r->provoking_vertex_extension_enabled) { - // Use last provoking vertex convention to match geometry shader - // assumption, because Vulkan default is first vertex convention. - VkProvokingVertexModeEXT provoking_mode = - VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; - - provoking_state = - (VkPipelineRasterizationProvokingVertexStateCreateInfoEXT){ - .sType = - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, - .provokingVertexMode = provoking_mode, - }; - rasterizer_next_struct = &provoking_state; - } - VkPipelineRasterizationStateCreateInfo rasterizer = { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .depthClampEnable = VK_TRUE, diff --git a/hw/xbox/nv2a/pgraph/vk/gpuprops.c b/hw/xbox/nv2a/pgraph/vk/gpuprops.c new file mode 100644 index 0000000000..1562be0993 --- /dev/null +++ b/hw/xbox/nv2a/pgraph/vk/gpuprops.c @@ -0,0 +1,605 @@ +/* + * Geforce NV2A PGRAPH Vulkan Renderer + * + * Copyright (c) 2024-2025 Matt Borgerson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "debug.h" +#include "renderer.h" + +static GPUProperties pgraph_vk_gpu_properties; + +static const char *vertex_shader_source = + "#version 450\n" + "layout(location = 0) out vec3 v_fragColor;\n" + "\n" + "vec2 positions[11] = vec2[](\n" + " vec2(-0.5, -0.75),\n" + " vec2(-0.25, -0.25),\n" + " vec2(-0.75, -0.25),\n" + " vec2(0.25, -0.25),\n" + " vec2(0.25, -0.75),\n" + " vec2(0.75, -0.25),\n" + " vec2(0.75, -0.75),\n" + " vec2(-0.75, 0.75),\n" + " vec2(-0.75, 0.25),\n" + " vec2(-0.25, 0.25),\n" + " vec2(-0.25, 0.75)\n" + ");\n" + "\n" + "vec3 colors[11] = vec3[](\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0),\n" + " vec3(0.0, 0.0, 1.0),\n" + " vec3(0.0, 1.0, 0.0),\n" + " vec3(0.0, 1.0, 1.0),\n" + " vec3(1.0, 0.0, 0.0)\n" + ");\n" + "\n" + "void main() {\n" + " gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0);\n" + " v_fragColor = colors[gl_VertexIndex];\n" + "}\n"; + +static const char *geometry_shader_source = + "#version 450\n" + "layout(triangles) in;\n" + "layout(triangle_strip, max_vertices = 3) out;\n" + "layout(location = 0) out vec3 fragColor;\n" + "layout(location = 0) in vec3 v_fragColor[];\n" + "\n" + "void emit_vertex(int index) {\n" + " gl_Position = gl_in[index].gl_Position;\n" + " fragColor = v_fragColor[0];\n" + " EmitVertex();\n" + "}\n" + "\n" + "void main() {\n" + " emit_vertex(0);\n" + " emit_vertex(1);\n" + " emit_vertex(2);\n" + " EndPrimitive();\n" + "}\n"; + +static const char *fragment_shader_source = + "#version 450\n" + "layout(location = 0) out vec4 outColor;\n" + "layout(location = 0) in vec3 fragColor;\n" + "\n" + "void main() {\n" + " outColor = vec4(fragColor, 1.0);\n" + "}\n"; + +static VkPipeline create_test_pipeline( + NV2AState *d, VkPrimitiveTopology primitive_topology, + VkShaderModule vert_shader_module, VkShaderModule geom_shader_module, + VkShaderModule frag_shader_module, VkPipelineLayout pipeline_layout, + VkRenderPass render_pass, int width, int height) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + VkPipelineShaderStageCreateInfo shader_stages[] = { + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vert_shader_module, + .pName = "main", + }, + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_GEOMETRY_BIT, + .module = geom_shader_module, + .pName = "main", + }, + (VkPipelineShaderStageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = frag_shader_module, + .pName = "main", + }, + }; + + VkPipelineVertexInputStateCreateInfo vertex_input_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 0, + .vertexAttributeDescriptionCount = 0, + }; + + VkPipelineInputAssemblyStateCreateInfo input_assembly = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = primitive_topology, + .primitiveRestartEnable = VK_FALSE, + }; + + VkViewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = (float)width, + .height = (float)height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + VkRect2D scissor = { + .offset = { 0, 0 }, + .extent.width = width, + .extent.height = height, + }; + VkPipelineViewportStateCreateInfo viewport_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .pViewports = &viewport, + .scissorCount = 1, + .pScissors = &scissor, + }; + + VkPipelineRasterizationStateCreateInfo rasterizer = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .lineWidth = 1.0f, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + }; + + VkPipelineMultisampleStateCreateInfo multisampling = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .sampleShadingEnable = VK_FALSE, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + }; + + VkPipelineColorBlendAttachmentState color_blend_attachment = { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + .blendEnable = VK_FALSE, + }; + + VkPipelineColorBlendStateCreateInfo color_blending = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment, + .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }, + }; + + VkGraphicsPipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(shader_stages), + .pStages = shader_stages, + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_state, + .pRasterizationState = &rasterizer, + .pMultisampleState = &multisampling, + .pColorBlendState = &color_blending, + .layout = pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + }; + + VkPipeline pipeline; + VK_CHECK(vkCreateGraphicsPipelines(r->device, VK_NULL_HANDLE, 1, + &pipeline_info, NULL, &pipeline)); + + return pipeline; +} + +static uint8_t *render_geom_shader_triangles(NV2AState *d, int width, + int height) +{ + PGRAPHState *pg = &d->pgraph; + PGRAPHVkState *r = pg->vk_renderer_state; + + // Create image + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .extent.width = width, + .extent.height = height, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + .samples = VK_SAMPLE_COUNT_1_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + VkImage offscreen_image; + VK_CHECK( + vkCreateImage(r->device, &image_create_info, NULL, &offscreen_image)); + + // Allocate and bind image memory + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(r->device, offscreen_image, + &memory_requirements); + + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = memory_requirements.size, + .memoryTypeIndex = + pgraph_vk_get_memory_type(pg, memory_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), + }; + + VkDeviceMemory image_memory; + VK_CHECK(vkAllocateMemory(r->device, &alloc_info, NULL, &image_memory)); + VK_CHECK(vkBindImageMemory(r->device, offscreen_image, image_memory, 0)); + + // Create Image View + VkImageViewCreateInfo image_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = offscreen_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_create_info.format, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }; + + VkImageView offscreen_image_view; + VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, + &offscreen_image_view)); + + // Buffer for image CPU access + VkBufferCreateInfo buffer_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = width * height * 4, // RGBA8 = 4 bytes per pixel + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + VkBuffer cpu_buffer; + VK_CHECK(vkCreateBuffer(r->device, &buffer_info, NULL, &cpu_buffer)); + + // Allocate and bind memory for image CPU access + VkMemoryRequirements host_mem_requirements; + vkGetBufferMemoryRequirements(r->device, cpu_buffer, + &host_mem_requirements); + + VkMemoryAllocateInfo host_alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = host_mem_requirements.size, + .memoryTypeIndex = + pgraph_vk_get_memory_type(pg, host_mem_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), + }; + + VkDeviceMemory cpu_buffer_memory; + VK_CHECK(vkAllocateMemory(r->device, &host_alloc_info, NULL, + &cpu_buffer_memory)); + VK_CHECK(vkBindBufferMemory(r->device, cpu_buffer, cpu_buffer_memory, 0)); + + + VkAttachmentDescription color_attachment = { + .format = VK_FORMAT_R8G8B8A8_UNORM, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }; + + VkAttachmentReference color_ref = { + 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + }; + + VkSubpassDescription subpass = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .colorAttachmentCount = 1, + .pColorAttachments = &color_ref, + }; + + VkRenderPassCreateInfo render_pass_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &color_attachment, + .subpassCount = 1, + .pSubpasses = &subpass, + }; + + VkRenderPass render_pass; + VK_CHECK( + vkCreateRenderPass(r->device, &render_pass_info, NULL, &render_pass)); + + VkFramebufferCreateInfo fb_info = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = render_pass, + .attachmentCount = 1, + .pAttachments = &offscreen_image_view, + .width = width, + .height = height, + .layers = 1, + }; + + VkFramebuffer framebuffer; + VK_CHECK(vkCreateFramebuffer(r->device, &fb_info, NULL, &framebuffer)); + + ShaderModuleInfo *vsh_info = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_VERTEX_BIT, vertex_shader_source); + ShaderModuleInfo *geom_info = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_GEOMETRY_BIT, geometry_shader_source); + ShaderModuleInfo *psh_info = pgraph_vk_create_shader_module_from_glsl( + r, VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader_source); + + VkShaderModule vert_shader_module = vsh_info->module; + VkShaderModule geom_shader_module = geom_info->module; + VkShaderModule frag_shader_module = psh_info->module; + + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 0, + .pushConstantRangeCount = 0, + }; + + VkPipelineLayout pipeline_layout; + VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, + &pipeline_layout)); + + VkPipeline tri_pipeline = create_test_pipeline( + d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, vert_shader_module, + geom_shader_module, frag_shader_module, pipeline_layout, render_pass, + width, height); + + VkPipeline strip_pipeline = create_test_pipeline( + d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, vert_shader_module, + geom_shader_module, frag_shader_module, pipeline_layout, render_pass, + width, height); + + VkPipeline fan_pipeline = create_test_pipeline( + d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN, vert_shader_module, + geom_shader_module, frag_shader_module, pipeline_layout, render_pass, + width, height); + + pgraph_vk_destroy_shader_module(r, psh_info); + pgraph_vk_destroy_shader_module(r, geom_info); + pgraph_vk_destroy_shader_module(r, vsh_info); + + VkCommandBufferBeginInfo begin_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + }; + VK_CHECK(vkBeginCommandBuffer(r->command_buffer, &begin_info)); + + // Begin render pass + VkClearValue clear_color = { + .color.float32 = { 0.0f, 0.0f, 0.0f, 1.0f }, + }; + VkRenderPassBeginInfo rp_begin = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = render_pass, + .framebuffer = framebuffer, + .renderArea.extent.width = width, + .renderArea.extent.height = height, + .clearValueCount = 1, + .pClearValues = &clear_color, + }; + + vkCmdBeginRenderPass(r->command_buffer, &rp_begin, + VK_SUBPASS_CONTENTS_INLINE); + + vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + tri_pipeline); + vkCmdDraw(r->command_buffer, 3, 1, 0, 0); + vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + strip_pipeline); + vkCmdDraw(r->command_buffer, 4, 1, 3, 0); + vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + fan_pipeline); + vkCmdDraw(r->command_buffer, 4, 1, 7, 0); + + vkCmdEndRenderPass(r->command_buffer); + + // Synchronize and transition framebuffer for copying to CPU + pgraph_vk_transition_image_layout(pg, r->command_buffer, offscreen_image, + image_create_info.format, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + // Copy framebuffer to CPU memory + VkBufferImageCopy region = { + .bufferOffset = 0, + .bufferRowLength = 0, // tightly packed + .bufferImageHeight = 0, + + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.mipLevel = 0, + .imageSubresource.baseArrayLayer = 0, + .imageSubresource.layerCount = 1, + + .imageOffset = { 0, 0, 0 }, + .imageExtent = { width, height, 1 }, + }; + + vkCmdCopyImageToBuffer(r->command_buffer, offscreen_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, cpu_buffer, 1, + ®ion); + + VK_CHECK(vkEndCommandBuffer(r->command_buffer)); + + VkSubmitInfo submit_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &r->command_buffer, + }; + + VK_CHECK(vkQueueSubmit(r->queue, 1, &submit_info, VK_NULL_HANDLE)); + VK_CHECK(vkQueueWaitIdle(r->queue)); + + void *data; + VK_CHECK( + vkMapMemory(r->device, cpu_buffer_memory, 0, VK_WHOLE_SIZE, 0, &data)); + void *pixels = g_malloc(width * height * 4); + assert(pixels != NULL); + memcpy(pixels, data, width * height * 4); + vkUnmapMemory(r->device, cpu_buffer_memory); + + vkDestroyPipeline(r->device, strip_pipeline, NULL); + vkDestroyPipeline(r->device, fan_pipeline, NULL); + vkDestroyPipeline(r->device, tri_pipeline, NULL); + vkDestroyPipelineLayout(r->device, pipeline_layout, NULL); + vkDestroyFramebuffer(r->device, framebuffer, NULL); + vkDestroyRenderPass(r->device, render_pass, NULL); + vkDestroyImageView(r->device, offscreen_image_view, NULL); + vkDestroyBuffer(r->device, cpu_buffer, NULL); + vkFreeMemory(r->device, cpu_buffer_memory, NULL); + vkDestroyImage(r->device, offscreen_image, NULL); + vkFreeMemory(r->device, image_memory, NULL); + + return (uint8_t *)pixels; +} + +static bool colors_match(int r1, int g1, int b1, int r2, int g2, int b2) +{ + int dr = r1 - r2; + int dg = g1 - g2; + int db = b1 - b2; + + return (dr * dr + dg * dg + db * db) <= 16; +} + +static int get_color_index(uint8_t *pixel) +{ + int r = pixel[0]; + int g = pixel[1]; + int b = pixel[2]; + + if (colors_match(r, g, b, 0, 0, 255)) { + return 0; + } else if (colors_match(r, g, b, 0, 255, 0)) { + return 1; + } else if (colors_match(r, g, b, 0, 255, 255)) { + return 2; + } else if (colors_match(r, g, b, 255, 0, 0)) { + return 3; + } else { + return -1; + } +} + +static int calc_offset_from_ndc(float x, float y, int width, int height) +{ + int x0 = (int)((x + 1.0f) * width * 0.5f); + int y0 = (int)((y + 1.0f) * height * 0.5f); + + x0 = MAX(x0, 0); + y0 = MAX(y0, 0); + x0 = MIN(x0, width - 1); + y0 = MIN(y0, height - 1); + + return y0 * width + x0; +} + +static void determine_triangle_winding_order(uint8_t *pixels, int width, + int height, GPUProperties *props) +{ + uint8_t *tri_pix = + pixels + calc_offset_from_ndc(-0.5f, -0.5f, width, height) * 4; + uint8_t *strip0_pix = + pixels + calc_offset_from_ndc(0.417f, -0.417f, width, height) * 4; + uint8_t *strip1_pix = + pixels + calc_offset_from_ndc(0.583f, -0.583f, width, height) * 4; + uint8_t *fan_pix = + pixels + calc_offset_from_ndc(-0.583f, 0.417f, width, height) * 4; + uint8_t *fan2_pix = + pixels + calc_offset_from_ndc(-0.417f, 0.583f, width, height) * 4; + + int tri_rot = get_color_index(tri_pix); + if (tri_rot < 0 || tri_rot > 2) { + fprintf(stderr, + "Could not determine triangle rotation, got color: R=%d, G=%d, " + "B=%d\n", + tri_pix[0], tri_pix[1], tri_pix[2]); + tri_rot = 0; + } + props->geom_shader_winding.tri = tri_rot; + + int strip0_rot = get_color_index(strip0_pix); + if (strip0_rot < 0 || strip0_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip0 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip0_pix[0], strip0_pix[1], strip0_pix[2]); + strip0_rot = 0; + } + int strip1_rot = get_color_index(strip1_pix) - 1; + if (strip1_rot < 0 || strip1_rot > 2) { + fprintf(stderr, + "Could not determine triangle strip1 rotation, got color: " + "R=%d, G=%d, B=%d\n", + strip1_pix[0], strip1_pix[1], strip1_pix[2]); + strip1_rot = 0; + } + props->geom_shader_winding.tri_strip0 = strip0_rot; + props->geom_shader_winding.tri_strip1 = (3 - strip1_rot) % 3; + + int fan_rot = get_color_index(fan_pix); + int fan2_rot = get_color_index(fan2_pix); + if (fan2_rot == 0) { + fan2_rot = 1; + } + fan2_rot--; + if (fan_rot != fan2_rot) { + fprintf(stderr, + "Unexpected inconsistency in triangle fan winding, got colors: " + "R=%d, G=%d, B=%d and R=%d, G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2], fan2_pix[0], fan2_pix[1], + fan2_pix[2]); + fan_rot = 1; + } + if (fan_rot < 0 || fan_rot > 2) { + fprintf(stderr, + "Could not determine triangle fan rotation, got color: R=%d, " + "G=%d, B=%d\n", + fan_pix[0], fan_pix[1], fan_pix[2]); + fan_rot = 1; + } + props->geom_shader_winding.tri_fan = (fan_rot + 2) % 3; +} + +void pgraph_vk_determine_gpu_properties(NV2AState *d) +{ + const int width = 640; + const int height = 480; + + uint8_t *pixels = render_geom_shader_triangles(d, width, height); + determine_triangle_winding_order(pixels, width, height, + &pgraph_vk_gpu_properties); + g_free(pixels); + + fprintf(stderr, "VK geometry shader winding: %d, %d, %d, %d\n", + pgraph_vk_gpu_properties.geom_shader_winding.tri, + pgraph_vk_gpu_properties.geom_shader_winding.tri_strip0, + pgraph_vk_gpu_properties.geom_shader_winding.tri_strip1, + pgraph_vk_gpu_properties.geom_shader_winding.tri_fan); +} + +GPUProperties *pgraph_vk_get_gpu_properties(void) +{ + return &pgraph_vk_gpu_properties; +} diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index 3d993cb735..fcada9e29b 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -401,10 +401,6 @@ static void add_optional_device_extension_names( add_extension_if_available(available_extensions, enabled_extension_names, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); - r->provoking_vertex_extension_enabled = - add_extension_if_available(available_extensions, enabled_extension_names, - VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); - r->memory_budget_extension_enabled = add_extension_if_available( available_extensions, enabled_extension_names, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME); @@ -570,17 +566,6 @@ static bool create_logical_device(PGRAPHState *pg, Error **errp) void *next_struct = NULL; - VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features; - if (r->provoking_vertex_extension_enabled) { - provoking_vertex_features = (VkPhysicalDeviceProvokingVertexFeaturesEXT){ - .sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, - .provokingVertexLast = VK_TRUE, - .pNext = next_struct, - }; - next_struct = &provoking_vertex_features; - } - VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_features; if (r->custom_border_color_extension_enabled) { custom_border_features = (VkPhysicalDeviceCustomBorderColorFeaturesEXT){ diff --git a/hw/xbox/nv2a/pgraph/vk/meson.build b/hw/xbox/nv2a/pgraph/vk/meson.build index 24c2474cb9..b5d75e2faa 100644 --- a/hw/xbox/nv2a/pgraph/vk/meson.build +++ b/hw/xbox/nv2a/pgraph/vk/meson.build @@ -9,6 +9,7 @@ specific_ss.add([sdl, volk, libglslang, vma, vulkan, spirv_reflect, gloffscreen, 'display.c', 'draw.c', 'glsl.c', + 'gpuprops.c', 'image.c', 'instance.c', 'renderer.c', diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c index 3dbc724b95..4272bbceb6 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.c +++ b/hw/xbox/nv2a/pgraph/vk/renderer.c @@ -62,6 +62,8 @@ static void pgraph_vk_init(NV2AState *d, Error **errp) pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr, memory_region_size(d->vram)); + + pgraph_vk_determine_gpu_properties(d); } static void pgraph_vk_finalize(NV2AState *d) @@ -227,6 +229,7 @@ static PGRAPHRenderer pgraph_vk_renderer = { .set_surface_scale_factor = pgraph_vk_set_surface_scale_factor, .get_surface_scale_factor = pgraph_vk_get_surface_scale_factor, .get_framebuffer_surface = pgraph_vk_get_framebuffer_surface, + .get_gpu_properties = pgraph_vk_get_gpu_properties, } }; diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index c2233a7b17..9f87114187 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -325,7 +325,6 @@ typedef struct PGRAPHVkState { bool debug_utils_extension_enabled; bool custom_border_color_extension_enabled; - bool provoking_vertex_extension_enabled; bool memory_budget_extension_enabled; VkPhysicalDevice physical_device; @@ -594,4 +593,8 @@ void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd); // blit.c void pgraph_vk_image_blit(NV2AState *d); +// gpuprops.c +void pgraph_vk_determine_gpu_properties(NV2AState *d); +GPUProperties *pgraph_vk_get_gpu_properties(void); + #endif