diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c
index 7af27d845c..9562ea30d8 100644
--- a/hw/xbox/nv2a/pgraph/gl/draw.c
+++ b/hw/xbox/nv2a/pgraph/gl/draw.c
@@ -227,6 +227,12 @@ void pgraph_gl_draw_begin(NV2AState *d)
glEnable(GL_DEPTH_CLAMP);
+ /* Set first vertex convention to match Vulkan default. This is needed
+ * because geometry shader outputs line strips with data for fragment
+ * shader.
+ */
+ glProvokingVertex(GL_FIRST_VERTEX_CONVENTION);
+
if (stencil_test) {
glEnable(GL_STENCIL_TEST);
diff --git a/hw/xbox/nv2a/pgraph/gl/gpuprops.c b/hw/xbox/nv2a/pgraph/gl/gpuprops.c
new file mode 100644
index 0000000000..29cb1e7b0e
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/gpuprops.c
@@ -0,0 +1,357 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2025 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "debug.h"
+#include "renderer.h"
+
+static GPUProperties pgraph_gl_gpu_properties;
+
+static const char *vertex_shader_source =
+ "#version 400\n"
+ "out vec3 v_fragColor;\n"
+ "\n"
+ "vec2 positions[11] = vec2[](\n"
+ " vec2(-0.5, -0.75),\n"
+ " vec2(-0.25, -0.25),\n"
+ " vec2(-0.75, -0.25),\n"
+ " vec2(0.25, -0.25),\n"
+ " vec2(0.25, -0.75),\n"
+ " vec2(0.75, -0.25),\n"
+ " vec2(0.75, -0.75),\n"
+ " vec2(-0.75, 0.75),\n"
+ " vec2(-0.75, 0.25),\n"
+ " vec2(-0.25, 0.25),\n"
+ " vec2(-0.25, 0.75)\n"
+ ");\n"
+ "\n"
+ "vec3 colors[11] = vec3[](\n"
+ " vec3(0.0, 0.0, 1.0),\n"
+ " vec3(0.0, 1.0, 0.0),\n"
+ " vec3(0.0, 1.0, 1.0),\n"
+ " vec3(0.0, 0.0, 1.0),\n"
+ " vec3(0.0, 1.0, 0.0),\n"
+ " vec3(0.0, 1.0, 1.0),\n"
+ " vec3(1.0, 0.0, 0.0),\n"
+ " vec3(0.0, 0.0, 1.0),\n"
+ " vec3(0.0, 1.0, 0.0),\n"
+ " vec3(0.0, 1.0, 1.0),\n"
+ " vec3(1.0, 0.0, 0.0)\n"
+ ");\n"
+ "\n"
+ "void main() {\n"
+ " gl_Position = vec4(positions[gl_VertexID], 0.0, 1.0);\n"
+ " v_fragColor = colors[gl_VertexID];\n"
+ "}\n";
+
+static const char *geometry_shader_source =
+ "#version 400\n"
+ "layout(triangles) in;\n"
+ "layout(triangle_strip, max_vertices = 3) out;\n"
+ "out vec3 fragColor;\n"
+ "in vec3 v_fragColor[];\n"
+ "\n"
+ "void emit_vertex(int index) {\n"
+ " gl_Position = gl_in[index].gl_Position;\n"
+ " fragColor = v_fragColor[0];\n"
+ " EmitVertex();\n"
+ "}\n"
+ "\n"
+ "void main() {\n"
+ " emit_vertex(0);\n"
+ " emit_vertex(1);\n"
+ " emit_vertex(2);\n"
+ " EndPrimitive();\n"
+ "}\n";
+
+static const char *fragment_shader_source =
+ "#version 400\n"
+ "out vec4 outColor;\n"
+ "in vec3 fragColor;\n"
+ "\n"
+ "void main() {\n"
+ " outColor = vec4(fragColor, 1.0);\n"
+ "}\n";
+
+static GLuint compile_shader(GLenum type, const char *source)
+{
+ GLuint shader = glCreateShader(type);
+ glShaderSource(shader, 1, &source, NULL);
+ glCompileShader(shader);
+
+ GLint success;
+ glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
+ if (!success) {
+ char log[512];
+ glGetShaderInfoLog(shader, sizeof(log), NULL, log);
+ log[sizeof(log) - 1] = '\0';
+ fprintf(stderr, "GL shader type %d compilation failed: %s\n", type,
+ log);
+ assert(false);
+ }
+
+ return shader;
+}
+
+static GLuint create_program(const char *vert_source, const char *geom_source,
+ const char *frag_source)
+{
+ GLuint vert_shader = compile_shader(GL_VERTEX_SHADER, vert_source);
+ GLuint geom_shader = compile_shader(GL_GEOMETRY_SHADER, geom_source);
+ GLuint frag_shader = compile_shader(GL_FRAGMENT_SHADER, frag_source);
+
+ GLuint shader_prog = glCreateProgram();
+ glAttachShader(shader_prog, vert_shader);
+ glAttachShader(shader_prog, geom_shader);
+ glAttachShader(shader_prog, frag_shader);
+ glLinkProgram(shader_prog);
+
+ GLint success;
+ glGetProgramiv(shader_prog, GL_LINK_STATUS, &success);
+ if (!success) {
+ char log[512];
+ glGetProgramInfoLog(shader_prog, sizeof(log), NULL, log);
+ log[sizeof(log) - 1] = '\0';
+ fprintf(stderr, "GL shader linking failed: %s\n", log);
+ assert(false);
+ }
+
+ glDeleteShader(vert_shader);
+ glDeleteShader(geom_shader);
+ glDeleteShader(frag_shader);
+
+ return shader_prog;
+}
+
+static void check_gl_error(const char *context)
+{
+ GLenum err;
+ int limit = 10;
+
+ while ((err = glGetError()) != GL_NO_ERROR) {
+ fprintf(stderr, "GPU properties OpenGL error 0x%X in %s\n", err,
+ context);
+ if (--limit <= 0) {
+ fprintf(
+ stderr,
+ "Too many OpenGL errors in %s — possible infinite error loop\n",
+ context);
+ break;
+ }
+ }
+}
+
+static uint8_t *render_geom_shader_triangles(int width, int height)
+{
+ // Create the framebuffer and renderbuffer for it
+ GLuint fbo, rbo;
+ glGenFramebuffers(1, &fbo);
+ glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+ glGenRenderbuffers(1, &rbo);
+ glBindRenderbuffer(GL_RENDERBUFFER, rbo);
+ glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height);
+ check_gl_error("glRenderbufferStorage");
+ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+ GL_RENDERBUFFER, rbo);
+ check_gl_error("glFramebufferRenderbuffer");
+
+ assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
+
+ GLuint shader_prog = create_program(
+ vertex_shader_source, geometry_shader_source, fragment_shader_source);
+ assert(shader_prog != 0);
+
+ glUseProgram(shader_prog);
+ check_gl_error("glUseProgram");
+ glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+ glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
+ glClear(GL_COLOR_BUFFER_BIT);
+ check_gl_error("glClear");
+
+ glDisable(GL_CULL_FACE);
+ glDisable(GL_DEPTH_TEST);
+ glDisable(GL_STENCIL_TEST);
+ glProvokingVertex(GL_FIRST_VERTEX_CONVENTION);
+ glViewport(0, 0, width, height);
+ check_gl_error("state setup");
+
+ GLuint vao;
+ glGenVertexArrays(1, &vao);
+ glBindVertexArray(vao);
+ check_gl_error("glBindVertexArray");
+ glDrawArrays(GL_TRIANGLES, 0, 3);
+ glDrawArrays(GL_TRIANGLE_STRIP, 3, 4);
+ glDrawArrays(GL_TRIANGLE_FAN, 7, 4);
+ check_gl_error("glDrawArrays");
+ glFinish(); // glFinish should be unnecessary
+
+ void *pixels = g_malloc(width * height * 4);
+ assert(pixels != NULL);
+ glReadBuffer(GL_COLOR_ATTACHMENT0);
+ glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, pixels);
+ check_gl_error("glReadPixels");
+
+ glBindVertexArray(0);
+ glDeleteVertexArrays(1, &vao);
+ glUseProgram(0);
+ glDeleteProgram(shader_prog);
+ glBindFramebuffer(GL_FRAMEBUFFER, 0);
+ glDeleteFramebuffers(1, &fbo);
+ glBindRenderbuffer(GL_RENDERBUFFER, 0);
+ glDeleteRenderbuffers(1, &rbo);
+
+ return (uint8_t *)pixels;
+}
+
+static bool colors_match(int r1, int g1, int b1, int r2, int g2, int b2)
+{
+ int dr = r1 - r2;
+ int dg = g1 - g2;
+ int db = b1 - b2;
+
+ return (dr * dr + dg * dg + db * db) <= 16;
+}
+
+static int get_color_index(uint8_t *pixel)
+{
+ int r = pixel[0];
+ int g = pixel[1];
+ int b = pixel[2];
+
+ if (colors_match(r, g, b, 0, 0, 255)) {
+ return 0;
+ } else if (colors_match(r, g, b, 0, 255, 0)) {
+ return 1;
+ } else if (colors_match(r, g, b, 0, 255, 255)) {
+ return 2;
+ } else if (colors_match(r, g, b, 255, 0, 0)) {
+ return 3;
+ } else {
+ return -1;
+ }
+}
+
+static int calc_offset_from_ndc(float x, float y, int width, int height)
+{
+ int x0 = (int)((x + 1.0f) * width * 0.5f);
+ int y0 = (int)((y + 1.0f) * height * 0.5f);
+
+ x0 = MAX(x0, 0);
+ y0 = MAX(y0, 0);
+ x0 = MIN(x0, width - 1);
+ y0 = MIN(y0, height - 1);
+
+ return y0 * width + x0;
+}
+
+static void determine_triangle_winding_order(uint8_t *pixels, int width,
+ int height, GPUProperties *props)
+{
+ uint8_t *tri_pix =
+ pixels + calc_offset_from_ndc(-0.5f, -0.5f, width, height) * 4;
+ uint8_t *strip0_pix =
+ pixels + calc_offset_from_ndc(0.417f, -0.417f, width, height) * 4;
+ uint8_t *strip1_pix =
+ pixels + calc_offset_from_ndc(0.583f, -0.583f, width, height) * 4;
+ uint8_t *fan_pix =
+ pixels + calc_offset_from_ndc(-0.583f, 0.417f, width, height) * 4;
+ uint8_t *fan2_pix =
+ pixels + calc_offset_from_ndc(-0.417f, 0.583f, width, height) * 4;
+
+ int tri_rot = get_color_index(tri_pix);
+ if (tri_rot < 0 || tri_rot > 2) {
+ fprintf(stderr,
+ "Could not determine triangle rotation, got color: R=%d, G=%d, "
+ "B=%d\n",
+ tri_pix[0], tri_pix[1], tri_pix[2]);
+ tri_rot = 0;
+ }
+ props->geom_shader_winding.tri = tri_rot;
+
+ int strip0_rot = get_color_index(strip0_pix);
+ if (strip0_rot < 0 || strip0_rot > 2) {
+ fprintf(stderr,
+ "Could not determine triangle strip0 rotation, got color: "
+ "R=%d, G=%d, B=%d\n",
+ strip0_pix[0], strip0_pix[1], strip0_pix[2]);
+ strip0_rot = 0;
+ }
+ int strip1_rot = get_color_index(strip1_pix) - 1;
+ if (strip1_rot < 0 || strip1_rot > 2) {
+ fprintf(stderr,
+ "Could not determine triangle strip1 rotation, got color: "
+ "R=%d, G=%d, B=%d\n",
+ strip1_pix[0], strip1_pix[1], strip1_pix[2]);
+ strip1_rot = 0;
+ }
+ props->geom_shader_winding.tri_strip0 = strip0_rot;
+ props->geom_shader_winding.tri_strip1 = (3 - strip1_rot) % 3;
+
+ int fan_rot = get_color_index(fan_pix);
+ int fan2_rot = get_color_index(fan2_pix);
+ if (fan2_rot == 0) {
+ fan2_rot = 1;
+ }
+ fan2_rot--;
+ if (fan_rot != fan2_rot) {
+ fprintf(stderr,
+ "Unexpected inconsistency in triangle fan winding, got colors: "
+ "R=%d, G=%d, B=%d and R=%d, G=%d, B=%d\n",
+ fan_pix[0], fan_pix[1], fan_pix[2], fan2_pix[0], fan2_pix[1],
+ fan2_pix[2]);
+ fan_rot = 1;
+ }
+ if (fan_rot < 0 || fan_rot > 2) {
+ fprintf(stderr,
+ "Could not determine triangle fan rotation, got color: R=%d, "
+ "G=%d, B=%d\n",
+ fan_pix[0], fan_pix[1], fan_pix[2]);
+ fan_rot = 1;
+ }
+ props->geom_shader_winding.tri_fan = (fan_rot + 2) % 3;
+}
+
+void pgraph_gl_determine_gpu_properties(NV2AState *d)
+{
+ const int width = 640;
+ const int height = 480;
+
+ GloContext *g_context = glo_context_create();
+ glo_set_current(g_context);
+
+ uint8_t *pixels = render_geom_shader_triangles(width, height);
+ determine_triangle_winding_order(pixels, width, height,
+ &pgraph_gl_gpu_properties);
+ g_free(pixels);
+
+ fprintf(stderr, "GL geometry shader winding: %d, %d, %d, %d\n",
+ pgraph_gl_gpu_properties.geom_shader_winding.tri,
+ pgraph_gl_gpu_properties.geom_shader_winding.tri_strip0,
+ pgraph_gl_gpu_properties.geom_shader_winding.tri_strip1,
+ pgraph_gl_gpu_properties.geom_shader_winding.tri_fan);
+
+ glo_context_destroy(g_context);
+ glo_set_current(g_nv2a_context_render);
+}
+
+GPUProperties *pgraph_gl_get_gpu_properties(void)
+{
+ return &pgraph_gl_gpu_properties;
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/meson.build b/hw/xbox/nv2a/pgraph/gl/meson.build
index ab25eacb7d..c19a542adb 100644
--- a/hw/xbox/nv2a/pgraph/gl/meson.build
+++ b/hw/xbox/nv2a/pgraph/gl/meson.build
@@ -3,6 +3,7 @@ specific_ss.add([sdl, gloffscreen, files(
'debug.c',
'display.c',
'draw.c',
+ 'gpuprops.c',
'renderer.c',
'reports.c',
'shaders.c',
diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c
index 36b8029439..74ece1c5f9 100644
--- a/hw/xbox/nv2a/pgraph/gl/renderer.c
+++ b/hw/xbox/nv2a/pgraph/gl/renderer.c
@@ -66,6 +66,8 @@ static void pgraph_gl_init(NV2AState *d, Error **errp)
pg->uniform_attrs = 0;
pg->swizzle_attrs = 0;
+
+ pgraph_gl_determine_gpu_properties(d);
}
static void pgraph_gl_finalize(NV2AState *d)
@@ -195,6 +197,7 @@ static PGRAPHRenderer pgraph_gl_renderer = {
.set_surface_scale_factor = pgraph_gl_set_surface_scale_factor,
.get_surface_scale_factor = pgraph_gl_get_surface_scale_factor,
.get_framebuffer_surface = pgraph_gl_get_framebuffer_surface,
+ .get_gpu_properties = pgraph_gl_get_gpu_properties,
}
};
diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h
index 28277fcdf5..5a2524bfbb 100644
--- a/hw/xbox/nv2a/pgraph/gl/renderer.h
+++ b/hw/xbox/nv2a/pgraph/gl/renderer.h
@@ -286,5 +286,7 @@ void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg);
void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale);
unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d);
int pgraph_gl_get_framebuffer_surface(NV2AState *d);
+void pgraph_gl_determine_gpu_properties(NV2AState *d);
+GPUProperties *pgraph_gl_get_gpu_properties(void);
#endif
diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c
index dd5dc363f5..ec4d19fe5f 100644
--- a/hw/xbox/nv2a/pgraph/glsl/geom.c
+++ b/hw/xbox/nv2a/pgraph/glsl/geom.c
@@ -45,6 +45,39 @@ void pgraph_glsl_set_geom_state(PGRAPHState *pg, GeomState *state)
state->z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE;
+
+ if (pg->renderer->ops.get_gpu_properties) {
+ GPUProperties *gpu_props = pg->renderer->ops.get_gpu_properties();
+
+ switch (state->primitive_mode) {
+ case PRIM_TYPE_TRIANGLES:
+ state->tri_rot0 = gpu_props->geom_shader_winding.tri;
+ state->tri_rot1 = state->tri_rot0;
+ break;
+ case PRIM_TYPE_TRIANGLE_STRIP:
+ state->tri_rot0 = gpu_props->geom_shader_winding.tri_strip0;
+ state->tri_rot1 = gpu_props->geom_shader_winding.tri_strip1;
+ break;
+ case PRIM_TYPE_TRIANGLE_FAN:
+ case PRIM_TYPE_POLYGON:
+ state->tri_rot0 = gpu_props->geom_shader_winding.tri_fan;
+ state->tri_rot1 = state->tri_rot0;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static const char *get_vertex_order(int rot)
+{
+ if (rot == 0) {
+ return "ivec3(0, 1, 2)";
+ } else if (rot == 1) {
+ return "ivec3(2, 0, 1)";
+ } else {
+ return "ivec3(1, 2, 0)";
+ }
}
bool pgraph_glsl_need_geom(const GeomState *state)
@@ -115,49 +148,47 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
case PRIM_TYPE_TRIANGLE_STRIP:
case PRIM_TYPE_TRIANGLE_FAN:
if (state->first_vertex_is_provoking) {
- if (state->primitive_mode == PRIM_TYPE_TRIANGLE_STRIP) {
- provoking_index = "gl_PrimitiveIDIn & 1";
- } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_FAN) {
- provoking_index = "1";
- } else {
- provoking_index = "0";
- }
+ provoking_index = "v[0]";
+ } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_STRIP) {
+ provoking_index = "v[2 - (gl_PrimitiveIDIn & 1)]";
+ } else if (state->primitive_mode == PRIM_TYPE_TRIANGLE_FAN) {
+ provoking_index = "v[1]";
} else {
- provoking_index = "2";
+ provoking_index = "v[2]";
}
need_triz = true;
layout_in = "layout(triangles) in;\n";
if (polygon_mode == POLY_MODE_FILL) {
layout_out = "layout(triangle_strip, max_vertices = 3) out;\n";
- body = " mat4 pz = calc_triz(0, 1, 2);\n"
- " emit_vertex(0, pz);\n"
- " emit_vertex(1, pz);\n"
- " emit_vertex(2, pz);\n"
+ body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n"
+ " emit_vertex(v[0], pz);\n"
+ " emit_vertex(v[1], pz);\n"
+ " emit_vertex(v[2], pz);\n"
" EndPrimitive();\n";
} else if (polygon_mode == POLY_MODE_LINE) {
need_linez = true;
layout_out = "layout(line_strip, max_vertices = 4) out;\n";
- body = " float triMZ = calc_triz(0, 1, 2)[3].x;\n"
- " mat4 pz1 = calc_linez(0, 1);\n"
- " pz1[3].x = triMZ;\n"
- " mat4 pz2 = calc_linez(1, 2);\n"
- " pz2[3].x = triMZ;\n"
- " mat4 pz3 = calc_linez(2, 0);\n"
- " pz3[3].x = triMZ;\n"
- " emit_vertex(0, pz1);\n"
- " emit_vertex(1, pz1);\n"
- " emit_vertex(2, pz2);\n"
- " emit_vertex(0, pz3);\n"
+ body = " float dz = calc_triz(v[0], v[1], v[2])[3].x;\n"
+ " mat4 pz1 = calc_linez(v[0], v[1]);\n"
+ " pz1[3].x = dz;\n"
+ " mat4 pz2 = calc_linez(v[1], v[2]);\n"
+ " pz2[3].x = dz;\n"
+ " mat4 pz3 = calc_linez(v[2], v[0]);\n"
+ " pz3[3].x = dz;\n"
+ " emit_vertex(v[0], pz1);\n"
+ " emit_vertex(v[1], pz2);\n"
+ " emit_vertex(v[2], pz3);\n"
+ " emit_vertex(v[0], pz3);\n"
" EndPrimitive();\n";
} else {
assert(polygon_mode == POLY_MODE_POINT);
layout_out = "layout(points, max_vertices = 3) out;\n";
- body = " mat4 pz = calc_triz(0, 1, 2);\n"
- " emit_vertex(0, mat4(pz[0], pz[0], pz[0], pz[3]));\n"
+ body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n"
+ " emit_vertex(v[0], mat4(pz[0], pz[0], pz[0], pz[3]));\n"
" EndPrimitive();\n"
- " emit_vertex(1, mat4(pz[1], pz[1], pz[1], pz[3]));\n"
+ " emit_vertex(v[1], mat4(pz[1], pz[1], pz[1], pz[3]));\n"
" EndPrimitive();\n"
- " emit_vertex(2, mat4(pz[2], pz[2], pz[2], pz[3]));\n"
+ " emit_vertex(v[2], mat4(pz[2], pz[2], pz[2], pz[3]));\n"
" EndPrimitive();\n";
}
break;
@@ -188,9 +219,9 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
" mat4 pz4 = calc_linez(3, 0);\n"
" pz4[3].x = pzs[3].x;\n"
" emit_vertex(0, pz1);\n"
- " emit_vertex(1, pz1);\n"
- " emit_vertex(2, pz2);\n"
- " emit_vertex(3, pz3);\n"
+ " emit_vertex(1, pz2);\n"
+ " emit_vertex(2, pz3);\n"
+ " emit_vertex(3, pz4);\n"
" emit_vertex(0, pz4);\n"
" EndPrimitive();\n";
} else {
@@ -237,9 +268,9 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
" mat4 pz4 = calc_linez(2, 0);\n"
" pz4[3].x = pz[3].x;\n"
" emit_vertex(0, pz1);\n"
- " emit_vertex(1, pz1);\n"
- " emit_vertex(3, pz2);\n"
- " emit_vertex(2, pz3);\n"
+ " emit_vertex(1, pz2);\n"
+ " emit_vertex(3, pz3);\n"
+ " emit_vertex(2, pz4);\n"
" emit_vertex(0, pz4);\n"
" EndPrimitive();\n";
} else {
@@ -259,17 +290,18 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
}
break;
case PRIM_TYPE_POLYGON:
- provoking_index = "0";
if (polygon_mode == POLY_MODE_FILL) {
+ provoking_index = "v[2]";
need_triz = true;
layout_in = "layout(triangles) in;\n";
layout_out = "layout(triangle_strip, max_vertices = 3) out;\n";
- body = " mat4 pz = calc_triz(0, 1, 2);\n"
- " emit_vertex(0, pz);\n"
- " emit_vertex(1, pz);\n"
- " emit_vertex(2, pz);\n"
+ body = " mat4 pz = calc_triz(v[0], v[1], v[2]);\n"
+ " emit_vertex(v[0], pz);\n"
+ " emit_vertex(v[1], pz);\n"
+ " emit_vertex(v[2], pz);\n"
" EndPrimitive();\n";
} else if (polygon_mode == POLY_MODE_LINE) {
+ provoking_index = "0";
need_linez = true;
/* FIXME: input here is lines and not triangles so we cannot
* calculate triangle plane slope. Also, the first vertex of the
@@ -310,6 +342,30 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
pgraph_glsl_get_vtx_header(output, opts.vulkan, state->smooth_shading,
false, false, false);
+ char vertex_order_buf[80];
+ const char *vertex_order_body = "";
+
+ if (need_triz) {
+ /* Input triangle absolute vertex order is not guaranteed by OpenGL
+ * or Vulkan, only winding order is. Reorder vertices here to first
+ * vertex convention which we assumed above when setting
+ * provoking_index. This mostly only matters with flat shading, but
+ * we reorder always to get consistent results across GPU vendors
+ * regarding floating-point rounding when calculating with vtxPos0/1/2.
+ */
+ mstring_append(output, "ivec3 v;\n");
+ if (state->tri_rot0 == state->tri_rot1) {
+ snprintf(vertex_order_buf, sizeof(vertex_order_buf), " v = %s;\n",
+ get_vertex_order(state->tri_rot0));
+ } else {
+ snprintf(vertex_order_buf, sizeof(vertex_order_buf),
+ " v = (gl_PrimitiveIDIn & 1) == 0 ? %s : %s;\n",
+ get_vertex_order(state->tri_rot0),
+ get_vertex_order(state->tri_rot1));
+ }
+ vertex_order_body = vertex_order_buf;
+ }
+
if (state->smooth_shading) {
provoking_index = "index";
}
@@ -422,8 +478,9 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
"\n"
"void main() {\n"
"%s"
+ "%s"
"}\n",
- body);
+ vertex_order_body, body);
return output;
}
diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.h b/hw/xbox/nv2a/pgraph/glsl/geom.h
index 55229d2e46..a00302f39b 100644
--- a/hw/xbox/nv2a/pgraph/glsl/geom.h
+++ b/hw/xbox/nv2a/pgraph/glsl/geom.h
@@ -32,6 +32,8 @@ typedef struct {
bool smooth_shading;
bool first_vertex_is_provoking;
bool z_perspective;
+ short tri_rot0;
+ short tri_rot1;
} GeomState;
typedef struct GenGeomGlslOptions {
diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h
index 49f941d450..0449270b55 100644
--- a/hw/xbox/nv2a/pgraph/pgraph.h
+++ b/hw/xbox/nv2a/pgraph/pgraph.h
@@ -96,6 +96,15 @@ typedef struct BetaState {
uint32_t beta;
} BetaState;
+typedef struct GPUProperties {
+ struct {
+ short tri;
+ short tri_strip0;
+ short tri_strip1;
+ short tri_fan;
+ } geom_shader_winding;
+} GPUProperties;
+
typedef struct PGRAPHRenderer {
CONFIG_DISPLAY_RENDERER type;
const char *name;
@@ -122,6 +131,7 @@ typedef struct PGRAPHRenderer {
void (*set_surface_scale_factor)(NV2AState *d, unsigned int scale);
unsigned int (*get_surface_scale_factor)(NV2AState *d);
int (*get_framebuffer_surface)(NV2AState *d);
+ GPUProperties *(*get_gpu_properties)(void);
} ops;
} PGRAPHRenderer;
diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c
index e4bd562149..d78087e56b 100644
--- a/hw/xbox/nv2a/pgraph/vk/draw.c
+++ b/hw/xbox/nv2a/pgraph/vk/draw.c
@@ -788,23 +788,6 @@ static void create_pipeline(PGRAPHState *pg)
void *rasterizer_next_struct = NULL;
- VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state;
-
- if (r->provoking_vertex_extension_enabled) {
- // Use last provoking vertex convention to match geometry shader
- // assumption, because Vulkan default is first vertex convention.
- VkProvokingVertexModeEXT provoking_mode =
- VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
-
- provoking_state =
- (VkPipelineRasterizationProvokingVertexStateCreateInfoEXT){
- .sType =
- VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT,
- .provokingVertexMode = provoking_mode,
- };
- rasterizer_next_struct = &provoking_state;
- }
-
VkPipelineRasterizationStateCreateInfo rasterizer = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.depthClampEnable = VK_TRUE,
diff --git a/hw/xbox/nv2a/pgraph/vk/gpuprops.c b/hw/xbox/nv2a/pgraph/vk/gpuprops.c
new file mode 100644
index 0000000000..1562be0993
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/gpuprops.c
@@ -0,0 +1,605 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024-2025 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "debug.h"
+#include "renderer.h"
+
+static GPUProperties pgraph_vk_gpu_properties;
+
+static const char *vertex_shader_source =
+ "#version 450\n"
+ "layout(location = 0) out vec3 v_fragColor;\n"
+ "\n"
+ "vec2 positions[11] = vec2[](\n"
+ " vec2(-0.5, -0.75),\n"
+ " vec2(-0.25, -0.25),\n"
+ " vec2(-0.75, -0.25),\n"
+ " vec2(0.25, -0.25),\n"
+ " vec2(0.25, -0.75),\n"
+ " vec2(0.75, -0.25),\n"
+ " vec2(0.75, -0.75),\n"
+ " vec2(-0.75, 0.75),\n"
+ " vec2(-0.75, 0.25),\n"
+ " vec2(-0.25, 0.25),\n"
+ " vec2(-0.25, 0.75)\n"
+ ");\n"
+ "\n"
+ "vec3 colors[11] = vec3[](\n"
+ " vec3(0.0, 0.0, 1.0),\n"
+ " vec3(0.0, 1.0, 0.0),\n"
+ " vec3(0.0, 1.0, 1.0),\n"
+ " vec3(0.0, 0.0, 1.0),\n"
+ " vec3(0.0, 1.0, 0.0),\n"
+ " vec3(0.0, 1.0, 1.0),\n"
+ " vec3(1.0, 0.0, 0.0),\n"
+ " vec3(0.0, 0.0, 1.0),\n"
+ " vec3(0.0, 1.0, 0.0),\n"
+ " vec3(0.0, 1.0, 1.0),\n"
+ " vec3(1.0, 0.0, 0.0)\n"
+ ");\n"
+ "\n"
+ "void main() {\n"
+ " gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0);\n"
+ " v_fragColor = colors[gl_VertexIndex];\n"
+ "}\n";
+
+static const char *geometry_shader_source =
+ "#version 450\n"
+ "layout(triangles) in;\n"
+ "layout(triangle_strip, max_vertices = 3) out;\n"
+ "layout(location = 0) out vec3 fragColor;\n"
+ "layout(location = 0) in vec3 v_fragColor[];\n"
+ "\n"
+ "void emit_vertex(int index) {\n"
+ " gl_Position = gl_in[index].gl_Position;\n"
+ " fragColor = v_fragColor[0];\n"
+ " EmitVertex();\n"
+ "}\n"
+ "\n"
+ "void main() {\n"
+ " emit_vertex(0);\n"
+ " emit_vertex(1);\n"
+ " emit_vertex(2);\n"
+ " EndPrimitive();\n"
+ "}\n";
+
+static const char *fragment_shader_source =
+ "#version 450\n"
+ "layout(location = 0) out vec4 outColor;\n"
+ "layout(location = 0) in vec3 fragColor;\n"
+ "\n"
+ "void main() {\n"
+ " outColor = vec4(fragColor, 1.0);\n"
+ "}\n";
+
+static VkPipeline create_test_pipeline(
+ NV2AState *d, VkPrimitiveTopology primitive_topology,
+ VkShaderModule vert_shader_module, VkShaderModule geom_shader_module,
+ VkShaderModule frag_shader_module, VkPipelineLayout pipeline_layout,
+ VkRenderPass render_pass, int width, int height)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkPipelineShaderStageCreateInfo shader_stages[] = {
+ (VkPipelineShaderStageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vert_shader_module,
+ .pName = "main",
+ },
+ (VkPipelineShaderStageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_GEOMETRY_BIT,
+ .module = geom_shader_module,
+ .pName = "main",
+ },
+ (VkPipelineShaderStageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = frag_shader_module,
+ .pName = "main",
+ },
+ };
+
+ VkPipelineVertexInputStateCreateInfo vertex_input_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 0,
+ .vertexAttributeDescriptionCount = 0,
+ };
+
+ VkPipelineInputAssemblyStateCreateInfo input_assembly = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = primitive_topology,
+ .primitiveRestartEnable = VK_FALSE,
+ };
+
+ VkViewport viewport = {
+ .x = 0.0f,
+ .y = 0.0f,
+ .width = (float)width,
+ .height = (float)height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f,
+ };
+ VkRect2D scissor = {
+ .offset = { 0, 0 },
+ .extent.width = width,
+ .extent.height = height,
+ };
+ VkPipelineViewportStateCreateInfo viewport_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .pViewports = &viewport,
+ .scissorCount = 1,
+ .pScissors = &scissor,
+ };
+
+ VkPipelineRasterizationStateCreateInfo rasterizer = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .depthClampEnable = VK_FALSE,
+ .rasterizerDiscardEnable = VK_FALSE,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .lineWidth = 1.0f,
+ .cullMode = VK_CULL_MODE_BACK_BIT,
+ .frontFace = VK_FRONT_FACE_CLOCKWISE,
+ .depthBiasEnable = VK_FALSE,
+ };
+
+ VkPipelineMultisampleStateCreateInfo multisampling = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .sampleShadingEnable = VK_FALSE,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ };
+
+ VkPipelineColorBlendAttachmentState color_blend_attachment = {
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ .blendEnable = VK_FALSE,
+ };
+
+ VkPipelineColorBlendStateCreateInfo color_blending = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_COPY,
+ .attachmentCount = 1,
+ .pAttachments = &color_blend_attachment,
+ .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f },
+ };
+
+ VkGraphicsPipelineCreateInfo pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(shader_stages),
+ .pStages = shader_stages,
+ .pVertexInputState = &vertex_input_info,
+ .pInputAssemblyState = &input_assembly,
+ .pViewportState = &viewport_state,
+ .pRasterizationState = &rasterizer,
+ .pMultisampleState = &multisampling,
+ .pColorBlendState = &color_blending,
+ .layout = pipeline_layout,
+ .renderPass = render_pass,
+ .subpass = 0,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ };
+
+ VkPipeline pipeline;
+ VK_CHECK(vkCreateGraphicsPipelines(r->device, VK_NULL_HANDLE, 1,
+ &pipeline_info, NULL, &pipeline));
+
+ return pipeline;
+}
+
+static uint8_t *render_geom_shader_triangles(NV2AState *d, int width,
+ int height)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ // Create image
+ VkImageCreateInfo image_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .extent.width = width,
+ .extent.height = height,
+ .extent.depth = 1,
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .format = VK_FORMAT_R8G8B8A8_UNORM,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ };
+
+ VkImage offscreen_image;
+ VK_CHECK(
+ vkCreateImage(r->device, &image_create_info, NULL, &offscreen_image));
+
+ // Allocate and bind image memory
+ VkMemoryRequirements memory_requirements;
+ vkGetImageMemoryRequirements(r->device, offscreen_image,
+ &memory_requirements);
+
+ VkMemoryAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = memory_requirements.size,
+ .memoryTypeIndex =
+ pgraph_vk_get_memory_type(pg, memory_requirements.memoryTypeBits,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
+ };
+
+ VkDeviceMemory image_memory;
+ VK_CHECK(vkAllocateMemory(r->device, &alloc_info, NULL, &image_memory));
+ VK_CHECK(vkBindImageMemory(r->device, offscreen_image, image_memory, 0));
+
+ // Create Image View
+ VkImageViewCreateInfo image_view_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = offscreen_image,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image_create_info.format,
+ .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ };
+
+ VkImageView offscreen_image_view;
+ VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
+ &offscreen_image_view));
+
+ // Buffer for image CPU access
+ VkBufferCreateInfo buffer_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .size = width * height * 4, // RGBA8 = 4 bytes per pixel
+ .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ };
+
+ VkBuffer cpu_buffer;
+ VK_CHECK(vkCreateBuffer(r->device, &buffer_info, NULL, &cpu_buffer));
+
+ // Allocate and bind memory for image CPU access
+ VkMemoryRequirements host_mem_requirements;
+ vkGetBufferMemoryRequirements(r->device, cpu_buffer,
+ &host_mem_requirements);
+
+ VkMemoryAllocateInfo host_alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = host_mem_requirements.size,
+ .memoryTypeIndex =
+ pgraph_vk_get_memory_type(pg, host_mem_requirements.memoryTypeBits,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
+ };
+
+ VkDeviceMemory cpu_buffer_memory;
+ VK_CHECK(vkAllocateMemory(r->device, &host_alloc_info, NULL,
+ &cpu_buffer_memory));
+ VK_CHECK(vkBindBufferMemory(r->device, cpu_buffer, cpu_buffer_memory, 0));
+
+
+ VkAttachmentDescription color_attachment = {
+ .format = VK_FORMAT_R8G8B8A8_UNORM,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ };
+
+ VkAttachmentReference color_ref = {
+ 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
+ };
+
+ VkSubpassDescription subpass = {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &color_ref,
+ };
+
+ VkRenderPassCreateInfo render_pass_info = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &color_attachment,
+ .subpassCount = 1,
+ .pSubpasses = &subpass,
+ };
+
+ VkRenderPass render_pass;
+ VK_CHECK(
+ vkCreateRenderPass(r->device, &render_pass_info, NULL, &render_pass));
+
+ VkFramebufferCreateInfo fb_info = {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .renderPass = render_pass,
+ .attachmentCount = 1,
+ .pAttachments = &offscreen_image_view,
+ .width = width,
+ .height = height,
+ .layers = 1,
+ };
+
+ VkFramebuffer framebuffer;
+ VK_CHECK(vkCreateFramebuffer(r->device, &fb_info, NULL, &framebuffer));
+
+ ShaderModuleInfo *vsh_info = pgraph_vk_create_shader_module_from_glsl(
+ r, VK_SHADER_STAGE_VERTEX_BIT, vertex_shader_source);
+ ShaderModuleInfo *geom_info = pgraph_vk_create_shader_module_from_glsl(
+ r, VK_SHADER_STAGE_GEOMETRY_BIT, geometry_shader_source);
+ ShaderModuleInfo *psh_info = pgraph_vk_create_shader_module_from_glsl(
+ r, VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader_source);
+
+ VkShaderModule vert_shader_module = vsh_info->module;
+ VkShaderModule geom_shader_module = geom_info->module;
+ VkShaderModule frag_shader_module = psh_info->module;
+
+ VkPipelineLayoutCreateInfo pipeline_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 0,
+ .pushConstantRangeCount = 0,
+ };
+
+ VkPipelineLayout pipeline_layout;
+ VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
+ &pipeline_layout));
+
+ VkPipeline tri_pipeline = create_test_pipeline(
+ d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, vert_shader_module,
+ geom_shader_module, frag_shader_module, pipeline_layout, render_pass,
+ width, height);
+
+ VkPipeline strip_pipeline = create_test_pipeline(
+ d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, vert_shader_module,
+ geom_shader_module, frag_shader_module, pipeline_layout, render_pass,
+ width, height);
+
+ VkPipeline fan_pipeline = create_test_pipeline(
+ d, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN, vert_shader_module,
+ geom_shader_module, frag_shader_module, pipeline_layout, render_pass,
+ width, height);
+
+ pgraph_vk_destroy_shader_module(r, psh_info);
+ pgraph_vk_destroy_shader_module(r, geom_info);
+ pgraph_vk_destroy_shader_module(r, vsh_info);
+
+ VkCommandBufferBeginInfo begin_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ };
+ VK_CHECK(vkBeginCommandBuffer(r->command_buffer, &begin_info));
+
+ // Begin render pass
+ VkClearValue clear_color = {
+ .color.float32 = { 0.0f, 0.0f, 0.0f, 1.0f },
+ };
+ VkRenderPassBeginInfo rp_begin = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = render_pass,
+ .framebuffer = framebuffer,
+ .renderArea.extent.width = width,
+ .renderArea.extent.height = height,
+ .clearValueCount = 1,
+ .pClearValues = &clear_color,
+ };
+
+ vkCmdBeginRenderPass(r->command_buffer, &rp_begin,
+ VK_SUBPASS_CONTENTS_INLINE);
+
+ vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ tri_pipeline);
+ vkCmdDraw(r->command_buffer, 3, 1, 0, 0);
+ vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ strip_pipeline);
+ vkCmdDraw(r->command_buffer, 4, 1, 3, 0);
+ vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ fan_pipeline);
+ vkCmdDraw(r->command_buffer, 4, 1, 7, 0);
+
+ vkCmdEndRenderPass(r->command_buffer);
+
+ // Synchronize and transition framebuffer for copying to CPU
+ pgraph_vk_transition_image_layout(pg, r->command_buffer, offscreen_image,
+ image_create_info.format,
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+ // Copy framebuffer to CPU memory
+ VkBufferImageCopy region = {
+ .bufferOffset = 0,
+ .bufferRowLength = 0, // tightly packed
+ .bufferImageHeight = 0,
+
+ .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .imageSubresource.mipLevel = 0,
+ .imageSubresource.baseArrayLayer = 0,
+ .imageSubresource.layerCount = 1,
+
+ .imageOffset = { 0, 0, 0 },
+ .imageExtent = { width, height, 1 },
+ };
+
+ vkCmdCopyImageToBuffer(r->command_buffer, offscreen_image,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, cpu_buffer, 1,
+ ®ion);
+
+ VK_CHECK(vkEndCommandBuffer(r->command_buffer));
+
+ VkSubmitInfo submit_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &r->command_buffer,
+ };
+
+ VK_CHECK(vkQueueSubmit(r->queue, 1, &submit_info, VK_NULL_HANDLE));
+ VK_CHECK(vkQueueWaitIdle(r->queue));
+
+ void *data;
+ VK_CHECK(
+ vkMapMemory(r->device, cpu_buffer_memory, 0, VK_WHOLE_SIZE, 0, &data));
+ void *pixels = g_malloc(width * height * 4);
+ assert(pixels != NULL);
+ memcpy(pixels, data, width * height * 4);
+ vkUnmapMemory(r->device, cpu_buffer_memory);
+
+ vkDestroyPipeline(r->device, strip_pipeline, NULL);
+ vkDestroyPipeline(r->device, fan_pipeline, NULL);
+ vkDestroyPipeline(r->device, tri_pipeline, NULL);
+ vkDestroyPipelineLayout(r->device, pipeline_layout, NULL);
+ vkDestroyFramebuffer(r->device, framebuffer, NULL);
+ vkDestroyRenderPass(r->device, render_pass, NULL);
+ vkDestroyImageView(r->device, offscreen_image_view, NULL);
+ vkDestroyBuffer(r->device, cpu_buffer, NULL);
+ vkFreeMemory(r->device, cpu_buffer_memory, NULL);
+ vkDestroyImage(r->device, offscreen_image, NULL);
+ vkFreeMemory(r->device, image_memory, NULL);
+
+ return (uint8_t *)pixels;
+}
+
+static bool colors_match(int r1, int g1, int b1, int r2, int g2, int b2)
+{
+ int dr = r1 - r2;
+ int dg = g1 - g2;
+ int db = b1 - b2;
+
+ return (dr * dr + dg * dg + db * db) <= 16;
+}
+
+static int get_color_index(uint8_t *pixel)
+{
+ int r = pixel[0];
+ int g = pixel[1];
+ int b = pixel[2];
+
+ if (colors_match(r, g, b, 0, 0, 255)) {
+ return 0;
+ } else if (colors_match(r, g, b, 0, 255, 0)) {
+ return 1;
+ } else if (colors_match(r, g, b, 0, 255, 255)) {
+ return 2;
+ } else if (colors_match(r, g, b, 255, 0, 0)) {
+ return 3;
+ } else {
+ return -1;
+ }
+}
+
+static int calc_offset_from_ndc(float x, float y, int width, int height)
+{
+ int x0 = (int)((x + 1.0f) * width * 0.5f);
+ int y0 = (int)((y + 1.0f) * height * 0.5f);
+
+ x0 = MAX(x0, 0);
+ y0 = MAX(y0, 0);
+ x0 = MIN(x0, width - 1);
+ y0 = MIN(y0, height - 1);
+
+ return y0 * width + x0;
+}
+
+static void determine_triangle_winding_order(uint8_t *pixels, int width,
+ int height, GPUProperties *props)
+{
+ uint8_t *tri_pix =
+ pixels + calc_offset_from_ndc(-0.5f, -0.5f, width, height) * 4;
+ uint8_t *strip0_pix =
+ pixels + calc_offset_from_ndc(0.417f, -0.417f, width, height) * 4;
+ uint8_t *strip1_pix =
+ pixels + calc_offset_from_ndc(0.583f, -0.583f, width, height) * 4;
+ uint8_t *fan_pix =
+ pixels + calc_offset_from_ndc(-0.583f, 0.417f, width, height) * 4;
+ uint8_t *fan2_pix =
+ pixels + calc_offset_from_ndc(-0.417f, 0.583f, width, height) * 4;
+
+ int tri_rot = get_color_index(tri_pix);
+ if (tri_rot < 0 || tri_rot > 2) {
+ fprintf(stderr,
+ "Could not determine triangle rotation, got color: R=%d, G=%d, "
+ "B=%d\n",
+ tri_pix[0], tri_pix[1], tri_pix[2]);
+ tri_rot = 0;
+ }
+ props->geom_shader_winding.tri = tri_rot;
+
+ int strip0_rot = get_color_index(strip0_pix);
+ if (strip0_rot < 0 || strip0_rot > 2) {
+ fprintf(stderr,
+ "Could not determine triangle strip0 rotation, got color: "
+ "R=%d, G=%d, B=%d\n",
+ strip0_pix[0], strip0_pix[1], strip0_pix[2]);
+ strip0_rot = 0;
+ }
+ int strip1_rot = get_color_index(strip1_pix) - 1;
+ if (strip1_rot < 0 || strip1_rot > 2) {
+ fprintf(stderr,
+ "Could not determine triangle strip1 rotation, got color: "
+ "R=%d, G=%d, B=%d\n",
+ strip1_pix[0], strip1_pix[1], strip1_pix[2]);
+ strip1_rot = 0;
+ }
+ props->geom_shader_winding.tri_strip0 = strip0_rot;
+ props->geom_shader_winding.tri_strip1 = (3 - strip1_rot) % 3;
+
+ int fan_rot = get_color_index(fan_pix);
+ int fan2_rot = get_color_index(fan2_pix);
+ if (fan2_rot == 0) {
+ fan2_rot = 1;
+ }
+ fan2_rot--;
+ if (fan_rot != fan2_rot) {
+ fprintf(stderr,
+ "Unexpected inconsistency in triangle fan winding, got colors: "
+ "R=%d, G=%d, B=%d and R=%d, G=%d, B=%d\n",
+ fan_pix[0], fan_pix[1], fan_pix[2], fan2_pix[0], fan2_pix[1],
+ fan2_pix[2]);
+ fan_rot = 1;
+ }
+ if (fan_rot < 0 || fan_rot > 2) {
+ fprintf(stderr,
+ "Could not determine triangle fan rotation, got color: R=%d, "
+ "G=%d, B=%d\n",
+ fan_pix[0], fan_pix[1], fan_pix[2]);
+ fan_rot = 1;
+ }
+ props->geom_shader_winding.tri_fan = (fan_rot + 2) % 3;
+}
+
+void pgraph_vk_determine_gpu_properties(NV2AState *d)
+{
+ const int width = 640;
+ const int height = 480;
+
+ uint8_t *pixels = render_geom_shader_triangles(d, width, height);
+ determine_triangle_winding_order(pixels, width, height,
+ &pgraph_vk_gpu_properties);
+ g_free(pixels);
+
+ fprintf(stderr, "VK geometry shader winding: %d, %d, %d, %d\n",
+ pgraph_vk_gpu_properties.geom_shader_winding.tri,
+ pgraph_vk_gpu_properties.geom_shader_winding.tri_strip0,
+ pgraph_vk_gpu_properties.geom_shader_winding.tri_strip1,
+ pgraph_vk_gpu_properties.geom_shader_winding.tri_fan);
+}
+
+GPUProperties *pgraph_vk_get_gpu_properties(void)
+{
+ return &pgraph_vk_gpu_properties;
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c
index 3d993cb735..fcada9e29b 100644
--- a/hw/xbox/nv2a/pgraph/vk/instance.c
+++ b/hw/xbox/nv2a/pgraph/vk/instance.c
@@ -401,10 +401,6 @@ static void add_optional_device_extension_names(
add_extension_if_available(available_extensions, enabled_extension_names,
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
- r->provoking_vertex_extension_enabled =
- add_extension_if_available(available_extensions, enabled_extension_names,
- VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
-
r->memory_budget_extension_enabled = add_extension_if_available(
available_extensions, enabled_extension_names,
VK_EXT_MEMORY_BUDGET_EXTENSION_NAME);
@@ -570,17 +566,6 @@ static bool create_logical_device(PGRAPHState *pg, Error **errp)
void *next_struct = NULL;
- VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features;
- if (r->provoking_vertex_extension_enabled) {
- provoking_vertex_features = (VkPhysicalDeviceProvokingVertexFeaturesEXT){
- .sType =
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
- .provokingVertexLast = VK_TRUE,
- .pNext = next_struct,
- };
- next_struct = &provoking_vertex_features;
- }
-
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_features;
if (r->custom_border_color_extension_enabled) {
custom_border_features = (VkPhysicalDeviceCustomBorderColorFeaturesEXT){
diff --git a/hw/xbox/nv2a/pgraph/vk/meson.build b/hw/xbox/nv2a/pgraph/vk/meson.build
index 24c2474cb9..b5d75e2faa 100644
--- a/hw/xbox/nv2a/pgraph/vk/meson.build
+++ b/hw/xbox/nv2a/pgraph/vk/meson.build
@@ -9,6 +9,7 @@ specific_ss.add([sdl, volk, libglslang, vma, vulkan, spirv_reflect, gloffscreen,
'display.c',
'draw.c',
'glsl.c',
+ 'gpuprops.c',
'image.c',
'instance.c',
'renderer.c',
diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c
index 3dbc724b95..4272bbceb6 100644
--- a/hw/xbox/nv2a/pgraph/vk/renderer.c
+++ b/hw/xbox/nv2a/pgraph/vk/renderer.c
@@ -62,6 +62,8 @@ static void pgraph_vk_init(NV2AState *d, Error **errp)
pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr,
memory_region_size(d->vram));
+
+ pgraph_vk_determine_gpu_properties(d);
}
static void pgraph_vk_finalize(NV2AState *d)
@@ -227,6 +229,7 @@ static PGRAPHRenderer pgraph_vk_renderer = {
.set_surface_scale_factor = pgraph_vk_set_surface_scale_factor,
.get_surface_scale_factor = pgraph_vk_get_surface_scale_factor,
.get_framebuffer_surface = pgraph_vk_get_framebuffer_surface,
+ .get_gpu_properties = pgraph_vk_get_gpu_properties,
}
};
diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h
index c2233a7b17..9f87114187 100644
--- a/hw/xbox/nv2a/pgraph/vk/renderer.h
+++ b/hw/xbox/nv2a/pgraph/vk/renderer.h
@@ -325,7 +325,6 @@ typedef struct PGRAPHVkState {
bool debug_utils_extension_enabled;
bool custom_border_color_extension_enabled;
- bool provoking_vertex_extension_enabled;
bool memory_budget_extension_enabled;
VkPhysicalDevice physical_device;
@@ -594,4 +593,8 @@ void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd);
// blit.c
void pgraph_vk_image_blit(NV2AState *d);
+// gpuprops.c
+void pgraph_vk_determine_gpu_properties(NV2AState *d);
+GPUProperties *pgraph_vk_get_gpu_properties(void);
+
#endif