From 687bf629720a84d68b489043ad1e63f4ebc954fa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 03:31:37 +0000 Subject: [PATCH 01/12] ci: bump astral-sh/setup-uv from 6.0.0 to 6.0.1 Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 6.0.0 to 6.0.1. - [Release notes](https://github.com/astral-sh/setup-uv/releases) - [Commits](https://github.com/astral-sh/setup-uv/compare/c7f87aa956e4c323abf06d5dec078e358f6b4d04...6b9c6063abd6010835644d4c2e1bef4cf5cd0fca) --- updated-dependencies: - dependency-name: astral-sh/setup-uv dependency-version: 6.0.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/bump-subproject-wraps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/bump-subproject-wraps.yml b/.github/workflows/bump-subproject-wraps.yml index 163d55e259..ff7740d78f 100644 --- a/.github/workflows/bump-subproject-wraps.yml +++ b/.github/workflows/bump-subproject-wraps.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Install the latest version of uv - uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6 + uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6 with: enable-cache: false From f7e40b2b80ad08745d6ecbdd3ffca0b655d6d6fb Mon Sep 17 00:00:00 2001 From: xemu-robot Date: Mon, 12 May 2025 06:03:35 +0000 Subject: [PATCH 02/12] meson: Bump SPIRV-Reflect to vulkan-sdk-1.4.313.0 --- subprojects/SPIRV-Reflect.wrap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subprojects/SPIRV-Reflect.wrap b/subprojects/SPIRV-Reflect.wrap index 8107b8b7a7..43ec680c93 100644 --- a/subprojects/SPIRV-Reflect.wrap +++ b/subprojects/SPIRV-Reflect.wrap @@ -1,6 +1,6 @@ [wrap-git] url = https://github.com/KhronosGroup/SPIRV-Reflect -revision = c637858562fbce1b6f5dc7ca48d4e8a5bd117b70 +revision = c6c0f5c9796bdef40c55065d82e0df67c38a29a4 depth = 1 [update] From 428c975f098cf3c8914e07219be02bed56a0b8d1 Mon Sep 17 00:00:00 2001 From: Erik Abair Date: Fri, 4 Apr 2025 10:32:49 -0700 Subject: [PATCH 03/12] nv2a: Allow multiframe RenderDoc captures with nv2a traces Allows multiple frames to be captured at once by holding shift while pressing F10. Temporarily toggles nv2a trace messages if control is held while pressing F10. --- hw/xbox/nv2a/debug.h | 3 ++- hw/xbox/nv2a/pgraph/debug_renderdoc.c | 4 +++- hw/xbox/nv2a/pgraph/gl/debug.c | 26 +++++++++++++++++++------- hw/xbox/nv2a/pgraph/vk/debug.c | 16 ++++++++++++++-- ui/xui/main.cc | 4 ++-- ui/xui/menubar.cc | 4 +++- 6 files changed, 43 insertions(+), 14 deletions(-) diff --git a/hw/xbox/nv2a/debug.h b/hw/xbox/nv2a/debug.h index 3873f94239..2dc4bece96 100644 --- a/hw/xbox/nv2a/debug.h +++ b/hw/xbox/nv2a/debug.h @@ -155,8 +155,9 @@ static inline void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt) void nv2a_dbg_renderdoc_init(void); void *nv2a_dbg_renderdoc_get_api(void); bool nv2a_dbg_renderdoc_available(void); -void nv2a_dbg_renderdoc_capture_frames(int num_frames); +void nv2a_dbg_renderdoc_capture_frames(int num_frames, bool trace); extern int renderdoc_capture_frames; +extern bool renderdoc_trace_frames; #endif #ifdef __cplusplus diff --git a/hw/xbox/nv2a/pgraph/debug_renderdoc.c b/hw/xbox/nv2a/pgraph/debug_renderdoc.c index 273e307973..667f01a0c9 100644 --- a/hw/xbox/nv2a/pgraph/debug_renderdoc.c +++ b/hw/xbox/nv2a/pgraph/debug_renderdoc.c @@ -36,6 +36,7 @@ static RENDERDOC_API_1_6_0 *rdoc_api = NULL; int renderdoc_capture_frames = 0; +bool renderdoc_trace_frames = false; void nv2a_dbg_renderdoc_init(void) { @@ -89,7 +90,8 @@ bool nv2a_dbg_renderdoc_available(void) return rdoc_api != NULL; } -void nv2a_dbg_renderdoc_capture_frames(int num_frames) +void nv2a_dbg_renderdoc_capture_frames(int num_frames, bool trace) { renderdoc_capture_frames += num_frames; + renderdoc_trace_frames = trace; } diff --git a/hw/xbox/nv2a/pgraph/gl/debug.c b/hw/xbox/nv2a/pgraph/gl/debug.c index 968941dc7e..97803bc301 100644 --- a/hw/xbox/nv2a/pgraph/gl/debug.c +++ b/hw/xbox/nv2a/pgraph/gl/debug.c @@ -29,6 +29,8 @@ #include #ifdef CONFIG_RENDERDOC +#include "trace/control.h" + #pragma GCC diagnostic ignored "-Wstrict-prototypes" #include "thirdparty/renderdoc_app.h" #endif @@ -154,7 +156,8 @@ void gl_debug_frame_terminator(void) RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api(); if (rdoc_api->IsTargetControlConnected()) { - if (rdoc_api->IsFrameCapturing()) { + bool capturing = rdoc_api->IsFrameCapturing(); + if (capturing && renderdoc_capture_frames == 0) { rdoc_api->EndFrameCapture(NULL, NULL); GLenum error = glGetError(); if (error != GL_NO_ERROR) { @@ -162,14 +165,23 @@ void gl_debug_frame_terminator(void) "Renderdoc EndFrameCapture triggered GL error 0x%X - ignoring\n", error); } + if (renderdoc_trace_frames) { + trace_enable_events("-nv2a_pgraph_*"); + renderdoc_trace_frames = false; + } } if (renderdoc_capture_frames > 0) { - rdoc_api->StartFrameCapture(NULL, NULL); - GLenum error = glGetError(); - if (error != GL_NO_ERROR) { - fprintf(stderr, - "Renderdoc StartFrameCapture triggered GL error 0x%X - ignoring\n", - error); + if (!capturing) { + if (renderdoc_trace_frames) { + trace_enable_events("nv2a_pgraph_*"); + } + rdoc_api->StartFrameCapture(NULL, NULL); + GLenum error = glGetError(); + if (error != GL_NO_ERROR) { + fprintf(stderr, + "Renderdoc StartFrameCapture triggered GL error 0x%X - ignoring\n", + error); + } } --renderdoc_capture_frames; } diff --git a/hw/xbox/nv2a/pgraph/vk/debug.c b/hw/xbox/nv2a/pgraph/vk/debug.c index 5c31c9f119..88327b0cca 100644 --- a/hw/xbox/nv2a/pgraph/vk/debug.c +++ b/hw/xbox/nv2a/pgraph/vk/debug.c @@ -25,6 +25,8 @@ #endif #ifdef CONFIG_RENDERDOC +#include "trace/control.h" + #pragma GCC diagnostic ignored "-Wstrict-prototypes" #include "thirdparty/renderdoc_app.h" #endif @@ -46,11 +48,21 @@ void pgraph_vk_debug_frame_terminator(void) PGRAPHVkState *r = g_nv2a->pgraph.vk_renderer_state; if (rdoc_api->IsTargetControlConnected()) { - if (rdoc_api->IsFrameCapturing()) { + bool capturing = rdoc_api->IsFrameCapturing(); + if (capturing && renderdoc_capture_frames == 0) { rdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0); + if (renderdoc_trace_frames) { + trace_enable_events("-nv2a_pgraph_*"); + renderdoc_trace_frames = false; + } } if (renderdoc_capture_frames > 0) { - rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0); + if (!capturing) { + if (renderdoc_trace_frames) { + trace_enable_events("nv2a_pgraph_*"); + } + rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0); + } --renderdoc_capture_frames; } } diff --git a/ui/xui/main.cc b/ui/xui/main.cc index 699805f113..a0e57820e1 100644 --- a/ui/xui/main.cc +++ b/ui/xui/main.cc @@ -218,7 +218,7 @@ void xemu_hud_render(void) #if defined(CONFIG_RENDERDOC) if (g_capture_renderdoc_frame) { - nv2a_dbg_renderdoc_capture_frames(1); + nv2a_dbg_renderdoc_capture_frames(1, false); g_capture_renderdoc_frame = false; } #endif @@ -291,7 +291,7 @@ void xemu_hud_render(void) !ImGui::IsAnyItemFocused() && !ImGui::IsAnyItemHovered())) { g_scene_mgr.PushScene(g_popup_menu); } - + bool mod_key_down = ImGui::IsKeyDown(ImGuiKey_ModShift); for (int f_key = 0; f_key < 4; ++f_key) { if (ImGui::IsKeyPressed((enum ImGuiKey)(ImGuiKey_F5 + f_key))) { diff --git a/ui/xui/menubar.cc b/ui/xui/menubar.cc index f0b6c1d5c2..83a0b08ae2 100644 --- a/ui/xui/menubar.cc +++ b/ui/xui/menubar.cc @@ -73,7 +73,9 @@ void ProcessKeyboardShortcuts(void) #ifdef CONFIG_RENDERDOC if (ImGui::IsKeyPressed(ImGuiKey_F10) && nv2a_dbg_renderdoc_available()) { - nv2a_dbg_renderdoc_capture_frames(1); + ImGuiIO& io = ImGui::GetIO(); + int num_frames = io.KeyShift ? 5 : 1; + nv2a_dbg_renderdoc_capture_frames(num_frames, io.KeyCtrl); } #endif } From bd3cd78ae47498e03dae947a8c35a8dde66143db Mon Sep 17 00:00:00 2001 From: Erik Abair Date: Tue, 22 Apr 2025 21:30:21 -0700 Subject: [PATCH 04/12] ui: Toggle fullscreen on mouse double click --- ui/xui/main.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ui/xui/main.cc b/ui/xui/main.cc index a0e57820e1..07441f3a64 100644 --- a/ui/xui/main.cc +++ b/ui/xui/main.cc @@ -290,6 +290,8 @@ void xemu_hud_render(void) (ImGui::IsMouseClicked(ImGuiMouseButton_Right) && !ImGui::IsAnyItemFocused() && !ImGui::IsAnyItemHovered())) { g_scene_mgr.PushScene(g_popup_menu); + } else if (ImGui::IsMouseDoubleClicked(ImGuiMouseButton_Left)) { + xemu_toggle_fullscreen(); } bool mod_key_down = ImGui::IsKeyDown(ImGuiKey_ModShift); From c720af00bb9db153fc26e982ae379006fa7dbbc2 Mon Sep 17 00:00:00 2001 From: Erik Abair Date: Thu, 15 May 2025 12:54:56 -0700 Subject: [PATCH 05/12] nv2a/vsh: Replace NaN with 1.0 for Bx, Dx, Fog outputs and MUL zero-check --- hw/xbox/nv2a/pgraph/glsl/vsh-prog.c | 2 +- hw/xbox/nv2a/pgraph/glsl/vsh.c | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index 0530e7ea7b..3e0ab5fbba 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -639,7 +639,7 @@ static const char* vsh_header = // Unfortunately mix() falls victim to the same handling of exceptional // (inf/NaN) handling as a multiply, so per-component comparisons are used // to guarantee HW behavior (anything * 0 must == 0). - " vec4 zero_components = sign(src0) * sign(src1);\n" + " vec4 zero_components = sign(NaNToOne(src0)) * sign(NaNToOne(src1));\n" " vec4 ret = src0 * src1;\n" " if (zero_components.x == 0.0) { ret.x = 0.0; }\n" " if (zero_components.y == 0.0) { ret.y = 0.0; }\n" diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c index 25c846bbde..f3c5dd5a43 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -81,6 +81,10 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) " t = clamp(t, uintBitsToFloat(0xDF800000), uintBitsToFloat(0x9F800000));\n" " }\n" " return t;\n" + "}\n" + "\n" + "vec4 NaNToOne(vec4 src) {\n" + " return mix(src, vec4(1.0), isnan(src));\n" "}\n"); pgraph_get_glsl_vtx_header(header, state->vulkan, state->smooth_shading, @@ -128,6 +132,7 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) } } } + mstring_append(header, "\n"); MString *body = mstring_from_str("void main() {\n"); @@ -232,17 +237,17 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) break; } - mstring_append(body, " oFog.xyzw = vec4(fogFactor);\n"); + mstring_append(body, " oFog = NaNToOne(vec4(fogFactor));\n"); } else { /* FIXME: Is the fog still calculated / passed somehow?! */ - mstring_append(body, " oFog.xyzw = vec4(1.0);\n"); + mstring_append(body, " oFog = vec4(1.0);\n"); } /* Set outputs */ mstring_append(body, "\n" - " vtxD0 = clamp(oD0, 0.0, 1.0);\n" - " vtxB0 = clamp(oB0, 0.0, 1.0);\n" + " vtxD0 = clamp(NaNToOne(oD0), 0.0, 1.0);\n" + " vtxB0 = clamp(NaNToOne(oB0), 0.0, 1.0);\n" " vtxFog = oFog.x;\n" " vtxT0 = oT0;\n" " vtxT1 = oT1;\n" @@ -253,8 +258,8 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) if (state->specular_enable) { mstring_append(body, - " vtxD1 = clamp(oD1, 0.0, 1.0);\n" - " vtxB1 = clamp(oB1, 0.0, 1.0);\n" + " vtxD1 = clamp(NaNToOne(oD1), 0.0, 1.0);\n" + " vtxB1 = clamp(NaNToOne(oB1), 0.0, 1.0);\n" ); if (state->ignore_specular_alpha) { From d8b1cae1fd59a28d4106a82d0feaec235288c3c4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 04:00:39 +0000 Subject: [PATCH 06/12] ci: bump docker/build-push-action from 6.16.0 to 6.17.0 Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 6.16.0 to 6.17.0. - [Release notes](https://github.com/docker/build-push-action/releases) - [Commits](https://github.com/docker/build-push-action/compare/14487ce63c7a62a4a324b0bfb37086795e31c6c1...1dc73863535b631f98b2378be8619f83b136f4a0) --- updated-dependencies: - dependency-name: docker/build-push-action dependency-version: 6.17.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/build-xemu-win64-toolchain.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-xemu-win64-toolchain.yml b/.github/workflows/build-xemu-win64-toolchain.yml index 346e8fad8d..4257895759 100644 --- a/.github/workflows/build-xemu-win64-toolchain.yml +++ b/.github/workflows/build-xemu-win64-toolchain.yml @@ -44,7 +44,7 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push image - uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1 # v5 + uses: docker/build-push-action@1dc73863535b631f98b2378be8619f83b136f4a0 # v5 with: context: ubuntu-win64-cross push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} From c9cdd76102ae6345b4f3df20a8090186034e8bbb Mon Sep 17 00:00:00 2001 From: xemu-robot Date: Mon, 19 May 2025 06:03:58 +0000 Subject: [PATCH 07/12] meson: Bump VulkanMemoryAllocator to v3.3.0 --- subprojects/VulkanMemoryAllocator.wrap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subprojects/VulkanMemoryAllocator.wrap b/subprojects/VulkanMemoryAllocator.wrap index 56550f5c96..321b8c7bed 100644 --- a/subprojects/VulkanMemoryAllocator.wrap +++ b/subprojects/VulkanMemoryAllocator.wrap @@ -1,4 +1,4 @@ [wrap-git] url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator -revision = c788c52156f3ef7bc7ab769cb03c110a53ac8fcb +revision = 1d8f600fd424278486eade7ed3e877c99f0846b1 depth = 1 From ef1b08d79dd780bd4b019306996a6b8cdb7e92aa Mon Sep 17 00:00:00 2001 From: Shiralyn <68858896+ShiralynDev@users.noreply.github.com> Date: Tue, 20 May 2025 20:37:29 +0200 Subject: [PATCH 08/12] ui: Add "allow vibration" input setting --- config_spec.yml | 3 +++ ui/xemu-input.c | 2 +- ui/xui/main-menu.cc | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/config_spec.yml b/config_spec.yml index d013c806ac..e95e6bddf7 100644 --- a/config_spec.yml +++ b/config_spec.yml @@ -54,6 +54,9 @@ input: auto_bind: type: bool default: true + allow_vibration: + type: bool + default: true background_input_capture: bool keyboard_controller_scancode_map: # Scancode reference : https://github.com/libsdl-org/SDL/blob/main/include/SDL_scancode.h diff --git a/ui/xemu-input.c b/ui/xemu-input.c index 31a51eda9d..de1db08f21 100644 --- a/ui/xemu-input.c +++ b/ui/xemu-input.c @@ -490,7 +490,7 @@ void xemu_input_update_sdl_controller_state(ControllerState *state) void xemu_input_update_rumble(ControllerState *state) { - if (!state->rumble_enabled) { + if (!state->rumble_enabled || !g_config.input.allow_vibration) { return; } diff --git a/ui/xui/main-menu.cc b/ui/xui/main-menu.cc index 9bb5dcf33f..ef3bb05c89 100644 --- a/ui/xui/main-menu.cc +++ b/ui/xui/main-menu.cc @@ -485,6 +485,8 @@ void MainMenuInputView::Draw() SectionTitle("Options"); Toggle("Auto-bind controllers", &g_config.input.auto_bind, "Bind newly connected controllers to any open port"); + Toggle("Controller vibration", &g_config.input.allow_vibration, + "Allows the controllers to vibrate"); Toggle("Background controller input capture", &g_config.input.background_input_capture, "Capture even if window is unfocused (requires restart)"); From 11dcae01b9c14c49426accbc167b431cff89fc3a Mon Sep 17 00:00:00 2001 From: coldhex Date: Mon, 13 Jan 2025 18:52:25 +0200 Subject: [PATCH 09/12] nv2a: implement screen coordinate rounding to 4 bit fractional precision Xbox triangle rasterization appears to follow the usual top-left rule. However, since Xemu renders to an OpenGL framebuffer object (FBO) instead of directly to the default framebuffer, Xemu actually has what could be called the bottom-left triangle rasterization rule. I'll address that in another commit. Also, note that the ProjAdjacentGeometry_0.5625 test in nxdk_pgraph_tests is very sensitive to floating point rounding errors. For example, the nxdk_pgraph_tests commit 66b32a0b1feba32a0db7a95d6358e84f7a6246ad changed the math library which caused the test result to change also on real Xbox hardware due to floating point rounding error differences in matrix inverse computation. Apart from the bottom-left rasterization issue, the differing result between Xbox and the rounding I am proposing here for Xemu seems to stem from floating point rounding that happens in screen coordinate calculations before the rounding to 4 bit precision takes place. Fixing such rounding issues would require carrying all preceding floating point computations exactly in the same order and with same precision as Xbox. Note that Xbox Direct3D library seems to add 0.03125 (1/32) to screen coordinates by default. Likely the idea there was to make floating point screen coordinates round to the nearest screen coordinates in 4 bit fixed point precision. So the Xbox Direct3D library (and therefore games) already mitigate against precarious rounding when exactly half-integer coordinates are used by games. Actually they would use integer coordinates because it is Direct3D 8, but since nv2a appears to rasterize at half-integer coordinates like OpenGL, Xbox Direct3D also adds 0.5 to screen coordinates in addition to 1/32. --- hw/xbox/nv2a/pgraph/gl/renderer.h | 1 - hw/xbox/nv2a/pgraph/gl/shaders.c | 23 ----------------------- hw/xbox/nv2a/pgraph/glsl/vsh-ff.c | 17 ++++++++++------- hw/xbox/nv2a/pgraph/glsl/vsh-prog.c | 24 ++++++++++-------------- hw/xbox/nv2a/pgraph/vk/renderer.h | 1 - hw/xbox/nv2a/pgraph/vk/shaders.c | 23 ----------------------- 6 files changed, 20 insertions(+), 69 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index d1a64bb024..b9074d9644 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -111,7 +111,6 @@ typedef struct ShaderBinding { GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS]; uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; - GLint inv_viewport_loc; GLint ltctxa_loc[NV2A_LTCTXA_COUNT]; GLint ltctxb_loc[NV2A_LTCTXB_COUNT]; GLint ltc1_loc[NV2A_LTC1_COUNT]; diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index af3cfd2f40..dbf555a621 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -158,7 +158,6 @@ static void update_shader_constant_locations(ShaderBinding *binding) binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor"); binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam"); - binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport"); for (int i = 0; i < NV2A_LTCTXA_COUNT; i++) { snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i); binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp); @@ -847,28 +846,6 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, if (binding->specular_power_loc != -1) { glUniform1f(binding->specular_power_loc, pg->specular_power); } - - /* estimate the viewport by assuming it matches the surface ... */ - unsigned int aa_width = 1, aa_height = 1; - pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); - - float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width); - float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height); - float m33 = zmax; - float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0]; - float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1]; - - float invViewport[16] = { - 1.0/m11, 0, 0, 0, - 0, 1.0/m22, 0, 0, - 0, 0, 1.0/m33, 0, - -1.0+m41/m11, 1.0+m42/m22, 0, 1.0 - }; - - if (binding->inv_viewport_loc != -1) { - glUniformMatrix4fv(binding->inv_viewport_loc, - 1, GL_FALSE, &invViewport[0]); - } } /* update vertex program constants */ diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c index 6bc637d582..02b1a8fda8 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -115,8 +115,6 @@ GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz") GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz") "\n" ); - mstring_append_fmt(uniforms, -"%smat4 invViewport;\n", u); /* Skinning */ unsigned int count; @@ -471,13 +469,18 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz } mstring_append(body, - " oPos = tPosition * compositeMat;\n" - " oPos.w = clampAwayZeroInf(oPos.w);\n" - " oPos = invViewport * oPos;\n" + " oPos = tPosition * compositeMat;\n" + " oPos.z = oPos.z / clipRange.y;\n" + " oPos.w = clampAwayZeroInf(oPos.w);\n" + " oPos.xy /= oPos.w;\n" + " oPos.xy += c[" stringify(NV_IGRAPH_XF_XFCTX_VPOFF) "].xy;\n" + " oPos.xy = floor(oPos.xy * 16.0f) / 16.0f;\n" + " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" + " oPos.xy *= oPos.w;\n" ); - if (state->vulkan) { - mstring_append(body, " oPos.y *= -1;\n"); + if (!state->vulkan) { + mstring_append(body, " oPos.y = -oPos.y;\n"); } /* FIXME: Testing */ diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index 3e0ab5fbba..e26d0c0304 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -821,22 +821,14 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version, assert(has_final); mstring_append(body, - /* the shaders leave the result in screen space, while - * opengl expects it in clip space. - * TODO: the pixel-center co-ordinate differences should handled + /* The shaders leave the result in screen space, while OpenGL expects it + * in clip space. Xbox NV2A rasterizer appears to have 4 bit precision + * fixed point fractional part and to convert floating point coordinates + * by flooring. */ - " oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n" - ); + " oPos.xy = floor(oPos.xy * 16.0f) / 16.0f;\n" + " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" - if (vulkan) { - mstring_append(body, - " oPos.y = 2.0 * oPos.y / surfaceSize.y - 1.0;\n"); - } else { - mstring_append(body, " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) " - "/ surfaceSize.y;\n"); - } - - mstring_append(body, " oPos.z = oPos.z / clipRange.y;\n" " oPos.w = clampAwayZeroInf(oPos.w);\n" @@ -849,4 +841,8 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version, */ " oPos.xyz *= oPos.w;\n" ); + + if (!vulkan) { + mstring_append(body, " oPos.y = -oPos.y;\n"); + } } diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 4112517e58..91bbf0f31d 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -179,7 +179,6 @@ typedef struct ShaderBinding { int vsh_constant_loc; uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; - int inv_viewport_loc; int ltctxa_loc; int ltctxb_loc; int ltc1_loc; diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c index 78122c701d..0a6e8a2b5c 100644 --- a/hw/xbox/nv2a/pgraph/vk/shaders.c +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -283,8 +283,6 @@ static void update_shader_constant_locations(ShaderBinding *binding) binding->fog_param_loc = uniform_index(&binding->vertex->uniforms, "fogParam"); - binding->inv_viewport_loc = - uniform_index(&binding->vertex->uniforms, "invViewport"); binding->ltctxa_loc = uniform_index(&binding->vertex->uniforms, "ltctxa"); binding->ltctxb_loc = uniform_index(&binding->vertex->uniforms, "ltctxb"); binding->ltc1_loc = uniform_index(&binding->vertex->uniforms, "ltc1"); @@ -617,27 +615,6 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, uniform1f(&binding->vertex->uniforms, binding->specular_power_loc, pg->specular_power); } - - /* estimate the viewport by assuming it matches the surface ... */ - unsigned int aa_width = 1, aa_height = 1; - pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); - - float m11 = 0.5 * (pg->surface_binding_dim.width / aa_width); - float m22 = -0.5 * (pg->surface_binding_dim.height / aa_height); - float m33 = zmax; - float m41 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0]; - float m42 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1]; - - float invViewport[16] = { - 1.0 / m11, 0, 0, 0, 0, 1.0 / m22, 0, - 0, 0, 0, 1.0 / m33, 0, -1.0 + m41 / m11, 1.0 + m42 / m22, - 0, 1.0 - }; - - if (binding->inv_viewport_loc != -1) { - uniformMatrix4fv(&binding->vertex->uniforms, - binding->inv_viewport_loc, &invViewport[0]); - } } /* update vertex program constants */ From a316d7487219e0412360097836c17b65ff877953 Mon Sep 17 00:00:00 2001 From: coldhex Date: Mon, 5 May 2025 21:56:31 +0300 Subject: [PATCH 10/12] nv2a: Use trunc in vertex rounding instead of floor Xbox seems to truncate instead of flooring, which can be inferred from interpolated depth buffer values. --- hw/xbox/nv2a/pgraph/glsl/vsh-ff.c | 2 +- hw/xbox/nv2a/pgraph/glsl/vsh-prog.c | 6 ++---- hw/xbox/nv2a/pgraph/glsl/vsh.c | 7 +++++++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c index 02b1a8fda8..8cee360997 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -474,7 +474,7 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz " oPos.w = clampAwayZeroInf(oPos.w);\n" " oPos.xy /= oPos.w;\n" " oPos.xy += c[" stringify(NV_IGRAPH_XF_XFCTX_VPOFF) "].xy;\n" - " oPos.xy = floor(oPos.xy * 16.0f) / 16.0f;\n" + " oPos.xy = roundScreenCoords(oPos.xy);\n" " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" " oPos.xy *= oPos.w;\n" ); diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index e26d0c0304..3068684d32 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -822,11 +822,9 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version, mstring_append(body, /* The shaders leave the result in screen space, while OpenGL expects it - * in clip space. Xbox NV2A rasterizer appears to have 4 bit precision - * fixed point fractional part and to convert floating point coordinates - * by flooring. + * in clip space. */ - " oPos.xy = floor(oPos.xy * 16.0f) / 16.0f;\n" + " oPos.xy = roundScreenCoords(oPos.xy);\n" " oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n" " oPos.z = oPos.z / clipRange.y;\n" diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c index f3c5dd5a43..b6b7045186 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -85,6 +85,13 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) "\n" "vec4 NaNToOne(vec4 src) {\n" " return mix(src, vec4(1.0), isnan(src));\n" + "}\n" + "\n" + // Xbox NV2A rasterizer appears to have 4 bit precision fixed-point + // fractional part and to convert floating-point coordinates by + // by truncating (not flooring). + "vec2 roundScreenCoords(vec2 pos) {\n" + " return trunc(pos * 16.0f) / 16.0f;\n" "}\n"); pgraph_get_glsl_vtx_header(header, state->vulkan, state->smooth_shading, From ce936bccdd70a18c28f35ffada8cb8c934fa0d74 Mon Sep 17 00:00:00 2001 From: coldhex Date: Tue, 20 May 2025 22:19:33 +0300 Subject: [PATCH 11/12] nv2a/gl: y-flipped rendering to framebuffer object Render scenes upside-down to framebuffer objects (FBO). The strange thing about rendering to OpenGL FBO is that it follows the bottom-left triangle rasterization rule with common PC GPUs. At least Intel and AMD. NVIDIA to be tested. My raster-rule-test github gist demonstrates this. This commit flips coordinates in y-direction, which effectively turns the bottom-left rule into top-left rule needed for Xbox compatibility. This (together with the previous commit) fixes Midtown Madness 3 Seine water rectangular seam rendering artifacts (and the remaining seams are present with Xbox hardware too.) May fix similar artifacts in other games. --- hw/xbox/nv2a/pgraph/gl/display.c | 2 +- hw/xbox/nv2a/pgraph/gl/draw.c | 5 ++-- hw/xbox/nv2a/pgraph/gl/shaders.c | 6 +---- hw/xbox/nv2a/pgraph/gl/surface.c | 42 ++++++++++++++++------------- hw/xbox/nv2a/pgraph/glsl/vsh-ff.c | 4 --- hw/xbox/nv2a/pgraph/glsl/vsh-prog.c | 4 --- 6 files changed, 27 insertions(+), 36 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/display.c b/hw/xbox/nv2a/pgraph/gl/display.c index 6d52a5c3b3..47400cbbd0 100644 --- a/hw/xbox/nv2a/pgraph/gl/display.c +++ b/hw/xbox/nv2a/pgraph/gl/display.c @@ -68,7 +68,7 @@ void pgraph_gl_init_display(NV2AState *d) "{\n" " vec2 texCoord = gl_FragCoord.xy/display_size;\n" " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" - " texCoord.y = 1 + rel*(texCoord.y - 1);" + " texCoord.y = rel*(1.0f - texCoord.y);" " out_Color.rgba = texture(tex, texCoord);\n" " if (pvideo_enable) {\n" " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n" diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c index bfa92662e7..79c18040f9 100644 --- a/hw/xbox/nv2a/pgraph/gl/draw.c +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -92,7 +92,6 @@ void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter) scissor_height = ymax - ymin + 1; pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); - ymin = pg->surface_binding_dim.height - (ymin + scissor_height); NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin, xmin + scissor_width - 1, ymin + scissor_height - 1); @@ -204,9 +203,10 @@ void pgraph_gl_draw_begin(NV2AState *d) } /* Front-face select */ + /* Winding is reverse here because clip-space y-coordinates are inverted */ glFrontFace(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_FRONTFACE - ? GL_CCW : GL_CW); + ? GL_CW : GL_CCW); /* Polygon offset */ /* FIXME: GL implementation-specific, maybe do this in VS? */ @@ -340,7 +340,6 @@ void pgraph_gl_draw_begin(NV2AState *d) pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); - ymin = pg->surface_binding_dim.height - (ymin + scissor_height); pgraph_apply_scaling_factor(pg, &xmin, &ymin); pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index dbf555a621..742e3c2881 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -924,12 +924,8 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, pgraph_apply_scaling_factor(pg, &x_min, &y_min); pgraph_apply_scaling_factor(pg, &x_max, &y_max); - /* Translate for the GL viewport origin */ - int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0); - int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height); - glUniform4i(r->shader_binding->clip_region_loc[i], - x_min, y_min_xlat, x_max, y_max_xlat); + x_min, y_min, x_max, y_max); } for (i = 0; i < 8; ++i) { diff --git a/hw/xbox/nv2a/pgraph/gl/surface.c b/hw/xbox/nv2a/pgraph/gl/surface.c index 53df185130..ab63dd4e3b 100644 --- a/hw/xbox/nv2a/pgraph/gl/surface.c +++ b/hw/xbox/nv2a/pgraph/gl/surface.c @@ -137,11 +137,7 @@ static void init_render_to_texture(PGRAPHState *pg) "layout(location = 0) out vec4 out_Color;\n" "void main()\n" "{\n" - " vec2 texCoord;\n" - " texCoord.x = gl_FragCoord.x;\n" - " texCoord.y = (surface_size.y - gl_FragCoord.y)\n" - " + (textureSize(tex,0).y - surface_size.y);\n" - " texCoord /= textureSize(tex,0).xy;\n" + " vec2 texCoord = gl_FragCoord.xy / textureSize(tex, 0).xy;\n" " out_Color.rgba = texture(tex, texCoord);\n" "}\n"; @@ -298,7 +294,7 @@ static void render_surface_to_texture_slow(NV2AState *d, size_t bufsize = width * height * surface->fmt.bytes_per_pixel; uint8_t *buf = g_malloc(bufsize); - surface_download_to_buffer(d, surface, false, true, false, buf); + surface_download_to_buffer(d, surface, false, false, false, buf); width = texture_shape->width; height = texture_shape->height; @@ -738,7 +734,7 @@ static void surface_download(NV2AState *d, SurfaceBinding *surface, bool force) nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD); - surface_download_to_buffer(d, surface, true, true, true, + surface_download_to_buffer(d, surface, true, false, true, d->vram_ptr + surface->vram_addr); memory_region_set_client_dirty(d->vram, surface->vram_addr, @@ -875,20 +871,26 @@ void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, surface->fmt.bytes_per_pixel); } - /* FIXME: Replace this flip/scaling */ + /* FIXME: Replace this scaling */ // This is VRAM so we can't do this inplace! - uint8_t *flipped_buf = (uint8_t *)g_malloc( - surface->height * surface->width * surface->fmt.bytes_per_pixel); - unsigned int irow; - for (irow = 0; irow < surface->height; irow++) { - memcpy(&flipped_buf[surface->width * (surface->height - irow - 1) - * surface->fmt.bytes_per_pixel], - &buf[surface->pitch * irow], - surface->width * surface->fmt.bytes_per_pixel); + uint8_t *optimal_buf = buf; + unsigned int optimal_pitch = surface->width * surface->fmt.bytes_per_pixel; + + if (surface->pitch != optimal_pitch) { + optimal_buf = (uint8_t *)g_malloc(surface->height * optimal_pitch); + + uint8_t *src = buf; + uint8_t *dst = optimal_buf; + unsigned int irow; + for (irow = 0; irow < surface->height; irow++) { + memcpy(dst, src, optimal_pitch); + src += surface->pitch; + dst += optimal_pitch; + } } - uint8_t *gl_read_buf = flipped_buf; + uint8_t *gl_read_buf = optimal_buf; unsigned int width = surface->width, height = surface->height; if (pg->surface_scale_factor > 1) { @@ -896,7 +898,7 @@ void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, pg->scale_buf = (uint8_t *)g_realloc( pg->scale_buf, width * height * surface->fmt.bytes_per_pixel); gl_read_buf = pg->scale_buf; - uint8_t *out = gl_read_buf, *in = flipped_buf; + uint8_t *out = gl_read_buf, *in = optimal_buf; surface_copy_expand(out, in, surface->width, surface->height, surface->fmt.bytes_per_pixel, d->pgraph.surface_scale_factor); @@ -915,7 +917,9 @@ void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, height, 0, surface->fmt.gl_format, surface->fmt.gl_type, gl_read_buf); glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment); - g_free(flipped_buf); + if (optimal_buf != buf) { + g_free(optimal_buf); + } if (surface->swizzle) { g_free(buf); } diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c index 8cee360997..fb07a12a4b 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -479,10 +479,6 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz " oPos.xy *= oPos.w;\n" ); - if (!state->vulkan) { - mstring_append(body, " oPos.y = -oPos.y;\n"); - } - /* FIXME: Testing */ if (state->point_params_enable) { mstring_append_fmt(uniforms, "%sfloat pointParams[8];\n", u); diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index 3068684d32..fd48979447 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -839,8 +839,4 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version, */ " oPos.xyz *= oPos.w;\n" ); - - if (!vulkan) { - mstring_append(body, " oPos.y = -oPos.y;\n"); - } } From 8667193001e4d9f20f9d5237d941f6f0ff3a2672 Mon Sep 17 00:00:00 2001 From: Erik Abair Date: Tue, 20 May 2025 13:02:00 -0700 Subject: [PATCH 12/12] nv2a: Prevent NaN in specular power factor calculation --- hw/xbox/nv2a/pgraph/glsl/vsh-ff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c index fb07a12a4b..daf821f3f1 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -358,7 +358,7 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz mstring_append_fmt(body, " float pf;\n" - " if (nDotVP == 0.0) {\n" + " if (nDotVP == 0.0 || nDotHV == 0.0) {\n" " pf = 0.0;\n" " } else {\n" " pf = pow(nDotHV, specularPower);\n"