From ce936bccdd70a18c28f35ffada8cb8c934fa0d74 Mon Sep 17 00:00:00 2001 From: coldhex Date: Tue, 20 May 2025 22:19:33 +0300 Subject: [PATCH] nv2a/gl: y-flipped rendering to framebuffer object Render scenes upside-down to framebuffer objects (FBO). The strange thing about rendering to OpenGL FBO is that it follows the bottom-left triangle rasterization rule with common PC GPUs. At least Intel and AMD. NVIDIA to be tested. My raster-rule-test github gist demonstrates this. This commit flips coordinates in y-direction, which effectively turns the bottom-left rule into top-left rule needed for Xbox compatibility. This (together with the previous commit) fixes Midtown Madness 3 Seine water rectangular seam rendering artifacts (and the remaining seams are present with Xbox hardware too.) May fix similar artifacts in other games. --- hw/xbox/nv2a/pgraph/gl/display.c | 2 +- hw/xbox/nv2a/pgraph/gl/draw.c | 5 ++-- hw/xbox/nv2a/pgraph/gl/shaders.c | 6 +---- hw/xbox/nv2a/pgraph/gl/surface.c | 42 ++++++++++++++++------------- hw/xbox/nv2a/pgraph/glsl/vsh-ff.c | 4 --- hw/xbox/nv2a/pgraph/glsl/vsh-prog.c | 4 --- 6 files changed, 27 insertions(+), 36 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/gl/display.c b/hw/xbox/nv2a/pgraph/gl/display.c index 6d52a5c3b3..47400cbbd0 100644 --- a/hw/xbox/nv2a/pgraph/gl/display.c +++ b/hw/xbox/nv2a/pgraph/gl/display.c @@ -68,7 +68,7 @@ void pgraph_gl_init_display(NV2AState *d) "{\n" " vec2 texCoord = gl_FragCoord.xy/display_size;\n" " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" - " texCoord.y = 1 + rel*(texCoord.y - 1);" + " texCoord.y = rel*(1.0f - texCoord.y);" " out_Color.rgba = texture(tex, texCoord);\n" " if (pvideo_enable) {\n" " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n" diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c index bfa92662e7..79c18040f9 100644 --- a/hw/xbox/nv2a/pgraph/gl/draw.c +++ b/hw/xbox/nv2a/pgraph/gl/draw.c @@ -92,7 +92,6 @@ void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter) scissor_height = ymax - ymin + 1; pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); - ymin = pg->surface_binding_dim.height - (ymin + scissor_height); NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin, xmin + scissor_width - 1, ymin + scissor_height - 1); @@ -204,9 +203,10 @@ void pgraph_gl_draw_begin(NV2AState *d) } /* Front-face select */ + /* Winding is reverse here because clip-space y-coordinates are inverted */ glFrontFace(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_FRONTFACE - ? GL_CCW : GL_CW); + ? GL_CW : GL_CCW); /* Polygon offset */ /* FIXME: GL implementation-specific, maybe do this in VS? */ @@ -340,7 +340,6 @@ void pgraph_gl_draw_begin(NV2AState *d) pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin); pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height); - ymin = pg->surface_binding_dim.height - (ymin + scissor_height); pgraph_apply_scaling_factor(pg, &xmin, &ymin); pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height); diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c index dbf555a621..742e3c2881 100644 --- a/hw/xbox/nv2a/pgraph/gl/shaders.c +++ b/hw/xbox/nv2a/pgraph/gl/shaders.c @@ -924,12 +924,8 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, pgraph_apply_scaling_factor(pg, &x_min, &y_min); pgraph_apply_scaling_factor(pg, &x_max, &y_max); - /* Translate for the GL viewport origin */ - int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0); - int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height); - glUniform4i(r->shader_binding->clip_region_loc[i], - x_min, y_min_xlat, x_max, y_max_xlat); + x_min, y_min, x_max, y_max); } for (i = 0; i < 8; ++i) { diff --git a/hw/xbox/nv2a/pgraph/gl/surface.c b/hw/xbox/nv2a/pgraph/gl/surface.c index 53df185130..ab63dd4e3b 100644 --- a/hw/xbox/nv2a/pgraph/gl/surface.c +++ b/hw/xbox/nv2a/pgraph/gl/surface.c @@ -137,11 +137,7 @@ static void init_render_to_texture(PGRAPHState *pg) "layout(location = 0) out vec4 out_Color;\n" "void main()\n" "{\n" - " vec2 texCoord;\n" - " texCoord.x = gl_FragCoord.x;\n" - " texCoord.y = (surface_size.y - gl_FragCoord.y)\n" - " + (textureSize(tex,0).y - surface_size.y);\n" - " texCoord /= textureSize(tex,0).xy;\n" + " vec2 texCoord = gl_FragCoord.xy / textureSize(tex, 0).xy;\n" " out_Color.rgba = texture(tex, texCoord);\n" "}\n"; @@ -298,7 +294,7 @@ static void render_surface_to_texture_slow(NV2AState *d, size_t bufsize = width * height * surface->fmt.bytes_per_pixel; uint8_t *buf = g_malloc(bufsize); - surface_download_to_buffer(d, surface, false, true, false, buf); + surface_download_to_buffer(d, surface, false, false, false, buf); width = texture_shape->width; height = texture_shape->height; @@ -738,7 +734,7 @@ static void surface_download(NV2AState *d, SurfaceBinding *surface, bool force) nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD); - surface_download_to_buffer(d, surface, true, true, true, + surface_download_to_buffer(d, surface, true, false, true, d->vram_ptr + surface->vram_addr); memory_region_set_client_dirty(d->vram, surface->vram_addr, @@ -875,20 +871,26 @@ void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, surface->fmt.bytes_per_pixel); } - /* FIXME: Replace this flip/scaling */ + /* FIXME: Replace this scaling */ // This is VRAM so we can't do this inplace! - uint8_t *flipped_buf = (uint8_t *)g_malloc( - surface->height * surface->width * surface->fmt.bytes_per_pixel); - unsigned int irow; - for (irow = 0; irow < surface->height; irow++) { - memcpy(&flipped_buf[surface->width * (surface->height - irow - 1) - * surface->fmt.bytes_per_pixel], - &buf[surface->pitch * irow], - surface->width * surface->fmt.bytes_per_pixel); + uint8_t *optimal_buf = buf; + unsigned int optimal_pitch = surface->width * surface->fmt.bytes_per_pixel; + + if (surface->pitch != optimal_pitch) { + optimal_buf = (uint8_t *)g_malloc(surface->height * optimal_pitch); + + uint8_t *src = buf; + uint8_t *dst = optimal_buf; + unsigned int irow; + for (irow = 0; irow < surface->height; irow++) { + memcpy(dst, src, optimal_pitch); + src += surface->pitch; + dst += optimal_pitch; + } } - uint8_t *gl_read_buf = flipped_buf; + uint8_t *gl_read_buf = optimal_buf; unsigned int width = surface->width, height = surface->height; if (pg->surface_scale_factor > 1) { @@ -896,7 +898,7 @@ void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, pg->scale_buf = (uint8_t *)g_realloc( pg->scale_buf, width * height * surface->fmt.bytes_per_pixel); gl_read_buf = pg->scale_buf; - uint8_t *out = gl_read_buf, *in = flipped_buf; + uint8_t *out = gl_read_buf, *in = optimal_buf; surface_copy_expand(out, in, surface->width, surface->height, surface->fmt.bytes_per_pixel, d->pgraph.surface_scale_factor); @@ -915,7 +917,9 @@ void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, height, 0, surface->fmt.gl_format, surface->fmt.gl_type, gl_read_buf); glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment); - g_free(flipped_buf); + if (optimal_buf != buf) { + g_free(optimal_buf); + } if (surface->swizzle) { g_free(buf); } diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c index 8cee360997..fb07a12a4b 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c @@ -479,10 +479,6 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz " oPos.xy *= oPos.w;\n" ); - if (!state->vulkan) { - mstring_append(body, " oPos.y = -oPos.y;\n"); - } - /* FIXME: Testing */ if (state->point_params_enable) { mstring_append_fmt(uniforms, "%sfloat pointParams[8];\n", u); diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c index 3068684d32..fd48979447 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c @@ -839,8 +839,4 @@ void pgraph_gen_vsh_prog_glsl(uint16_t version, */ " oPos.xyz *= oPos.w;\n" ); - - if (!vulkan) { - mstring_append(body, " oPos.y = -oPos.y;\n"); - } }