From 0d84befb82ec862b22b117958f0641857d9e1590 Mon Sep 17 00:00:00 2001 From: Erik Abair Date: Wed, 1 Jun 2022 20:56:38 -0700 Subject: [PATCH] nv2a: Implement support for border textures --- hw/xbox/nv2a/nv2a_int.h | 1 + hw/xbox/nv2a/pgraph.c | 123 +++++++++++++++++++++++++++++++++++----- hw/xbox/nv2a/psh.c | 69 +++++++++++++++++----- hw/xbox/nv2a/psh.h | 3 + 4 files changed, 167 insertions(+), 29 deletions(-) diff --git a/hw/xbox/nv2a/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h index c46a42db05..17c4730198 100644 --- a/hw/xbox/nv2a/nv2a_int.h +++ b/hw/xbox/nv2a/nv2a_int.h @@ -164,6 +164,7 @@ typedef struct TextureShape { unsigned int color_format; unsigned int levels; unsigned int width, height, depth; + bool border; unsigned int min_mipmap_level, max_mipmap_level; unsigned int pitch; diff --git a/hw/xbox/nv2a/pgraph.c b/hw/xbox/nv2a/pgraph.c index 9f7ccfb33d..5d815683de 100644 --- a/hw/xbox/nv2a/pgraph.c +++ b/hw/xbox/nv2a/pgraph.c @@ -4526,12 +4526,45 @@ static void pgraph_bind_shaders(PGRAPHState *pg) state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN; - unsigned int color_format = - GET_MASK(pg->regs[NV_PGRAPH_TEXFMT0 + i*4], - NV_PGRAPH_TEXFMT0_COLOR); + uint32_t tex_fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4]; + unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR); ColorFormatInfo f = kelvin_color_format_map[color_format]; state.psh.rect_tex[i] = f.linear; + uint32_t border_source = GET_MASK(tex_fmt, + NV_PGRAPH_TEXFMT0_BORDER_SOURCE); + bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE); + state.psh.border_logical_size[i][0] = 0.0f; + state.psh.border_logical_size[i][1] = 0.0f; + if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) { + if (!f.linear && !cubemap) { + // The actual texture will be (at least) double the reported + // size and shifted by a 4 texel border but texture coordinates + // will still be relative to the reported size. + unsigned int reported_width = + 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U); + unsigned int reported_height = + 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V); + state.psh.border_logical_size[i][0] = reported_width; + state.psh.border_logical_size[i][1] = reported_height; + if (reported_width < 8) { + state.psh.border_inv_real_size[i][0] = 0.0625f; + } else { + state.psh.border_inv_real_size[i][0] = + 1.0f / (reported_width * 2.0f); + } + if (reported_height < 8) { + state.psh.border_inv_real_size[i][1] = 0.0625f; + } else { + state.psh.border_inv_real_size[i][1] = + 1.0f / (reported_height * 2.0f); + } + } else { + NV2A_UNIMPLEMENTED("Border source texture with linear %d cubemap %d", + f.linear, cubemap); + } + } + /* Keep track of whether texture data has been loaded as signed * normalized integers or not. This dictates whether or not we will need * to re-map in fragment shader for certain texture modes (e.g. @@ -6540,6 +6573,7 @@ static void pgraph_bind_textures(NV2AState *d) state.min_mipmap_level = min_mipmap_level; state.max_mipmap_level = max_mipmap_level; state.pitch = pitch; + state.border = border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR; /* * Check active surfaces to see if this texture was a render target @@ -6677,7 +6711,7 @@ static void pgraph_bind_textures(NV2AState *d) } /* FIXME: Only upload if necessary? [s, t or r = GL_CLAMP_TO_BORDER] */ - if (border_source == NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) { + if (!state.border) { GLfloat gl_border_color[] = { /* FIXME: Color channels might be wrong order */ ((border_color >> 16) & 0xFF) / 255.0f, /* red */ @@ -7093,6 +7127,15 @@ static void upload_gl_texture(GLenum gl_target, ColorFormatInfo f = kelvin_color_format_map[s.color_format]; nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD); + unsigned int adjusted_width = s.width; + unsigned int adjusted_height = s.height; + unsigned int adjusted_pitch = s.pitch; + if (!f.linear && s.border) { + adjusted_width = MAX(16, adjusted_width * 2); + adjusted_height = MAX(16, adjusted_height * 2); + adjusted_pitch = adjusted_width * (s.pitch / s.width); + } + switch(gl_target) { case GL_TEXTURE_1D: assert(false); @@ -7103,11 +7146,13 @@ static void upload_gl_texture(GLenum gl_target, uint8_t *converted = convert_texture_data(s, texture_data, palette_data, - s.width, s.height, 1, - s.pitch, 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, converted ? 0 : s.pitch / f.bytes_per_pixel); + adjusted_width, + adjusted_height, 1, + adjusted_pitch, 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, + converted ? 0 : adjusted_pitch / f.bytes_per_pixel); glTexImage2D(gl_target, 0, f.gl_internal_format, - s.width, s.height, 0, + adjusted_width, adjusted_height, 0, f.gl_format, f.gl_type, converted ? converted : texture_data); @@ -7126,7 +7171,7 @@ static void upload_gl_texture(GLenum gl_target, case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: { - unsigned int width = s.width, height = s.height; + unsigned int width = adjusted_width, height = adjusted_height; int level; for (level = 0; level < s.levels; level++) { @@ -7146,12 +7191,35 @@ static void upload_gl_texture(GLenum gl_target, uint8_t *converted = decompress_2d_texture_data( f.gl_internal_format, texture_data, physical_width, physical_height); - glTexImage2D(gl_target, level, GL_RGBA, width, height, 0, + unsigned int tex_width = width; + unsigned int tex_height = height; + + if (s.cubemap && adjusted_width != s.width) { + // FIXME: Consider preserving the border. + // There does not seem to be a way to reference the border + // texels in a cubemap, so they are discarded. + glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4); + glPixelStorei(GL_UNPACK_SKIP_ROWS, 4); + tex_width = s.width; + tex_height = s.height; + if (physical_width == width) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); + } + } + + glTexImage2D(gl_target, level, GL_RGBA, tex_width, tex_height, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, converted); g_free(converted); if (physical_width != width) { glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } + if (s.cubemap && adjusted_width != s.width) { + glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); + glPixelStorei(GL_UNPACK_SKIP_ROWS, 0); + if (physical_width == width) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + } + } texture_data += physical_width / 4 * physical_height / 4 * block_size; } else { @@ -7163,9 +7231,26 @@ static void upload_gl_texture(GLenum gl_target, palette_data, width, height, 1, pitch, 0); - glTexImage2D(gl_target, level, f.gl_internal_format, width, - height, 0, f.gl_format, f.gl_type, - converted ? converted : unswizzled); + uint8_t *pixel_data = converted ? converted : unswizzled; + unsigned int tex_width = width; + unsigned int tex_height = height; + + if (s.cubemap && adjusted_width != s.width) { + // FIXME: Consider preserving the border. + // There does not seem to be a way to reference the border + // texels in a cubemap, so they are discarded. + glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width); + tex_width = s.width; + tex_height = s.height; + pixel_data += 4 * f.bytes_per_pixel + 4 * pitch; + } + + glTexImage2D(gl_target, level, f.gl_internal_format, tex_width, + tex_height, 0, f.gl_format, f.gl_type, + pixel_data); + if (s.cubemap && s.border) { + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + } if (converted) { g_free(converted); } @@ -7182,7 +7267,9 @@ static void upload_gl_texture(GLenum gl_target, } case GL_TEXTURE_3D: { - unsigned int width = s.width, height = s.height, depth = s.depth; + unsigned int width = adjusted_width; + unsigned int height = adjusted_height; + unsigned int depth = s.depth; assert(f.linear == false); @@ -7315,7 +7402,13 @@ static TextureBinding* generate_texture(const TextureShape s, } size_t length = 0; - unsigned int w = s.width, h = s.height; + unsigned int w = s.width; + unsigned int h = s.height; + if (!f.linear && s.border) { + w = MAX(16, w * 2); + h = MAX(16, h * 2); + } + int level; for (level = 0; level < s.levels; level++) { if (f.gl_format == 0) { diff --git a/hw/xbox/nv2a/psh.c b/hw/xbox/nv2a/psh.c index a5148106a4..79cf61f67b 100644 --- a/hw/xbox/nv2a/psh.c +++ b/hw/xbox/nv2a/psh.c @@ -665,6 +665,26 @@ static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compa } } +// Adjust the s, t coordinates in the given VAR to account for the 4 texel +// border supported by the hardware. +static void apply_border_adjustment(const struct PixelShader *ps, MString *vars, int tex_index, const char *var_template) +{ + int i = tex_index; + if (ps->state.border_logical_size[i][0] == 0.0f) { + return; + } + + char var_name[32] = {0}; + snprintf(var_name, sizeof(var_name), var_template, i); + + mstring_append_fmt( + vars, + "vec2 t%dLogicalSize = vec2(%f, %f);\n" + "%s.xy = (%s.xy * t%dLogicalSize + vec2(4, 4)) * vec2(%f, %f);\n", + i, ps->state.border_logical_size[i][0], ps->state.border_logical_size[i][1], + var_name, var_name, i, ps->state.border_inv_real_size[i][0], ps->state.border_inv_real_size[i][1]); +} + static MString* psh_convert(struct PixelShader *ps) { int i; @@ -836,6 +856,7 @@ static MString* psh_convert(struct PixelShader *ps) NV2A_UNIMPLEMENTED("Convolution for 2D textures"); } } + apply_border_adjustment(ps, vars, i, "pT%d"); mstring_append_fmt(vars, "pT%d.xy = texScale%d * pT%d.xy;\n", i, i, i); mstring_append_fmt(vars, "vec4 t%d = %s(texSamp%d, pT%d.xyw);\n", i, lookup, i, i); @@ -846,6 +867,7 @@ static MString* psh_convert(struct PixelShader *ps) if (ps->state.shadow_map[i]) { psh_append_shadowmap(ps, i, true, vars); } else { + apply_border_adjustment(ps, vars, i, "pT%d"); mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyzw);\n", i, i, i); } @@ -855,6 +877,7 @@ static MString* psh_convert(struct PixelShader *ps) i, i, i, i); break; case PS_TEXTUREMODES_PASSTHRU: + assert(ps->state.border_logical_size[i][0] == 0.0f && "Unexpected border texture on passthru"); mstring_append_fmt(vars, "vec4 t%d = pT%d;\n", i, i); break; case PS_TEXTUREMODES_CLIPPLANE: { @@ -919,10 +942,14 @@ static MString* psh_convert(struct PixelShader *ps) case PS_TEXTUREMODES_DOT_ST: assert(i >= 2); mstring_append_fmt(vars, "/* PS_TEXTUREMODES_DOT_ST */\n"); - mstring_append_fmt(vars, "float dot%d = dot(pT%d.xyz, %s(t%d.rgb));\n", - i, i, dotmap_func, ps->input_tex[i]); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, texScale%d * vec2(dot%d, dot%d));\n", - i, i, i, i-1, i); + mstring_append_fmt(vars, + "float dot%d = dot(pT%d.xyz, %s(t%d.rgb));\n" + "vec2 dotST%d = vec2(dot%d, dot%d);\n", + i, i, dotmap_func, ps->input_tex[i], i, i-1, i); + + apply_border_adjustment(ps, vars, i, "dotST%d"); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, texScale%d * dotST%d);\n", + i, i, i, i); break; case PS_TEXTUREMODES_DOT_ZW: assert(i >= 2); @@ -942,6 +969,7 @@ static MString* psh_convert(struct PixelShader *ps) i, i+1, dotmap_funcs[ps->dot_map[i+1]], ps->input_tex[i+1]); mstring_append_fmt(vars, "vec3 n_%d = vec3(dot%d, dot%d, dot%d_n);\n", i, i-1, i, i); + apply_border_adjustment(ps, vars, i, "n_%d"); mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, n_%d);\n", i, i, i); break; @@ -956,36 +984,49 @@ static MString* psh_convert(struct PixelShader *ps) i, i-2, i-1, i); mstring_append_fmt(vars, "vec3 rv_%d = 2*n_%d*dot(n_%d,e_%d)/dot(n_%d,n_%d) - e_%d;\n", i, i, i, i, i, i, i); + apply_border_adjustment(ps, vars, i, "rv_%d"); mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, rv_%d);\n", i, i, i); break; case PS_TEXTUREMODES_DOT_STR_3D: assert(i == 3); mstring_append_fmt(vars, "/* PS_TEXTUREMODES_DOT_STR_3D */\n"); - mstring_append_fmt(vars, "float dot%d = dot(pT%d.xyz, %s(t%d.rgb));\n", - i, i, dotmap_func, ps->input_tex[i]); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, vec3(dot%d, dot%d, dot%d));\n", - i, i, i-2, i-1, i); + mstring_append_fmt(vars, + "float dot%d = dot(pT%d.xyz, %s(t%d.rgb));\n" + "vec2 dotSTR%d = vec3(dot%d, dot%d, dot%d));\n", + i, i, dotmap_func, ps->input_tex[i], + i, i-2, i-1, i); + + apply_border_adjustment(ps, vars, i, "dotSTR%d"); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, dotSTR%d);\n", + i, i, i); break; case PS_TEXTUREMODES_DOT_STR_CUBE: assert(i == 3); mstring_append_fmt(vars, "/* PS_TEXTUREMODES_DOT_STR_CUBE */\n"); mstring_append_fmt(vars, "float dot%d = dot(pT%d.xyz, %s(t%d.rgb));\n", i, i, dotmap_func, ps->input_tex[i]); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, vec3(dot%d, dot%d, dot%d));\n", - i, i, i-2, i-1, i); + mstring_append_fmt(vars, "vec3 dotSTR%dCube = vec3(dot%d, dot%d, dot%d);\n", + i, i-2, i-1, i); + apply_border_adjustment(ps, vars, i, "dotSTR%dCube"); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, dotSTR%dCube);\n", + i, i, i); break; case PS_TEXTUREMODES_DPNDNT_AR: assert(i >= 1); assert(!ps->state.rect_tex[i]); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, t%d.ar);\n", - i, i, ps->input_tex[i]); + mstring_append_fmt(vars, "vec2 t%dAR = t%d.ar;\n", i, ps->input_tex[i]); + apply_border_adjustment(ps, vars, i, "t%dAR"); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, t%dAR);\n", + i, i, i); break; case PS_TEXTUREMODES_DPNDNT_GB: assert(i >= 1); assert(!ps->state.rect_tex[i]); - mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, t%d.gb);\n", - i, i, ps->input_tex[i]); + mstring_append_fmt(vars, "vec2 t%dGB = t%d.gb;\n", i, ps->input_tex[i]); + apply_border_adjustment(ps, vars, i, "t%dGB"); + mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, t%dGB);\n", + i, i, i); break; case PS_TEXTUREMODES_DOTPRODUCT: assert(i == 1 || i == 2); diff --git a/hw/xbox/nv2a/psh.h b/hw/xbox/nv2a/psh.h index 1d7ccc7bdc..815e775ca7 100644 --- a/hw/xbox/nv2a/psh.h +++ b/hw/xbox/nv2a/psh.h @@ -68,6 +68,9 @@ typedef struct PshState { bool alphakill[4]; enum ConvolutionFilter conv_tex[4]; + float border_logical_size[4][2]; + float border_inv_real_size[4][2]; + bool shadow_map[4]; enum PshShadowDepthFunc shadow_depth_func;