From 2497e2d7c4c0eac24d350b9bb8fa89d599cf68fb Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 14 Jun 2018 15:13:54 -0700 Subject: [PATCH] nv2a: Add support for window clipping Fixes several games which utilize clipping, such as Halo:CE which uses clipping in multiplayer to render multiple player screens. Research and original PoC work done by Jannik Vogel: - JayFoxRox/xqemu-espes@c05d91b - JayFoxRox/xqemu-espes@a254830 --- hw/xbox/nv2a/nv2a_int.h | 26 ++++++++++++ hw/xbox/nv2a/nv2a_pgraph.c | 85 ++++++++++++++++++++++++++++++++++++- hw/xbox/nv2a/nv2a_psh.c | 37 ++++++++++++++++ hw/xbox/nv2a/nv2a_psh.h | 3 ++ hw/xbox/nv2a/nv2a_shaders.c | 4 ++ hw/xbox/nv2a/nv2a_shaders.h | 1 + 6 files changed, 155 insertions(+), 1 deletion(-) diff --git a/hw/xbox/nv2a/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h index 570f0f18e7..6d150f9311 100644 --- a/hw/xbox/nv2a/nv2a_int.h +++ b/hw/xbox/nv2a/nv2a_int.h @@ -462,6 +462,7 @@ # define NV_PGRAPH_SETUPRASTER_FRONTFACE (1 << 23) # define NV_PGRAPH_SETUPRASTER_CULLENABLE (1 << 28) # define NV_PGRAPH_SETUPRASTER_Z_FORMAT (1 << 29) +# define NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE (1 << 31) #define NV_PGRAPH_SHADERCLIPMODE 0x00001994 #define NV_PGRAPH_SHADERCTL 0x00001998 #define NV_PGRAPH_SHADERPROG 0x0000199C @@ -554,6 +555,26 @@ #define NV_PGRAPH_TEXPALETTE1 0x00001A38 #define NV_PGRAPH_TEXPALETTE2 0x00001A3C #define NV_PGRAPH_TEXPALETTE3 0x00001A40 +#define NV_PGRAPH_WINDOWCLIPX0 0x00001A44 +# define NV_PGRAPH_WINDOWCLIPX0_XMIN 0x00000FFF +# define NV_PGRAPH_WINDOWCLIPX0_XMAX 0x0FFF0000 +#define NV_PGRAPH_WINDOWCLIPX1 0x00001A48 +#define NV_PGRAPH_WINDOWCLIPX2 0x00001A4C +#define NV_PGRAPH_WINDOWCLIPX3 0x00001A50 +#define NV_PGRAPH_WINDOWCLIPX4 0x00001A54 +#define NV_PGRAPH_WINDOWCLIPX5 0x00001A58 +#define NV_PGRAPH_WINDOWCLIPX6 0x00001A5C +#define NV_PGRAPH_WINDOWCLIPX7 0x00001A60 +#define NV_PGRAPH_WINDOWCLIPY0 0x00001A64 +# define NV_PGRAPH_WINDOWCLIPY0_YMIN 0x00000FFF +# define NV_PGRAPH_WINDOWCLIPY0_YMAX 0x0FFF0000 +#define NV_PGRAPH_WINDOWCLIPY1 0x00001A68 +#define NV_PGRAPH_WINDOWCLIPY2 0x00001A6C +#define NV_PGRAPH_WINDOWCLIPY3 0x00001A70 +#define NV_PGRAPH_WINDOWCLIPY4 0x00001A74 +#define NV_PGRAPH_WINDOWCLIPY5 0x00001A78 +#define NV_PGRAPH_WINDOWCLIPY6 0x00001A7C +#define NV_PGRAPH_WINDOWCLIPY7 0x00001A80 #define NV_PGRAPH_ZSTENCILCLEARVALUE 0x00001A88 #define NV_PGRAPH_ZCLIPMIN 0x00001A90 #define NV_PGRAPH_ZOFFSETBIAS 0x00001AA4 @@ -794,6 +815,11 @@ # define NV097_SET_FOG_COLOR_GREEN 0x0000FF00 # define NV097_SET_FOG_COLOR_BLUE 0x00FF0000 # define NV097_SET_FOG_COLOR_ALPHA 0xFF000000 +# define NV097_SET_WINDOW_CLIP_TYPE 0x000002B4 +# define NV097_SET_WINDOW_CLIP_HORIZONTAL 0x000002C0 +# define NV097_SET_WINDOW_CLIP_HORIZONTAL_XMIN 0x00000FFF +# define NV097_SET_WINDOW_CLIP_HORIZONTAL_XMAX 0x0FFF0000 +# define NV097_SET_WINDOW_CLIP_VERTICAL 0x000002E0 # define NV097_SET_ALPHA_TEST_ENABLE 0x00000300 # define NV097_SET_BLEND_ENABLE 0x00000304 # define NV097_SET_CULL_FACE_ENABLE 0x00000308 diff --git a/hw/xbox/nv2a/nv2a_pgraph.c b/hw/xbox/nv2a/nv2a_pgraph.c index dd7ffb91db..e9be6eb386 100644 --- a/hw/xbox/nv2a/nv2a_pgraph.c +++ b/hw/xbox/nv2a/nv2a_pgraph.c @@ -874,6 +874,20 @@ static void pgraph_method(NV2AState *d, SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_ALPHA, alpha); break; } + case NV097_SET_WINDOW_CLIP_TYPE: + SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], + NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE, parameter); + break; + case NV097_SET_WINDOW_CLIP_HORIZONTAL ... + NV097_SET_WINDOW_CLIP_HORIZONTAL + 0x1c: + slot = (method - NV097_SET_WINDOW_CLIP_HORIZONTAL) / 4; + pg->regs[NV_PGRAPH_WINDOWCLIPX0 + slot * 4] = parameter; + break; + case NV097_SET_WINDOW_CLIP_VERTICAL ... + NV097_SET_WINDOW_CLIP_VERTICAL + 0x1c: + slot = (method - NV097_SET_WINDOW_CLIP_VERTICAL) / 4; + pg->regs[NV_PGRAPH_WINDOWCLIPY0 + slot * 4] = parameter; + break; case NV097_SET_ALPHA_TEST_ENABLE: SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, parameter); @@ -2409,6 +2423,8 @@ static void pgraph_method(NV2AState *d, /* FIXME: Should this really be inverted instead of ymin? */ glScissor(scissor_x, scissor_y, scissor_width, scissor_height); + /* FIXME: Respect window clip?!?! */ + NV2A_DPRINTF("------------------CLEAR 0x%x %d,%d - %d,%d %x---------------\n", parameter, xmin, ymin, xmax, ymax, d->pgraph.regs[NV_PGRAPH_COLORCLEARVALUE]); @@ -2917,7 +2933,6 @@ static void pgraph_shader_update_constants(PGRAPHState *pg, if (binding->clip_range_loc != -1) { glUniform2f(binding->clip_range_loc, zclip_min, zclip_max); } - } static void pgraph_bind_shaders(PGRAPHState *pg) @@ -2942,6 +2957,8 @@ static void pgraph_bind_shaders(PGRAPHState *pg) ShaderState state = { .psh = (PshState){ /* register combier stuff */ + .window_clip_exclusive = pg->regs[NV_PGRAPH_SETUPRASTER] + & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE, .combiner_control = pg->regs[NV_PGRAPH_COMBINECTL], .shader_stage_program = pg->regs[NV_PGRAPH_SHADERPROG], .other_stage_input = pg->regs[NV_PGRAPH_SHADERCTL], @@ -3037,6 +3054,45 @@ static void pgraph_bind_shaders(PGRAPHState *pg) } } + /* Window clip + * + * Optimization note: very quickly check to ignore any repeated or zero-size + * clipping regions. Note that if region number 7 is valid, but the rest are + * not, we will still add all of them. Clip regions seem to be typically + * front-loaded (meaning the first one or two regions are populated, and the + * following are zeroed-out), so let's avoid adding any more complicated + * masking or copying logic here for now unless we discover a valid case. + */ + assert(!state.psh.window_clip_exclusive); /* FIXME: Untested */ + state.psh.window_clip_count = 0; + uint32_t last_x = 0, last_y = 0; + + for (i = 0; i < 8; i++) { + const uint32_t x = pg->regs[NV_PGRAPH_WINDOWCLIPX0 + i * 4]; + const uint32_t y = pg->regs[NV_PGRAPH_WINDOWCLIPY0 + i * 4]; + const uint32_t x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN); + const uint32_t x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX); + const uint32_t y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN); + const uint32_t y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX); + + /* Check for zero width or height clipping region */ + if ((x_min == x_max) || (y_min == y_max)) { + continue; + } + + /* Check for in-order duplicate regions */ + if ((x == last_x) && (y == last_y)) { + continue; + } + + NV2A_DPRINTF("Clipping Region %d: min=(%d, %d) max=(%d, %d)\n", + i, x_min, y_min, x_max, y_max); + + state.psh.window_clip_count = i + 1; + last_x = x; + last_y = y; + } + for (i = 0; i < 8; i++) { state.psh.rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4]; state.psh.rgb_outputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORO0 + i * 4]; @@ -3083,6 +3139,33 @@ static void pgraph_bind_shaders(PGRAPHState *pg) glUseProgram(pg->shader_binding->gl_program); + /* Clipping regions */ + for (i = 0; i < state.psh.window_clip_count; i++) { + if (pg->shader_binding->clip_region_loc[i] == -1) { + continue; + } + + uint32_t x = pg->regs[NV_PGRAPH_WINDOWCLIPX0 + i * 4]; + GLuint x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN); + GLuint x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX); + + /* Adjust y-coordinates for the OpenGL viewport: translate coordinates + * to have the origin at the bottom-left of the surface (as opposed to + * top-left), and flip y-min and y-max accordingly. + */ + uint32_t y = pg->regs[NV_PGRAPH_WINDOWCLIPY0 + i * 4]; + GLuint y_min = (pg->surface_shape.clip_height - 1) - + GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX); + GLuint y_max = (pg->surface_shape.clip_height - 1) - + GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN); + + pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min); + pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max); + + glUniform4i(pg->shader_binding->clip_region_loc[i], + x_min, y_min, x_max, y_max); + } + pgraph_shader_update_constants(pg, pg->shader_binding, binding_changed, vertex_program, fixed_function); diff --git a/hw/xbox/nv2a/nv2a_psh.c b/hw/xbox/nv2a/nv2a_psh.c index 4c39748310..e31533d1b1 100644 --- a/hw/xbox/nv2a/nv2a_psh.c +++ b/hw/xbox/nv2a/nv2a_psh.c @@ -539,6 +539,42 @@ static QString* psh_convert(struct PixelShader *ps) qstring_append(preflight, "\n"); qstring_append(preflight, "uniform vec4 fogColor;\n"); + /* Window Clipping */ + QString *clip = qstring_new(); + if (ps->state.window_clip_count != 0) { + qstring_append_fmt(preflight, "uniform ivec4 clipRegion[%d];\n", + ps->state.window_clip_count); + qstring_append_fmt(clip, "/* Window-clip (%s) */\n", + ps->state.window_clip_exclusive ? + "Exclusive" : "Inclusive"); + if (!ps->state.window_clip_exclusive) { + qstring_append(clip, "bool clipContained = false;\n"); + } + qstring_append_fmt(clip, "for (int i = 0; i < %d; i++) {\n", + ps->state.window_clip_count); + qstring_append(clip, " bvec4 clipTest = bvec4(lessThan(gl_FragCoord.xy, clipRegion[i].xy),\n" + " greaterThan(gl_FragCoord.xy, clipRegion[i].zw));\n" + " if (!any(clipTest)) {\n"); + if (ps->state.window_clip_exclusive) { + /* Pixel in clip region = exclude by discarding */ + qstring_append(clip, " discard;\n"); + assert(false); /* Untested */ + } else { + /* Pixel in clip region = mark pixel as contained and leave */ + qstring_append(clip, " clipContained = true;\n" + " break;\n"); + } + qstring_append(clip, " }\n" + "}\n"); + /* Check for inclusive window clip */ + if (!ps->state.window_clip_exclusive) { + qstring_append(clip, "if (!clipContained) { discard; }\n"); + } + } else if (ps->state.window_clip_exclusive) { + /* Clip everything */ + qstring_append(clip, "discard;\n"); + } + /* calculate perspective-correct inputs */ QString *vars = qstring_new(); qstring_append(vars, "vec4 pD0 = vtx.D0 / vtx.inv_w;\n"); @@ -746,6 +782,7 @@ static QString* psh_convert(struct PixelShader *ps) qstring_append(final, "#version 330\n\n"); qstring_append(final, qstring_get_str(preflight)); qstring_append(final, "void main() {\n"); + qstring_append(final, qstring_get_str(clip)); qstring_append(final, qstring_get_str(vars)); qstring_append(final, qstring_get_str(ps->code)); qstring_append(final, "fragColor = r0;\n"); diff --git a/hw/xbox/nv2a/nv2a_psh.h b/hw/xbox/nv2a/nv2a_psh.h index cb55ba775c..31c19779a7 100644 --- a/hw/xbox/nv2a/nv2a_psh.h +++ b/hw/xbox/nv2a/nv2a_psh.h @@ -52,6 +52,9 @@ typedef struct PshState { bool alpha_test; enum PshAlphaFunc alpha_func; + + bool window_clip_exclusive; + unsigned int window_clip_count; } PshState; QString *psh_translate(const PshState state); diff --git a/hw/xbox/nv2a/nv2a_shaders.c b/hw/xbox/nv2a/nv2a_shaders.c index f568c6e48a..e0e4f26466 100644 --- a/hw/xbox/nv2a/nv2a_shaders.c +++ b/hw/xbox/nv2a/nv2a_shaders.c @@ -985,6 +985,10 @@ ShaderBinding* generate_shaders(const ShaderState state) snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i); ret->light_local_attenuation_loc[i] = glGetUniformLocation(program, tmp); } + for (i = 0; i < 8; i++) { + snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i); + ret->clip_region_loc[i] = glGetUniformLocation(program, tmp); + } return ret; } diff --git a/hw/xbox/nv2a/nv2a_shaders.h b/hw/xbox/nv2a/nv2a_shaders.h index b2a41aaf31..10cacc1149 100644 --- a/hw/xbox/nv2a/nv2a_shaders.h +++ b/hw/xbox/nv2a/nv2a_shaders.h @@ -108,6 +108,7 @@ typedef struct ShaderBinding { GLint light_local_position_loc[NV2A_MAX_LIGHTS]; GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS]; + GLint clip_region_loc[8]; } ShaderBinding; ShaderBinding* generate_shaders(const ShaderState state);