From 0169caadd09b8199596acca061845813984aea47 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH] nv2a/vk: Fallback to UBO if maxPushConstantsSize is insufficient --- hw/xbox/nv2a/pgraph/glsl/vsh.c | 29 +++++++++++----- hw/xbox/nv2a/pgraph/pgraph.h | 3 ++ hw/xbox/nv2a/pgraph/shaders.c | 4 +-- hw/xbox/nv2a/pgraph/shaders.h | 1 + hw/xbox/nv2a/pgraph/vertex.c | 20 +++++++++++ hw/xbox/nv2a/pgraph/vk/draw.c | 57 +++++++++++++++++++------------ hw/xbox/nv2a/pgraph/vk/instance.c | 3 -- hw/xbox/nv2a/pgraph/vk/renderer.h | 2 ++ hw/xbox/nv2a/pgraph/vk/shaders.c | 30 ++++++++++++++++ hw/xbox/nv2a/pgraph/vk/vertex.c | 2 +- 10 files changed, 114 insertions(+), 37 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c index 4fcc09cac5..84609e18df 100644 --- a/hw/xbox/nv2a/pgraph/glsl/vsh.c +++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c @@ -93,15 +93,19 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) ); } mstring_append(header, "\n"); - for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + int num_uniform_attrs = 0; + + for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { bool is_uniform = state->uniform_attrs & (1 << i); bool is_compressed = state->compressed_attrs & (1 << i); assert(!(is_uniform && is_compressed)); if (is_uniform) { - mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i, i); + mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i, + num_uniform_attrs); + num_uniform_attrs += 1; } else { if (state->compressed_attrs & (1 << i)) { mstring_append_fmt(header, @@ -249,17 +253,24 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs) shade_model_mult, shade_model_mult); - /* Return combined header + source */ if (state->vulkan) { + // FIXME: Optimize uniforms + if (state->use_push_constants_for_uniform_attrs) { + mstring_append_fmt(output, + "layout(push_constant) uniform PushConstants {\n" + " vec4 inlineValue[%d];\n" + "};\n\n", num_uniform_attrs); + } else { + mstring_append_fmt(uniforms, " vec4 inlineValue[%d];\n", + num_uniform_attrs); + } mstring_append_fmt( - output, "layout(binding = %d, std140) uniform VshUniforms {\n%s};\n\n", + output, + "layout(binding = %d, std140) uniform VshUniforms {\n" + "%s" + "};\n\n", VSH_UBO_BINDING, mstring_get_str(uniforms)); - // FIXME: Only needed for vk, for gl we use glVertexAttrib - mstring_append_fmt(output, - "layout(push_constant) uniform PushConstants {\n" - "vec4 inlineValue[" stringify(NV2A_VERTEXSHADER_ATTRIBUTES) "];\n" - "};\n\n"); } else { mstring_append( output, mstring_get_str(uniforms)); diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h index 4ecc1c0d25..634f15c8ad 100644 --- a/hw/xbox/nv2a/pgraph/pgraph.h +++ b/hw/xbox/nv2a/pgraph/pgraph.h @@ -373,6 +373,9 @@ void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg); void pgraph_reset_inline_buffers(PGRAPHState *pg); void pgraph_reset_draw_arrays(PGRAPHState *pg); void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data); +void pgraph_get_inline_values(PGRAPHState *pg, uint16_t attrs, + float values[NV2A_VERTEXSHADER_ATTRIBUTES][4], + int *count); /* RDI */ uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select, diff --git a/hw/xbox/nv2a/pgraph/shaders.c b/hw/xbox/nv2a/pgraph/shaders.c index 82737b44f4..285d24f439 100644 --- a/hw/xbox/nv2a/pgraph/shaders.c +++ b/hw/xbox/nv2a/pgraph/shaders.c @@ -39,10 +39,9 @@ ShaderState pgraph_get_shader_state(PGRAPHState *pg) ShaderState state; - // We will hash it, so make sure any padding is zerod + // We will hash it, so make sure any padding is zeroed memset(&state, 0, sizeof(ShaderState)); - state.vulkan = pg->renderer->type == CONFIG_DISPLAY_RENDERER_VULKAN; state.surface_scale_factor = pg->surface_scale_factor; state.compressed_attrs = pg->compressed_attrs; @@ -50,7 +49,6 @@ ShaderState pgraph_get_shader_state(PGRAPHState *pg) state.swizzle_attrs = pg->swizzle_attrs; /* register combiner stuff */ - state.psh.vulkan = state.vulkan; state.psh.window_clip_exclusive = pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE; state.psh.combiner_control = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL); diff --git a/hw/xbox/nv2a/pgraph/shaders.h b/hw/xbox/nv2a/pgraph/shaders.h index 842658f808..71febe2e2f 100644 --- a/hw/xbox/nv2a/pgraph/shaders.h +++ b/hw/xbox/nv2a/pgraph/shaders.h @@ -55,6 +55,7 @@ enum MaterialColorSource { typedef struct ShaderState { bool vulkan; + bool use_push_constants_for_uniform_attrs; unsigned int surface_scale_factor; PshState psh; diff --git a/hw/xbox/nv2a/pgraph/vertex.c b/hw/xbox/nv2a/pgraph/vertex.c index 47f7cb5688..31076896e7 100644 --- a/hw/xbox/nv2a/pgraph/vertex.c +++ b/hw/xbox/nv2a/pgraph/vertex.c @@ -82,6 +82,26 @@ void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data) } } +void pgraph_get_inline_values(PGRAPHState *pg, uint16_t attrs, + float values[NV2A_VERTEXSHADER_ATTRIBUTES][4], + int *count) +{ + int num_attributes = 0; + + for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + if (attrs & (1 << i)) { + memcpy(values[num_attributes], + pg->vertex_attributes[i].inline_value, 4 * sizeof(float)); + num_attributes += 1; + } + } + + if (count) { + *count = num_attributes; + } +} + + void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr) { VertexAttribute *attribute = &pg->vertex_attributes[attr]; diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index f1261c412b..8500852101 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -982,19 +982,29 @@ static void create_pipeline(PGRAPHState *pg) // FIXME: No direct analog. Just do it with MSAA. // } - VkPushConstantRange push_constant_range = { - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .offset = 0, - // FIXME: Minimize push constants - .size = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float), - }; + VkPipelineLayoutCreateInfo pipeline_layout_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, .pSetLayouts = &r->descriptor_set_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &push_constant_range, }; + + VkPushConstantRange push_constant_range; + if (r->shader_binding->state.use_push_constants_for_uniform_attrs) { + int num_uniform_attributes = + __builtin_popcount(r->shader_binding->state.uniform_attrs); + if (num_uniform_attributes) { + push_constant_range = (VkPushConstantRange){ + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .offset = 0, + // FIXME: Minimize push constants + .size = num_uniform_attributes * 4 * sizeof(float), + }; + pipeline_layout_info.pushConstantRangeCount = 1; + pipeline_layout_info.pPushConstantRanges = &push_constant_range; + } + } + VkPipelineLayout layout; VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL, &layout)); @@ -1031,23 +1041,28 @@ static void create_pipeline(PGRAPHState *pg) NV2A_VK_DGROUP_END(); } -static void push_vertex_attrib_values(PGRAPHState *pg) +static void push_vertex_attr_values(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; - // FIXME: Do partial updates - - float attrib_values[NV2A_VERTEXSHADER_ATTRIBUTES * 4]; - for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - attrib_values[i * 4 + 0] = pg->vertex_attributes[i].inline_value[0]; - attrib_values[i * 4 + 1] = pg->vertex_attributes[i].inline_value[1]; - attrib_values[i * 4 + 2] = pg->vertex_attributes[i].inline_value[2]; - attrib_values[i * 4 + 3] = pg->vertex_attributes[i].inline_value[3]; + if (!r->shader_binding->state.use_push_constants_for_uniform_attrs) { + return; } - vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout, - VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(attrib_values), - &attrib_values); + // FIXME: Partial updates + + float values[NV2A_VERTEXSHADER_ATTRIBUTES][4]; + int num_uniform_attrs = 0; + + pgraph_get_inline_values(pg, r->shader_binding->state.uniform_attrs, values, + &num_uniform_attrs); + + if (num_uniform_attrs > 0) { + vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout, + VK_SHADER_STAGE_VERTEX_BIT, 0, + num_uniform_attrs * 4 * sizeof(float), + &values); + } } static void bind_descriptor_sets(PGRAPHState *pg) @@ -1405,7 +1420,7 @@ static void begin_draw(PGRAPHState *pg) if (!pg->clearing) { bind_descriptor_sets(pg); - push_vertex_attrib_values(pg); + push_vertex_attr_values(pg); } r->in_draw = true; diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c index d9504402db..cb7c677e45 100644 --- a/hw/xbox/nv2a/pgraph/vk/instance.c +++ b/hw/xbox/nv2a/pgraph/vk/instance.c @@ -478,9 +478,6 @@ static bool select_physical_device(PGRAPHState *pg, Error **errp) VK_VERSION_MINOR(r->device_props.driverVersion), VK_VERSION_PATCH(r->device_props.driverVersion)); - size_t vsh_attr_values_size = - NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float); - assert(r->device_props.limits.maxPushConstantsSize >= vsh_attr_values_size); return true; } diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 787f3df8a5..c889626bb9 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -192,6 +192,8 @@ typedef struct ShaderBinding { int clip_region_loc; int material_alpha_loc; + + int uniform_attrs_loc; } ShaderBinding; typedef struct TextureKey { diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c index fde19242a5..6ba540a511 100644 --- a/hw/xbox/nv2a/pgraph/vk/shaders.c +++ b/hw/xbox/nv2a/pgraph/vk/shaders.c @@ -34,6 +34,8 @@ #include "renderer.h" #include +const size_t MAX_UNIFORM_ATTR_VALUES_SIZE = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float); + static void create_descriptor_pool(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; @@ -305,6 +307,9 @@ static void update_shader_constant_locations(ShaderBinding *binding) binding->material_alpha_loc = uniform_index(&binding->vertex->uniforms, "material_alpha"); + + binding->uniform_attrs_loc = + uniform_index(&binding->vertex->uniforms, "inlineValue"); } static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state) @@ -430,11 +435,26 @@ static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state) return snode; } +static void update_uniform_attr_values(PGRAPHState *pg, ShaderBinding *binding) +{ + float values[NV2A_VERTEXSHADER_ATTRIBUTES][4]; + int num_uniform_attrs = 0; + + pgraph_get_inline_values(pg, binding->state.uniform_attrs, values, + &num_uniform_attrs); + + if (num_uniform_attrs > 0) { + uniform1fv(&binding->vertex->uniforms, binding->uniform_attrs_loc, + num_uniform_attrs * 4, &values[0][0]); + } +} + // FIXME: Move to common static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function) { + ShaderState *state = &binding->state; int i, j; /* update combiner constants */ @@ -662,6 +682,10 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, uniform1f(&binding->vertex->uniforms, binding->material_alpha_loc, pg->material_alpha); } + + if (!state->use_push_constants_for_uniform_attrs && state->uniform_attrs) { + update_uniform_attr_values(pg, binding); + } } // Quickly check PGRAPH state to see if any registers have changed that @@ -742,6 +766,12 @@ void pgraph_vk_bind_shaders(PGRAPHState *pg) ShaderState new_state; memset(&new_state, 0, sizeof(ShaderState)); new_state = pgraph_get_shader_state(pg); + new_state.vulkan = true; + new_state.psh.vulkan = true; + new_state.use_push_constants_for_uniform_attrs = + (r->device_props.limits.maxPushConstantsSize >= + MAX_UNIFORM_ATTR_VALUES_SIZE); + if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) { r->shader_binding = gen_shaders(pg, &new_state); r->shader_bindings_changed = true; diff --git a/hw/xbox/nv2a/pgraph/vk/vertex.c b/hw/xbox/nv2a/pgraph/vk/vertex.c index 6625520c65..5c4580aaa4 100644 --- a/hw/xbox/nv2a/pgraph/vk/vertex.c +++ b/hw/xbox/nv2a/pgraph/vk/vertex.c @@ -309,4 +309,4 @@ void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d) pg->uniform_attrs |= 1 << i; } } -} \ No newline at end of file +}