nv2a/vk: Fallback to UBO if maxPushConstantsSize is insufficient

This commit is contained in:
Matt Borgerson 2024-07-26 17:21:01 -07:00 committed by mborgerson
parent 22674f782c
commit 0169caadd0
10 changed files with 114 additions and 37 deletions

View File

@ -93,15 +93,19 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs)
);
}
mstring_append(header, "\n");
for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
int num_uniform_attrs = 0;
for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
bool is_uniform = state->uniform_attrs & (1 << i);
bool is_compressed = state->compressed_attrs & (1 << i);
assert(!(is_uniform && is_compressed));
if (is_uniform) {
mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i, i);
mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i,
num_uniform_attrs);
num_uniform_attrs += 1;
} else {
if (state->compressed_attrs & (1 << i)) {
mstring_append_fmt(header,
@ -249,17 +253,24 @@ MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs)
shade_model_mult,
shade_model_mult);
/* Return combined header + source */
if (state->vulkan) {
// FIXME: Optimize uniforms
if (state->use_push_constants_for_uniform_attrs) {
mstring_append_fmt(output,
"layout(push_constant) uniform PushConstants {\n"
" vec4 inlineValue[%d];\n"
"};\n\n", num_uniform_attrs);
} else {
mstring_append_fmt(uniforms, " vec4 inlineValue[%d];\n",
num_uniform_attrs);
}
mstring_append_fmt(
output, "layout(binding = %d, std140) uniform VshUniforms {\n%s};\n\n",
output,
"layout(binding = %d, std140) uniform VshUniforms {\n"
"%s"
"};\n\n",
VSH_UBO_BINDING, mstring_get_str(uniforms));
// FIXME: Only needed for vk, for gl we use glVertexAttrib
mstring_append_fmt(output,
"layout(push_constant) uniform PushConstants {\n"
"vec4 inlineValue[" stringify(NV2A_VERTEXSHADER_ATTRIBUTES) "];\n"
"};\n\n");
} else {
mstring_append(
output, mstring_get_str(uniforms));

View File

@ -373,6 +373,9 @@ void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg);
void pgraph_reset_inline_buffers(PGRAPHState *pg);
void pgraph_reset_draw_arrays(PGRAPHState *pg);
void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data);
void pgraph_get_inline_values(PGRAPHState *pg, uint16_t attrs,
float values[NV2A_VERTEXSHADER_ATTRIBUTES][4],
int *count);
/* RDI */
uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select,

View File

@ -39,10 +39,9 @@ ShaderState pgraph_get_shader_state(PGRAPHState *pg)
ShaderState state;
// We will hash it, so make sure any padding is zerod
// We will hash it, so make sure any padding is zeroed
memset(&state, 0, sizeof(ShaderState));
state.vulkan = pg->renderer->type == CONFIG_DISPLAY_RENDERER_VULKAN;
state.surface_scale_factor = pg->surface_scale_factor;
state.compressed_attrs = pg->compressed_attrs;
@ -50,7 +49,6 @@ ShaderState pgraph_get_shader_state(PGRAPHState *pg)
state.swizzle_attrs = pg->swizzle_attrs;
/* register combiner stuff */
state.psh.vulkan = state.vulkan;
state.psh.window_clip_exclusive =
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE;
state.psh.combiner_control = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL);

View File

@ -55,6 +55,7 @@ enum MaterialColorSource {
typedef struct ShaderState {
bool vulkan;
bool use_push_constants_for_uniform_attrs;
unsigned int surface_scale_factor;
PshState psh;

View File

@ -82,6 +82,26 @@ void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data)
}
}
void pgraph_get_inline_values(PGRAPHState *pg, uint16_t attrs,
float values[NV2A_VERTEXSHADER_ATTRIBUTES][4],
int *count)
{
int num_attributes = 0;
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
if (attrs & (1 << i)) {
memcpy(values[num_attributes],
pg->vertex_attributes[i].inline_value, 4 * sizeof(float));
num_attributes += 1;
}
}
if (count) {
*count = num_attributes;
}
}
void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr)
{
VertexAttribute *attribute = &pg->vertex_attributes[attr];

View File

@ -982,19 +982,29 @@ static void create_pipeline(PGRAPHState *pg)
// FIXME: No direct analog. Just do it with MSAA.
// }
VkPushConstantRange push_constant_range = {
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
.offset = 0,
// FIXME: Minimize push constants
.size = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float),
};
VkPipelineLayoutCreateInfo pipeline_layout_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &r->descriptor_set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_constant_range,
};
VkPushConstantRange push_constant_range;
if (r->shader_binding->state.use_push_constants_for_uniform_attrs) {
int num_uniform_attributes =
__builtin_popcount(r->shader_binding->state.uniform_attrs);
if (num_uniform_attributes) {
push_constant_range = (VkPushConstantRange){
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
.offset = 0,
// FIXME: Minimize push constants
.size = num_uniform_attributes * 4 * sizeof(float),
};
pipeline_layout_info.pushConstantRangeCount = 1;
pipeline_layout_info.pPushConstantRanges = &push_constant_range;
}
}
VkPipelineLayout layout;
VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
&layout));
@ -1031,23 +1041,28 @@ static void create_pipeline(PGRAPHState *pg)
NV2A_VK_DGROUP_END();
}
static void push_vertex_attrib_values(PGRAPHState *pg)
static void push_vertex_attr_values(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
// FIXME: Do partial updates
float attrib_values[NV2A_VERTEXSHADER_ATTRIBUTES * 4];
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
attrib_values[i * 4 + 0] = pg->vertex_attributes[i].inline_value[0];
attrib_values[i * 4 + 1] = pg->vertex_attributes[i].inline_value[1];
attrib_values[i * 4 + 2] = pg->vertex_attributes[i].inline_value[2];
attrib_values[i * 4 + 3] = pg->vertex_attributes[i].inline_value[3];
if (!r->shader_binding->state.use_push_constants_for_uniform_attrs) {
return;
}
vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout,
VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(attrib_values),
&attrib_values);
// FIXME: Partial updates
float values[NV2A_VERTEXSHADER_ATTRIBUTES][4];
int num_uniform_attrs = 0;
pgraph_get_inline_values(pg, r->shader_binding->state.uniform_attrs, values,
&num_uniform_attrs);
if (num_uniform_attrs > 0) {
vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout,
VK_SHADER_STAGE_VERTEX_BIT, 0,
num_uniform_attrs * 4 * sizeof(float),
&values);
}
}
static void bind_descriptor_sets(PGRAPHState *pg)
@ -1405,7 +1420,7 @@ static void begin_draw(PGRAPHState *pg)
if (!pg->clearing) {
bind_descriptor_sets(pg);
push_vertex_attrib_values(pg);
push_vertex_attr_values(pg);
}
r->in_draw = true;

View File

@ -478,9 +478,6 @@ static bool select_physical_device(PGRAPHState *pg, Error **errp)
VK_VERSION_MINOR(r->device_props.driverVersion),
VK_VERSION_PATCH(r->device_props.driverVersion));
size_t vsh_attr_values_size =
NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float);
assert(r->device_props.limits.maxPushConstantsSize >= vsh_attr_values_size);
return true;
}

View File

@ -192,6 +192,8 @@ typedef struct ShaderBinding {
int clip_region_loc;
int material_alpha_loc;
int uniform_attrs_loc;
} ShaderBinding;
typedef struct TextureKey {

View File

@ -34,6 +34,8 @@
#include "renderer.h"
#include <locale.h>
const size_t MAX_UNIFORM_ATTR_VALUES_SIZE = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float);
static void create_descriptor_pool(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
@ -305,6 +307,9 @@ static void update_shader_constant_locations(ShaderBinding *binding)
binding->material_alpha_loc =
uniform_index(&binding->vertex->uniforms, "material_alpha");
binding->uniform_attrs_loc =
uniform_index(&binding->vertex->uniforms, "inlineValue");
}
static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
@ -430,11 +435,26 @@ static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state)
return snode;
}
static void update_uniform_attr_values(PGRAPHState *pg, ShaderBinding *binding)
{
float values[NV2A_VERTEXSHADER_ATTRIBUTES][4];
int num_uniform_attrs = 0;
pgraph_get_inline_values(pg, binding->state.uniform_attrs, values,
&num_uniform_attrs);
if (num_uniform_attrs > 0) {
uniform1fv(&binding->vertex->uniforms, binding->uniform_attrs_loc,
num_uniform_attrs * 4, &values[0][0]);
}
}
// FIXME: Move to common
static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
bool binding_changed, bool vertex_program,
bool fixed_function)
{
ShaderState *state = &binding->state;
int i, j;
/* update combiner constants */
@ -662,6 +682,10 @@ static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
uniform1f(&binding->vertex->uniforms, binding->material_alpha_loc,
pg->material_alpha);
}
if (!state->use_push_constants_for_uniform_attrs && state->uniform_attrs) {
update_uniform_attr_values(pg, binding);
}
}
// Quickly check PGRAPH state to see if any registers have changed that
@ -742,6 +766,12 @@ void pgraph_vk_bind_shaders(PGRAPHState *pg)
ShaderState new_state;
memset(&new_state, 0, sizeof(ShaderState));
new_state = pgraph_get_shader_state(pg);
new_state.vulkan = true;
new_state.psh.vulkan = true;
new_state.use_push_constants_for_uniform_attrs =
(r->device_props.limits.maxPushConstantsSize >=
MAX_UNIFORM_ATTR_VALUES_SIZE);
if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) {
r->shader_binding = gen_shaders(pg, &new_state);
r->shader_bindings_changed = true;

View File

@ -309,4 +309,4 @@ void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d)
pg->uniform_attrs |= 1 << i;
}
}
}
}