From da1e72a39a3c4e9bedd9f64e52fbbfb6b88fef38 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Fri, 26 Jul 2024 17:21:01 -0700 Subject: [PATCH] nv2a/vk: Use additional descriptor sets in compute ops --- hw/xbox/nv2a/pgraph/vk/draw.c | 2 ++ hw/xbox/nv2a/pgraph/vk/renderer.h | 5 ++- hw/xbox/nv2a/pgraph/vk/surface-compute.c | 39 ++++++++++++++++++------ hw/xbox/nv2a/pgraph/vk/surface.c | 13 +++++--- hw/xbox/nv2a/pgraph/vk/texture.c | 14 ++++++--- 5 files changed, 55 insertions(+), 18 deletions(-) diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c index 0816835113..91aa6d1345 100644 --- a/hw/xbox/nv2a/pgraph/vk/draw.c +++ b/hw/xbox/nv2a/pgraph/vk/draw.c @@ -1322,6 +1322,8 @@ void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason) NV2AState *d = container_of(pg, NV2AState, pgraph); pgraph_vk_process_pending_reports_internal(d); + + pgraph_vk_compute_finish_complete(r); } void pgraph_vk_begin_command_buffer(PGRAPHState *pg) diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h index 0b835e1c5c..c66e354193 100644 --- a/hw/xbox/nv2a/pgraph/vk/renderer.h +++ b/hw/xbox/nv2a/pgraph/vk/renderer.h @@ -260,7 +260,8 @@ typedef struct PGRAPHVkDisplayState { typedef struct PGRAPHVkComputeState { VkDescriptorPool descriptor_pool; VkDescriptorSetLayout descriptor_set_layout; - VkDescriptorSet descriptor_sets[1]; + VkDescriptorSet descriptor_sets[1024]; + int descriptor_set_index; VkPipelineLayout pipeline_layout; VkPipeline pipeline_pack_d24s8; VkPipeline pipeline_unpack_d24s8; @@ -458,6 +459,8 @@ void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg); // surface-compute.c void pgraph_vk_init_compute(PGRAPHState *pg); +bool pgraph_vk_compute_needs_finish(PGRAPHVkState *r); +void pgraph_vk_compute_finish_complete(PGRAPHVkState *r); void pgraph_vk_finalize_compute(PGRAPHState *pg); void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, VkCommandBuffer cmd, VkBuffer src, diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c index 045f8231b8..dc14840dd1 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface-compute.c +++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c @@ -189,7 +189,7 @@ static void create_descriptor_sets(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; - VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)]; + VkDescriptorSetLayout layouts[ARRAY_SIZE(r->compute.descriptor_sets)]; for (int i = 0; i < ARRAY_SIZE(layouts); i++) { layouts[i] = r->compute.descriptor_set_layout; } @@ -269,12 +269,15 @@ static void update_descriptor_sets(PGRAPHState *pg, assert(count == 3); VkWriteDescriptorSet descriptor_writes[3]; - const int descriptor_set_index = 0; + + assert(r->compute.descriptor_set_index < + ARRAY_SIZE(r->compute.descriptor_sets)); for (int i = 0; i < count; i++) { descriptor_writes[i] = (VkWriteDescriptorSet){ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = r->compute.descriptor_sets[descriptor_set_index], + .dstSet = + r->compute.descriptor_sets[r->compute.descriptor_set_index], .dstBinding = i, .dstArrayElement = 0, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, @@ -283,6 +286,21 @@ static void update_descriptor_sets(PGRAPHState *pg, }; } vkUpdateDescriptorSets(r->device, count, descriptor_writes, 0, NULL); + + r->compute.descriptor_set_index += 1; +} + +bool pgraph_vk_compute_needs_finish(PGRAPHVkState *r) +{ + bool need_descriptor_write_reset = (r->compute.descriptor_set_index >= + ARRAY_SIZE(r->compute.descriptor_sets)); + + return need_descriptor_write_reset; +} + +void pgraph_vk_compute_finish_complete(PGRAPHVkState *r) +{ + r->compute.descriptor_set_index = 0; } // @@ -329,6 +347,7 @@ void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, .range = output_size, }, }; + update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers)); if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { @@ -340,9 +359,10 @@ void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, } else { assert(!"Unsupported pack format"); } - vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, - r->compute.pipeline_layout, 0, 1, - &r->compute.descriptor_sets[0], 0, NULL); + vkCmdBindDescriptorSets( + cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->compute.pipeline_layout, 0, 1, + &r->compute.descriptor_sets[r->compute.descriptor_set_index - 1], 0, + NULL); uint32_t push_constants[2] = { input_width, output_width }; assert(sizeof(push_constants) == 8); @@ -408,9 +428,10 @@ void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, } else { assert(!"Unsupported pack format"); } - vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, - r->compute.pipeline_layout, 0, 1, - &r->compute.descriptor_sets[0], 0, NULL); + vkCmdBindDescriptorSets( + cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->compute.pipeline_layout, 0, 1, + &r->compute.descriptor_sets[r->compute.descriptor_set_index - 1], 0, + NULL); assert(output_width >= input_width); uint32_t push_constants[2] = { input_width, output_width }; diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c index 76a5e44a31..f45a129525 100644 --- a/hw/xbox/nv2a/pgraph/vk/surface.c +++ b/hw/xbox/nv2a/pgraph/vk/surface.c @@ -130,9 +130,18 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD); + bool use_compute_to_convert_depth_stencil_format = + surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; + + bool compute_needs_finish = (use_compute_to_convert_depth_stencil_format && + pgraph_vk_compute_needs_finish(r)); + if (r->in_command_buffer && surface->draw_time >= r->command_buffer_start_time) { pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_DOWN); + } else if (compute_needs_finish) { + pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE); } bool downscale = (pg->surface_scale_factor != 1); @@ -175,10 +184,6 @@ static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, .imageSubresource.layerCount = 1, }; - bool use_compute_to_convert_depth_stencil_format = - surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || - surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; - VkImage surface_image_loc; if (downscale && !use_compute_to_convert_depth_stencil_format) { copy_regions[0].imageExtent = diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c index 34e903d958..2478f61d71 100644 --- a/hw/xbox/nv2a/pgraph/vk/texture.c +++ b/hw/xbox/nv2a/pgraph/vk/texture.c @@ -591,6 +591,16 @@ static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surfac TextureShape *state = &texture->key.state; VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format]; + bool use_compute_to_convert_depth_stencil = + surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; + + bool compute_needs_finish = use_compute_to_convert_depth_stencil && + pgraph_vk_compute_needs_finish(r); + if (compute_needs_finish) { + pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE); + } + nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX); trace_nv2a_pgraph_surface_render_to_texture( @@ -644,10 +654,6 @@ static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surfac .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1}, }; } - - bool use_compute_to_convert_depth_stencil = - surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || - surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; assert(use_compute_to_convert_depth_stencil && "Unimplemented"); StorageBuffer *dst_storage_buffer = &r->storage_buffers[BUFFER_COMPUTE_DST];