From 3d96fe79cc1d51830b4d26a2ca5134f3b00ae77e Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 14 Jan 2020 16:21:44 +0300 Subject: [PATCH] vk: Implement dynamic sized compute heap - Implements a dynamically sized compute heap to allow growing up the size if it is too small. --- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 14 +++++++++++--- rpcs3/Emu/RSX/VK/VKHelpers.h | 2 +- rpcs3/Emu/RSX/VK/VKTexture.cpp | 27 +++++++++++++++++++-------- rpcs3/Emu/RSX/VK/VKTextureCache.h | 6 +++--- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index d515a09f16..7f09780432 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -338,12 +338,20 @@ namespace vk return ptr.get(); } - vk::buffer* get_scratch_buffer() + vk::buffer* get_scratch_buffer(u32 min_required_size) { + if (g_scratch_buffer && g_scratch_buffer->size() < min_required_size) + { + // Scratch heap cannot fit requirements. Discard it and allocate a new one. + vk::get_resource_manager()->dispose(g_scratch_buffer); + } + if (!g_scratch_buffer) { - // 128M disposable scratch memory - g_scratch_buffer = std::make_unique(*g_current_renderer, 128 * 0x100000, + // Choose optimal size + const u64 alloc_size = std::max(128 * 0x100000, align(min_required_size, 0x100000)); + + g_scratch_buffer = std::make_unique(*g_current_renderer, alloc_size, g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0); } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index cadc86ec89..a3d081ba8d 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -144,7 +144,7 @@ namespace vk VkSampler null_sampler(); image_view* null_image_view(vk::command_buffer&); image* get_typeless_helper(VkFormat format, u32 requested_width, u32 requested_height); - buffer* get_scratch_buffer(); + buffer* get_scratch_buffer(u32 min_required_size = 0); data_heap* get_upload_heap(); memory_type_mapping get_memory_mapping(const physical_device& dev); diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index b2b8c736e9..d87205cd8d 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -221,7 +221,6 @@ namespace vk src->push_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); } - auto scratch_buf = vk::get_scratch_buffer(); VkBufferImageCopy src_copy{}, dst_copy{}; src_copy.imageExtent = { u32(src_rect.x2 - src_rect.x1), u32(src_rect.y2 - src_rect.y1), 1 }; src_copy.imageOffset = { src_rect.x1, src_rect.y1, 0 }; @@ -231,6 +230,21 @@ namespace vk dst_copy.imageOffset = { dst_rect.x1, dst_rect.y1, 0 }; dst_copy.imageSubresource = { dst_aspect & dst_transfer_mask, 0, 0, 1 }; + const auto src_texel_size = vk::get_format_texel_width(src->info.format); + const auto src_length = src_texel_size * src_copy.imageExtent.width * src_copy.imageExtent.height; + u64 min_scratch_size = src_length; + + // Check for DS manipulation which will affect scratch memory requirements + if (const VkFlags combined_aspect = src->aspect() | dst->aspect(); + (combined_aspect & VK_IMAGE_ASPECT_STENCIL_BIT) != 0) + { + // At least one depth-stencil merge/extract required; requirements change to 2(w*h*bpp) + (w*h) + min_scratch_size = (src_length * 2) + (src_length / src_texel_size); + } + + // Initialize scratch memory + auto scratch_buf = vk::get_scratch_buffer(min_scratch_size); + for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level) { vk::copy_image_to_buffer(cmd, src, scratch_buf, src_copy); @@ -247,10 +261,7 @@ namespace vk } else { - const auto elem_size = vk::get_format_texel_width(src->info.format); - const auto length = elem_size * src_copy.imageExtent.width * src_copy.imageExtent.height; - - insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, src_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); vk::cs_shuffle_base *shuffle_kernel = nullptr; @@ -275,9 +286,9 @@ namespace vk } } - shuffle_kernel->run(cmd, scratch_buf, length); + shuffle_kernel->run(cmd, scratch_buf, src_length); - insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, src_length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); } } @@ -742,7 +753,7 @@ namespace vk { if (!scratch_buf) { - scratch_buf = vk::get_scratch_buffer(); + scratch_buf = vk::get_scratch_buffer(image_linear_size * 2); buffer_copies.reserve(subresource_layout.size()); } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index c3679daa24..67e67d2ecb 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -197,7 +197,7 @@ namespace vk const auto transfer_pitch = real_pitch; const auto task_length = transfer_pitch * src_area.height(); - auto working_buffer = vk::get_scratch_buffer(); + auto working_buffer = vk::get_scratch_buffer(task_length); auto final_mapping = vk::map_dma(cmd, valid_range.start, section_length); VkBufferImageCopy region = {}; @@ -678,10 +678,10 @@ namespace vk copy.imageOffset = { src_x, src_y, 0 }; copy.imageSubresource = { src_image->aspect(), 0, 0, 1 }; - auto scratch_buf = vk::get_scratch_buffer(); + const auto mem_length = src_w * src_h * dst_bpp; + auto scratch_buf = vk::get_scratch_buffer(mem_length); vkCmdCopyImageToBuffer(cmd, src_image->value, src_image->current_layout, scratch_buf->value, 1, ©); - const auto mem_length = src_w * src_h * dst_bpp; vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);