vk: Implement dynamic sized compute heap

- Implements a dynamically sized compute heap to allow growing up the
size if it is too small.
This commit is contained in:
kd-11 2020-01-14 16:21:44 +03:00 committed by kd-11
parent 1ccb3c4492
commit 3d96fe79cc
4 changed files with 34 additions and 15 deletions

View File

@ -338,12 +338,20 @@ namespace vk
return ptr.get(); return ptr.get();
} }
vk::buffer* get_scratch_buffer() vk::buffer* get_scratch_buffer(u32 min_required_size)
{ {
if (g_scratch_buffer && g_scratch_buffer->size() < min_required_size)
{
// Scratch heap cannot fit requirements. Discard it and allocate a new one.
vk::get_resource_manager()->dispose(g_scratch_buffer);
}
if (!g_scratch_buffer) if (!g_scratch_buffer)
{ {
// 128M disposable scratch memory // Choose optimal size
g_scratch_buffer = std::make_unique<vk::buffer>(*g_current_renderer, 128 * 0x100000, const u64 alloc_size = std::max<u64>(128 * 0x100000, align(min_required_size, 0x100000));
g_scratch_buffer = std::make_unique<vk::buffer>(*g_current_renderer, alloc_size,
g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0); VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0);
} }

View File

@ -144,7 +144,7 @@ namespace vk
VkSampler null_sampler(); VkSampler null_sampler();
image_view* null_image_view(vk::command_buffer&); image_view* null_image_view(vk::command_buffer&);
image* get_typeless_helper(VkFormat format, u32 requested_width, u32 requested_height); image* get_typeless_helper(VkFormat format, u32 requested_width, u32 requested_height);
buffer* get_scratch_buffer(); buffer* get_scratch_buffer(u32 min_required_size = 0);
data_heap* get_upload_heap(); data_heap* get_upload_heap();
memory_type_mapping get_memory_mapping(const physical_device& dev); memory_type_mapping get_memory_mapping(const physical_device& dev);

View File

@ -221,7 +221,6 @@ namespace vk
src->push_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); src->push_layout(cmd, VK_IMAGE_LAYOUT_GENERAL);
} }
auto scratch_buf = vk::get_scratch_buffer();
VkBufferImageCopy src_copy{}, dst_copy{}; VkBufferImageCopy src_copy{}, dst_copy{};
src_copy.imageExtent = { u32(src_rect.x2 - src_rect.x1), u32(src_rect.y2 - src_rect.y1), 1 }; src_copy.imageExtent = { u32(src_rect.x2 - src_rect.x1), u32(src_rect.y2 - src_rect.y1), 1 };
src_copy.imageOffset = { src_rect.x1, src_rect.y1, 0 }; src_copy.imageOffset = { src_rect.x1, src_rect.y1, 0 };
@ -231,6 +230,21 @@ namespace vk
dst_copy.imageOffset = { dst_rect.x1, dst_rect.y1, 0 }; dst_copy.imageOffset = { dst_rect.x1, dst_rect.y1, 0 };
dst_copy.imageSubresource = { dst_aspect & dst_transfer_mask, 0, 0, 1 }; dst_copy.imageSubresource = { dst_aspect & dst_transfer_mask, 0, 0, 1 };
const auto src_texel_size = vk::get_format_texel_width(src->info.format);
const auto src_length = src_texel_size * src_copy.imageExtent.width * src_copy.imageExtent.height;
u64 min_scratch_size = src_length;
// Check for DS manipulation which will affect scratch memory requirements
if (const VkFlags combined_aspect = src->aspect() | dst->aspect();
(combined_aspect & VK_IMAGE_ASPECT_STENCIL_BIT) != 0)
{
// At least one depth-stencil merge/extract required; requirements change to 2(w*h*bpp) + (w*h)
min_scratch_size = (src_length * 2) + (src_length / src_texel_size);
}
// Initialize scratch memory
auto scratch_buf = vk::get_scratch_buffer(min_scratch_size);
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level) for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
{ {
vk::copy_image_to_buffer(cmd, src, scratch_buf, src_copy); vk::copy_image_to_buffer(cmd, src, scratch_buf, src_copy);
@ -247,10 +261,7 @@ namespace vk
} }
else else
{ {
const auto elem_size = vk::get_format_texel_width(src->info.format); insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, src_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
const auto length = elem_size * src_copy.imageExtent.width * src_copy.imageExtent.height;
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
vk::cs_shuffle_base *shuffle_kernel = nullptr; vk::cs_shuffle_base *shuffle_kernel = nullptr;
@ -275,9 +286,9 @@ namespace vk
} }
} }
shuffle_kernel->run(cmd, scratch_buf, length); shuffle_kernel->run(cmd, scratch_buf, src_length);
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, src_length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
} }
} }
@ -742,7 +753,7 @@ namespace vk
{ {
if (!scratch_buf) if (!scratch_buf)
{ {
scratch_buf = vk::get_scratch_buffer(); scratch_buf = vk::get_scratch_buffer(image_linear_size * 2);
buffer_copies.reserve(subresource_layout.size()); buffer_copies.reserve(subresource_layout.size());
} }

View File

@ -197,7 +197,7 @@ namespace vk
const auto transfer_pitch = real_pitch; const auto transfer_pitch = real_pitch;
const auto task_length = transfer_pitch * src_area.height(); const auto task_length = transfer_pitch * src_area.height();
auto working_buffer = vk::get_scratch_buffer(); auto working_buffer = vk::get_scratch_buffer(task_length);
auto final_mapping = vk::map_dma(cmd, valid_range.start, section_length); auto final_mapping = vk::map_dma(cmd, valid_range.start, section_length);
VkBufferImageCopy region = {}; VkBufferImageCopy region = {};
@ -678,10 +678,10 @@ namespace vk
copy.imageOffset = { src_x, src_y, 0 }; copy.imageOffset = { src_x, src_y, 0 };
copy.imageSubresource = { src_image->aspect(), 0, 0, 1 }; copy.imageSubresource = { src_image->aspect(), 0, 0, 1 };
auto scratch_buf = vk::get_scratch_buffer(); const auto mem_length = src_w * src_h * dst_bpp;
auto scratch_buf = vk::get_scratch_buffer(mem_length);
vkCmdCopyImageToBuffer(cmd, src_image->value, src_image->current_layout, scratch_buf->value, 1, &copy); vkCmdCopyImageToBuffer(cmd, src_image->value, src_image->current_layout, scratch_buf->value, 1, &copy);
const auto mem_length = src_w * src_h * dst_bpp;
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);