vk: Implement dynamic sized compute heap

- Implements a dynamically sized compute heap to allow growing up the
size if it is too small.
This commit is contained in:
kd-11 2020-01-14 16:21:44 +03:00 committed by kd-11
parent 1ccb3c4492
commit 3d96fe79cc
4 changed files with 34 additions and 15 deletions

View File

@ -338,12 +338,20 @@ namespace vk
return ptr.get();
}
vk::buffer* get_scratch_buffer()
vk::buffer* get_scratch_buffer(u32 min_required_size)
{
if (g_scratch_buffer && g_scratch_buffer->size() < min_required_size)
{
// Scratch heap cannot fit requirements. Discard it and allocate a new one.
vk::get_resource_manager()->dispose(g_scratch_buffer);
}
if (!g_scratch_buffer)
{
// 128M disposable scratch memory
g_scratch_buffer = std::make_unique<vk::buffer>(*g_current_renderer, 128 * 0x100000,
// Choose optimal size
const u64 alloc_size = std::max<u64>(128 * 0x100000, align(min_required_size, 0x100000));
g_scratch_buffer = std::make_unique<vk::buffer>(*g_current_renderer, alloc_size,
g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0);
}

View File

@ -144,7 +144,7 @@ namespace vk
VkSampler null_sampler();
image_view* null_image_view(vk::command_buffer&);
image* get_typeless_helper(VkFormat format, u32 requested_width, u32 requested_height);
buffer* get_scratch_buffer();
buffer* get_scratch_buffer(u32 min_required_size = 0);
data_heap* get_upload_heap();
memory_type_mapping get_memory_mapping(const physical_device& dev);

View File

@ -221,7 +221,6 @@ namespace vk
src->push_layout(cmd, VK_IMAGE_LAYOUT_GENERAL);
}
auto scratch_buf = vk::get_scratch_buffer();
VkBufferImageCopy src_copy{}, dst_copy{};
src_copy.imageExtent = { u32(src_rect.x2 - src_rect.x1), u32(src_rect.y2 - src_rect.y1), 1 };
src_copy.imageOffset = { src_rect.x1, src_rect.y1, 0 };
@ -231,6 +230,21 @@ namespace vk
dst_copy.imageOffset = { dst_rect.x1, dst_rect.y1, 0 };
dst_copy.imageSubresource = { dst_aspect & dst_transfer_mask, 0, 0, 1 };
const auto src_texel_size = vk::get_format_texel_width(src->info.format);
const auto src_length = src_texel_size * src_copy.imageExtent.width * src_copy.imageExtent.height;
u64 min_scratch_size = src_length;
// Check for DS manipulation which will affect scratch memory requirements
if (const VkFlags combined_aspect = src->aspect() | dst->aspect();
(combined_aspect & VK_IMAGE_ASPECT_STENCIL_BIT) != 0)
{
// At least one depth-stencil merge/extract required; requirements change to 2(w*h*bpp) + (w*h)
min_scratch_size = (src_length * 2) + (src_length / src_texel_size);
}
// Initialize scratch memory
auto scratch_buf = vk::get_scratch_buffer(min_scratch_size);
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
{
vk::copy_image_to_buffer(cmd, src, scratch_buf, src_copy);
@ -247,10 +261,7 @@ namespace vk
}
else
{
const auto elem_size = vk::get_format_texel_width(src->info.format);
const auto length = elem_size * src_copy.imageExtent.width * src_copy.imageExtent.height;
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, src_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
vk::cs_shuffle_base *shuffle_kernel = nullptr;
@ -275,9 +286,9 @@ namespace vk
}
}
shuffle_kernel->run(cmd, scratch_buf, length);
shuffle_kernel->run(cmd, scratch_buf, src_length);
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, src_length, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
}
}
@ -742,7 +753,7 @@ namespace vk
{
if (!scratch_buf)
{
scratch_buf = vk::get_scratch_buffer();
scratch_buf = vk::get_scratch_buffer(image_linear_size * 2);
buffer_copies.reserve(subresource_layout.size());
}

View File

@ -197,7 +197,7 @@ namespace vk
const auto transfer_pitch = real_pitch;
const auto task_length = transfer_pitch * src_area.height();
auto working_buffer = vk::get_scratch_buffer();
auto working_buffer = vk::get_scratch_buffer(task_length);
auto final_mapping = vk::map_dma(cmd, valid_range.start, section_length);
VkBufferImageCopy region = {};
@ -678,10 +678,10 @@ namespace vk
copy.imageOffset = { src_x, src_y, 0 };
copy.imageSubresource = { src_image->aspect(), 0, 0, 1 };
auto scratch_buf = vk::get_scratch_buffer();
const auto mem_length = src_w * src_h * dst_bpp;
auto scratch_buf = vk::get_scratch_buffer(mem_length);
vkCmdCopyImageToBuffer(cmd, src_image->value, src_image->current_layout, scratch_buf->value, 1, &copy);
const auto mem_length = src_w * src_h * dst_bpp;
vk::insert_buffer_memory_barrier(cmd, scratch_buf->value, 0, mem_length, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);