vk: Tighten some memory bounds checks

This commit is contained in:
kd-11 2024-02-06 02:07:04 +03:00 committed by kd-11
parent e5c831a800
commit 041daab068
2 changed files with 25 additions and 23 deletions

View File

@ -601,20 +601,20 @@ namespace vk
this->in_offset = config.src_offset;
this->out_offset = config.dst_offset;
const auto tiled_height = std::min(
const auto tile_aligned_height = std::min(
utils::align<u32>(config.image_height, 64),
utils::aligned_div(config.tile_size - config.tile_base_offset, config.tile_pitch)
);
if constexpr (Op == RSX_detiler_op::decode)
{
this->in_block_length = tiled_height * config.tile_pitch;
this->in_block_length = tile_aligned_height * config.tile_pitch;
this->out_block_length = config.image_height * config.image_pitch;
}
else
{
this->in_block_length = config.image_height * config.image_pitch;
this->out_block_length = tiled_height* config.tile_pitch;
this->out_block_length = tile_aligned_height * config.tile_pitch;
}
auto get_prime_factor = [](u32 pitch) -> std::pair<u32, u32>
@ -650,7 +650,7 @@ namespace vk
params.tile_pitch = config.tile_pitch;
params.tile_bank = config.bank;
params.image_width = config.image_width;
params.image_height = tiled_height;
params.image_height = (Op == RSX_detiler_op::decode) ? tile_aligned_height : config.image_height;
params.image_pitch = config.image_pitch;
params.image_bpp = config.image_bpp;
set_parameters(cmd);

View File

@ -115,7 +115,6 @@ namespace vk
if (require_gpu_transform)
{
auto section_length = valid_range.length();
const auto transfer_pitch = real_pitch;
const auto task_length = transfer_pitch * src_area.height();
auto working_buffer_length = calculate_working_buffer_size(task_length, src->aspect());
@ -251,25 +250,28 @@ namespace vk
real_pitch = tiled_region.tile->pitch; // We're always copying the full image. In case of partials we're "filling in" blocks, not doing partial 2D copies.
require_rw_barrier = true;
#if 0
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, result_offset, working_buffer_length,
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
#if VISUALIZE_GPU_TILING
if (g_cfg.video.renderdoc_compatiblity)
{
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, result_offset, working_buffer_length,
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
// Debug write
auto scratch_img = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM, RSX_FORMAT_CLASS_COLOR, tiled_region.tile->pitch / 4, 768);
scratch_img->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
// Debug write
auto scratch_img = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM, RSX_FORMAT_CLASS_COLOR, tiled_region.tile->pitch / 4, 768);
scratch_img->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
VkBufferImageCopy dbg_copy{};
dbg_copy.bufferOffset = config.dst_offset;
dbg_copy.imageExtent.width = width;
dbg_copy.imageExtent.height = height;
dbg_copy.imageExtent.depth = 1;
dbg_copy.bufferRowLength = tiled_region.tile->pitch / 4;
dbg_copy.imageSubresource = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1 };
vk::copy_buffer_to_image(cmd, working_buffer, scratch_img, dbg_copy);
VkBufferImageCopy dbg_copy{};
dbg_copy.bufferOffset = config.dst_offset;
dbg_copy.imageExtent.width = width;
dbg_copy.imageExtent.height = height;
dbg_copy.imageExtent.depth = 1;
dbg_copy.bufferRowLength = tiled_region.tile->pitch / 4;
dbg_copy.imageSubresource = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1 };
vk::copy_buffer_to_image(cmd, working_buffer, scratch_img, dbg_copy);
scratch_img->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
scratch_img->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
}
#endif
#endif
@ -277,7 +279,7 @@ namespace vk
if (require_rw_barrier)
{
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, result_offset, working_buffer_length,
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, result_offset, dma_sync_region.length(),
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
}
@ -289,7 +291,7 @@ namespace vk
VkBufferCopy copy = {};
copy.srcOffset = result_offset;
copy.dstOffset = dma_mapping.first;
copy.size = section_length;
copy.size = dma_sync_region.length();
vkCmdCopyBuffer(cmd, working_buffer->value, dma_mapping.second->value, 1, &copy);
}
else