rsx/vk: Support mismatched pitch (padded) and fix the overscan problem

This commit is contained in:
kd-11 2023-09-12 03:38:59 +03:00 committed by kd-11
parent 647f7ddeec
commit ffc61e01f7
5 changed files with 45 additions and 16 deletions

View File

@ -19,6 +19,7 @@ namespace rsx
uint32_t tile_bank; uint32_t tile_bank;
uint32_t image_width; uint32_t image_width;
uint32_t image_height; uint32_t image_height;
uint32_t image_pitch;
uint32_t image_bpp; uint32_t image_bpp;
}; };
@ -98,7 +99,7 @@ namespace rsx
tile_address ^= ((tile_address >> 11) & 1) << 10; tile_address ^= ((tile_address >> 11) & 1) << 10;
// Calculate relative addresses and sample // Calculate relative addresses and sample
uint32_t linear_image_offset = (row * conf.tile_pitch) + (col * conf.image_bpp); uint32_t linear_image_offset = (row * conf.image_pitch) + (col * conf.image_bpp);
uint32_t tile_data_offset = tile_address - (conf.tile_base_address + conf.tile_offset); uint32_t tile_data_offset = tile_address - (conf.tile_base_address + conf.tile_offset);
if (tile_data_offset >= conf.tile_size) if (tile_data_offset >= conf.tile_size)
@ -160,6 +161,7 @@ namespace rsx
.tile_bank = bank_sense, .tile_bank = bank_sense,
.image_width = image_width, .image_width = image_width,
.image_height = image_height, .image_height = image_height,
.image_pitch = row_pitch_in_bytes,
.image_bpp = sizeof(T) .image_bpp = sizeof(T)
}; };

View File

@ -39,6 +39,7 @@ layout(%push_block) uniform Configuration
uint tile_bank; uint tile_bank;
uint image_width; uint image_width;
uint image_height; uint image_height;
uint image_pitch;
uint image_bpp; uint image_bpp;
}; };
#else #else
@ -52,6 +53,7 @@ layout(%push_block) uniform Configuration
uniform uint tile_bank; uniform uint tile_bank;
uniform uint image_width; uniform uint image_width;
uniform uint image_height; uniform uint image_height;
uniform uint image_pitch;
uniform uint image_bpp; uniform uint image_bpp;
#endif #endif
@ -306,7 +308,7 @@ void do_memory_op(const in uint row, const in uint col)
tile_address ^= ((tile_address >> 11) & 1) << 10; tile_address ^= ((tile_address >> 11) & 1) << 10;
// Calculate relative addresses and sample // Calculate relative addresses and sample
uint linear_image_offset = (row * tile_pitch) + (col * image_bpp); uint linear_image_offset = (row * image_pitch) + (col * image_bpp);
uint tile_data_offset = tile_address - (tile_base_address + tile_offset); uint tile_data_offset = tile_address - (tile_base_address + tile_offset);
if (tile_data_offset >= tile_size) if (tile_data_offset >= tile_size)
@ -335,7 +337,7 @@ void main()
const uint row = gl_GlobalInvocationID.y; const uint row = gl_GlobalInvocationID.y;
const uint col0 = gl_GlobalInvocationID.x; const uint col0 = gl_GlobalInvocationID.x;
// for (uint col = col0; col < (col0 + num_iterations); ++col) for (uint col = col0; col < (col0 + num_iterations); ++col)
{ {
if (row >= image_height || col0 >= image_width) if (row >= image_height || col0 >= image_width)
{ {

View File

@ -524,6 +524,7 @@ namespace vk
u16 image_width; u16 image_width;
u16 image_height; u16 image_height;
u32 image_pitch; u32 image_pitch;
u8 image_bpp;
}; };
template <RSX_detiler_op Op> template <RSX_detiler_op Op>
@ -542,6 +543,7 @@ namespace vk
u32 tile_bank; u32 tile_bank;
u32 image_width; u32 image_width;
u32 image_height; u32 image_height;
u32 image_pitch;
u32 image_bpp; u32 image_bpp;
} params; } params;
#pragma pack (pop) #pragma pack (pop)
@ -557,7 +559,7 @@ namespace vk
{ {
ssbo_count = 2; ssbo_count = 2;
use_push_constants = true; use_push_constants = true;
push_constants_size = 44; push_constants_size = 48;
create(); create();
@ -565,7 +567,6 @@ namespace vk
#include "../Program/GLSLSnippets/RSXMemoryTiling.glsl" #include "../Program/GLSLSnippets/RSXMemoryTiling.glsl"
; ;
optimal_group_size = 1;
const std::pair<std::string_view, std::string> syntax_replace[] = const std::pair<std::string_view, std::string> syntax_replace[] =
{ {
{ "%loc", "0" }, { "%loc", "0" },
@ -646,11 +647,14 @@ namespace vk
params.tile_pitch = config.tile_pitch; params.tile_pitch = config.tile_pitch;
params.tile_bank = config.bank; params.tile_bank = config.bank;
params.image_width = config.image_width; params.image_width = config.image_width;
params.image_height = config.image_height; params.image_height = tiled_height;
params.image_bpp = config.image_pitch / config.image_width; params.image_pitch = config.image_pitch;
params.image_bpp = config.image_bpp;
set_parameters(cmd); set_parameters(cmd);
const u32 invocations_x = utils::aligned_div(config.image_width, optimal_group_size); const u32 subtexels_per_invocation = (config.image_bpp < 4) ? (4 / config.image_bpp) : 1;
const u32 virtual_width = config.image_width / subtexels_per_invocation;
const u32 invocations_x = utils::aligned_div(virtual_width, optimal_group_size);
compute_task::run(cmd, invocations_x, config.image_height, 1); compute_task::run(cmd, invocations_x, config.image_height, 1);
} }
}; };

View File

@ -99,13 +99,24 @@ namespace vk
if (require_gpu_transform) if (require_gpu_transform)
{ {
const auto section_length = valid_range.length(); auto section_length = valid_range.length();
const auto transfer_pitch = real_pitch; const auto transfer_pitch = real_pitch;
const auto task_length = transfer_pitch * src_area.height(); const auto task_length = transfer_pitch * src_area.height();
auto working_buffer_length = calculate_working_buffer_size(task_length, src->aspect()); auto working_buffer_length = calculate_working_buffer_size(task_length, src->aspect());
if (require_tiling) { if (require_tiling)
{
working_buffer_length += tiled_region.tile->size; working_buffer_length += tiled_region.tile->size;
// Calculate actual section length
const auto available_tile_size = tiled_region.tile->size - (valid_range.start - tiled_region.base_address);
const auto max_content_size = tiled_region.tile->pitch * utils::align(height, 64);
section_length = std::min(max_content_size, available_tile_size);
if (section_length > valid_range.length()) [[ likely ]]
{
dma_mapping = vk::map_dma(valid_range.start, section_length);
}
} }
auto working_buffer = vk::get_scratch_buffer(cmd, working_buffer_length); auto working_buffer = vk::get_scratch_buffer(cmd, working_buffer_length);
@ -185,14 +196,17 @@ namespace vk
.image_width = width, .image_width = width,
.image_height = height, .image_height = height,
.image_pitch = real_pitch .image_pitch = real_pitch,
.image_bpp = rsx::get_format_block_size_in_bytes(gcm_format)
}; };
// Execute // Execute
const auto job = vk::get_compute_task<vk::cs_tile_memcpy<RSX_detiler_op::encode>>(); const auto job = vk::get_compute_task<vk::cs_tile_memcpy<RSX_detiler_op::encode>>();
job->run(cmd, config); job->run(cmd, config);
// Update internal variables
result_offset = task_length; result_offset = task_length;
real_pitch = tiled_region.tile->pitch;
require_rw_barrier = true; require_rw_barrier = true;
#endif #endif
} }

View File

@ -291,7 +291,17 @@ namespace vk
// Calculate smallest range to flush - for framebuffers, the raster region is enough // Calculate smallest range to flush - for framebuffers, the raster region is enough
const auto range = (context == rsx::texture_upload_context::framebuffer_storage) ? get_section_range() : get_confirmed_range(); const auto range = (context == rsx::texture_upload_context::framebuffer_storage) ? get_section_range() : get_confirmed_range();
vk::flush_dma(range.start, range.length()); auto flush_length = range.length();
const auto tiled_region = rsx::get_current_renderer()->get_tiled_memory_region(range);
if (tiled_region)
{
const auto available_tile_size = tiled_region.tile->size - (range.start - tiled_region.base_address);
const auto max_content_size = tiled_region.tile->pitch * utils::align(height, 64);
flush_length = std::min(max_content_size, available_tile_size);
}
vk::flush_dma(range.start, flush_length);
#if DEBUG_DMA_TILING #if DEBUG_DMA_TILING
// Are we a tiled region? // Are we a tiled region?
@ -310,10 +320,7 @@ namespace vk
width, width,
height height
); );
const auto available_tile_size = tiled_region.tile->size - (range.start - tiled_region.base_address); std::memcpy(real_data, out_data.data(), flush_length);
const auto max_content_size = tiled_region.tile->pitch * utils::align(height, 64);
const auto write_length = std::min(max_content_size, available_tile_size);
std::memcpy(real_data, out_data.data(), write_length);
} }
#endif #endif