diff --git a/rpcs3/Emu/RSX/Common/tiled_dma_copy.hpp b/rpcs3/Emu/RSX/Common/tiled_dma_copy.hpp index 39e88a36f6..28f4577085 100644 --- a/rpcs3/Emu/RSX/Common/tiled_dma_copy.hpp +++ b/rpcs3/Emu/RSX/Common/tiled_dma_copy.hpp @@ -19,6 +19,7 @@ namespace rsx uint32_t tile_bank; uint32_t image_width; uint32_t image_height; + uint32_t image_pitch; uint32_t image_bpp; }; @@ -98,7 +99,7 @@ namespace rsx tile_address ^= ((tile_address >> 11) & 1) << 10; // Calculate relative addresses and sample - uint32_t linear_image_offset = (row * conf.tile_pitch) + (col * conf.image_bpp); + uint32_t linear_image_offset = (row * conf.image_pitch) + (col * conf.image_bpp); uint32_t tile_data_offset = tile_address - (conf.tile_base_address + conf.tile_offset); if (tile_data_offset >= conf.tile_size) @@ -160,6 +161,7 @@ namespace rsx .tile_bank = bank_sense, .image_width = image_width, .image_height = image_height, + .image_pitch = row_pitch_in_bytes, .image_bpp = sizeof(T) }; diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXMemoryTiling.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXMemoryTiling.glsl index 9b3dd62f73..eebed180c6 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXMemoryTiling.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXMemoryTiling.glsl @@ -39,6 +39,7 @@ layout(%push_block) uniform Configuration uint tile_bank; uint image_width; uint image_height; + uint image_pitch; uint image_bpp; }; #else @@ -52,6 +53,7 @@ layout(%push_block) uniform Configuration uniform uint tile_bank; uniform uint image_width; uniform uint image_height; + uniform uint image_pitch; uniform uint image_bpp; #endif @@ -306,7 +308,7 @@ void do_memory_op(const in uint row, const in uint col) tile_address ^= ((tile_address >> 11) & 1) << 10; // Calculate relative addresses and sample - uint linear_image_offset = (row * tile_pitch) + (col * image_bpp); + uint linear_image_offset = (row * image_pitch) + (col * image_bpp); uint tile_data_offset = tile_address - (tile_base_address + tile_offset); if (tile_data_offset >= tile_size) @@ -335,7 +337,7 @@ void main() const uint row = gl_GlobalInvocationID.y; const uint col0 = gl_GlobalInvocationID.x; - // for (uint col = col0; col < (col0 + num_iterations); ++col) + for (uint col = col0; col < (col0 + num_iterations); ++col) { if (row >= image_height || col0 >= image_width) { diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index c48ab89d76..94604020bf 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -524,6 +524,7 @@ namespace vk u16 image_width; u16 image_height; u32 image_pitch; + u8 image_bpp; }; template @@ -542,6 +543,7 @@ namespace vk u32 tile_bank; u32 image_width; u32 image_height; + u32 image_pitch; u32 image_bpp; } params; #pragma pack (pop) @@ -557,7 +559,7 @@ namespace vk { ssbo_count = 2; use_push_constants = true; - push_constants_size = 44; + push_constants_size = 48; create(); @@ -565,7 +567,6 @@ namespace vk #include "../Program/GLSLSnippets/RSXMemoryTiling.glsl" ; - optimal_group_size = 1; const std::pair syntax_replace[] = { { "%loc", "0" }, @@ -646,11 +647,14 @@ namespace vk params.tile_pitch = config.tile_pitch; params.tile_bank = config.bank; params.image_width = config.image_width; - params.image_height = config.image_height; - params.image_bpp = config.image_pitch / config.image_width; + params.image_height = tiled_height; + params.image_pitch = config.image_pitch; + params.image_bpp = config.image_bpp; set_parameters(cmd); - const u32 invocations_x = utils::aligned_div(config.image_width, optimal_group_size); + const u32 subtexels_per_invocation = (config.image_bpp < 4) ? (4 / config.image_bpp) : 1; + const u32 virtual_width = config.image_width / subtexels_per_invocation; + const u32 invocations_x = utils::aligned_div(virtual_width, optimal_group_size); compute_task::run(cmd, invocations_x, config.image_height, 1); } }; diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp index 212c5852af..a4abcc4d93 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp @@ -99,13 +99,24 @@ namespace vk if (require_gpu_transform) { - const auto section_length = valid_range.length(); + auto section_length = valid_range.length(); const auto transfer_pitch = real_pitch; const auto task_length = transfer_pitch * src_area.height(); auto working_buffer_length = calculate_working_buffer_size(task_length, src->aspect()); - if (require_tiling) { + if (require_tiling) + { working_buffer_length += tiled_region.tile->size; + + // Calculate actual section length + const auto available_tile_size = tiled_region.tile->size - (valid_range.start - tiled_region.base_address); + const auto max_content_size = tiled_region.tile->pitch * utils::align(height, 64); + section_length = std::min(max_content_size, available_tile_size); + + if (section_length > valid_range.length()) [[ likely ]] + { + dma_mapping = vk::map_dma(valid_range.start, section_length); + } } auto working_buffer = vk::get_scratch_buffer(cmd, working_buffer_length); @@ -185,14 +196,17 @@ namespace vk .image_width = width, .image_height = height, - .image_pitch = real_pitch + .image_pitch = real_pitch, + .image_bpp = rsx::get_format_block_size_in_bytes(gcm_format) }; // Execute const auto job = vk::get_compute_task>(); job->run(cmd, config); + // Update internal variables result_offset = task_length; + real_pitch = tiled_region.tile->pitch; require_rw_barrier = true; #endif } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 09df70d67d..f418f761a7 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -291,7 +291,17 @@ namespace vk // Calculate smallest range to flush - for framebuffers, the raster region is enough const auto range = (context == rsx::texture_upload_context::framebuffer_storage) ? get_section_range() : get_confirmed_range(); - vk::flush_dma(range.start, range.length()); + auto flush_length = range.length(); + + const auto tiled_region = rsx::get_current_renderer()->get_tiled_memory_region(range); + if (tiled_region) + { + const auto available_tile_size = tiled_region.tile->size - (range.start - tiled_region.base_address); + const auto max_content_size = tiled_region.tile->pitch * utils::align(height, 64); + flush_length = std::min(max_content_size, available_tile_size); + } + + vk::flush_dma(range.start, flush_length); #if DEBUG_DMA_TILING // Are we a tiled region? @@ -310,10 +320,7 @@ namespace vk width, height ); - const auto available_tile_size = tiled_region.tile->size - (range.start - tiled_region.base_address); - const auto max_content_size = tiled_region.tile->pitch * utils::align(height, 64); - const auto write_length = std::min(max_content_size, available_tile_size); - std::memcpy(real_data, out_data.data(), write_length); + std::memcpy(real_data, out_data.data(), flush_length); } #endif