From 44f2cacf7ba06a07405ce29a2ae84ae8369bb065 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 23 Jan 2020 23:20:22 +0300 Subject: [PATCH] rsx: Blit engine tuning - Attempt to identify blit operations that will be flushed immediately after and just do them on CPU instead if the transformation is trivial. - If only a single blit section is contributing to an atlas merge op, the threshold should be 100%. The only acceptable result here is a truncation. --- rpcs3/Emu/RSX/Common/texture_cache.h | 54 ++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index f0406d6010..938f3e5a42 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -213,7 +213,8 @@ namespace rsx } } - bool atlas_covers_target_area() const + // Returns true if at least threshold% is covered in pixels + bool atlas_covers_target_area(u32 threshold) const { if (external_subresource_desc.op != deferred_request_command::atlas_gather) return true; @@ -222,7 +223,7 @@ namespace rsx max_x = 0, max_y = 0; // Require at least 90% coverage - const u32 target_area = ((min_x * min_y) * 90u) / 100u; + const u32 target_area = ((min_x * min_y) * threshold) / 100u; for (const auto §ion : external_subresource_desc.sections_to_copy) { @@ -1654,8 +1655,8 @@ namespace rsx return {}; } - if (!result.external_subresource_desc.sections_to_copy.empty() && - (_pool == 0 || result.atlas_covers_target_area())) + if (const auto section_count = result.external_subresource_desc.sections_to_copy.size(); + section_count > 0 && result.atlas_covers_target_area(section_count == 1? 99 : 90)) { // TODO: Overlapped section persistance is required for framebuffer resources to work with this! // Yellow filter in SCV is because of a 384x384 surface being reused as 160x90 (and likely not getting written to) @@ -1921,6 +1922,7 @@ namespace rsx const bool is_copy_op = (fcmp(scale_x, 1.f) && fcmp(scale_y, 1.f)); const bool is_format_convert = (dst_is_argb8 != src_is_argb8); + bool skip_if_collision_exists = false; // Offset in x and y for src is 0 (it is already accounted for when getting pixels_src) // Reproject final clip onto source... @@ -2075,6 +2077,9 @@ namespace rsx return false; } } + + // If a matching section exists with a different use-case, fall back to CPU memcpy + skip_if_collision_exists = true; } if (!g_cfg.video.use_gpu_texture_scaling) @@ -2176,9 +2181,20 @@ namespace rsx if (!dst_is_render_target) { // Check for any available region that will fit this one - const auto required_type = (use_null_region) ? texture_upload_context::dma : texture_upload_context::blit_engine_dst; + u32 required_type_mask; + if (use_null_region) + { + required_type_mask = texture_upload_context::dma; + } + else + { + required_type_mask = texture_upload_context::blit_engine_dst; + if (skip_if_collision_exists) required_type_mask |= texture_upload_context::shader_read; + } + const auto dst_range = address_range::start_length(dst_address, dst_payload_length); - auto overlapping_surfaces = find_texture_from_range(dst_range, dst.pitch, required_type); + auto overlapping_surfaces = find_texture_from_range(dst_range, dst.pitch, required_type_mask); + for (const auto &surface : overlapping_surfaces) { if (!surface->is_locked()) @@ -2194,23 +2210,30 @@ namespace rsx continue; } + if (!dst_range.inside(surface->get_section_range())) + { + // Hit test failed + continue; + } + if (use_null_region) { - if (dst_range.inside(surface->get_section_range())) - { - // Attach to existing region - cached_dest = surface; - } + + // Attach to existing region + cached_dest = surface; // Technically it is totally possible to just extend a pre-existing section // Will leave this as a TODO continue; } - const auto this_address = surface->get_section_base(); - if (this_address > dst_address) + if (UNLIKELY(skip_if_collision_exists)) { - continue; + if (surface->get_context() != texture_upload_context::blit_engine_dst) + { + // This section is likely to be 'flushed' to CPU for reupload soon anyway + return false; + } } switch (surface->get_gcm_format()) @@ -2227,7 +2250,8 @@ namespace rsx continue; } - if (const u32 address_offset = dst_address - this_address) + if (const auto this_address = surface->get_section_base(); + const u32 address_offset = dst_address - this_address) { const u16 offset_y = address_offset / dst.pitch; const u16 offset_x = address_offset % dst.pitch;