rsx: Blit engine tuning

- Attempt to identify blit operations that will be flushed immediately
after and just do them on CPU instead if the transformation is trivial.
- If only a single blit section is contributing to an atlas merge op, the
threshold should be 100%. The only acceptable result here is a
truncation.
This commit is contained in:
kd-11 2020-01-23 23:20:22 +03:00 committed by kd-11
parent 7a275eaa3a
commit 44f2cacf7b
1 changed files with 39 additions and 15 deletions

View File

@ -213,7 +213,8 @@ namespace rsx
}
}
bool atlas_covers_target_area() const
// Returns true if at least threshold% is covered in pixels
bool atlas_covers_target_area(u32 threshold) const
{
if (external_subresource_desc.op != deferred_request_command::atlas_gather)
return true;
@ -222,7 +223,7 @@ namespace rsx
max_x = 0, max_y = 0;
// Require at least 90% coverage
const u32 target_area = ((min_x * min_y) * 90u) / 100u;
const u32 target_area = ((min_x * min_y) * threshold) / 100u;
for (const auto &section : external_subresource_desc.sections_to_copy)
{
@ -1654,8 +1655,8 @@ namespace rsx
return {};
}
if (!result.external_subresource_desc.sections_to_copy.empty() &&
(_pool == 0 || result.atlas_covers_target_area()))
if (const auto section_count = result.external_subresource_desc.sections_to_copy.size();
section_count > 0 && result.atlas_covers_target_area(section_count == 1? 99 : 90))
{
// TODO: Overlapped section persistance is required for framebuffer resources to work with this!
// Yellow filter in SCV is because of a 384x384 surface being reused as 160x90 (and likely not getting written to)
@ -1921,6 +1922,7 @@ namespace rsx
const bool is_copy_op = (fcmp(scale_x, 1.f) && fcmp(scale_y, 1.f));
const bool is_format_convert = (dst_is_argb8 != src_is_argb8);
bool skip_if_collision_exists = false;
// Offset in x and y for src is 0 (it is already accounted for when getting pixels_src)
// Reproject final clip onto source...
@ -2075,6 +2077,9 @@ namespace rsx
return false;
}
}
// If a matching section exists with a different use-case, fall back to CPU memcpy
skip_if_collision_exists = true;
}
if (!g_cfg.video.use_gpu_texture_scaling)
@ -2176,9 +2181,20 @@ namespace rsx
if (!dst_is_render_target)
{
// Check for any available region that will fit this one
const auto required_type = (use_null_region) ? texture_upload_context::dma : texture_upload_context::blit_engine_dst;
u32 required_type_mask;
if (use_null_region)
{
required_type_mask = texture_upload_context::dma;
}
else
{
required_type_mask = texture_upload_context::blit_engine_dst;
if (skip_if_collision_exists) required_type_mask |= texture_upload_context::shader_read;
}
const auto dst_range = address_range::start_length(dst_address, dst_payload_length);
auto overlapping_surfaces = find_texture_from_range(dst_range, dst.pitch, required_type);
auto overlapping_surfaces = find_texture_from_range(dst_range, dst.pitch, required_type_mask);
for (const auto &surface : overlapping_surfaces)
{
if (!surface->is_locked())
@ -2194,23 +2210,30 @@ namespace rsx
continue;
}
if (!dst_range.inside(surface->get_section_range()))
{
// Hit test failed
continue;
}
if (use_null_region)
{
if (dst_range.inside(surface->get_section_range()))
{
// Attach to existing region
cached_dest = surface;
}
// Technically it is totally possible to just extend a pre-existing section
// Will leave this as a TODO
continue;
}
const auto this_address = surface->get_section_base();
if (this_address > dst_address)
if (UNLIKELY(skip_if_collision_exists))
{
continue;
if (surface->get_context() != texture_upload_context::blit_engine_dst)
{
// This section is likely to be 'flushed' to CPU for reupload soon anyway
return false;
}
}
switch (surface->get_gcm_format())
@ -2227,7 +2250,8 @@ namespace rsx
continue;
}
if (const u32 address_offset = dst_address - this_address)
if (const auto this_address = surface->get_section_base();
const u32 address_offset = dst_address - this_address)
{
const u16 offset_y = address_offset / dst.pitch;
const u16 offset_x = address_offset % dst.pitch;