rsx: Tweak behaviour of the "Use GPU texture scaling" option

- If either source data or dest is a render target, do image operations on the GPU same as before
- If swizzle is desired, use CPU fallback
- If no scaling and no format conversion is required, use CPU fallback
- If scaling is desired and the transfer target is in local memory, use the GPU
- When doing trivial copies, use the routine in rsx_methods instead of
  duplicating code. Also has the benefit of better range checking.
This commit is contained in:
kd-11 2019-10-20 15:43:25 +03:00 committed by kd-11
parent 868547aec8
commit 09de3b7974
1 changed files with 27 additions and 18 deletions

View File

@ -1911,6 +1911,9 @@ namespace rsx
const f32 scale_x = fabsf(dst.scale_x);
const f32 scale_y = fabsf(dst.scale_y);
const bool is_copy_op = (fcmp(scale_x, 1.f) && fcmp(scale_y, 1.f));
const bool is_format_convert = (dst_is_argb8 != src_is_argb8);
// Offset in x and y for src is 0 (it is already accounted for when getting pixels_src)
// Reproject final clip onto source...
u16 src_w = (u16)((f32)dst.clip_width / scale_x);
@ -2041,7 +2044,7 @@ namespace rsx
// 1. Invalidate surfaces in range
// 2. Proceed as normal, blit into a 'normal' surface and any upload routines should catch it
m_rtts.invalidate_range(utils::address_range::start_length(dst_address, dst.pitch * dst_h));
use_null_region = (fcmp(scale_x, 1.f) && fcmp(scale_y, 1.f));
use_null_region = (is_copy_op && !is_format_convert);
}
// TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
@ -2049,28 +2052,34 @@ namespace rsx
src_is_render_target = src_subres.surface != nullptr;
// Always use GPU blit if src or dst is in the surface store
if (!g_cfg.video.use_gpu_texture_scaling && !(src_is_render_target || dst_is_render_target))
return false;
if (!src_is_render_target && !dst_is_render_target)
{
const bool is_trivial_copy = is_copy_op && !is_format_convert && !dst.swizzled;
if (is_trivial_copy)
{
// Check if trivial memcpy can perform the same task
// Used to copy programs and arbitrary data to the GPU in some cases
if (!src_is_render_target && !dst_is_render_target && dst_is_argb8 == src_is_argb8 && !dst.swizzled)
{
// NOTE: This case overrides the GPU texture scaling option
if ((src_h == 1 && dst_h == 1) || (dst_w == src_w && dst_h == src_h && src.pitch == dst.pitch))
{
if (dst.scale_x > 0.f && dst.scale_y > 0.f)
{
const u32 memcpy_bytes_length = dst.clip_width * dst_bpp * dst.clip_height;
std::lock_guard lock(m_cache_mutex);
invalidate_range_impl_base(cmd, address_range::start_length(src_address, memcpy_bytes_length), invalidation_cause::read, std::forward<Args>(extras)...);
invalidate_range_impl_base(cmd, address_range::start_length(dst_address, memcpy_bytes_length), invalidation_cause::write, std::forward<Args>(extras)...);
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
return true;
return false;
}
else
}
}
if (!g_cfg.video.use_gpu_texture_scaling)
{
// Rotation transform applied, use fallback
if (dst.swizzled)
{
// Swizzle operation requested. Use fallback
return false;
}
if (is_trivial_copy && get_location(dst_address) != CELL_GCM_LOCATION_LOCAL)
{
// Trivial copy and the destination is in XDR memory
return false;
}
}