GPU/HW: Improve heuristics for draw/write when copying

This commit is contained in:
Stenzek 2023-12-15 16:05:39 +10:00
parent a499e21453
commit 87a7c09466
No known key found for this signature in database
2 changed files with 38 additions and 14 deletions

View File

@ -1,8 +1,9 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include <algorithm>
#include <cmath>
#include <cstring>
#include <limits>
#include <tuple>
@ -136,6 +137,24 @@ struct Rectangle
return (left <= rhs.left && right >= rhs.right && top <= rhs.top && bottom >= rhs.bottom);
}
/// Returns the middle point of the rectangle.
constexpr T GetCenterX() const { return left + ((right - left) / 2); }
constexpr T GetCenterY() const { return top + ((bottom - top) / 2); }
/// Returns the distance between two rectangles.
T GetDistance(const Rectangle& rhs) const
{
const T lcx = GetCenterX();
const T lcy = GetCenterY();
const T rcx = rhs.GetCenterX();
const T rcy = rhs.GetCenterY();
const T dx = (lcx - rcx);
const T dy = (lcy - rcy);
const T distsq = (dx * dx) + (dy * dy);
const float dist = std::sqrt(static_cast<float>(distsq));
return static_cast<T>(dist);
}
/// Expands the bounds of the rectangle to contain the specified point.
constexpr void Include(T x, T y)
{

View File

@ -2493,13 +2493,13 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
(m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH ||
((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH ||
((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT);
const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height);
const Common::Rectangle<u32> dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height);
const bool intersect_with_draw = m_vram_dirty_draw_rect.Intersects(src_bounds);
const bool intersect_with_write = m_vram_dirty_write_rect.Intersects(src_bounds);
if (use_shader || IsUsingMultisampling())
{
const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height);
const Common::Rectangle<u32> dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height);
const bool intersect_with_draw = m_vram_dirty_draw_rect.Intersects(src_bounds);
const bool intersect_with_write = m_vram_dirty_write_rect.Intersects(src_bounds);
if (intersect_with_draw || intersect_with_write)
UpdateVRAMReadTexture(intersect_with_draw, intersect_with_write);
IncludeVRAMDirtyRectangle(m_vram_dirty_draw_rect, dst_bounds);
@ -2545,22 +2545,27 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
}
GPUTexture* src_tex = m_vram_texture.get();
const bool overlaps_with_self = Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height)
.Intersects(Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height));
const bool overlaps_with_self = src_bounds.Intersects(dst_bounds);
if (!g_gpu_device->GetFeatures().texture_copy_to_self || overlaps_with_self)
{
src_tex = m_vram_read_texture.get();
const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height);
const bool intersect_with_draw = m_vram_dirty_draw_rect.Intersects(src_bounds);
const bool intersect_with_write = m_vram_dirty_write_rect.Intersects(src_bounds);
if (intersect_with_draw || intersect_with_write)
UpdateVRAMReadTexture(intersect_with_draw, intersect_with_write);
}
IncludeVRAMDirtyRectangle(
m_vram_dirty_draw_rect,
Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
Common::Rectangle<u32>* update_rect;
if (intersect_with_draw || intersect_with_write)
{
update_rect = intersect_with_draw ? &m_vram_dirty_draw_rect : &m_vram_dirty_write_rect;
}
else
{
const bool use_write =
(m_vram_dirty_write_rect.Valid() && m_vram_dirty_draw_rect.Valid() &&
m_vram_dirty_write_rect.GetDistance(dst_bounds) < m_vram_dirty_draw_rect.GetDistance(dst_bounds));
update_rect = use_write ? &m_vram_dirty_write_rect : &m_vram_dirty_draw_rect;
}
IncludeVRAMDirtyRectangle(*update_rect, dst_bounds);
if (m_GPUSTAT.check_mask_before_draw)
{