GS/HW: Don't align dirty rectangles to block sizes when updating

Don't align the area we write to the target to the block size. If the format matches, the writes don't need
to be block aligned. We still read the whole thing in, because that's the granularity that ReadTexture
operates at, but discard those pixels when updating the framebuffer. Onimusha 2 does this dance where it
uploads the left 4 pixels to the middle of the image, then moves it to the left, and because we process
the move in hardware, local memory never gets updated, and thus is stale.
This commit is contained in:
Stenzek 2023-10-17 23:45:59 +10:00 committed by Connor McLaughlin
parent b90c2ec385
commit 3e8e1ed596
5 changed files with 36 additions and 29 deletions

View File

@ -60,7 +60,7 @@ def compare_frames(path1, path2):
def extract_stats(file):
stats = {}
try:
with open(file, "r") as f:
with open(file, "r", errors="ignore") as f:
for line in f.readlines():
m = re.match(".*@HWSTAT@ ([^:]+): (.*) \(avg ([^)]+)\)$", line)
if m is None:

View File

@ -35,7 +35,7 @@ GSDirtyRect::GSDirtyRect(GSVector4i& r, u32 psm, u32 bw, RGBAMask rgba, bool req
{
}
GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0) const
GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0, bool align) const
{
GSVector4i _r;
@ -48,14 +48,13 @@ GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0) const
_r.top = (r.top * dst.y) / src.y;
_r.right = (r.right * dst.x) / src.x;
_r.bottom = (r.bottom * dst.y) / src.y;
_r = _r.ralign<Align_Outside>(src);
}
else
{
_r = r.ralign<Align_Outside>(src);
_r = r;
}
return _r;
return align ? _r.ralign<Align_Outside>(src) : _r;
}
GSVector4i GSDirtyRectList::GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size) const
@ -66,7 +65,7 @@ GSVector4i GSDirtyRectList::GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size
for (auto& dirty_rect : *this)
{
r = r.runion(dirty_rect.GetDirtyRect(TEX0));
r = r.runion(dirty_rect.GetDirtyRect(TEX0, true));
}
const GSVector2i& bs = GSLocalMemory::m_psm[TEX0.PSM].bs;
@ -92,12 +91,13 @@ u32 GSDirtyRectList::GetDirtyChannels()
return channels;
}
GSVector4i GSDirtyRectList::GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp) const
GSVector4i GSDirtyRectList::GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp, bool align) const
{
const GSVector4i r = (*this)[index].GetDirtyRect(TEX0);
GSVector4i r = (*this)[index].GetDirtyRect(TEX0, align);
const GSVector2i& bs = GSLocalMemory::m_psm[TEX0.PSM].bs;
if (align)
r = r.ralign<Align_Outside>(bs);
GSVector2i bs = GSLocalMemory::m_psm[TEX0.PSM].bs;
return r.ralign<Align_Outside>(bs).rintersect(clamp);
return r.rintersect(clamp);
}

View File

@ -40,7 +40,7 @@ public:
GSDirtyRect();
GSDirtyRect(GSVector4i& r, u32 psm, u32 bw, RGBAMask rgba, bool req_linear);
GSVector4i GetDirtyRect(GIFRegTEX0 TEX0) const;
GSVector4i GetDirtyRect(GIFRegTEX0 TEX0, bool align) const;
};
class GSDirtyRectList : public std::vector<GSDirtyRect>
@ -49,5 +49,5 @@ public:
GSDirtyRectList() {}
GSVector4i GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size) const;
u32 GetDirtyChannels();
GSVector4i GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp) const;
GSVector4i GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp, bool align) const;
};

View File

@ -5497,7 +5497,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
bool is_dirty = false;
for (const GSDirtyRect& rc : tgt->m_dirty)
{
if (!rc.GetDirtyRect(m_cached_ctx.TEX0).rintersect(r).rempty())
if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(r).rempty())
{
is_dirty = true;
break;
@ -5597,7 +5597,7 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t
const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage);
for (GSDirtyRect& rc : src_target->m_dirty)
{
if (!rc.GetDirtyRect(m_cached_ctx.TEX0).rintersect(tr).rempty())
if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(tr).rempty())
return true;
}
}

View File

@ -1169,7 +1169,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
{
for (auto& dirty : t->m_dirty)
{
GSVector4i dirty_rect = dirty.GetDirtyRect(t->m_TEX0);
GSVector4i dirty_rect = dirty.GetDirtyRect(t->m_TEX0, true);
if (!dirty_rect.rintersect(new_rect).rempty())
{
rect_clean = false;
@ -2000,7 +2000,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
// Don't bother copying the old target in if the whole thing is dirty.
if (dst->m_dirty.empty() || (~dst->m_dirty.GetDirtyChannels() & GSUtil::GetChannelMask(TEX0.PSM)) != 0 ||
!dst->m_dirty.GetDirtyRect(0, TEX0, dst->GetUnscaledRect()).eq(dst->GetUnscaledRect()))
!dst->m_dirty.GetDirtyRect(0, TEX0, dst->GetUnscaledRect(), false).eq(dst->GetUnscaledRect()))
{
// If the old target was cleared, simply propagate that through.
if (dst_match->m_texture->GetState() == GSTexture::State::Cleared)
@ -2508,7 +2508,7 @@ bool GSTextureCache::CopyRGBFromDepthToColor(Target* dst, Target* depth_src)
for (u32 i = 0; i < dst->m_dirty.size(); i++)
{
GSDirtyRect& dr = dst->m_dirty[i];
const GSVector4i drc = dr.GetDirtyRect(dst->m_TEX0);
const GSVector4i drc = dr.GetDirtyRect(dst->m_TEX0, false);
if (!drc.rintersect(clear_dirty_rc).rempty())
{
if ((dr.rgba._u32 &= ~0x7) == 0)
@ -4785,7 +4785,7 @@ GSTexture* GSTextureCache::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVec
bool is_dirty = false;
for (GSDirtyRect& dirty : t->m_dirty)
{
if (!dirty.GetDirtyRect(t->m_TEX0).rintersect(clut_rc).rempty())
if (!dirty.GetDirtyRect(t->m_TEX0, false).rintersect(clut_rc).rempty())
{
GL_INS("Dirty rectangle overlaps CLUT rectangle, skipping");
is_dirty = true;
@ -5414,34 +5414,41 @@ void GSTextureCache::Target::Update()
const GSOffset off(g_gs_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM));
for (size_t i = 0; i < m_dirty.size(); i++)
{
const GSVector4i r(m_dirty.GetDirtyRect(i, m_TEX0, total_rect));
if (r.rempty())
// Don't align the area we write to the target to the block size. If the format matches, the writes don't need
// to be block aligned. We still read the whole thing in, because that's the granularity that ReadTexture
// operates at, but discard those pixels when updating the framebuffer. Onimusha 2 does this dance where it
// uploads the left 4 pixels to the middle of the image, then moves it to the left, and because we process
// the move in hardware, local memory never gets updated, and thus is stale.
const GSVector4i update_r = m_dirty.GetDirtyRect(i, m_TEX0, total_rect, false);
if (update_r.rempty())
continue;
const GSVector4i t_r(r - t_offset);
const GSVector4i read_r = m_dirty.GetDirtyRect(i, m_TEX0, total_rect, true);
const GSVector4i t_r(read_r - t_offset);
if (mapped)
{
g_gs_renderer->m_mem.ReadTexture(
off, r, m.bits + t_r.y * static_cast<u32>(m.pitch) + (t_r.x * sizeof(u32)), m.pitch, TEXA);
off, read_r, m.bits + t_r.y * static_cast<u32>(m.pitch) + (t_r.x * sizeof(u32)), m.pitch, TEXA);
}
else
{
const int pitch = VectorAlign(r.width() * sizeof(u32));
g_gs_renderer->m_mem.ReadTexture(off, r, s_unswizzle_buffer, pitch, TEXA);
const int pitch = VectorAlign(read_r.width() * sizeof(u32));
g_gs_renderer->m_mem.ReadTexture(off, read_r, s_unswizzle_buffer, pitch, TEXA);
t->Update(t_r, s_unswizzle_buffer, pitch);
}
GSDevice::MultiStretchRect& drect = drects[ndrects++];
drect.src = t;
drect.src_rect = GSVector4(r - t_offset) / t_sizef;
drect.dst_rect = GSVector4(r) * GSVector4(m_scale);
drect.src_rect = GSVector4(update_r - t_offset) / t_sizef;
drect.dst_rect = GSVector4(update_r) * GSVector4(m_scale);
drect.linear = linear && (m_dirty[i].req_linear || override_linear);
// Copy the new GS memory content into the destination texture.
if (m_type == RenderTarget)
{
GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, r.x, r.y, r.z, r.w);
GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW,
update_r.x, update_r.y, update_r.z, update_r.w);
drect.wmask = static_cast<u8>(m_dirty[i].rgba._u32);
}
else if (m_type == DepthStencil)
@ -5498,7 +5505,7 @@ void GSTextureCache::Target::UpdateIfDirtyIntersects(const GSVector4i& rc)
for (auto& dirty : m_dirty)
{
const GSVector4i dirty_rc(dirty.GetDirtyRect(m_TEX0));
const GSVector4i dirty_rc(dirty.GetDirtyRect(m_TEX0, false));
if (dirty_rc.rintersect(rc).rempty())
continue;