From 3e8e1ed596ae254d8309c4fc6c4deebbb5d5c035 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 17 Oct 2023 23:45:59 +1000 Subject: [PATCH] GS/HW: Don't align dirty rectangles to block sizes when updating Don't align the area we write to the target to the block size. If the format matches, the writes don't need to be block aligned. We still read the whole thing in, because that's the granularity that ReadTexture operates at, but discard those pixels when updating the framebuffer. Onimusha 2 does this dance where it uploads the left 4 pixels to the middle of the image, then moves it to the left, and because we process the move in hardware, local memory never gets updated, and thus is stale. --- pcsx2-gsrunner/test_check_dumps.py | 2 +- pcsx2/GS/Renderers/Common/GSDirtyRect.cpp | 20 ++++++------- pcsx2/GS/Renderers/Common/GSDirtyRect.h | 4 +-- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 4 +-- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 35 ++++++++++++++--------- 5 files changed, 36 insertions(+), 29 deletions(-) diff --git a/pcsx2-gsrunner/test_check_dumps.py b/pcsx2-gsrunner/test_check_dumps.py index d8b487c533..0b5d811a17 100644 --- a/pcsx2-gsrunner/test_check_dumps.py +++ b/pcsx2-gsrunner/test_check_dumps.py @@ -60,7 +60,7 @@ def compare_frames(path1, path2): def extract_stats(file): stats = {} try: - with open(file, "r") as f: + with open(file, "r", errors="ignore") as f: for line in f.readlines(): m = re.match(".*@HWSTAT@ ([^:]+): (.*) \(avg ([^)]+)\)$", line) if m is None: diff --git a/pcsx2/GS/Renderers/Common/GSDirtyRect.cpp b/pcsx2/GS/Renderers/Common/GSDirtyRect.cpp index 14616446b5..2e8263adc2 100644 --- a/pcsx2/GS/Renderers/Common/GSDirtyRect.cpp +++ b/pcsx2/GS/Renderers/Common/GSDirtyRect.cpp @@ -35,7 +35,7 @@ GSDirtyRect::GSDirtyRect(GSVector4i& r, u32 psm, u32 bw, RGBAMask rgba, bool req { } -GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0) const +GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0, bool align) const { GSVector4i _r; @@ -48,14 +48,13 @@ GSVector4i GSDirtyRect::GetDirtyRect(GIFRegTEX0 TEX0) const _r.top = (r.top * dst.y) / src.y; _r.right = (r.right * dst.x) / src.x; _r.bottom = (r.bottom * dst.y) / src.y; - _r = _r.ralign(src); } else { - _r = r.ralign(src); + _r = r; } - return _r; + return align ? _r.ralign(src) : _r; } GSVector4i GSDirtyRectList::GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size) const @@ -66,7 +65,7 @@ GSVector4i GSDirtyRectList::GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size for (auto& dirty_rect : *this) { - r = r.runion(dirty_rect.GetDirtyRect(TEX0)); + r = r.runion(dirty_rect.GetDirtyRect(TEX0, true)); } const GSVector2i& bs = GSLocalMemory::m_psm[TEX0.PSM].bs; @@ -92,12 +91,13 @@ u32 GSDirtyRectList::GetDirtyChannels() return channels; } -GSVector4i GSDirtyRectList::GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp) const +GSVector4i GSDirtyRectList::GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp, bool align) const { - const GSVector4i r = (*this)[index].GetDirtyRect(TEX0); + GSVector4i r = (*this)[index].GetDirtyRect(TEX0, align); + const GSVector2i& bs = GSLocalMemory::m_psm[TEX0.PSM].bs; + if (align) + r = r.ralign(bs); - GSVector2i bs = GSLocalMemory::m_psm[TEX0.PSM].bs; - - return r.ralign(bs).rintersect(clamp); + return r.rintersect(clamp); } diff --git a/pcsx2/GS/Renderers/Common/GSDirtyRect.h b/pcsx2/GS/Renderers/Common/GSDirtyRect.h index e1be80ff6b..f7ba51322b 100644 --- a/pcsx2/GS/Renderers/Common/GSDirtyRect.h +++ b/pcsx2/GS/Renderers/Common/GSDirtyRect.h @@ -40,7 +40,7 @@ public: GSDirtyRect(); GSDirtyRect(GSVector4i& r, u32 psm, u32 bw, RGBAMask rgba, bool req_linear); - GSVector4i GetDirtyRect(GIFRegTEX0 TEX0) const; + GSVector4i GetDirtyRect(GIFRegTEX0 TEX0, bool align) const; }; class GSDirtyRectList : public std::vector @@ -49,5 +49,5 @@ public: GSDirtyRectList() {} GSVector4i GetTotalRect(GIFRegTEX0 TEX0, const GSVector2i& size) const; u32 GetDirtyChannels(); - GSVector4i GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp) const; + GSVector4i GetDirtyRect(size_t index, GIFRegTEX0 TEX0, const GSVector4i& clamp, bool align) const; }; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 4371022a30..19a7515636 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -5497,7 +5497,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw() bool is_dirty = false; for (const GSDirtyRect& rc : tgt->m_dirty) { - if (!rc.GetDirtyRect(m_cached_ctx.TEX0).rintersect(r).rempty()) + if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(r).rempty()) { is_dirty = true; break; @@ -5597,7 +5597,7 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage); for (GSDirtyRect& rc : src_target->m_dirty) { - if (!rc.GetDirtyRect(m_cached_ctx.TEX0).rintersect(tr).rempty()) + if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(tr).rempty()) return true; } } diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 8d04454c94..4ce5380f86 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1169,7 +1169,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con { for (auto& dirty : t->m_dirty) { - GSVector4i dirty_rect = dirty.GetDirtyRect(t->m_TEX0); + GSVector4i dirty_rect = dirty.GetDirtyRect(t->m_TEX0, true); if (!dirty_rect.rintersect(new_rect).rempty()) { rect_clean = false; @@ -2000,7 +2000,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe // Don't bother copying the old target in if the whole thing is dirty. if (dst->m_dirty.empty() || (~dst->m_dirty.GetDirtyChannels() & GSUtil::GetChannelMask(TEX0.PSM)) != 0 || - !dst->m_dirty.GetDirtyRect(0, TEX0, dst->GetUnscaledRect()).eq(dst->GetUnscaledRect())) + !dst->m_dirty.GetDirtyRect(0, TEX0, dst->GetUnscaledRect(), false).eq(dst->GetUnscaledRect())) { // If the old target was cleared, simply propagate that through. if (dst_match->m_texture->GetState() == GSTexture::State::Cleared) @@ -2508,7 +2508,7 @@ bool GSTextureCache::CopyRGBFromDepthToColor(Target* dst, Target* depth_src) for (u32 i = 0; i < dst->m_dirty.size(); i++) { GSDirtyRect& dr = dst->m_dirty[i]; - const GSVector4i drc = dr.GetDirtyRect(dst->m_TEX0); + const GSVector4i drc = dr.GetDirtyRect(dst->m_TEX0, false); if (!drc.rintersect(clear_dirty_rc).rempty()) { if ((dr.rgba._u32 &= ~0x7) == 0) @@ -4785,7 +4785,7 @@ GSTexture* GSTextureCache::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVec bool is_dirty = false; for (GSDirtyRect& dirty : t->m_dirty) { - if (!dirty.GetDirtyRect(t->m_TEX0).rintersect(clut_rc).rempty()) + if (!dirty.GetDirtyRect(t->m_TEX0, false).rintersect(clut_rc).rempty()) { GL_INS("Dirty rectangle overlaps CLUT rectangle, skipping"); is_dirty = true; @@ -5414,34 +5414,41 @@ void GSTextureCache::Target::Update() const GSOffset off(g_gs_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM)); for (size_t i = 0; i < m_dirty.size(); i++) { - const GSVector4i r(m_dirty.GetDirtyRect(i, m_TEX0, total_rect)); - if (r.rempty()) + // Don't align the area we write to the target to the block size. If the format matches, the writes don't need + // to be block aligned. We still read the whole thing in, because that's the granularity that ReadTexture + // operates at, but discard those pixels when updating the framebuffer. Onimusha 2 does this dance where it + // uploads the left 4 pixels to the middle of the image, then moves it to the left, and because we process + // the move in hardware, local memory never gets updated, and thus is stale. + const GSVector4i update_r = m_dirty.GetDirtyRect(i, m_TEX0, total_rect, false); + if (update_r.rempty()) continue; - const GSVector4i t_r(r - t_offset); + const GSVector4i read_r = m_dirty.GetDirtyRect(i, m_TEX0, total_rect, true); + const GSVector4i t_r(read_r - t_offset); if (mapped) { g_gs_renderer->m_mem.ReadTexture( - off, r, m.bits + t_r.y * static_cast(m.pitch) + (t_r.x * sizeof(u32)), m.pitch, TEXA); + off, read_r, m.bits + t_r.y * static_cast(m.pitch) + (t_r.x * sizeof(u32)), m.pitch, TEXA); } else { - const int pitch = VectorAlign(r.width() * sizeof(u32)); - g_gs_renderer->m_mem.ReadTexture(off, r, s_unswizzle_buffer, pitch, TEXA); + const int pitch = VectorAlign(read_r.width() * sizeof(u32)); + g_gs_renderer->m_mem.ReadTexture(off, read_r, s_unswizzle_buffer, pitch, TEXA); t->Update(t_r, s_unswizzle_buffer, pitch); } GSDevice::MultiStretchRect& drect = drects[ndrects++]; drect.src = t; - drect.src_rect = GSVector4(r - t_offset) / t_sizef; - drect.dst_rect = GSVector4(r) * GSVector4(m_scale); + drect.src_rect = GSVector4(update_r - t_offset) / t_sizef; + drect.dst_rect = GSVector4(update_r) * GSVector4(m_scale); drect.linear = linear && (m_dirty[i].req_linear || override_linear); // Copy the new GS memory content into the destination texture. if (m_type == RenderTarget) { - GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, r.x, r.y, r.z, r.w); + GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, + update_r.x, update_r.y, update_r.z, update_r.w); drect.wmask = static_cast(m_dirty[i].rgba._u32); } else if (m_type == DepthStencil) @@ -5498,7 +5505,7 @@ void GSTextureCache::Target::UpdateIfDirtyIntersects(const GSVector4i& rc) for (auto& dirty : m_dirty) { - const GSVector4i dirty_rc(dirty.GetDirtyRect(m_TEX0)); + const GSVector4i dirty_rc(dirty.GetDirtyRect(m_TEX0, false)); if (dirty_rc.rintersect(rc).rempty()) continue;