From 5bb3d8e60d99d3d772a889c5db0b44430c5202e6 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Thu, 23 Feb 2023 10:12:35 +0000 Subject: [PATCH] GS-HW: Improve GS read target detection, avoid reading dirty targets. --- pcsx2/GS/GSUtil.cpp | 7 ++++ pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 49 +++++++++++++++++++++++- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 19 +++++++-- 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/pcsx2/GS/GSUtil.cpp b/pcsx2/GS/GSUtil.cpp index 426953c27b..b195f0b196 100644 --- a/pcsx2/GS/GSUtil.cpp +++ b/pcsx2/GS/GSUtil.cpp @@ -130,16 +130,23 @@ bool GSUtil::HasSharedBits(u32 spsm, const u32* RESTRICT ptr) return (ptr[spsm >> 5] & (1 << (spsm & 0x1f))) == 0; } +// Pixels can NOT coexist in the same 32bits of space. +// Example: Using PSMT8H or PSMT4HL/HH with CT24 would fail this check. bool GSUtil::HasSharedBits(u32 spsm, u32 dpsm) { return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0; } +// Pixels can NOT coexist in the same 32bits of space. +// Example: Using PSMT8H or PSMT4HL/HH with CT24 would fail this check. +// SBP and DBO must match. bool GSUtil::HasSharedBits(u32 sbp, u32 spsm, u32 dbp, u32 dpsm) { return ((sbp ^ dbp) | (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f)))) == 0; } +// Shares bit depths, only detects 16/24/32 bit formats. +// 24/32bit cross compatible, 16bit compatbile with 16bit. bool GSUtil::HasCompatibleBits(u32 spsm, u32 dpsm) { return (s_maps.CompatibleBitsField[spsm][dpsm >> 5] & (1 << (dpsm & 0x1f))) != 0; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index b8c280005e..dcfcd65471 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -795,7 +795,37 @@ void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS { // printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", static_cast(g_perfmon.GetFrame()), r.left, r.top, r.right, r.bottom, static_cast(BITBLTBUF.DBP), static_cast(BITBLTBUF.DPSM)); - m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r, eewrite); + // This is gross, but if the EE write loops, we need to split it on the 2048 border. + GSVector4i rect = r; + bool loop_h = false; + bool loop_w = false; + if (r.w > 2048) + { + rect.w = 2048; + loop_h = true; + } + if (r.z > 2048) + { + rect.z = 2048; + loop_w = true; + } + if (loop_h || loop_w) + { + m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), rect, eewrite); + if (loop_h) + { + rect.y = 0; + rect.w = r.w - 2048; + } + if (loop_w) + { + rect.x = 0; + rect.z = r.w - 2048; + } + m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), rect, eewrite); + } + else + m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r, eewrite); } void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) @@ -805,6 +835,23 @@ void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS if (clut) return; // FIXME + u32 incoming_end = GSLocalMemory::m_psm[BITBLTBUF.SPSM].info.bn(r.z - 1, r.w - 1, BITBLTBUF.SBP, BITBLTBUF.SBW); + std::vector::iterator iter = GSRendererHW::GetInstance()->m_draw_transfers.end(); + + // If the EE write overlaps the readback and was done since the last draw, there's no need to read it back. + // Dog's life and Ratchet Gladiator do this. + while (iter != GSRendererHW::GetInstance()->m_draw_transfers.begin()) + { + --iter; + u32 ee_write_end = GSLocalMemory::m_psm[iter->blit.DPSM].info.bn(iter->rect.z - 1, iter->rect.w - 1, iter->blit.DBP, iter->blit.DBW); + // If the format, and location doesn't match, but also the upload is at least the size of the target, don't preload. + if (iter->blit.DBP < incoming_end && GSUtil::HasSharedBits(iter->blit.DPSM, BITBLTBUF.SPSM) && ee_write_end > BITBLTBUF.SBP && iter->draw == s_n) + { + DevCon.Warning("Download from same draw as write address %x, skipping invalidation", BITBLTBUF.SBP); + return; + } + } + m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); } diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index a57e5e0c27..e80b1b9dbc 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1494,18 +1494,31 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r // (Busen0: Wizardry and Chaos Legion). // Also in a few games the below code ran the Grandia3 case when it shouldn't :p auto& rts = m_dst[RenderTarget]; - for (auto it = rts.rbegin(); it != rts.rend(); ++it) // Iterate targets from LRU to MRU. + for (auto it = rts.rbegin(); it != rts.rend(); it++) // Iterate targets from LRU to MRU. { Target* t = *it; if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S) { - if (!t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30) + const u32 read_start = GSLocalMemory::m_psm[psm].info.bn(r.x, r.y, bp, bw); + // Check the offset of the read, if they're not pointing at or inside this texture, it's probably not what we want. + const bool expecting_this_tex = (bp < t->m_TEX0.TBP0 && read_start >= t->m_TEX0.TBP0) || bp >= t->m_TEX0.TBP0; + + if (!expecting_this_tex || !t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30) continue; const bool bpp_match = GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[psm].bpp; const bool format_match = (bp == t->m_TEX0.TBP0 && bw == t->m_TEX0.TBW && bpp_match); + SurfaceOffsetKey sok; + sok.elems[0].bp = bp; + sok.elems[0].bw = bw; + sok.elems[0].psm = psm; + sok.elems[0].rect = r; + sok.elems[1].bp = t->m_TEX0.TBP0; + sok.elems[1].bw = t->m_TEX0.TBW; + sok.elems[1].psm = t->m_TEX0.PSM; + sok.elems[1].rect = t->m_valid; // Calculate the rect offset if the BP doesn't match. - const GSVector4i targetr = (format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(bp, bw, psm, r, t).b2a_offset; + const GSVector4i targetr = (format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(sok).b2a_offset; // Some games like to offset their GS download memory addresses by // using overly big source Y position values.