From a62737b2441fc575a2f671acace6a4b06e48eefa Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 5 Jul 2023 22:49:33 +1000 Subject: [PATCH] GS/HW: Be more strict with double half clear detection And less strict with letting depth mem clears go through. --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 61 ++++++++++++-------------- pcsx2/GS/Renderers/HW/GSRendererHW.h | 2 +- 2 files changed, 29 insertions(+), 34 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index f8dcf3f754..4ed53156dd 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -1934,24 +1934,10 @@ void GSRendererHW::Draw() } const bool is_zero_clear = (GetConstantDirectWriteMemClearColor() == 0 && !preserve_rt_color); - const bool req_z = m_cached_ctx.FRAME.FBP != m_cached_ctx.ZBUF.ZBP && !m_cached_ctx.ZBUF.ZMSK; - bool no_target_found = false; - - // This is behind the if just to reduce lookups. - if (is_zero_clear && !clear_height_valid) - { - const u32 fbw = m_cached_ctx.FRAME.FBW; - const u32 frame_start = m_cached_ctx.FRAME.Block(); - const u32 frame_end = GSLocalMemory::GetEndBlockAddress(frame_start, fbw, m_cached_ctx.FRAME.PSM, m_r); - no_target_found = - !g_texture_cache->GetExactTarget(frame_start, fbw, GSTextureCache::RenderTarget, frame_end) && - !g_texture_cache->GetExactTarget(frame_start, fbw, GSTextureCache::DepthStencil, frame_end); - } // If it's an invalid-sized draw, do the mem clear on the CPU, we don't want to create huge targets. // If clearing to zero, don't bother creating the target. Games tend to clear more than they use, wasting VRAM/bandwidth. - if ((is_zero_clear || clear_height_valid) && TryGSMemClear(no_rt, no_ds) && - (clear_height_valid || (!req_z && no_target_found))) + if ((is_zero_clear || clear_height_valid) && TryGSMemClear()) { GL_INS("Skipping (%d,%d=>%d,%d) draw at FBP %x/ZBP %x due to invalid height or zero clear.", m_r.x, m_r.y, m_r.z, m_r.w, m_cached_ctx.FRAME.Block(), m_cached_ctx.ZBUF.Block()); @@ -2147,7 +2133,7 @@ void GSRendererHW::Draw() { GL_INS("Clear draw with no target, skipping."); cleanup_cancelled_draw(); - TryGSMemClear(no_rt, no_ds); + TryGSMemClear(); return; } @@ -5337,14 +5323,25 @@ bool GSRendererHW::DetectDoubleHalfClear(bool& no_rt, bool& no_ds) const u32 half = clear_depth ? m_cached_ctx.FRAME.FBP : m_cached_ctx.ZBUF.ZBP; // Size of the current draw - const u32 w_pages = static_cast(roundf(m_vt.m_max.p.x / frame_psm.pgs.x)); - const u32 h_pages = static_cast(roundf(m_vt.m_max.p.y / frame_psm.pgs.y)); + const u32 w_pages = (m_r.z + (frame_psm.pgs.x - 1)) / frame_psm.pgs.x; + const u32 h_pages = (m_r.w + (frame_psm.pgs.y - 1)) / frame_psm.pgs.y; const u32 written_pages = w_pages * h_pages; // If both buffers are side by side we can expect a fast clear in on-going if (half != (base + written_pages)) return false; + // Don't allow double half clear to go through when the number of bits written through FRAME and Z are different. + // GTA: LCS does this setup, along with a few other games. Thankfully if it's a zero clear, we'll clear both + // separately, and the end result is the same because it gets invalidated. That's better than falsely detecting + // double half clears, and ending up with 1024 high render targets which really shouldn't be. + if (frame_psm.fmt != zbuf_psm.fmt && m_cached_ctx.FRAME.FBMSK != ((zbuf_psm.fmt == 1) ? 0xFF000000u : 0)) + { + GL_INS("Inconsistent FRAME [%s, %08x] and ZBUF [%s] formats, not using double-half clear.", + psm_str(m_cached_ctx.FRAME.PSM), m_cached_ctx.FRAME.FBMSK, psm_str(m_cached_ctx.ZBUF.PSM)); + return false; + } + // Try peeking ahead to confirm whether this is a "normal" clear, where the two buffers just happen to be // bang up next to each other, or a double half clear. The two are really difficult to differentiate. // Have to check both contexts, because God of War 2 likes to do this in-between setting TRXDIR, which @@ -5389,13 +5386,6 @@ bool GSRendererHW::DetectDoubleHalfClear(bool& no_rt, bool& no_ds) clear_depth ? "depth" : "color", m_cached_ctx.FRAME.Block(), m_cached_ctx.ZBUF.Block(), written_pages, base * BLOCKS_PER_PAGE, half * BLOCKS_PER_PAGE, m_r.x, m_r.y, m_r.z, m_r.w); - // Warn, but not fatal if the clear is inconsistent across FRAME and Z pages. - if (frame_psm.fmt != zbuf_psm.fmt && m_cached_ctx.FRAME.FBMSK != ((zbuf_psm.fmt == 1) ? 0xFF000000u : 0)) - { - GL_INS("Inconsistent FRAME [%s, %08x] and ZBUF [%s] formats in double-half clear.", - psm_str(m_cached_ctx.FRAME.PSM), m_cached_ctx.FRAME.FBMSK, psm_str(m_cached_ctx.ZBUF.PSM)); - } - // Double the clear rect. if (horizontal) m_r.z += m_r.x + m_r.width(); @@ -5464,7 +5454,7 @@ bool GSRendererHW::TryTargetClear(GSTextureCache::Target* rt, GSTextureCache::Ta return skip; } -bool GSRendererHW::TryGSMemClear(bool no_rt, bool no_ds) +bool GSRendererHW::TryGSMemClear() { if (!PrimitiveCoversWithoutGaps()) return false; @@ -5475,17 +5465,21 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool no_ds) return false; // Don't mem clear one of frame or z, only do both. - const u32 fbmsk = (m_cached_ctx.FRAME.FBMSK & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk); - if ((!no_rt && (fbmsk != 0 || m_vt.m_eq.rgba != 0xFFFF)) || - (!no_ds && (m_cached_ctx.ZBUF.ZMSK != 0 || !m_vt.m_eq.z))) + const u32 fmsk = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk; + const u32 fbmsk = (m_cached_ctx.FRAME.FBMSK & fmsk); + const bool clear_rt = (fbmsk & fmsk) != fmsk; + const bool clear_z = (m_cached_ctx.ZBUF.ZMSK == 0); + if ((clear_rt && ((fbmsk != 0 && (m_cached_ctx.FRAME.PSM != PSMCT32 || fbmsk != 0xFF000000u)) || + m_vt.m_eq.rgba != 0xFFFF)) || + (clear_z && (m_cached_ctx.ZBUF.ZMSK != 0 && !m_vt.m_eq.z))) { return false; } - if (!no_rt) + if (clear_rt) ClearGSLocalMemory(m_context->offset.fb, m_r, GetConstantDirectWriteMemClearColor()); - if (!no_ds) + if (clear_z) ClearGSLocalMemory(m_context->offset.zb, m_r, m_vertex.buff[1].XYZ.Z); return true; @@ -5496,7 +5490,8 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, GL_INS( "ClearGSLocalMemory(): %08X %d,%d => %d,%d @ BP %x BW %u", vert_color, r.x, r.y, r.z, r.w, off.bp(), off.bw()); - const int format = GSLocalMemory::m_psm[off.psm()].fmt; + const u32 psm = (off.psm() == PSMCT32 && m_cached_ctx.FRAME.FBMSK == 0xFF000000u) ? PSMCT24 : off.psm(); + const int format = GSLocalMemory::m_psm[psm].fmt; const int left = r.left; const int right = r.right; @@ -5509,7 +5504,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, const u32 fbw = m_cached_ctx.FRAME.FBW; const u32 pages_wide = r.z / 64u; - const GSVector2i& pgs = GSLocalMemory::m_psm[off.psm()].pgs; + const GSVector2i& pgs = GSLocalMemory::m_psm[psm].pgs; if (left == 0 && top == 0 && (right & (pgs.x - 1)) == 0 && pages_wide <= fbw) { const u32 pixels_per_page = pgs.x * pgs.y; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 98a8f772d1..18c1637bca 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -44,7 +44,7 @@ private: // Require special argument bool OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* t, const GSVector4i& r_draw); - bool TryGSMemClear(bool no_rt, bool no_ds); + bool TryGSMemClear(); void ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, u32 vert_color); bool DetectDoubleHalfClear(bool& no_rt, bool& no_ds); bool DetectStripedDoubleClear(bool& no_rt, bool& no_ds);