GS/HW: Be more strict with double half clear detection

And less strict with letting depth mem clears go through.
This commit is contained in:
Stenzek 2023-07-05 22:49:33 +10:00 committed by Connor McLaughlin
parent 1fa3111e67
commit a62737b244
2 changed files with 29 additions and 34 deletions

View File

@ -1934,24 +1934,10 @@ void GSRendererHW::Draw()
} }
const bool is_zero_clear = (GetConstantDirectWriteMemClearColor() == 0 && !preserve_rt_color); const bool is_zero_clear = (GetConstantDirectWriteMemClearColor() == 0 && !preserve_rt_color);
const bool req_z = m_cached_ctx.FRAME.FBP != m_cached_ctx.ZBUF.ZBP && !m_cached_ctx.ZBUF.ZMSK;
bool no_target_found = false;
// This is behind the if just to reduce lookups.
if (is_zero_clear && !clear_height_valid)
{
const u32 fbw = m_cached_ctx.FRAME.FBW;
const u32 frame_start = m_cached_ctx.FRAME.Block();
const u32 frame_end = GSLocalMemory::GetEndBlockAddress(frame_start, fbw, m_cached_ctx.FRAME.PSM, m_r);
no_target_found =
!g_texture_cache->GetExactTarget(frame_start, fbw, GSTextureCache::RenderTarget, frame_end) &&
!g_texture_cache->GetExactTarget(frame_start, fbw, GSTextureCache::DepthStencil, frame_end);
}
// If it's an invalid-sized draw, do the mem clear on the CPU, we don't want to create huge targets. // If it's an invalid-sized draw, do the mem clear on the CPU, we don't want to create huge targets.
// If clearing to zero, don't bother creating the target. Games tend to clear more than they use, wasting VRAM/bandwidth. // If clearing to zero, don't bother creating the target. Games tend to clear more than they use, wasting VRAM/bandwidth.
if ((is_zero_clear || clear_height_valid) && TryGSMemClear(no_rt, no_ds) && if ((is_zero_clear || clear_height_valid) && TryGSMemClear())
(clear_height_valid || (!req_z && no_target_found)))
{ {
GL_INS("Skipping (%d,%d=>%d,%d) draw at FBP %x/ZBP %x due to invalid height or zero clear.", m_r.x, m_r.y, GL_INS("Skipping (%d,%d=>%d,%d) draw at FBP %x/ZBP %x due to invalid height or zero clear.", m_r.x, m_r.y,
m_r.z, m_r.w, m_cached_ctx.FRAME.Block(), m_cached_ctx.ZBUF.Block()); m_r.z, m_r.w, m_cached_ctx.FRAME.Block(), m_cached_ctx.ZBUF.Block());
@ -2147,7 +2133,7 @@ void GSRendererHW::Draw()
{ {
GL_INS("Clear draw with no target, skipping."); GL_INS("Clear draw with no target, skipping.");
cleanup_cancelled_draw(); cleanup_cancelled_draw();
TryGSMemClear(no_rt, no_ds); TryGSMemClear();
return; return;
} }
@ -5337,14 +5323,25 @@ bool GSRendererHW::DetectDoubleHalfClear(bool& no_rt, bool& no_ds)
const u32 half = clear_depth ? m_cached_ctx.FRAME.FBP : m_cached_ctx.ZBUF.ZBP; const u32 half = clear_depth ? m_cached_ctx.FRAME.FBP : m_cached_ctx.ZBUF.ZBP;
// Size of the current draw // Size of the current draw
const u32 w_pages = static_cast<u32>(roundf(m_vt.m_max.p.x / frame_psm.pgs.x)); const u32 w_pages = (m_r.z + (frame_psm.pgs.x - 1)) / frame_psm.pgs.x;
const u32 h_pages = static_cast<u32>(roundf(m_vt.m_max.p.y / frame_psm.pgs.y)); const u32 h_pages = (m_r.w + (frame_psm.pgs.y - 1)) / frame_psm.pgs.y;
const u32 written_pages = w_pages * h_pages; const u32 written_pages = w_pages * h_pages;
// If both buffers are side by side we can expect a fast clear in on-going // If both buffers are side by side we can expect a fast clear in on-going
if (half != (base + written_pages)) if (half != (base + written_pages))
return false; return false;
// Don't allow double half clear to go through when the number of bits written through FRAME and Z are different.
// GTA: LCS does this setup, along with a few other games. Thankfully if it's a zero clear, we'll clear both
// separately, and the end result is the same because it gets invalidated. That's better than falsely detecting
// double half clears, and ending up with 1024 high render targets which really shouldn't be.
if (frame_psm.fmt != zbuf_psm.fmt && m_cached_ctx.FRAME.FBMSK != ((zbuf_psm.fmt == 1) ? 0xFF000000u : 0))
{
GL_INS("Inconsistent FRAME [%s, %08x] and ZBUF [%s] formats, not using double-half clear.",
psm_str(m_cached_ctx.FRAME.PSM), m_cached_ctx.FRAME.FBMSK, psm_str(m_cached_ctx.ZBUF.PSM));
return false;
}
// Try peeking ahead to confirm whether this is a "normal" clear, where the two buffers just happen to be // Try peeking ahead to confirm whether this is a "normal" clear, where the two buffers just happen to be
// bang up next to each other, or a double half clear. The two are really difficult to differentiate. // bang up next to each other, or a double half clear. The two are really difficult to differentiate.
// Have to check both contexts, because God of War 2 likes to do this in-between setting TRXDIR, which // Have to check both contexts, because God of War 2 likes to do this in-between setting TRXDIR, which
@ -5389,13 +5386,6 @@ bool GSRendererHW::DetectDoubleHalfClear(bool& no_rt, bool& no_ds)
clear_depth ? "depth" : "color", m_cached_ctx.FRAME.Block(), m_cached_ctx.ZBUF.Block(), written_pages, clear_depth ? "depth" : "color", m_cached_ctx.FRAME.Block(), m_cached_ctx.ZBUF.Block(), written_pages,
base * BLOCKS_PER_PAGE, half * BLOCKS_PER_PAGE, m_r.x, m_r.y, m_r.z, m_r.w); base * BLOCKS_PER_PAGE, half * BLOCKS_PER_PAGE, m_r.x, m_r.y, m_r.z, m_r.w);
// Warn, but not fatal if the clear is inconsistent across FRAME and Z pages.
if (frame_psm.fmt != zbuf_psm.fmt && m_cached_ctx.FRAME.FBMSK != ((zbuf_psm.fmt == 1) ? 0xFF000000u : 0))
{
GL_INS("Inconsistent FRAME [%s, %08x] and ZBUF [%s] formats in double-half clear.",
psm_str(m_cached_ctx.FRAME.PSM), m_cached_ctx.FRAME.FBMSK, psm_str(m_cached_ctx.ZBUF.PSM));
}
// Double the clear rect. // Double the clear rect.
if (horizontal) if (horizontal)
m_r.z += m_r.x + m_r.width(); m_r.z += m_r.x + m_r.width();
@ -5464,7 +5454,7 @@ bool GSRendererHW::TryTargetClear(GSTextureCache::Target* rt, GSTextureCache::Ta
return skip; return skip;
} }
bool GSRendererHW::TryGSMemClear(bool no_rt, bool no_ds) bool GSRendererHW::TryGSMemClear()
{ {
if (!PrimitiveCoversWithoutGaps()) if (!PrimitiveCoversWithoutGaps())
return false; return false;
@ -5475,17 +5465,21 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool no_ds)
return false; return false;
// Don't mem clear one of frame or z, only do both. // Don't mem clear one of frame or z, only do both.
const u32 fbmsk = (m_cached_ctx.FRAME.FBMSK & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk); const u32 fmsk = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk;
if ((!no_rt && (fbmsk != 0 || m_vt.m_eq.rgba != 0xFFFF)) || const u32 fbmsk = (m_cached_ctx.FRAME.FBMSK & fmsk);
(!no_ds && (m_cached_ctx.ZBUF.ZMSK != 0 || !m_vt.m_eq.z))) const bool clear_rt = (fbmsk & fmsk) != fmsk;
const bool clear_z = (m_cached_ctx.ZBUF.ZMSK == 0);
if ((clear_rt && ((fbmsk != 0 && (m_cached_ctx.FRAME.PSM != PSMCT32 || fbmsk != 0xFF000000u)) ||
m_vt.m_eq.rgba != 0xFFFF)) ||
(clear_z && (m_cached_ctx.ZBUF.ZMSK != 0 && !m_vt.m_eq.z)))
{ {
return false; return false;
} }
if (!no_rt) if (clear_rt)
ClearGSLocalMemory(m_context->offset.fb, m_r, GetConstantDirectWriteMemClearColor()); ClearGSLocalMemory(m_context->offset.fb, m_r, GetConstantDirectWriteMemClearColor());
if (!no_ds) if (clear_z)
ClearGSLocalMemory(m_context->offset.zb, m_r, m_vertex.buff[1].XYZ.Z); ClearGSLocalMemory(m_context->offset.zb, m_r, m_vertex.buff[1].XYZ.Z);
return true; return true;
@ -5496,7 +5490,8 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
GL_INS( GL_INS(
"ClearGSLocalMemory(): %08X %d,%d => %d,%d @ BP %x BW %u", vert_color, r.x, r.y, r.z, r.w, off.bp(), off.bw()); "ClearGSLocalMemory(): %08X %d,%d => %d,%d @ BP %x BW %u", vert_color, r.x, r.y, r.z, r.w, off.bp(), off.bw());
const int format = GSLocalMemory::m_psm[off.psm()].fmt; const u32 psm = (off.psm() == PSMCT32 && m_cached_ctx.FRAME.FBMSK == 0xFF000000u) ? PSMCT24 : off.psm();
const int format = GSLocalMemory::m_psm[psm].fmt;
const int left = r.left; const int left = r.left;
const int right = r.right; const int right = r.right;
@ -5509,7 +5504,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
const u32 fbw = m_cached_ctx.FRAME.FBW; const u32 fbw = m_cached_ctx.FRAME.FBW;
const u32 pages_wide = r.z / 64u; const u32 pages_wide = r.z / 64u;
const GSVector2i& pgs = GSLocalMemory::m_psm[off.psm()].pgs; const GSVector2i& pgs = GSLocalMemory::m_psm[psm].pgs;
if (left == 0 && top == 0 && (right & (pgs.x - 1)) == 0 && pages_wide <= fbw) if (left == 0 && top == 0 && (right & (pgs.x - 1)) == 0 && pages_wide <= fbw)
{ {
const u32 pixels_per_page = pgs.x * pgs.y; const u32 pixels_per_page = pgs.x * pgs.y;

View File

@ -44,7 +44,7 @@ private:
// Require special argument // Require special argument
bool OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* t, const GSVector4i& r_draw); bool OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* t, const GSVector4i& r_draw);
bool TryGSMemClear(bool no_rt, bool no_ds); bool TryGSMemClear();
void ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, u32 vert_color); void ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, u32 vert_color);
bool DetectDoubleHalfClear(bool& no_rt, bool& no_ds); bool DetectDoubleHalfClear(bool& no_rt, bool& no_ds);
bool DetectStripedDoubleClear(bool& no_rt, bool& no_ds); bool DetectStripedDoubleClear(bool& no_rt, bool& no_ds);