mirror of https://github.com/PCSX2/pcsx2.git
GS/HW: Further improve no_rt heuristics
Reduces copies by almost 500 in Crash and Burn, few hundred drawcall reductions in other games.
This commit is contained in:
parent
23b72d08d2
commit
8818cd0285
|
@ -1210,7 +1210,38 @@ void GSRendererHW::FinishSplitClear()
|
|||
m_split_clear_color = 0;
|
||||
}
|
||||
|
||||
bool GSRendererHW::IsTBPFrameOrZ(u32 tbp) const
|
||||
bool GSRendererHW::IsRTWritten()
|
||||
{
|
||||
const u32 written_bits = (~m_cached_ctx.FRAME.FBMSK & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk);
|
||||
const GIFRegALPHA ALPHA = m_context->ALPHA;
|
||||
return (
|
||||
// A not masked
|
||||
(written_bits & 0xFF000000u) != 0) ||
|
||||
(
|
||||
// RGB not entirely masked
|
||||
((written_bits & 0x00FFFFFFu) != 0) &&
|
||||
// RGB written through no-blending, or blend result being non-zero
|
||||
(!PRIM->ABE || // not blending
|
||||
ALPHA.D != 1 || // additive to Cs
|
||||
(ALPHA.A != ALPHA.B && // left side is not zero
|
||||
(ALPHA.C == 1 || // multiply by Ad
|
||||
(ALPHA.C == 2 && ALPHA.FIX != 0) || // multiply by 0
|
||||
(ALPHA.C == 0 && GetAlphaMinMax().max != 0)))));
|
||||
}
|
||||
|
||||
bool GSRendererHW::IsUsingCsInBlend()
|
||||
{
|
||||
const GIFRegALPHA ALPHA = m_context->ALPHA;
|
||||
const bool blend_zero = (ALPHA.A == ALPHA.B || (ALPHA.C == 2 && ALPHA.FIX == 0) || (ALPHA.C == 0 && GetAlphaMinMax().max == 0));
|
||||
return (PRIM->ABE && ((ALPHA.IsUsingCs() && !blend_zero) || m_context->ALPHA.D == 0));
|
||||
}
|
||||
|
||||
bool GSRendererHW::IsUsingAsInBlend()
|
||||
{
|
||||
return (PRIM->ABE && m_context->ALPHA.IsUsingAs() && GetAlphaMinMax().max != 0);
|
||||
}
|
||||
|
||||
bool GSRendererHW::IsTBPFrameOrZ(u32 tbp)
|
||||
{
|
||||
const bool is_frame = (m_cached_ctx.FRAME.Block() == tbp);
|
||||
const bool is_z = (m_cached_ctx.ZBUF.Block() == tbp);
|
||||
|
@ -1222,15 +1253,16 @@ bool GSRendererHW::IsTBPFrameOrZ(u32 tbp) const
|
|||
const u32 fm_mask = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk;
|
||||
|
||||
const u32 max_z = (0xFFFFFFFF >> (GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].fmt * 8));
|
||||
const bool no_rt = (m_context->ALPHA.IsCd() && PRIM->ABE && (m_cached_ctx.FRAME.PSM == 1))
|
||||
|| (!m_cached_ctx.TEST.DATE && (fm & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk) == GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk);
|
||||
const bool no_rt = (!IsRTWritten() && !m_cached_ctx.TEST.DATE);
|
||||
const bool no_ds = (
|
||||
// Depth is always pass/fail (no read) and write are discarded.
|
||||
(zm != 0 && m_cached_ctx.TEST.ZTST <= ZTST_ALWAYS) ||
|
||||
// Depth test will always pass
|
||||
(zm != 0 && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL && m_vt.m_eq.z && std::min(m_vertex.buff[0].XYZ.Z, max_z) == max_z) ||
|
||||
// Depth will be written through the RT
|
||||
(!no_rt && m_cached_ctx.FRAME.FBP == m_cached_ctx.ZBUF.ZBP && !PRIM->TME && zm == 0 && (fm & fm_mask) == 0 && m_cached_ctx.TEST.ZTE));
|
||||
// Depth is always pass/fail (no read) and write are discarded.
|
||||
(zm != 0 && m_cached_ctx.TEST.ZTST <= ZTST_ALWAYS) ||
|
||||
// Depth test will always pass
|
||||
(zm != 0 && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL && m_vt.m_eq.z && std::min(m_vertex.buff[0].XYZ.Z, max_z) == max_z) ||
|
||||
// Depth will be written through the RT
|
||||
(!no_rt && m_cached_ctx.FRAME.FBP == m_cached_ctx.ZBUF.ZBP && !PRIM->TME && zm == 0 && (fm & fm_mask) == 0 && m_cached_ctx.TEST.ZTE)) ||
|
||||
// No color or Z being written.
|
||||
(no_rt && zm != 0);
|
||||
|
||||
// Relying a lot on the optimizer here... I don't like it.
|
||||
return (is_frame && !no_rt) || (is_z && !no_ds);
|
||||
|
@ -1857,16 +1889,17 @@ void GSRendererHW::Draw()
|
|||
// 3/ 50cents really draws (0,0,0,128) color and a (0) 24 bits depth
|
||||
// Note: FF DoC has both buffer at same location but disable the depth test (write?) with ZTE = 0
|
||||
const u32 max_z = (0xFFFFFFFF >> (GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].fmt * 8));
|
||||
bool no_rt = (context->ALPHA.IsCd() && PRIM->ABE && (m_cached_ctx.FRAME.PSM == 1))
|
||||
|| (!m_cached_ctx.TEST.DATE && (fm & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk) == GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk);
|
||||
bool no_rt = (!IsRTWritten() && !m_cached_ctx.TEST.DATE);
|
||||
const bool all_depth_tests_pass =
|
||||
// Depth is always pass/fail (no read) and write are discarded.
|
||||
(!m_cached_ctx.TEST.ZTE || m_cached_ctx.TEST.ZTST <= ZTST_ALWAYS) ||
|
||||
// Depth test will always pass
|
||||
(m_cached_ctx.TEST.ZTST == ZTST_GEQUAL && m_vt.m_eq.z && std::min(m_vertex.buff[0].XYZ.Z, max_z) == max_z);
|
||||
bool no_ds = (zm != 0 && all_depth_tests_pass) ||
|
||||
// Depth will be written through the RT
|
||||
(!no_rt && m_cached_ctx.FRAME.FBP == m_cached_ctx.ZBUF.ZBP && !PRIM->TME && zm == 0 && (fm & fm_mask) == 0 && m_cached_ctx.TEST.ZTE);
|
||||
// Depth will be written through the RT
|
||||
(!no_rt && m_cached_ctx.FRAME.FBP == m_cached_ctx.ZBUF.ZBP && !PRIM->TME && zm == 0 && (fm & fm_mask) == 0 && m_cached_ctx.TEST.ZTE) ||
|
||||
// No color or Z being written.
|
||||
(no_rt && zm != 0);
|
||||
|
||||
// No Z test if no z buffer.
|
||||
if (no_ds || all_depth_tests_pass)
|
||||
|
@ -2259,10 +2292,10 @@ void GSRendererHW::Draw()
|
|||
|
||||
tgt = nullptr;
|
||||
}
|
||||
const bool possible_shuffle = ((shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))) || IsPossibleChannelShuffle();
|
||||
const bool possible_shuffle = !no_rt && (((shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))) || IsPossibleChannelShuffle());
|
||||
const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && m_context->ALPHA.C == 0 && m_env.TEXA.AEM;
|
||||
const bool req_color = (!PRIM->ABE || (PRIM->ABE && (m_context->ALPHA.IsUsingCs() || need_aem_color))) && (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0x00FFFFFF)) != (fm_mask & 0x00FFFFFF));
|
||||
const bool alpha_used = m_context->TEX0.TCC && ((PRIM->ABE && m_context->ALPHA.IsUsingAs()) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST > ATST_ALWAYS) || (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0xFF000000)) != (fm_mask & 0xFF000000)));
|
||||
const bool req_color = (!PRIM->ABE || (PRIM->ABE && (IsUsingCsInBlend() || need_aem_color))) && (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0x00FFFFFF)) != (fm_mask & 0x00FFFFFF));
|
||||
const bool alpha_used = m_context->TEX0.TCC && ((PRIM->ABE && IsUsingAsInBlend()) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST > ATST_ALWAYS) || (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0xFF000000)) != (fm_mask & 0xFF000000)));
|
||||
const bool req_alpha = (GSUtil::GetChannelMask(m_context->TEX0.PSM) & 0x8) && alpha_used;
|
||||
|
||||
// TODO: Be able to send an alpha of 1.0 (blended with vertex alpha maybe?) so we can avoid sending the texture, since we don't always need it.
|
||||
|
|
|
@ -111,6 +111,10 @@ private:
|
|||
bool ContinueSplitClear();
|
||||
void FinishSplitClear();
|
||||
|
||||
bool IsRTWritten();
|
||||
bool IsUsingCsInBlend();
|
||||
bool IsUsingAsInBlend();
|
||||
|
||||
GSVector4i m_r = {};
|
||||
|
||||
// We modify some of the context registers to optimize away unnecessary operations.
|
||||
|
@ -215,7 +219,7 @@ public:
|
|||
bool TestChannelShuffle(GSTextureCache::Target* src);
|
||||
|
||||
/// Returns true if the specified texture address matches the frame or Z buffer.
|
||||
bool IsTBPFrameOrZ(u32 tbp) const;
|
||||
bool IsTBPFrameOrZ(u32 tbp);
|
||||
|
||||
/// Offsets the current draw, used for RT-in-RT. Offsets are relative to the *current* FBP, not the new FBP.
|
||||
void OffsetDraw(s32 fbp_offset, s32 zbp_offset, s32 xoffset, s32 yoffset);
|
||||
|
|
Loading…
Reference in New Issue