GS/HW: Disable texture when not required

This commit is contained in:
refractionpcsx2 2023-10-25 20:34:40 +01:00
parent b53ffb65af
commit 27d71f530a
3 changed files with 73 additions and 48 deletions

View File

@ -545,6 +545,7 @@ REG_END2
// output will be Cd, Cs is discarded
__forceinline bool IsCdOutput() const { return (C == 2 && D != 1 && FIX == 0x00); }
__forceinline bool IsUsingCs() const { return (A == 0 || B == 0 || D == 0); }
__forceinline bool IsUsingAs() const { return (A != B && C == 0); }
__forceinline bool IsBlack() const { return ((C == 2 && FIX == 0) || (A == 2 && A == B)) && D == 2; }
REG_END2

View File

@ -39,6 +39,7 @@ private:
protected:
GSVector2i m_real_size{0, 0};
bool m_texture_shuffle = false;
bool m_process_texture = false;
bool m_copy_16bit_to_target_shuffle = false;
bool m_same_group_texture_shuffle = false;

View File

@ -1899,18 +1899,22 @@ void GSRendererHW::Draw()
DetectDoubleHalfClear(no_rt, no_ds);
}
const bool process_texture = PRIM->TME && !(PRIM->ABE && m_context->ALPHA.IsBlack() && !m_cached_ctx.TEX0.TCC);
const u32 frame_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r);
const bool tex_is_rt = (process_texture && m_cached_ctx.TEX0.TBP0 >= m_cached_ctx.FRAME.Block() &&
m_cached_ctx.TEX0.TBP0 < frame_end_bp);
m_process_texture = PRIM->TME && !(PRIM->ABE && m_context->ALPHA.IsBlack() && !m_cached_ctx.TEX0.TCC);
const bool not_writing_to_all = (!PrimitiveCoversWithoutGaps() || AreAnyPixelsDiscarded() || !all_depth_tests_pass);
const bool preserve_rt_rgb = (!no_rt && (!IsDiscardingDstRGB() || not_writing_to_all || tex_is_rt));
const bool preserve_rt_alpha =
bool preserve_depth =
not_writing_to_all || (!no_ds && (!all_depth_tests_pass || !m_cached_ctx.DepthWrite() || m_cached_ctx.TEST.ATE));
const u32 frame_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r);
// This is a first pass, but it could be disabled further down.
bool tex_is_rt = (m_process_texture && m_cached_ctx.TEX0.TBP0 >= m_cached_ctx.FRAME.Block() &&
m_cached_ctx.TEX0.TBP0 < frame_end_bp);
bool preserve_rt_rgb = (!no_rt && (!IsDiscardingDstRGB() || not_writing_to_all || tex_is_rt));
bool preserve_rt_alpha =
(!no_rt && (!IsDiscardingDstAlpha() || not_writing_to_all ||
(tex_is_rt && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp != 24)));
bool preserve_rt_color = preserve_rt_rgb || preserve_rt_alpha;
bool preserve_depth =
not_writing_to_all || (!no_ds && (!all_depth_tests_pass || !m_cached_ctx.DepthWrite() || m_cached_ctx.TEST.ATE));
// SW CLUT Render enable.
bool force_preload = GSConfig.PreloadFrameWithGSData;
@ -2082,7 +2086,7 @@ void GSRendererHW::Draw()
TextureMinMaxResult tmm;
// Disable texture mapping if the blend is black and using alpha from vertex.
if (process_texture)
if (m_process_texture)
{
GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP;
GSVector2i hash_lod_range(0, 0);
@ -2216,42 +2220,50 @@ void GSRendererHW::Draw()
const bool possible_shuffle = ((rt_32bit && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) || IsPossibleChannelShuffle();
const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && m_context->ALPHA.C == 0 && m_env.TEXA.AEM;
const bool req_color = (!PRIM->ABE || (PRIM->ABE && (m_context->ALPHA.IsUsingCs() || need_aem_color))) && (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0x00FFFFFF)) != (fm_mask & 0x00FFFFFF));
const bool req_alpha = (GSUtil::GetChannelMask(m_context->TEX0.PSM) & 0x8) && m_context->TEX0.TCC && ((m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST > ATST_ALWAYS) || (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0xFF000000)) != (fm_mask & 0xFF000000)));
const bool alpha_used = m_context->TEX0.TCC && ((PRIM->ABE && m_context->ALPHA.IsUsingAs()) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST > ATST_ALWAYS) || (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0xFF000000)) != (fm_mask & 0xFF000000)));
const bool req_alpha = (GSUtil::GetChannelMask(m_context->TEX0.PSM) & 0x8) && alpha_used;
src = tex_psm.depth ? g_texture_cache->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha) :
g_texture_cache->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr,
possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha);
if (unlikely(!src))
// TODO: Be able to send an alpha of 1.0 (blended with vertex alpha maybe?) so we can avoid sending the texture, since we don't always need it.
// Example games: Evolution Snowboarding, Final Fantasy Dirge of Cerberus, Red Dead Revolver, Stuntman, Tony Hawk's Underground 2, Ultimate Spider-Man.
if (!req_color && !alpha_used)
m_process_texture = false;
else
{
GL_INS("ERROR: Source lookup failed, skipping.");
CleanupDraw(true);
return;
}
src = tex_psm.depth ? g_texture_cache->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha) :
g_texture_cache->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr,
possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha);
// We don't know the alpha range of direct sources when we first tried to optimize the alpha test.
// Moving the texture lookup before the ATST optimization complicates things a lot, so instead,
// recompute it, and everything derived from it again if it changes.
if (GSLocalMemory::m_psm[src->m_TEX0.PSM].pal == 0)
{
CalcAlphaMinMax(src->m_alpha_minmax.first, src->m_alpha_minmax.second);
u32 new_fm = m_context->FRAME.FBMSK;
u32 new_zm = m_context->ZBUF.ZMSK || m_context->TEST.ZTE == 0 ? 0xffffffff : 0;
if (m_cached_ctx.TEST.ATE && GSRenderer::TryAlphaTest(new_fm, new_zm))
if (unlikely(!src))
{
m_cached_ctx.TEST.ATE = false;
m_cached_ctx.FRAME.FBMSK = new_fm;
m_cached_ctx.ZBUF.ZMSK = (new_zm != 0);
fm = new_fm;
zm = new_zm;
no_rt = no_rt || (!m_cached_ctx.TEST.DATE && (fm & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk) == GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk);
no_ds = no_ds || (zm != 0 && all_depth_tests_pass);
if (no_rt && no_ds)
GL_INS("ERROR: Source lookup failed, skipping.");
CleanupDraw(true);
return;
}
// We don't know the alpha range of direct sources when we first tried to optimize the alpha test.
// Moving the texture lookup before the ATST optimization complicates things a lot, so instead,
// recompute it, and everything derived from it again if it changes.
if (GSLocalMemory::m_psm[src->m_TEX0.PSM].pal == 0)
{
CalcAlphaMinMax(src->m_alpha_minmax.first, src->m_alpha_minmax.second);
u32 new_fm = m_context->FRAME.FBMSK;
u32 new_zm = m_context->ZBUF.ZMSK || m_context->TEST.ZTE == 0 ? 0xffffffff : 0;
if (m_cached_ctx.TEST.ATE && GSRenderer::TryAlphaTest(new_fm, new_zm))
{
GL_INS("Late draw cancel because no pixels pass alpha test.");
CleanupDraw(true);
return;
m_cached_ctx.TEST.ATE = false;
m_cached_ctx.FRAME.FBMSK = new_fm;
m_cached_ctx.ZBUF.ZMSK = (new_zm != 0);
fm = new_fm;
zm = new_zm;
no_rt = no_rt || (!m_cached_ctx.TEST.DATE && (fm & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk) == GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk);
no_ds = no_ds || (zm != 0 && all_depth_tests_pass);
if (no_rt && no_ds)
{
GL_INS("Late draw cancel because no pixels pass alpha test.");
CleanupDraw(true);
return;
}
}
}
}
@ -2279,6 +2291,17 @@ void GSRendererHW::Draw()
target_scale = 1.0f;
}
if (!m_process_texture && tex_is_rt)
{
tex_is_rt = (m_process_texture && m_cached_ctx.TEX0.TBP0 >= m_cached_ctx.FRAME.Block() &&
m_cached_ctx.TEX0.TBP0 < frame_end_bp);
preserve_rt_rgb = (!no_rt && (!IsDiscardingDstRGB() || not_writing_to_all || tex_is_rt));
preserve_rt_alpha =
(!no_rt && (!IsDiscardingDstAlpha() || not_writing_to_all ||
(tex_is_rt && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp != 24)));
preserve_rt_color = preserve_rt_rgb || preserve_rt_alpha;
}
GSTextureCache::Target* rt = nullptr;
GIFRegTEX0 FRAME_TEX0;
if (!no_rt)
@ -2354,7 +2377,7 @@ void GSRendererHW::Draw()
}
}
if (process_texture)
if (m_process_texture)
{
GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP;
const GSVertex* v = &m_vertex.buff[0];
@ -2939,7 +2962,7 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy)
{
GL_PUSH("IA");
if (GSConfig.UserHacks_WildHack && !m_isPackedUV_HackFlag && PRIM->TME && PRIM->FST)
if (GSConfig.UserHacks_WildHack && !m_isPackedUV_HackFlag && m_process_texture && PRIM->FST)
{
for (u32 i = 0; i < m_vertex.next; i++)
m_vertex.buff[i].UV &= 0x3FEF3FEF;
@ -5089,7 +5112,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
// vs
m_conf.vs.tme = PRIM->TME;
m_conf.vs.tme = m_process_texture;
m_conf.vs.fst = PRIM->FST;
// FIXME D3D11 and GL support half pixel center. Code could be easier!!!
@ -5431,15 +5454,15 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
// Writing to the framebuffer for output. We're not interested. - Note: This stops NFS HP2 Busted screens working, but they're glitchy anyway
// what NFS HP2 really needs is a kind of shuffle with mask, 32bit target is interpreted as 16bit and masked.
if ((m_regs->DISP[0].DISPFB.Block() == m_cached_ctx.FRAME.Block()) || (m_regs->DISP[1].DISPFB.Block() == m_cached_ctx.FRAME.Block()) ||
(PRIM->TME && ((m_regs->DISP[0].DISPFB.Block() == m_cached_ctx.TEX0.TBP0) || (m_regs->DISP[1].DISPFB.Block() == m_cached_ctx.TEX0.TBP0)) && !(m_mem.m_clut.IsInvalid() & 2)))
(m_process_texture && ((m_regs->DISP[0].DISPFB.Block() == m_cached_ctx.TEX0.TBP0) || (m_regs->DISP[1].DISPFB.Block() == m_cached_ctx.TEX0.TBP0)) && !(m_mem.m_clut.IsInvalid() & 2)))
return CLUTDrawTestResult::NotCLUTDraw;
// Ignore large render targets, make sure it's staying in page width.
if (PRIM->TME && (m_cached_ctx.FRAME.FBW != 1 && m_cached_ctx.TEX0.TBW == m_cached_ctx.FRAME.FBW))
if (m_process_texture && (m_cached_ctx.FRAME.FBW != 1 && m_cached_ctx.TEX0.TBW == m_cached_ctx.FRAME.FBW))
return CLUTDrawTestResult::NotCLUTDraw;
// Hopefully no games draw a CLUT with a CLUT, that would be evil, most likely a channel shuffle.
if (PRIM->TME && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal > 0)
if (m_process_texture && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal > 0)
return CLUTDrawTestResult::NotCLUTDraw;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM];
@ -5481,7 +5504,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
if (!valid_size)
return CLUTDrawTestResult::NotCLUTDraw;
if (PRIM->TME)
if (m_process_texture)
{
// If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need.
const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage;
@ -6081,7 +6104,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw)
{
if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && PRIM->TME && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0)
if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0)
{
GL_PUSH("OI_BlitFMV");