mirror of https://github.com/PCSX2/pcsx2.git
GS/HW: Disable texture when not required
This commit is contained in:
parent
b53ffb65af
commit
27d71f530a
|
@ -545,6 +545,7 @@ REG_END2
|
||||||
// output will be Cd, Cs is discarded
|
// output will be Cd, Cs is discarded
|
||||||
__forceinline bool IsCdOutput() const { return (C == 2 && D != 1 && FIX == 0x00); }
|
__forceinline bool IsCdOutput() const { return (C == 2 && D != 1 && FIX == 0x00); }
|
||||||
__forceinline bool IsUsingCs() const { return (A == 0 || B == 0 || D == 0); }
|
__forceinline bool IsUsingCs() const { return (A == 0 || B == 0 || D == 0); }
|
||||||
|
__forceinline bool IsUsingAs() const { return (A != B && C == 0); }
|
||||||
|
|
||||||
__forceinline bool IsBlack() const { return ((C == 2 && FIX == 0) || (A == 2 && A == B)) && D == 2; }
|
__forceinline bool IsBlack() const { return ((C == 2 && FIX == 0) || (A == 2 && A == B)) && D == 2; }
|
||||||
REG_END2
|
REG_END2
|
||||||
|
|
|
@ -39,6 +39,7 @@ private:
|
||||||
protected:
|
protected:
|
||||||
GSVector2i m_real_size{0, 0};
|
GSVector2i m_real_size{0, 0};
|
||||||
bool m_texture_shuffle = false;
|
bool m_texture_shuffle = false;
|
||||||
|
bool m_process_texture = false;
|
||||||
bool m_copy_16bit_to_target_shuffle = false;
|
bool m_copy_16bit_to_target_shuffle = false;
|
||||||
bool m_same_group_texture_shuffle = false;
|
bool m_same_group_texture_shuffle = false;
|
||||||
|
|
||||||
|
|
|
@ -1899,18 +1899,22 @@ void GSRendererHW::Draw()
|
||||||
DetectDoubleHalfClear(no_rt, no_ds);
|
DetectDoubleHalfClear(no_rt, no_ds);
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool process_texture = PRIM->TME && !(PRIM->ABE && m_context->ALPHA.IsBlack() && !m_cached_ctx.TEX0.TCC);
|
m_process_texture = PRIM->TME && !(PRIM->ABE && m_context->ALPHA.IsBlack() && !m_cached_ctx.TEX0.TCC);
|
||||||
const u32 frame_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r);
|
|
||||||
const bool tex_is_rt = (process_texture && m_cached_ctx.TEX0.TBP0 >= m_cached_ctx.FRAME.Block() &&
|
|
||||||
m_cached_ctx.TEX0.TBP0 < frame_end_bp);
|
|
||||||
const bool not_writing_to_all = (!PrimitiveCoversWithoutGaps() || AreAnyPixelsDiscarded() || !all_depth_tests_pass);
|
const bool not_writing_to_all = (!PrimitiveCoversWithoutGaps() || AreAnyPixelsDiscarded() || !all_depth_tests_pass);
|
||||||
const bool preserve_rt_rgb = (!no_rt && (!IsDiscardingDstRGB() || not_writing_to_all || tex_is_rt));
|
bool preserve_depth =
|
||||||
const bool preserve_rt_alpha =
|
not_writing_to_all || (!no_ds && (!all_depth_tests_pass || !m_cached_ctx.DepthWrite() || m_cached_ctx.TEST.ATE));
|
||||||
|
|
||||||
|
const u32 frame_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r);
|
||||||
|
|
||||||
|
// This is a first pass, but it could be disabled further down.
|
||||||
|
bool tex_is_rt = (m_process_texture && m_cached_ctx.TEX0.TBP0 >= m_cached_ctx.FRAME.Block() &&
|
||||||
|
m_cached_ctx.TEX0.TBP0 < frame_end_bp);
|
||||||
|
bool preserve_rt_rgb = (!no_rt && (!IsDiscardingDstRGB() || not_writing_to_all || tex_is_rt));
|
||||||
|
bool preserve_rt_alpha =
|
||||||
(!no_rt && (!IsDiscardingDstAlpha() || not_writing_to_all ||
|
(!no_rt && (!IsDiscardingDstAlpha() || not_writing_to_all ||
|
||||||
(tex_is_rt && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp != 24)));
|
(tex_is_rt && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp != 24)));
|
||||||
bool preserve_rt_color = preserve_rt_rgb || preserve_rt_alpha;
|
bool preserve_rt_color = preserve_rt_rgb || preserve_rt_alpha;
|
||||||
bool preserve_depth =
|
|
||||||
not_writing_to_all || (!no_ds && (!all_depth_tests_pass || !m_cached_ctx.DepthWrite() || m_cached_ctx.TEST.ATE));
|
|
||||||
|
|
||||||
// SW CLUT Render enable.
|
// SW CLUT Render enable.
|
||||||
bool force_preload = GSConfig.PreloadFrameWithGSData;
|
bool force_preload = GSConfig.PreloadFrameWithGSData;
|
||||||
|
@ -2082,7 +2086,7 @@ void GSRendererHW::Draw()
|
||||||
TextureMinMaxResult tmm;
|
TextureMinMaxResult tmm;
|
||||||
|
|
||||||
// Disable texture mapping if the blend is black and using alpha from vertex.
|
// Disable texture mapping if the blend is black and using alpha from vertex.
|
||||||
if (process_texture)
|
if (m_process_texture)
|
||||||
{
|
{
|
||||||
GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP;
|
GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP;
|
||||||
GSVector2i hash_lod_range(0, 0);
|
GSVector2i hash_lod_range(0, 0);
|
||||||
|
@ -2216,42 +2220,50 @@ void GSRendererHW::Draw()
|
||||||
const bool possible_shuffle = ((rt_32bit && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) || IsPossibleChannelShuffle();
|
const bool possible_shuffle = ((rt_32bit && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) || IsPossibleChannelShuffle();
|
||||||
const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && m_context->ALPHA.C == 0 && m_env.TEXA.AEM;
|
const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && m_context->ALPHA.C == 0 && m_env.TEXA.AEM;
|
||||||
const bool req_color = (!PRIM->ABE || (PRIM->ABE && (m_context->ALPHA.IsUsingCs() || need_aem_color))) && (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0x00FFFFFF)) != (fm_mask & 0x00FFFFFF));
|
const bool req_color = (!PRIM->ABE || (PRIM->ABE && (m_context->ALPHA.IsUsingCs() || need_aem_color))) && (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0x00FFFFFF)) != (fm_mask & 0x00FFFFFF));
|
||||||
const bool req_alpha = (GSUtil::GetChannelMask(m_context->TEX0.PSM) & 0x8) && m_context->TEX0.TCC && ((m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST > ATST_ALWAYS) || (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0xFF000000)) != (fm_mask & 0xFF000000)));
|
const bool alpha_used = m_context->TEX0.TCC && ((PRIM->ABE && m_context->ALPHA.IsUsingAs()) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST > ATST_ALWAYS) || (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0xFF000000)) != (fm_mask & 0xFF000000)));
|
||||||
|
const bool req_alpha = (GSUtil::GetChannelMask(m_context->TEX0.PSM) & 0x8) && alpha_used;
|
||||||
|
|
||||||
src = tex_psm.depth ? g_texture_cache->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha) :
|
// TODO: Be able to send an alpha of 1.0 (blended with vertex alpha maybe?) so we can avoid sending the texture, since we don't always need it.
|
||||||
g_texture_cache->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr,
|
// Example games: Evolution Snowboarding, Final Fantasy Dirge of Cerberus, Red Dead Revolver, Stuntman, Tony Hawk's Underground 2, Ultimate Spider-Man.
|
||||||
possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha);
|
if (!req_color && !alpha_used)
|
||||||
|
m_process_texture = false;
|
||||||
if (unlikely(!src))
|
else
|
||||||
{
|
{
|
||||||
GL_INS("ERROR: Source lookup failed, skipping.");
|
src = tex_psm.depth ? g_texture_cache->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha) :
|
||||||
CleanupDraw(true);
|
g_texture_cache->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr,
|
||||||
return;
|
possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha);
|
||||||
}
|
|
||||||
|
|
||||||
// We don't know the alpha range of direct sources when we first tried to optimize the alpha test.
|
if (unlikely(!src))
|
||||||
// Moving the texture lookup before the ATST optimization complicates things a lot, so instead,
|
|
||||||
// recompute it, and everything derived from it again if it changes.
|
|
||||||
if (GSLocalMemory::m_psm[src->m_TEX0.PSM].pal == 0)
|
|
||||||
{
|
|
||||||
CalcAlphaMinMax(src->m_alpha_minmax.first, src->m_alpha_minmax.second);
|
|
||||||
|
|
||||||
u32 new_fm = m_context->FRAME.FBMSK;
|
|
||||||
u32 new_zm = m_context->ZBUF.ZMSK || m_context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
|
||||||
if (m_cached_ctx.TEST.ATE && GSRenderer::TryAlphaTest(new_fm, new_zm))
|
|
||||||
{
|
{
|
||||||
m_cached_ctx.TEST.ATE = false;
|
GL_INS("ERROR: Source lookup failed, skipping.");
|
||||||
m_cached_ctx.FRAME.FBMSK = new_fm;
|
CleanupDraw(true);
|
||||||
m_cached_ctx.ZBUF.ZMSK = (new_zm != 0);
|
return;
|
||||||
fm = new_fm;
|
}
|
||||||
zm = new_zm;
|
|
||||||
no_rt = no_rt || (!m_cached_ctx.TEST.DATE && (fm & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk) == GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk);
|
// We don't know the alpha range of direct sources when we first tried to optimize the alpha test.
|
||||||
no_ds = no_ds || (zm != 0 && all_depth_tests_pass);
|
// Moving the texture lookup before the ATST optimization complicates things a lot, so instead,
|
||||||
if (no_rt && no_ds)
|
// recompute it, and everything derived from it again if it changes.
|
||||||
|
if (GSLocalMemory::m_psm[src->m_TEX0.PSM].pal == 0)
|
||||||
|
{
|
||||||
|
CalcAlphaMinMax(src->m_alpha_minmax.first, src->m_alpha_minmax.second);
|
||||||
|
|
||||||
|
u32 new_fm = m_context->FRAME.FBMSK;
|
||||||
|
u32 new_zm = m_context->ZBUF.ZMSK || m_context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||||
|
if (m_cached_ctx.TEST.ATE && GSRenderer::TryAlphaTest(new_fm, new_zm))
|
||||||
{
|
{
|
||||||
GL_INS("Late draw cancel because no pixels pass alpha test.");
|
m_cached_ctx.TEST.ATE = false;
|
||||||
CleanupDraw(true);
|
m_cached_ctx.FRAME.FBMSK = new_fm;
|
||||||
return;
|
m_cached_ctx.ZBUF.ZMSK = (new_zm != 0);
|
||||||
|
fm = new_fm;
|
||||||
|
zm = new_zm;
|
||||||
|
no_rt = no_rt || (!m_cached_ctx.TEST.DATE && (fm & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk) == GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk);
|
||||||
|
no_ds = no_ds || (zm != 0 && all_depth_tests_pass);
|
||||||
|
if (no_rt && no_ds)
|
||||||
|
{
|
||||||
|
GL_INS("Late draw cancel because no pixels pass alpha test.");
|
||||||
|
CleanupDraw(true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2279,6 +2291,17 @@ void GSRendererHW::Draw()
|
||||||
target_scale = 1.0f;
|
target_scale = 1.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!m_process_texture && tex_is_rt)
|
||||||
|
{
|
||||||
|
tex_is_rt = (m_process_texture && m_cached_ctx.TEX0.TBP0 >= m_cached_ctx.FRAME.Block() &&
|
||||||
|
m_cached_ctx.TEX0.TBP0 < frame_end_bp);
|
||||||
|
preserve_rt_rgb = (!no_rt && (!IsDiscardingDstRGB() || not_writing_to_all || tex_is_rt));
|
||||||
|
preserve_rt_alpha =
|
||||||
|
(!no_rt && (!IsDiscardingDstAlpha() || not_writing_to_all ||
|
||||||
|
(tex_is_rt && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp != 24)));
|
||||||
|
preserve_rt_color = preserve_rt_rgb || preserve_rt_alpha;
|
||||||
|
}
|
||||||
|
|
||||||
GSTextureCache::Target* rt = nullptr;
|
GSTextureCache::Target* rt = nullptr;
|
||||||
GIFRegTEX0 FRAME_TEX0;
|
GIFRegTEX0 FRAME_TEX0;
|
||||||
if (!no_rt)
|
if (!no_rt)
|
||||||
|
@ -2354,7 +2377,7 @@ void GSRendererHW::Draw()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (process_texture)
|
if (m_process_texture)
|
||||||
{
|
{
|
||||||
GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP;
|
GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP;
|
||||||
const GSVertex* v = &m_vertex.buff[0];
|
const GSVertex* v = &m_vertex.buff[0];
|
||||||
|
@ -2939,7 +2962,7 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy)
|
||||||
{
|
{
|
||||||
GL_PUSH("IA");
|
GL_PUSH("IA");
|
||||||
|
|
||||||
if (GSConfig.UserHacks_WildHack && !m_isPackedUV_HackFlag && PRIM->TME && PRIM->FST)
|
if (GSConfig.UserHacks_WildHack && !m_isPackedUV_HackFlag && m_process_texture && PRIM->FST)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < m_vertex.next; i++)
|
for (u32 i = 0; i < m_vertex.next; i++)
|
||||||
m_vertex.buff[i].UV &= 0x3FEF3FEF;
|
m_vertex.buff[i].UV &= 0x3FEF3FEF;
|
||||||
|
@ -5089,7 +5112,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
||||||
|
|
||||||
// vs
|
// vs
|
||||||
|
|
||||||
m_conf.vs.tme = PRIM->TME;
|
m_conf.vs.tme = m_process_texture;
|
||||||
m_conf.vs.fst = PRIM->FST;
|
m_conf.vs.fst = PRIM->FST;
|
||||||
|
|
||||||
// FIXME D3D11 and GL support half pixel center. Code could be easier!!!
|
// FIXME D3D11 and GL support half pixel center. Code could be easier!!!
|
||||||
|
@ -5431,15 +5454,15 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
|
||||||
// Writing to the framebuffer for output. We're not interested. - Note: This stops NFS HP2 Busted screens working, but they're glitchy anyway
|
// Writing to the framebuffer for output. We're not interested. - Note: This stops NFS HP2 Busted screens working, but they're glitchy anyway
|
||||||
// what NFS HP2 really needs is a kind of shuffle with mask, 32bit target is interpreted as 16bit and masked.
|
// what NFS HP2 really needs is a kind of shuffle with mask, 32bit target is interpreted as 16bit and masked.
|
||||||
if ((m_regs->DISP[0].DISPFB.Block() == m_cached_ctx.FRAME.Block()) || (m_regs->DISP[1].DISPFB.Block() == m_cached_ctx.FRAME.Block()) ||
|
if ((m_regs->DISP[0].DISPFB.Block() == m_cached_ctx.FRAME.Block()) || (m_regs->DISP[1].DISPFB.Block() == m_cached_ctx.FRAME.Block()) ||
|
||||||
(PRIM->TME && ((m_regs->DISP[0].DISPFB.Block() == m_cached_ctx.TEX0.TBP0) || (m_regs->DISP[1].DISPFB.Block() == m_cached_ctx.TEX0.TBP0)) && !(m_mem.m_clut.IsInvalid() & 2)))
|
(m_process_texture && ((m_regs->DISP[0].DISPFB.Block() == m_cached_ctx.TEX0.TBP0) || (m_regs->DISP[1].DISPFB.Block() == m_cached_ctx.TEX0.TBP0)) && !(m_mem.m_clut.IsInvalid() & 2)))
|
||||||
return CLUTDrawTestResult::NotCLUTDraw;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// Ignore large render targets, make sure it's staying in page width.
|
// Ignore large render targets, make sure it's staying in page width.
|
||||||
if (PRIM->TME && (m_cached_ctx.FRAME.FBW != 1 && m_cached_ctx.TEX0.TBW == m_cached_ctx.FRAME.FBW))
|
if (m_process_texture && (m_cached_ctx.FRAME.FBW != 1 && m_cached_ctx.TEX0.TBW == m_cached_ctx.FRAME.FBW))
|
||||||
return CLUTDrawTestResult::NotCLUTDraw;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// Hopefully no games draw a CLUT with a CLUT, that would be evil, most likely a channel shuffle.
|
// Hopefully no games draw a CLUT with a CLUT, that would be evil, most likely a channel shuffle.
|
||||||
if (PRIM->TME && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal > 0)
|
if (m_process_texture && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal > 0)
|
||||||
return CLUTDrawTestResult::NotCLUTDraw;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM];
|
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM];
|
||||||
|
@ -5481,7 +5504,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
|
||||||
if (!valid_size)
|
if (!valid_size)
|
||||||
return CLUTDrawTestResult::NotCLUTDraw;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
if (PRIM->TME)
|
if (m_process_texture)
|
||||||
{
|
{
|
||||||
// If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need.
|
// If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need.
|
||||||
const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage;
|
const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage;
|
||||||
|
@ -6081,7 +6104,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
|
||||||
|
|
||||||
bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw)
|
bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw)
|
||||||
{
|
{
|
||||||
if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && PRIM->TME && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0)
|
if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0)
|
||||||
{
|
{
|
||||||
GL_PUSH("OI_BlitFMV");
|
GL_PUSH("OI_BlitFMV");
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue