From f4aa5410c91bfcc871fdf6bb3695689d3a976eaf Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 13 Mar 2024 13:42:34 +0000 Subject: [PATCH] GS: Improve optimizing scissoring texture when REPEAT sampling --- pcsx2/GS/GSState.cpp | 89 ++++++++++--------- pcsx2/GS/GSState.h | 2 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 13 +-- pcsx2/GS/Renderers/HW/GSRendererHW.h | 1 + .../GS/Renderers/HW/GSRendererHWMultiISA.cpp | 4 +- pcsx2/GS/Renderers/SW/GSRendererSW.cpp | 4 +- 6 files changed, 60 insertions(+), 53 deletions(-) diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 2e825c0500..1bb32b8dfd 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -3643,7 +3643,7 @@ static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, i return sets_bits || clears_bits; } -GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize) +GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize, bool no_gaps) { // TODO: some of the +1s can be removed if linear == false @@ -3752,7 +3752,7 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL // Adjust texture range when sprites get scissor clipped. Since we linearly interpolate, this // optimization doesn't work when perspective correction is enabled. - if (m_vt.m_primclass == GS_SPRITE_CLASS && PRIM->FST == 1 && m_index.tail < 3) + if (m_vt.m_primclass == GS_SPRITE_CLASS && PRIM->FST == 1 && no_gaps) { // When coordinates are fractional, GS appears to draw to the right/bottom (effectively // taking the ceiling), not to the top/left (taking the floor). @@ -3768,50 +3768,55 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL const GSVertex* vert_first = &m_vertex.buff[m_index.buff[0]]; const GSVertex* vert_second = &m_vertex.buff[m_index.buff[1]]; - // we need to check that it's not going to repeat over the non-clipped part - if (wms != CLAMP_REGION_REPEAT && (wms != CLAMP_REPEAT || (static_cast(st.x) & ~tw_mask) == (static_cast(st.z) & ~tw_mask))) - { - // Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap - const bool u_forward = vert_first->U < vert_second->U; - const bool x_forward = vert_first->XYZ.X < vert_second->XYZ.X; - const bool swap_x = u_forward != x_forward; + GSVector4 new_st = st; + // Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap + const bool u_forward = vert_first->U < vert_second->U; + const bool x_forward = vert_first->XYZ.X < vert_second->XYZ.X; + const bool swap_x = u_forward != x_forward; - if (int_rc.left < scissored_rc.left) - { - if(!swap_x) - st.x += floor(static_cast(scissored_rc.left - int_rc.left) * grad.x); - else - st.z -= floor(static_cast(scissored_rc.left - int_rc.left) * grad.x); - } - if (int_rc.right > scissored_rc.right) - { - if (!swap_x) - st.z -= floor(static_cast(int_rc.right - scissored_rc.right) * grad.x); - else - st.x += floor(static_cast(int_rc.right - scissored_rc.right) * grad.x); - } + if (int_rc.left < scissored_rc.left) + { + if (!swap_x) + new_st.x += floor(static_cast(scissored_rc.left - int_rc.left) * grad.x); + else + new_st.z -= floor(static_cast(scissored_rc.left - int_rc.left) * grad.x); } - if (wmt != CLAMP_REGION_REPEAT && (wmt != CLAMP_REPEAT || (static_cast(st.y) & ~th_mask) == (static_cast(st.w) & ~th_mask))) + if (int_rc.right > scissored_rc.right) { - // Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap - const bool v_forward = vert_first->V < vert_second->V; - const bool y_forward = vert_first->XYZ.Y < vert_second->XYZ.Y; - const bool swap_y = v_forward != y_forward; + if (!swap_x) + new_st.z -= floor(static_cast(int_rc.right - scissored_rc.right) * grad.x); + else + new_st.x += floor(static_cast(int_rc.right - scissored_rc.right) * grad.x); + } + // we need to check that it's not going to repeat over the non-clipped part + if (wms != CLAMP_REGION_REPEAT && (wms != CLAMP_REPEAT || (static_cast(new_st.x) & ~tw_mask) == (static_cast(new_st.z) & ~tw_mask))) + { + st.x = new_st.x; + st.z = new_st.z; + } - if (int_rc.top < scissored_rc.top) - { - if (!swap_y) - st.y += floor(static_cast(scissored_rc.top - int_rc.top) * grad.y); - else - st.w -= floor(static_cast(scissored_rc.top - int_rc.top) * grad.y); - } - if (int_rc.bottom > scissored_rc.bottom) - { - if (!swap_y) - st.w -= floor(static_cast(int_rc.bottom - scissored_rc.bottom) * grad.y); - else - st.y += floor(static_cast(int_rc.bottom - scissored_rc.bottom) * grad.y); - } + const bool v_forward = vert_first->V < vert_second->V; + const bool y_forward = vert_first->XYZ.Y < vert_second->XYZ.Y; + const bool swap_y = v_forward != y_forward; + + if (int_rc.top < scissored_rc.top) + { + if (!swap_y) + new_st.y += floor(static_cast(scissored_rc.top - int_rc.top) * grad.y); + else + new_st.w -= floor(static_cast(scissored_rc.top - int_rc.top) * grad.y); + } + if (int_rc.bottom > scissored_rc.bottom) + { + if (!swap_y) + new_st.w -= floor(static_cast(int_rc.bottom - scissored_rc.bottom) * grad.y); + else + new_st.y += floor(static_cast(int_rc.bottom - scissored_rc.bottom) * grad.y); + } + if (wmt != CLAMP_REGION_REPEAT && (wmt != CLAMP_REPEAT || (static_cast(new_st.y) & ~th_mask) == (static_cast(new_st.w) & ~th_mask))) + { + st.y = new_st.y; + st.w = new_st.w; } } } diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 7496a27726..c5fc341bcd 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -180,7 +180,7 @@ protected: GSVector4i coverage; ///< Part of the texture used u8 uses_boundary; ///< Whether or not the usage touches the left, top, right, or bottom edge (and therefore needs wrap modes preserved) }; - TextureMinMaxResult GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize); + TextureMinMaxResult GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize, bool no_gaps); bool TryAlphaTest(u32& fm, u32& zm); bool IsOpaque(); bool IsMipMapDraw(); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index b2f5fe860f..509d7f4079 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2100,6 +2100,7 @@ void GSRendererHW::Draw() // -------------------------------------- m_r = GSVector4i(m_vt.m_min.p.upld(m_vt.m_max.p) + GSVector4::cxpr(0.5f)); m_r = m_r.blend8(m_r + GSVector4i::cxpr(0, 0, 1, 1), (m_r.xyxy() == m_r.zwzw())); + m_r_no_scissor = m_r; m_r = m_r.rintersect(context->scissor.in); // Draw is too small, just skip it. @@ -2404,7 +2405,7 @@ void GSRendererHW::Draw() TEX0 = m_cached_ctx.TEX0; } - tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear(), false); + tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear(), false, no_gaps); // Snowblind games set TW/TH to 1024, and use UVs for smaller textures inside that. // Such textures usually contain junk in local memory, so try to make them smaller based on UVs. @@ -2795,7 +2796,7 @@ void GSRendererHW::Draw() m_vt.m_min.t *= 0.5f; m_vt.m_max.t *= 0.5f; - tmm = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, m_vt.IsLinear(), false); + tmm = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, m_vt.IsLinear(), false, no_gaps); src->UpdateLayer(MIP_TEX0, tmm.coverage, layer - m_lod.x); } @@ -5947,7 +5948,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw() if (m_process_texture) { // If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need. - const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage; + const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false, m_vt.m_primclass == GS_SPRITE_CLASS && PrimitiveCoversWithoutGaps()).coverage; // If we have GPU CLUT enabled, don't do a CPU draw when it would result in a download. if (GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled) @@ -6060,7 +6061,7 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t // If the EE has written over our sample area, we're fine to do this on the CPU, despite the target. if (!src_target->m_dirty.empty()) { - const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage); + const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false, m_vt.m_primclass == GS_SPRITE_CLASS && PrimitiveCoversWithoutGaps()).coverage); for (GSDirtyRect& rc : src_target->m_dirty) { if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(tr).rempty()) @@ -6773,7 +6774,7 @@ bool GSRendererHW::PrimitiveCoversWithoutGaps() const int first_dpX = v[1].XYZ.X - v[0].XYZ.X; // Horizontal Match. - if ((first_dpX >> 4) == m_r.z) + if ((first_dpX >> 4) == m_r_no_scissor.z) { // Borrowed from MergeSprite() modified to calculate heights. for (u32 i = 2; i < m_vertex.next; i += 2) @@ -6792,7 +6793,7 @@ bool GSRendererHW::PrimitiveCoversWithoutGaps() } // Vertical Match. - if ((first_dpY >> 4) == m_r.w) + if ((first_dpY >> 4) == m_r_no_scissor.w) { // Borrowed from MergeSprite(). const int offset_X = m_context->XYOFFSET.OFX; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 4919b790f3..878090eed1 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -124,6 +124,7 @@ private: bool IsUsingAsInBlend(); GSVector4i m_r = {}; + GSVector4i m_r_no_scissor = {}; // We modify some of the context registers to optimize away unnecessary operations. // Instead of messing with the real context, we copy them and use those instead. diff --git a/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp b/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp index 9381d4b25b..f19005b810 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp @@ -186,7 +186,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b GIFRegTEX0 TEX0 = context->GetSizeFixedTEX0(vt.m_min.t.xyxy(vt.m_max.t), vt.IsLinear(), mipmap); - const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage; + const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true, hw.m_index.tail < 3).coverage; if (!hw.m_sw_texture[0]) hw.m_sw_texture[0] = std::make_unique(0, TEX0, env.TEXA); @@ -287,7 +287,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b else hw.m_sw_texture[i]->Reset(gd.sel.tw + 3, MIP_TEX0, env.TEXA); - GSVector4i r = hw.GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true).coverage; + GSVector4i r = hw.GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true, hw.m_index.tail < 3).coverage; hw.m_sw_texture[i]->Update(r); gd.tex[i] = hw.m_sw_texture[i]->m_buff; } diff --git a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp index b20a8971c8..bdd4742df6 100644 --- a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp +++ b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp @@ -1054,7 +1054,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap); - GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage; + GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true, m_index.tail < 3).coverage; GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA); @@ -1160,7 +1160,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) return false; } - GSVector4i r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true).coverage; + GSVector4i r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true, m_index.tail < 3).coverage; data->SetSource(t, r, i); }