GS: Improve optimizing scissoring texture when REPEAT sampling

This commit is contained in:
refractionpcsx2 2024-03-13 13:42:34 +00:00
parent 8dce187746
commit f3a75f55e7
6 changed files with 60 additions and 53 deletions

View File

@ -3643,7 +3643,7 @@ static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, i
return sets_bits || clears_bits; return sets_bits || clears_bits;
} }
GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize) GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize, bool no_gaps)
{ {
// TODO: some of the +1s can be removed if linear == false // TODO: some of the +1s can be removed if linear == false
@ -3752,7 +3752,7 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL
// Adjust texture range when sprites get scissor clipped. Since we linearly interpolate, this // Adjust texture range when sprites get scissor clipped. Since we linearly interpolate, this
// optimization doesn't work when perspective correction is enabled. // optimization doesn't work when perspective correction is enabled.
if (m_vt.m_primclass == GS_SPRITE_CLASS && PRIM->FST == 1 && m_index.tail < 3) if (m_vt.m_primclass == GS_SPRITE_CLASS && PRIM->FST == 1 && no_gaps)
{ {
// When coordinates are fractional, GS appears to draw to the right/bottom (effectively // When coordinates are fractional, GS appears to draw to the right/bottom (effectively
// taking the ceiling), not to the top/left (taking the floor). // taking the ceiling), not to the top/left (taking the floor).
@ -3768,50 +3768,55 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL
const GSVertex* vert_first = &m_vertex.buff[m_index.buff[0]]; const GSVertex* vert_first = &m_vertex.buff[m_index.buff[0]];
const GSVertex* vert_second = &m_vertex.buff[m_index.buff[1]]; const GSVertex* vert_second = &m_vertex.buff[m_index.buff[1]];
// we need to check that it's not going to repeat over the non-clipped part GSVector4 new_st = st;
if (wms != CLAMP_REGION_REPEAT && (wms != CLAMP_REPEAT || (static_cast<int>(st.x) & ~tw_mask) == (static_cast<int>(st.z) & ~tw_mask))) // Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap
{ const bool u_forward = vert_first->U < vert_second->U;
// Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap const bool x_forward = vert_first->XYZ.X < vert_second->XYZ.X;
const bool u_forward = vert_first->U < vert_second->U; const bool swap_x = u_forward != x_forward;
const bool x_forward = vert_first->XYZ.X < vert_second->XYZ.X;
const bool swap_x = u_forward != x_forward;
if (int_rc.left < scissored_rc.left) if (int_rc.left < scissored_rc.left)
{ {
if(!swap_x) if (!swap_x)
st.x += floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x); new_st.x += floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x);
else else
st.z -= floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x); new_st.z -= floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x);
}
if (int_rc.right > scissored_rc.right)
{
if (!swap_x)
st.z -= floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
else
st.x += floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
}
} }
if (wmt != CLAMP_REGION_REPEAT && (wmt != CLAMP_REPEAT || (static_cast<int>(st.y) & ~th_mask) == (static_cast<int>(st.w) & ~th_mask))) if (int_rc.right > scissored_rc.right)
{ {
// Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap if (!swap_x)
const bool v_forward = vert_first->V < vert_second->V; new_st.z -= floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
const bool y_forward = vert_first->XYZ.Y < vert_second->XYZ.Y; else
const bool swap_y = v_forward != y_forward; new_st.x += floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
}
// we need to check that it's not going to repeat over the non-clipped part
if (wms != CLAMP_REGION_REPEAT && (wms != CLAMP_REPEAT || (static_cast<int>(new_st.x) & ~tw_mask) == (static_cast<int>(new_st.z) & ~tw_mask)))
{
st.x = new_st.x;
st.z = new_st.z;
}
if (int_rc.top < scissored_rc.top) const bool v_forward = vert_first->V < vert_second->V;
{ const bool y_forward = vert_first->XYZ.Y < vert_second->XYZ.Y;
if (!swap_y) const bool swap_y = v_forward != y_forward;
st.y += floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y);
else if (int_rc.top < scissored_rc.top)
st.w -= floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y); {
} if (!swap_y)
if (int_rc.bottom > scissored_rc.bottom) new_st.y += floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y);
{ else
if (!swap_y) new_st.w -= floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y);
st.w -= floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y); }
else if (int_rc.bottom > scissored_rc.bottom)
st.y += floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y); {
} if (!swap_y)
new_st.w -= floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y);
else
new_st.y += floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y);
}
if (wmt != CLAMP_REGION_REPEAT && (wmt != CLAMP_REPEAT || (static_cast<int>(new_st.y) & ~th_mask) == (static_cast<int>(new_st.w) & ~th_mask)))
{
st.y = new_st.y;
st.w = new_st.w;
} }
} }
} }

View File

@ -182,7 +182,7 @@ protected:
GSVector4i coverage; ///< Part of the texture used GSVector4i coverage; ///< Part of the texture used
u8 uses_boundary; ///< Whether or not the usage touches the left, top, right, or bottom edge (and therefore needs wrap modes preserved) u8 uses_boundary; ///< Whether or not the usage touches the left, top, right, or bottom edge (and therefore needs wrap modes preserved)
}; };
TextureMinMaxResult GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize); TextureMinMaxResult GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize, bool no_gaps);
bool TryAlphaTest(u32& fm, u32& zm); bool TryAlphaTest(u32& fm, u32& zm);
bool IsOpaque(); bool IsOpaque();
bool IsMipMapDraw(); bool IsMipMapDraw();

View File

@ -2100,6 +2100,7 @@ void GSRendererHW::Draw()
// -------------------------------------- // --------------------------------------
m_r = GSVector4i(m_vt.m_min.p.upld(m_vt.m_max.p) + GSVector4::cxpr(0.5f)); m_r = GSVector4i(m_vt.m_min.p.upld(m_vt.m_max.p) + GSVector4::cxpr(0.5f));
m_r = m_r.blend8(m_r + GSVector4i::cxpr(0, 0, 1, 1), (m_r.xyxy() == m_r.zwzw())); m_r = m_r.blend8(m_r + GSVector4i::cxpr(0, 0, 1, 1), (m_r.xyxy() == m_r.zwzw()));
m_r_no_scissor = m_r;
m_r = m_r.rintersect(context->scissor.in); m_r = m_r.rintersect(context->scissor.in);
// Draw is too small, just skip it. // Draw is too small, just skip it.
@ -2404,7 +2405,7 @@ void GSRendererHW::Draw()
TEX0 = m_cached_ctx.TEX0; TEX0 = m_cached_ctx.TEX0;
} }
tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear(), false); tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear(), false, no_gaps);
// Snowblind games set TW/TH to 1024, and use UVs for smaller textures inside that. // Snowblind games set TW/TH to 1024, and use UVs for smaller textures inside that.
// Such textures usually contain junk in local memory, so try to make them smaller based on UVs. // Such textures usually contain junk in local memory, so try to make them smaller based on UVs.
@ -2795,7 +2796,7 @@ void GSRendererHW::Draw()
m_vt.m_min.t *= 0.5f; m_vt.m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f; m_vt.m_max.t *= 0.5f;
tmm = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, m_vt.IsLinear(), false); tmm = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, m_vt.IsLinear(), false, no_gaps);
src->UpdateLayer(MIP_TEX0, tmm.coverage, layer - m_lod.x); src->UpdateLayer(MIP_TEX0, tmm.coverage, layer - m_lod.x);
} }
@ -5947,7 +5948,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
if (m_process_texture) if (m_process_texture)
{ {
// If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need. // If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need.
const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage; const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false, m_vt.m_primclass == GS_SPRITE_CLASS && PrimitiveCoversWithoutGaps()).coverage;
// If we have GPU CLUT enabled, don't do a CPU draw when it would result in a download. // If we have GPU CLUT enabled, don't do a CPU draw when it would result in a download.
if (GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled) if (GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled)
@ -6060,7 +6061,7 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t
// If the EE has written over our sample area, we're fine to do this on the CPU, despite the target. // If the EE has written over our sample area, we're fine to do this on the CPU, despite the target.
if (!src_target->m_dirty.empty()) if (!src_target->m_dirty.empty())
{ {
const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage); const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false, m_vt.m_primclass == GS_SPRITE_CLASS && PrimitiveCoversWithoutGaps()).coverage);
for (GSDirtyRect& rc : src_target->m_dirty) for (GSDirtyRect& rc : src_target->m_dirty)
{ {
if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(tr).rempty()) if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(tr).rempty())
@ -6773,7 +6774,7 @@ bool GSRendererHW::PrimitiveCoversWithoutGaps()
const int first_dpX = v[1].XYZ.X - v[0].XYZ.X; const int first_dpX = v[1].XYZ.X - v[0].XYZ.X;
// Horizontal Match. // Horizontal Match.
if ((first_dpX >> 4) == m_r.z) if ((first_dpX >> 4) == m_r_no_scissor.z)
{ {
// Borrowed from MergeSprite() modified to calculate heights. // Borrowed from MergeSprite() modified to calculate heights.
for (u32 i = 2; i < m_vertex.next; i += 2) for (u32 i = 2; i < m_vertex.next; i += 2)
@ -6792,7 +6793,7 @@ bool GSRendererHW::PrimitiveCoversWithoutGaps()
} }
// Vertical Match. // Vertical Match.
if ((first_dpY >> 4) == m_r.w) if ((first_dpY >> 4) == m_r_no_scissor.w)
{ {
// Borrowed from MergeSprite(). // Borrowed from MergeSprite().
const int offset_X = m_context->XYOFFSET.OFX; const int offset_X = m_context->XYOFFSET.OFX;

View File

@ -124,6 +124,7 @@ private:
bool IsUsingAsInBlend(); bool IsUsingAsInBlend();
GSVector4i m_r = {}; GSVector4i m_r = {};
GSVector4i m_r_no_scissor = {};
// We modify some of the context registers to optimize away unnecessary operations. // We modify some of the context registers to optimize away unnecessary operations.
// Instead of messing with the real context, we copy them and use those instead. // Instead of messing with the real context, we copy them and use those instead.

View File

@ -186,7 +186,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b
GIFRegTEX0 TEX0 = context->GetSizeFixedTEX0(vt.m_min.t.xyxy(vt.m_max.t), vt.IsLinear(), mipmap); GIFRegTEX0 TEX0 = context->GetSizeFixedTEX0(vt.m_min.t.xyxy(vt.m_max.t), vt.IsLinear(), mipmap);
const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage; const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true, hw.m_index.tail < 3).coverage;
if (!hw.m_sw_texture[0]) if (!hw.m_sw_texture[0])
hw.m_sw_texture[0] = std::make_unique<GSTextureCacheSW::Texture>(0, TEX0, env.TEXA); hw.m_sw_texture[0] = std::make_unique<GSTextureCacheSW::Texture>(0, TEX0, env.TEXA);
@ -287,7 +287,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b
else else
hw.m_sw_texture[i]->Reset(gd.sel.tw + 3, MIP_TEX0, env.TEXA); hw.m_sw_texture[i]->Reset(gd.sel.tw + 3, MIP_TEX0, env.TEXA);
GSVector4i r = hw.GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true).coverage; GSVector4i r = hw.GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true, hw.m_index.tail < 3).coverage;
hw.m_sw_texture[i]->Update(r); hw.m_sw_texture[i]->Update(r);
gd.tex[i] = hw.m_sw_texture[i]->m_buff; gd.tex[i] = hw.m_sw_texture[i]->m_buff;
} }

View File

@ -1054,7 +1054,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap); GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap);
GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage; GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true, m_index.tail < 3).coverage;
GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA); GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA);
@ -1160,7 +1160,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
return false; return false;
} }
GSVector4i r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true).coverage; GSVector4i r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true, m_index.tail < 3).coverage;
data->SetSource(t, r, i); data->SetSource(t, r, i);
} }