GS: Improve optimizing scissoring texture when REPEAT sampling

This commit is contained in:
refractionpcsx2 2024-03-13 13:42:34 +00:00
parent 8dce187746
commit f3a75f55e7
6 changed files with 60 additions and 53 deletions

View File

@ -3643,7 +3643,7 @@ static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, i
return sets_bits || clears_bits;
}
GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize)
GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize, bool no_gaps)
{
// TODO: some of the +1s can be removed if linear == false
@ -3752,7 +3752,7 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL
// Adjust texture range when sprites get scissor clipped. Since we linearly interpolate, this
// optimization doesn't work when perspective correction is enabled.
if (m_vt.m_primclass == GS_SPRITE_CLASS && PRIM->FST == 1 && m_index.tail < 3)
if (m_vt.m_primclass == GS_SPRITE_CLASS && PRIM->FST == 1 && no_gaps)
{
// When coordinates are fractional, GS appears to draw to the right/bottom (effectively
// taking the ceiling), not to the top/left (taking the floor).
@ -3768,50 +3768,55 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL
const GSVertex* vert_first = &m_vertex.buff[m_index.buff[0]];
const GSVertex* vert_second = &m_vertex.buff[m_index.buff[1]];
// we need to check that it's not going to repeat over the non-clipped part
if (wms != CLAMP_REGION_REPEAT && (wms != CLAMP_REPEAT || (static_cast<int>(st.x) & ~tw_mask) == (static_cast<int>(st.z) & ~tw_mask)))
{
// Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap
const bool u_forward = vert_first->U < vert_second->U;
const bool x_forward = vert_first->XYZ.X < vert_second->XYZ.X;
const bool swap_x = u_forward != x_forward;
GSVector4 new_st = st;
// Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap
const bool u_forward = vert_first->U < vert_second->U;
const bool x_forward = vert_first->XYZ.X < vert_second->XYZ.X;
const bool swap_x = u_forward != x_forward;
if (int_rc.left < scissored_rc.left)
{
if(!swap_x)
st.x += floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x);
else
st.z -= floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x);
}
if (int_rc.right > scissored_rc.right)
{
if (!swap_x)
st.z -= floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
else
st.x += floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
}
if (int_rc.left < scissored_rc.left)
{
if (!swap_x)
new_st.x += floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x);
else
new_st.z -= floor(static_cast<float>(scissored_rc.left - int_rc.left) * grad.x);
}
if (wmt != CLAMP_REGION_REPEAT && (wmt != CLAMP_REPEAT || (static_cast<int>(st.y) & ~th_mask) == (static_cast<int>(st.w) & ~th_mask)))
if (int_rc.right > scissored_rc.right)
{
// Check if the UV coords are going in a different direction to the verts, if they match direction, no need to swap
const bool v_forward = vert_first->V < vert_second->V;
const bool y_forward = vert_first->XYZ.Y < vert_second->XYZ.Y;
const bool swap_y = v_forward != y_forward;
if (!swap_x)
new_st.z -= floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
else
new_st.x += floor(static_cast<float>(int_rc.right - scissored_rc.right) * grad.x);
}
// we need to check that it's not going to repeat over the non-clipped part
if (wms != CLAMP_REGION_REPEAT && (wms != CLAMP_REPEAT || (static_cast<int>(new_st.x) & ~tw_mask) == (static_cast<int>(new_st.z) & ~tw_mask)))
{
st.x = new_st.x;
st.z = new_st.z;
}
if (int_rc.top < scissored_rc.top)
{
if (!swap_y)
st.y += floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y);
else
st.w -= floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y);
}
if (int_rc.bottom > scissored_rc.bottom)
{
if (!swap_y)
st.w -= floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y);
else
st.y += floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y);
}
const bool v_forward = vert_first->V < vert_second->V;
const bool y_forward = vert_first->XYZ.Y < vert_second->XYZ.Y;
const bool swap_y = v_forward != y_forward;
if (int_rc.top < scissored_rc.top)
{
if (!swap_y)
new_st.y += floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y);
else
new_st.w -= floor(static_cast<float>(scissored_rc.top - int_rc.top) * grad.y);
}
if (int_rc.bottom > scissored_rc.bottom)
{
if (!swap_y)
new_st.w -= floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y);
else
new_st.y += floor(static_cast<float>(int_rc.bottom - scissored_rc.bottom) * grad.y);
}
if (wmt != CLAMP_REGION_REPEAT && (wmt != CLAMP_REPEAT || (static_cast<int>(new_st.y) & ~th_mask) == (static_cast<int>(new_st.w) & ~th_mask)))
{
st.y = new_st.y;
st.w = new_st.w;
}
}
}

View File

@ -182,7 +182,7 @@ protected:
GSVector4i coverage; ///< Part of the texture used
u8 uses_boundary; ///< Whether or not the usage touches the left, top, right, or bottom edge (and therefore needs wrap modes preserved)
};
TextureMinMaxResult GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize);
TextureMinMaxResult GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize, bool no_gaps);
bool TryAlphaTest(u32& fm, u32& zm);
bool IsOpaque();
bool IsMipMapDraw();

View File

@ -2100,6 +2100,7 @@ void GSRendererHW::Draw()
// --------------------------------------
m_r = GSVector4i(m_vt.m_min.p.upld(m_vt.m_max.p) + GSVector4::cxpr(0.5f));
m_r = m_r.blend8(m_r + GSVector4i::cxpr(0, 0, 1, 1), (m_r.xyxy() == m_r.zwzw()));
m_r_no_scissor = m_r;
m_r = m_r.rintersect(context->scissor.in);
// Draw is too small, just skip it.
@ -2404,7 +2405,7 @@ void GSRendererHW::Draw()
TEX0 = m_cached_ctx.TEX0;
}
tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear(), false);
tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear(), false, no_gaps);
// Snowblind games set TW/TH to 1024, and use UVs for smaller textures inside that.
// Such textures usually contain junk in local memory, so try to make them smaller based on UVs.
@ -2795,7 +2796,7 @@ void GSRendererHW::Draw()
m_vt.m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f;
tmm = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, m_vt.IsLinear(), false);
tmm = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, m_vt.IsLinear(), false, no_gaps);
src->UpdateLayer(MIP_TEX0, tmm.coverage, layer - m_lod.x);
}
@ -5947,7 +5948,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
if (m_process_texture)
{
// If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need.
const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage;
const GSVector4i r = GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false, m_vt.m_primclass == GS_SPRITE_CLASS && PrimitiveCoversWithoutGaps()).coverage;
// If we have GPU CLUT enabled, don't do a CPU draw when it would result in a download.
if (GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled)
@ -6060,7 +6061,7 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t
// If the EE has written over our sample area, we're fine to do this on the CPU, despite the target.
if (!src_target->m_dirty.empty())
{
const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false).coverage);
const GSVector4i tr(GetTextureMinMax(m_cached_ctx.TEX0, m_cached_ctx.CLAMP, m_vt.IsLinear(), false, m_vt.m_primclass == GS_SPRITE_CLASS && PrimitiveCoversWithoutGaps()).coverage);
for (GSDirtyRect& rc : src_target->m_dirty)
{
if (!rc.GetDirtyRect(m_cached_ctx.TEX0, false).rintersect(tr).rempty())
@ -6773,7 +6774,7 @@ bool GSRendererHW::PrimitiveCoversWithoutGaps()
const int first_dpX = v[1].XYZ.X - v[0].XYZ.X;
// Horizontal Match.
if ((first_dpX >> 4) == m_r.z)
if ((first_dpX >> 4) == m_r_no_scissor.z)
{
// Borrowed from MergeSprite() modified to calculate heights.
for (u32 i = 2; i < m_vertex.next; i += 2)
@ -6792,7 +6793,7 @@ bool GSRendererHW::PrimitiveCoversWithoutGaps()
}
// Vertical Match.
if ((first_dpY >> 4) == m_r.w)
if ((first_dpY >> 4) == m_r_no_scissor.w)
{
// Borrowed from MergeSprite().
const int offset_X = m_context->XYOFFSET.OFX;

View File

@ -124,6 +124,7 @@ private:
bool IsUsingAsInBlend();
GSVector4i m_r = {};
GSVector4i m_r_no_scissor = {};
// We modify some of the context registers to optimize away unnecessary operations.
// Instead of messing with the real context, we copy them and use those instead.

View File

@ -186,7 +186,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b
GIFRegTEX0 TEX0 = context->GetSizeFixedTEX0(vt.m_min.t.xyxy(vt.m_max.t), vt.IsLinear(), mipmap);
const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true, hw.m_index.tail < 3).coverage;
if (!hw.m_sw_texture[0])
hw.m_sw_texture[0] = std::make_unique<GSTextureCacheSW::Texture>(0, TEX0, env.TEXA);
@ -287,7 +287,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc, b
else
hw.m_sw_texture[i]->Reset(gd.sel.tw + 3, MIP_TEX0, env.TEXA);
GSVector4i r = hw.GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true).coverage;
GSVector4i r = hw.GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true, hw.m_index.tail < 3).coverage;
hw.m_sw_texture[i]->Update(r);
gd.tex[i] = hw.m_sw_texture[i]->m_buff;
}

View File

@ -1054,7 +1054,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap);
GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage;
GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true, m_index.tail < 3).coverage;
GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA);
@ -1160,7 +1160,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
return false;
}
GSVector4i r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true).coverage;
GSVector4i r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true, m_index.tail < 3).coverage;
data->SetSource(t, r, i);
}