GS:TC: Track whether or not textures use their clamp modes

This commit is contained in:
TellowKrinkle 2022-01-10 23:47:38 -06:00 committed by refractionpcsx2
parent f5fba1cbd1
commit e87d8da1fb
4 changed files with 84 additions and 78 deletions

View File

@ -2660,7 +2660,44 @@ __forceinline void GSState::VertexKick(u32 skip)
} }
} }
void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear) /// Checks if region repeat is used (applying it does something to at least one of the values in min...max)
/// Also calculates the real min and max values seen after applying the region repeat to all values in min...max
static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, int* max_out)
{
const int cleared_bits = ~msk & ~fix; // Bits that are always cleared by applying msk and fix
const int set_bits = fix; // Bits that are always set by applying msk and fix
unsigned long msb;
int variable_bits = min ^ max;
if (_BitScanReverse(&msb, variable_bits))
variable_bits |= (1 << msb) - 1; // Fill in all lower bits
const int always_set = min & ~variable_bits; // Bits that are set in every value in min...max
const int sometimes_set = min | variable_bits; // Bits that are set in at least one value in min...max
const bool sets_bits = (set_bits | always_set) != always_set; // At least one bit in min...max is set by applying msk and fix
const bool clears_bits = (cleared_bits & sometimes_set) != 0; // At least one bit in min...max is cleared by applying msk and fix
const int overwritten_variable_bits = (cleared_bits | set_bits) & variable_bits;
// A variable bit that's `0` in `min` will at some point switch to a `1` (because it's variable)
// When it does, all bits below it will switch to a `0` (that's how incrementing works)
// If the 0 to 1 switch is reflected in the final output (not masked and not replaced by a fixed value),
// the final value would be larger than the previous. Otherwise, the final value will be less.
// The true minimum value is `min` with all bits below the most significant replaced variable `0` bit cleared
const int min_overwritten_variable_zeros = ~min & overwritten_variable_bits;
if (_BitScanReverse(&msb, min_overwritten_variable_zeros))
min &= (~0 << msb);
// Similar thing for max, but the first masked `1` bit
const int max_overwritten_variable_ones = max & overwritten_variable_bits;
if (_BitScanReverse(&msb, max_overwritten_variable_ones))
max |= (1 << msb) - 1;
*min_out = (msk & min) | fix;
*max_out = ((msk & max) | fix) + 1;
return sets_bits || clears_bits;
}
GSState::TextureMinMaxResult GSState::GetTextureMinMax(const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear)
{ {
// TODO: some of the +1s can be removed if linear == false // TODO: some of the +1s can be removed if linear == false
@ -2669,17 +2706,11 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
const int w = 1 << tw; const int w = 1 << tw;
const int h = 1 << th; const int h = 1 << th;
const int tw_mask = w - 1;
const int th_mask = h - 1;
GSVector4i tr(0, 0, w, h); GSVector4i tr(0, 0, w, h);
// don't bother checking when preload is on, since we're going to test the whole thing anyway
if (GSConfig.PreloadTexture && GSConfig.UseHardwareRenderer() &&
CanPreloadTextureSize(static_cast<u32>(tw), static_cast<u32>(th)))
{
r = tr;
return;
}
const int wms = CLAMP.WMS; const int wms = CLAMP.WMS;
const int wmt = CLAMP.WMT; const int wmt = CLAMP.WMT;
@ -2730,69 +2761,35 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
__assume(0); __assume(0);
} }
bool skipClamp = false; u8 uses_border = 0;
// If any of the min/max values are +-FLT_MAX we can't rely on them
// so just assume full texture.
if (m_vt.m_max.t.x >= FLT_MAX || m_vt.m_min.t.x <= -FLT_MAX || if (m_vt.m_max.t.x >= FLT_MAX || m_vt.m_min.t.x <= -FLT_MAX ||
m_vt.m_max.t.y >= FLT_MAX || m_vt.m_min.t.y <= -FLT_MAX) m_vt.m_max.t.y >= FLT_MAX || m_vt.m_min.t.y <= -FLT_MAX)
skipClamp = true;
if (wms == CLAMP_REGION_REPEAT && wmt == CLAMP_REGION_REPEAT)
skipClamp = true;
// Optimisation aims to reduce the amount of texture loaded to only the bit which will be read
if (!skipClamp)
{ {
// If any of the min/max values are +-FLT_MAX we can't rely on them
// so just assume full texture.
uses_border = 0xF;
}
else
{
// Optimisation aims to reduce the amount of texture loaded to only the bit which will be read
GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t); GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t);
if (linear) if (linear)
st += GSVector4(-0.5f, 0.5f).xxyy(); st += GSVector4(-0.5f, 0.5f).xxyy();
GSVector4i uv = GSVector4i(st.floor()); GSVector4i uv = GSVector4i(st.floor());
GSVector4i u, v, uu, vv; uses_border = GSVector4::cast((uv < vr).blend32<0xc>(uv >= vr)).mask();
// Checks for UV's going above the size of the texture (for wrapping)
if (wms == CLAMP_REPEAT)
{
// See commented code below for the meaning of mask
u = uv & GSVector4i::xffffffff().srl32(32 - tw);
uu = uv.sra32(tw);
}
if (wmt == CLAMP_REPEAT)
{
// See commented code below for the meaning of mask
v = uv & GSVector4i::xffffffff().srl32(32 - th);
vv = uv.sra32(th);
}
const int mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
// if values don't match it means that the texture will wrap so it needs the whole thing
// vy uy vx ux
// ==
// vw uw vz uz
// Roughly cut out the min/max of the read (Clamp) // Roughly cut out the min/max of the read (Clamp)
// Intersect on vr because it will have already cut it on region clamp
uv = uv.rintersect(vr);
switch (wms) switch (wms)
{ {
case CLAMP_REPEAT: case CLAMP_REPEAT:
// This commented code cannot be used directly because it needs uv before the intersection if ((uv.x & ~tw_mask) == (uv.z & ~tw_mask))
//if (uv_.x >> tw == uv_.z >> tw)
//{
// vr.x = std::max(vr.x, (uv_.x & ((1 << tw) - 1)));
// vr.z = std::min(vr.z, (uv_.z & ((1 << tw) - 1)) + 1);
//}
//vx == vz
if (mask & 0x000f)
{ {
if (vr.x < u.x) vr.x = std::max(vr.x, uv.x & tw_mask);
vr.x = u.x; vr.z = std::min(vr.z, (uv.z & tw_mask) + 1);
if (vr.z > u.z + 1)
vr.z = u.z + 1;
} }
break; break;
case CLAMP_CLAMP: case CLAMP_CLAMP:
@ -2802,22 +2799,19 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
if (vr.z > (uv.z + 1)) if (vr.z > (uv.z + 1))
vr.z = uv.z + 1; vr.z = uv.z + 1;
break; break;
} case CLAMP_REGION_REPEAT:
if (UsesRegionRepeat(maxu, minu, uv.x, uv.z, &vr.x, &vr.z) || maxu >= tw)
uses_border |= TextureMinMaxResult::USES_BOUNDARY_U;
break;
}
switch (wmt) switch (wmt)
{ {
case CLAMP_REPEAT: case CLAMP_REPEAT:
//if (uv_.y >> th == uv_.w >> th) if ((uv.y & ~th_mask) == (uv.w & ~th_mask))
//{
// vr.y = max(vr.y, (uv_.y & ((1 << th) - 1)));
// vr.w = min(vr.w, (uv_.w & ((1 << th) - 1)) + 1);
//}
if (mask & 0xf000)
{ {
if (vr.y < v.y) vr.y = std::max(vr.y, uv.y & th_mask);
vr.y = v.y; vr.w = std::min(vr.w, (uv.w & th_mask) + 1);
if (vr.w > v.w + 1)
vr.w = v.w + 1;
} }
break; break;
case CLAMP_CLAMP: case CLAMP_CLAMP:
@ -2827,6 +2821,10 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
if (vr.w > (uv.w + 1)) if (vr.w > (uv.w + 1))
vr.w = uv.w + 1; vr.w = uv.w + 1;
break; break;
case CLAMP_REGION_REPEAT:
if (UsesRegionRepeat(maxv, minv, uv.y, uv.w, &vr.y, &vr.w) || maxv >= th)
uses_border |= TextureMinMaxResult::USES_BOUNDARY_V;
break;
} }
} }
@ -2846,7 +2844,7 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
vr = (vr + GSVector4i(-1, +1).xxyy()).rintersect(tr); vr = (vr + GSVector4i(-1, +1).xxyy()).rintersect(tr);
} }
r = vr; return { vr, uses_border };
} }
void GSState::CalcAlphaMinMax() void GSState::CalcAlphaMinMax()

View File

@ -198,7 +198,21 @@ protected:
CalcAlphaMinMax(); CalcAlphaMinMax();
return m_vt.m_alpha; return m_vt.m_alpha;
} }
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear); struct TextureMinMaxResult
{
enum UsesBoundary
{
USES_BOUNDARY_LEFT = 1 << 0,
USES_BOUNDARY_TOP = 1 << 1,
USES_BOUNDARY_RIGHT = 1 << 2,
USES_BOUNDARY_BOTTOM = 1 << 3,
USES_BOUNDARY_U = USES_BOUNDARY_LEFT | USES_BOUNDARY_RIGHT,
USES_BOUNDARY_V = USES_BOUNDARY_TOP | USES_BOUNDARY_BOTTOM,
};
GSVector4i coverage; ///< Part of the texture used
u8 uses_boundary; ///< Whether or not the usage touches the left, top, right, or bottom edge (and therefore needs wrap modes preserved)
};
TextureMinMaxResult GetTextureMinMax(const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
bool TryAlphaTest(u32& fm, u32& zm); bool TryAlphaTest(u32& fm, u32& zm);
bool IsOpaque(); bool IsOpaque();
bool IsMipMapDraw(); bool IsMipMapDraw();

View File

@ -1384,9 +1384,7 @@ void GSRendererHW::Draw()
m_context->offset.tex = m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); m_context->offset.tex = m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
GSVector4i r; GSVector4i r = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear()).coverage;
GetTextureMinMax(r, TEX0, MIP_CLAMP, m_vt.IsLinear());
m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, r) : m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, r) :
m_tc->LookupSource(TEX0, env.TEXA, r, m_hw_mipmap >= HWMipmapLevel::Basic || m_tc->LookupSource(TEX0, env.TEXA, r, m_hw_mipmap >= HWMipmapLevel::Basic ||
@ -1413,7 +1411,7 @@ void GSRendererHW::Draw()
m_vt.m_min.t *= 0.5f; m_vt.m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f; m_vt.m_max.t *= 0.5f;
GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, m_vt.IsLinear()); r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, m_vt.IsLinear()).coverage;
m_src->UpdateLayer(MIP_TEX0, r, layer - m_lod.x); m_src->UpdateLayer(MIP_TEX0, r, layer - m_lod.x);
} }

View File

@ -1057,9 +1057,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap); GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap);
GSVector4i r; GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf).coverage;
GetTextureMinMax(r, TEX0, context->CLAMP, gd.sel.ltf);
GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA); GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA);
@ -1167,9 +1165,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
return false; return false;
} }
GSVector4i r; GSVector4i r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf).coverage;
GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, gd.sel.ltf);
data->SetSource(t, r, i); data->SetSource(t, r, i);
} }