GS: Redo the Texture min/max opt

This commit is contained in:
refractionpcsx2 2021-11-03 01:35:40 +00:00
parent d29121a871
commit d88f638fbc
1 changed files with 91 additions and 36 deletions

View File

@ -2599,37 +2599,71 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
__assume(0);
}
// Small optimisation left over from a much bigger (and broken) optimisation
// This makes Baldurs Gate 2 go brr. Without it, it's pretty slow due to cutting
// things up in to tiny pieces and the GS allocating a huge texture
if (wms == CLAMP_REPEAT && wmt == CLAMP_REPEAT)
bool skipClamp = false;
// If any of the min/max values are +-FLT_MAX we can't rely on them
// so just assume full texture.
if (m_vt.m_max.t.x >= FLT_MAX || m_vt.m_min.t.x <= -FLT_MAX ||
m_vt.m_max.t.y >= FLT_MAX || m_vt.m_min.t.y <= -FLT_MAX)
skipClamp = true;
// Optimisation aims to reduce the amount of texture loaded to only the bit which will be read
if (!skipClamp)
{
GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t);
if (linear)
{
st += GSVector4(-0.5f, 0.5f).xxyy();
}
GSVector4i uv = GSVector4i(st.floor());
GSVector4i u, v, uu, vv;
// Checks for UV's going above the size of the texture (for wrapping)
if (wms == CLAMP_REGION_REPEAT)
{
u = uv & (maxu | minu);
uu = uv & ~(maxu | minu);
}
else if (wms == CLAMP_REPEAT)
{
// See commented code below for the meaning of mask
const GSVector4i u = uv & GSVector4i::xffffffff().srl32(32 - tw);
const GSVector4i v = uv & GSVector4i::xffffffff().srl32(32 - th);
u = uv & GSVector4i::xffffffff().srl32(32 - tw);
uu = uv.sra32(tw);
}
GSVector4i uu = uv.sra32(tw);
GSVector4i vv = uv.sra32(th);
if (wmt == CLAMP_REGION_REPEAT)
{
v = uv & (maxv | minv);
vv = uv & ~(maxv | minv);
}
else if (wmt == CLAMP_REPEAT)
{
// See commented code below for the meaning of mask
v = uv & GSVector4i::xffffffff().srl32(32 - th);
vv = uv.sra32(th);
}
const int mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
// if values don't match it means that the texture will wrap so it needs the whole thing
// vy uy vx ux
// ==
// vw uw vz uz
uv = uv.rintersect(tr);
// Roughly cut out the min/max of the read (Clamp)
// Intersect on vr because it will have already cut it on region clamp
uv = uv.rintersect(vr);
switch (wms)
{
case CLAMP_REPEAT:
case CLAMP_REGION_REPEAT:
// This commented code cannot be used directly because it needs uv before the intersection
//if (uv_.x >> tw == uv_.z >> tw)
//{
// vr.x = std::max(vr.x, (uv_.x & ((1 << tw) - 1)));
// vr.z = std::min(vr.z, (uv_.z & ((1 << tw) - 1)) + 1);
//}
//vx == vz
if (mask & 0x000f)
{
if (vr.x < u.x)
@ -2637,7 +2671,20 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
if (vr.z > u.z + 1)
vr.z = u.z + 1;
}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if (vr.x < uv.x)
vr.x = uv.x;
if (vr.z > (uv.z + 1))
vr.z = uv.z + 1;
break;
}
switch (wmt)
{
case CLAMP_REPEAT:
case CLAMP_REGION_REPEAT:
//if (uv_.y >> th == uv_.w >> th)
//{
// vr.y = max(vr.y, (uv_.y & ((1 << th) - 1)));
@ -2650,13 +2697,21 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
if (vr.w > v.w + 1)
vr.w = v.w + 1;
}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if (vr.y < uv.y)
vr.y = uv.y;
if (vr.w > (uv.w + 1))
vr.w = uv.w + 1;
break;
}
}
vr = vr.rintersect(tr);
// This really shouldn't happen now except with the clamping region set entirely outside the texture,
// special handling should be written for that case.
if (vr.rempty())
{
// NOTE: this can happen when texcoords are all outside the texture or clamping area is zero, but we can't