VU: Don't clamp VF00 or I Reg

This commit is contained in:
refractionpcsx2 2022-06-26 05:49:52 +01:00
parent cdd9b1fa3b
commit 391bd119b8
4 changed files with 23 additions and 11 deletions

View File

@ -34,9 +34,9 @@ alignas(16) const u32 sse4_maxvals[2][4] = {
// gotten a NaN value, then something went wrong; and the NaN's sign // gotten a NaN value, then something went wrong; and the NaN's sign
// is not to be trusted. Games like positive values better usually, // is not to be trusted. Games like positive values better usually,
// and its faster... so just always make NaNs into positive infinity. // and its faster... so just always make NaNs into positive infinity.
void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) void mVUclamp1(microVU& mVU, const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0)
{ {
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) if (((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) && mVU.regAlloc->checkVFClamp(reg.Id))
{ {
switch (xyzw) switch (xyzw)
{ {
@ -59,7 +59,7 @@ void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0)
// so we just use a temporary mem location for our backup for now... (non-sse4 version only) // so we just use a temporary mem location for our backup for now... (non-sse4 version only)
void mVUclamp2(microVU& mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0) void mVUclamp2(microVU& mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0)
{ {
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) if (((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) && mVU.regAlloc->checkVFClamp(reg.Id))
{ {
int i = (xyzw == 1 || xyzw == 2 || xyzw == 4 || xyzw == 8) ? 0 : 1; int i = (xyzw == 1 || xyzw == 2 || xyzw == 4 || xyzw == 8) ? 0 : 1;
xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]); xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]);
@ -67,13 +67,13 @@ void mVUclamp2(microVU& mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool
return; return;
} }
else else
mVUclamp1(reg, regT1in, xyzw, bClampE); mVUclamp1(mVU, reg, regT1in, xyzw, bClampE);
} }
// Used for operand clamping on every SSE instruction (add/sub/mul/div) // Used for operand clamping on every SSE instruction (add/sub/mul/div)
void mVUclamp3(microVU& mVU, const xmm& reg, const xmm& regT1, int xyzw) void mVUclamp3(microVU& mVU, const xmm& reg, const xmm& regT1, int xyzw)
{ {
if (clampE) if (clampE && mVU.regAlloc->checkVFClamp(reg.Id))
mVUclamp2(mVU, reg, regT1, xyzw, 1); mVUclamp2(mVU, reg, regT1, xyzw, 1);
} }
@ -83,8 +83,8 @@ void mVUclamp3(microVU& mVU, const xmm& reg, const xmm& regT1, int xyzw)
// emulated opcodes (causing crashes). Since we're clamping the operands // emulated opcodes (causing crashes). Since we're clamping the operands
// with mVUclamp3, we should almost never be getting a NaN result, // with mVUclamp3, we should almost never be getting a NaN result,
// but this clamp is just a precaution just-in-case. // but this clamp is just a precaution just-in-case.
void mVUclamp4(const xmm& reg, const xmm& regT1, int xyzw) void mVUclamp4(microVU& mVU, const xmm& reg, const xmm& regT1, int xyzw)
{ {
if (clampE && !CHECK_VU_SIGN_OVERFLOW) if (clampE && !CHECK_VU_SIGN_OVERFLOW && mVU.regAlloc->checkVFClamp(reg.Id))
mVUclamp1(reg, regT1, xyzw, 1); mVUclamp1(mVU, reg, regT1, xyzw, 1);
} }

View File

@ -220,6 +220,7 @@ struct microMapXMM
int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid) int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid)
int count; // Count of when last used int count; // Count of when last used
bool isNeeded; // Is needed for current instruction bool isNeeded; // Is needed for current instruction
bool isZero; // Register was loaded from VF00 and doesn't need clamping
}; };
class microRegAlloc class microRegAlloc
@ -330,6 +331,14 @@ public:
} }
} }
bool checkVFClamp(int regId)
{
if ((xmmMap[regId].VFreg == 33 && !EmuConfig.Gamefixes.IbitHack) || xmmMap[regId].isZero)
return false;
else
return true;
}
bool checkCachedReg(int regId) bool checkCachedReg(int regId)
{ {
if (regId < xmmTotal) if (regId < xmmTotal)
@ -346,6 +355,7 @@ public:
clear.count = 0; clear.count = 0;
clear.xyzw = 0; clear.xyzw = 0;
clear.isNeeded = 0; clear.isNeeded = 0;
clear.isZero = 0;
} }
void clearRegVF(int VFreg) void clearRegVF(int VFreg)
@ -513,6 +523,7 @@ public:
} }
xmmMap[z].VFreg = vfWriteReg; xmmMap[z].VFreg = vfWriteReg;
xmmMap[z].xyzw = xyzw; xmmMap[z].xyzw = xyzw;
xmmMap[z].isZero = (vfLoadReg == 0);
} }
xmmMap[z].count = counter; xmmMap[z].count = counter;
xmmMap[z].isNeeded = true; xmmMap[z].isNeeded = true;
@ -550,6 +561,7 @@ public:
xmmMap[x].VFreg = vfLoadReg; xmmMap[x].VFreg = vfLoadReg;
xmmMap[x].xyzw = 0; xmmMap[x].xyzw = 0;
} }
xmmMap[x].isZero = (vfLoadReg == 0);
xmmMap[x].count = counter; xmmMap[x].count = counter;
xmmMap[x].isNeeded = true; xmmMap[x].isNeeded = true;
return xmmX; return xmmX;

View File

@ -72,7 +72,7 @@ mVUop(mVU_DIV)
cjmp.SetTarget(); cjmp.SetTarget();
xMOV(ptr32[&mVU.divFlag], 0); // Clear I/D flags xMOV(ptr32[&mVU.divFlag], 0); // Clear I/D flags
SSE_DIVSS(mVU, Fs, Ft); SSE_DIVSS(mVU, Fs, Ft);
mVUclamp1(Fs, t1, 8, true); mVUclamp1(mVU, Fs, t1, 8, true);
djmp.SetTarget(); djmp.SetTarget();
writeQreg(Fs, mVUinfo.writeQ); writeQreg(Fs, mVUinfo.writeQ);
@ -148,7 +148,7 @@ mVUop(mVU_RSQRT)
xForwardJump8 djmp; xForwardJump8 djmp;
ajmp.SetTarget(); ajmp.SetTarget();
SSE_DIVSS(mVU, Fs, Ft); SSE_DIVSS(mVU, Fs, Ft);
mVUclamp1(Fs, t1, 8, true); mVUclamp1(mVU, Fs, t1, 8, true);
djmp.SetTarget(); djmp.SetTarget();
writeQreg(Fs, mVUinfo.writeQ); writeQreg(Fs, mVUinfo.writeQ);

View File

@ -458,7 +458,7 @@ void ADD_SS_TriAceHack(microVU& mVU, const xmm& to, const xmm& from)
mVUclamp3(mVU, to, t1, (isPS) ? 0xf : 0x8); \ mVUclamp3(mVU, to, t1, (isPS) ? 0xf : 0x8); \
mVUclamp3(mVU, from, t1, (isPS) ? 0xf : 0x8); \ mVUclamp3(mVU, from, t1, (isPS) ? 0xf : 0x8); \
opX(to, from); \ opX(to, from); \
mVUclamp4(to, t1, (isPS) ? 0xf : 0x8); \ mVUclamp4(mVU, to, t1, (isPS) ? 0xf : 0x8); \
} while (0) } while (0)
void SSE_MAXPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) void SSE_MAXPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)