- Rewrote and simplified the TriAce gamefix
VU interpreter:
- Implemented a TriAce gamefix for vu0 interpreter

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4960 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2011-11-07 10:20:56 +00:00
parent c234e1f6dd
commit f423a9c41d
2 changed files with 150 additions and 80 deletions

View File

@ -339,6 +339,36 @@ static __fi float vuDouble(u32 f)
} }
#endif #endif
static __fi float vuADD_TriAceHack(u32 a, u32 b) {
// On VU0 TriAce Games use ADDi and expects these bit-perfect results:
//if (a == 0xb3e2a619 && b == 0x42546666) return vuDouble(0x42546666);
//if (a == 0x8b5b19e9 && b == 0xc7f079b3) return vuDouble(0xc7f079b3);
if (a == 0x4b1ed4a8 && b == 0x43a02666) return vuDouble(0x4b1ed5e7);
//if (a == 0x7d1ca47b && b == 0x42f23333) return vuDouble(0x7d1ca47b);
// In the 3rd case, some other rounding error is giving us incorrect
// operands ('a' is wrong); and therefor an incorrect result.
// We're getting: 0x4b1ed4a8 + 0x43a02666 = 0x4b1ed5e8
// We should be getting: 0x4b1ed4a7 + 0x43a02666 = 0x4b1ed5e7
// microVU gets the correct operands and result. The interps likely
// don't get it due to rounding towards nearest in other calculations.
if (0) {
// microVU uses something like this to get TriAce games working,
// but VU interpreters don't seem to need it currently:
s32 aExp = (a >> 23) & 0xff;
s32 bExp = (b >> 23) & 0xff;
if (aExp - bExp >= 25) b &= 0x80000000;
if (aExp - bExp <=-25) a &= 0x80000000;
float ret = vuDouble(a) + vuDouble(b);
DevCon.WriteLn("aExp = %d, bExp = %d", aExp, bExp);
DevCon.WriteLn("0x%08x + 0x%08x = 0x%08x", a, b, (u32&)ret);
DevCon.WriteLn("%f + %f = %f", vuDouble(a), vuDouble(b), ret);
return ret;
}
return vuDouble(a) + vuDouble(b);
}
void _vuABS(VURegs * VU) { void _vuABS(VURegs * VU) {
if (_Ft_ == 0) return; if (_Ft_ == 0) return;
@ -367,11 +397,21 @@ static __fi void _vuADDi(VURegs * VU) {
if (_Fd_ == 0) dst = &RDzero; if (_Fd_ == 0) dst = &RDzero;
else dst = &VU->VF[_Fd_]; else dst = &VU->VF[_Fd_];
if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); if (!CHECK_VUADDSUBHACK) {
if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU);
if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU);
if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU);
VU_STAT_UPDATE(VU); if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU);
VU_STAT_UPDATE(VU);
}
else {
if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU);
if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU);
if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU);
if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU);
VU_STAT_UPDATE(VU);
}
}/*Reworked from define to function. asadr*/ }/*Reworked from define to function. asadr*/
static __fi void _vuADDq(VURegs * VU) { static __fi void _vuADDq(VURegs * VU) {

View File

@ -214,8 +214,7 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
//------------------------------------------------------------------ //------------------------------------------------------------------
// Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI) // Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI)
__fi void mVUbackupRegs(microVU& mVU, bool toMemory = false) __fi void mVUbackupRegs(microVU& mVU, bool toMemory = false) {
{
if (toMemory) { if (toMemory) {
for(int i = 0; i < 8; i++) { for(int i = 0; i < 8; i++) {
xMOVAPS(ptr128[&mVU.xmmBackup[i][0]], xmm(i)); xMOVAPS(ptr128[&mVU.xmmBackup[i][0]], xmm(i));
@ -228,8 +227,7 @@ __fi void mVUbackupRegs(microVU& mVU, bool toMemory = false)
} }
// Restore Volatile Regs // Restore Volatile Regs
__fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false) __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false) {
{
if (fromMemory) { if (fromMemory) {
for(int i = 0; i < 8; i++) { for(int i = 0; i < 8; i++) {
xMOVAPS(xmm(i), ptr128[&mVU.xmmBackup[i][0]]); xMOVAPS(xmm(i), ptr128[&mVU.xmmBackup[i][0]]);
@ -238,6 +236,20 @@ __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false)
else xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]); else xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]);
} }
_mVUt void __fc mVUprintRegs() {
microVU& mVU = mVUx;
for(int i = 0; i < 8; i++) {
Console.WriteLn("xmm%d = [0x%08x,0x%08x,0x%08x,0x%08x]", i,
mVU.xmmBackup[i][0], mVU.xmmBackup[i][1],
mVU.xmmBackup[i][2], mVU.xmmBackup[i][3]);
}
for(int i = 0; i < 8; i++) {
Console.WriteLn("xmm%d = [%f,%f,%f,%f]", i,
(float&)mVU.xmmBackup[i][0], (float&)mVU.xmmBackup[i][1],
(float&)mVU.xmmBackup[i][2], (float&)mVU.xmmBackup[i][3]);
}
}
// Gets called by mVUaddrFix at execution-time // Gets called by mVUaddrFix at execution-time
static void __fc mVUwarningRegAccess(u32 prog, u32 pc) { static void __fc mVUwarningRegAccess(u32 prog, u32 pc) {
Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog); Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog);
@ -288,12 +300,13 @@ __fi void mVUaddrFix(mV, const x32& gprReg)
// Micro VU - Custom SSE Instructions // Micro VU - Custom SSE Instructions
//------------------------------------------------------------------ //------------------------------------------------------------------
struct SSEMaskPair { u32 mask1[4], mask2[4]; }; struct SSEMasks { u32 MIN_MAX_1[4], MIN_MAX_2[4], ADD_SS[4]; };
static const __aligned16 SSEMaskPair MIN_MAX = static const __aligned16 SSEMasks sseMasks =
{ {
{0xffffffff, 0x80000000, 0xffffffff, 0x80000000}, {0xffffffff, 0x80000000, 0xffffffff, 0x80000000},
{0x00000000, 0x40000000, 0x00000000, 0x40000000} {0x00000000, 0x40000000, 0x00000000, 0x40000000},
{0x80000000, 0xffffffff, 0xffffffff, 0xffffffff}
}; };
@ -306,21 +319,21 @@ void MIN_MAX_PS(microVU& mVU, const xmm& to, const xmm& from, const xmm& t1in, c
if (0) { // use double comparison if (0) { // use double comparison
// ZW // ZW
xPSHUF.D(t1, to, 0xfa); xPSHUF.D(t1, to, 0xfa);
xPAND (t1, ptr128[MIN_MAX.mask1]); xPAND (t1, ptr128[sseMasks.MIN_MAX_1]);
xPOR (t1, ptr128[MIN_MAX.mask2]); xPOR (t1, ptr128[sseMasks.MIN_MAX_2]);
xPSHUF.D(t2, from, 0xfa); xPSHUF.D(t2, from, 0xfa);
xPAND (t2, ptr128[MIN_MAX.mask1]); xPAND (t2, ptr128[sseMasks.MIN_MAX_1]);
xPOR (t2, ptr128[MIN_MAX.mask2]); xPOR (t2, ptr128[sseMasks.MIN_MAX_2]);
if (min) xMIN.PD(t1, t2); if (min) xMIN.PD(t1, t2);
else xMAX.PD(t1, t2); else xMAX.PD(t1, t2);
// XY // XY
xPSHUF.D(t2, from, 0x50); xPSHUF.D(t2, from, 0x50);
xPAND (t2, ptr128[MIN_MAX.mask1]); xPAND (t2, ptr128[sseMasks.MIN_MAX_1]);
xPOR (t2, ptr128[MIN_MAX.mask2]); xPOR (t2, ptr128[sseMasks.MIN_MAX_2]);
xPSHUF.D(to, to, 0x50); xPSHUF.D(to, to, 0x50);
xPAND (to, ptr128[MIN_MAX.mask1]); xPAND (to, ptr128[sseMasks.MIN_MAX_1]);
xPOR (to, ptr128[MIN_MAX.mask2]); xPOR (to, ptr128[sseMasks.MIN_MAX_2]);
if (min) xMIN.PD(to, t2); if (min) xMIN.PD(to, t2);
else xMAX.PD(to, t2); else xMAX.PD(to, t2);
@ -355,83 +368,100 @@ void MIN_MAX_SS(mV, const xmm& to, const xmm& from, const xmm& t1in, bool min)
{ {
const xmm& t1 = t1in.IsEmpty() ? mVU.regAlloc->allocReg() : t1in; const xmm& t1 = t1in.IsEmpty() ? mVU.regAlloc->allocReg() : t1in;
xSHUF.PS(to, from, 0); xSHUF.PS(to, from, 0);
xPAND (to, ptr128[MIN_MAX.mask1]); xPAND (to, ptr128[sseMasks.MIN_MAX_1]);
xPOR (to, ptr128[MIN_MAX.mask2]); xPOR (to, ptr128[sseMasks.MIN_MAX_2]);
xPSHUF.D(t1, to, 0xee); xPSHUF.D(t1, to, 0xee);
if (min) xMIN.PD(to, t1); if (min) xMIN.PD(to, t1);
else xMAX.PD(to, t1); else xMAX.PD(to, t1);
if (t1 != t1in) mVU.regAlloc->clearNeeded(t1); if (t1 != t1in) mVU.regAlloc->clearNeeded(t1);
} }
// Warning: Modifies all vectors in 'to' and 'from', and Modifies xmmT1 and xmmT2 // Not Used! - TriAce games only need a portion of this code to boot (see function below)
void ADD_SS(microVU& mVU, const xmm& to, const xmm& from, const xmm& t1in, const xmm& t2in) // What this code attempts to do is do a floating point ADD with only 1 guard bit,
// whereas FPU calculations that follow the IEEE standard have 3 guard bits (guard|round|sticky)
// Warning: Modifies all vectors in 'to' and 'from', and Modifies t1in
void ADD_SS_Single_Guard_Bit(microVU& mVU, const xmm& to, const xmm& from, const xmm& t1in)
{ {
const xmm& t1 = t1in.IsEmpty() ? mVU.regAlloc->allocReg() : t1in; const xmm& t1 = t1in.IsEmpty() ? mVU.regAlloc->allocReg() : t1in;
const xmm& t2 = t2in.IsEmpty() ? mVU.regAlloc->allocReg() : t2in;
xMOVAPS(t1, to); xMOVD(eax, to);
xMOVAPS(t2, from); xMOVD(ecx, from);
xMOVD(ecx, to); xSHR (eax, 23);
xSHR(ecx, 23); xSHR (ecx, 23);
xMOVD(eax, from); xAND (eax, 0xff);
xSHR(eax, 23); xAND (ecx, 0xff);
xAND(ecx, 0xff); xSUB (ecx, eax); // Exponent Difference
xAND(eax, 0xff);
xSUB(ecx, eax); //ecx = exponent difference
xCMP(ecx, 25); xForwardJL8 case_neg;
xForwardJGE8 case2; xForwardJE8 case_end1;
xCMP(ecx, 0);
xForwardJG8 case3;
xForwardJE8 toend1;
xCMP(ecx, -25);
xForwardJLE8 case4;
// negative small xCMP (ecx, 24);
xNOT(ecx); // -ecx - 1 xForwardJLE8 case_pos_small;
xMOV(eax, 0xffffffff);
xSHL(eax, cl);
xPCMP.EQB(to, to);
xMOVDZX(from, eax);
xMOVSS(to, from);
xPCMP.EQB(from, from);
xForwardJump8 toend2;
case2.SetTarget(); // positive large // case_pos_big:
xMOV(eax, 0x80000000); xPAND(to, ptr128[sseMasks.ADD_SS]);
xPCMP.EQB(from, from); xForwardJump8 case_end2;
xMOVDZX(to, eax);
xMOVSS(from, to);
xPCMP.EQB(to, to);
xForwardJump8 toend3;
case3.SetTarget(); // positive small case_pos_small.SetTarget();
xDEC(ecx); xDEC (ecx);
xMOV(eax, 0xffffffff); xMOV (eax, 0xffffffff);
xSHL(eax, cl); xSHL (eax, cl);
xPCMP.EQB(from, from); xMOVDZX(t1, eax);
xMOVDZX(to, eax); xPAND (to, t1);
xMOVSS(from, to); xForwardJump8 case_end3;
xPCMP.EQB(to, to);
xForwardJump8 toend4;
case4.SetTarget(); // negative large case_neg.SetTarget();
xMOV(eax, 0x80000000); xCMP (ecx, -24);
xPCMP.EQB(to, to); xForwardJGE8 case_neg_small;
xMOVDZX(from, eax);
xMOVSS(to, from);
xPCMP.EQB(from, from);
toend1.SetTarget(); // case_neg_big:
toend2.SetTarget(); xPAND(from, ptr128[sseMasks.ADD_SS]);
toend3.SetTarget(); xForwardJump8 case_end4;
toend4.SetTarget();
case_neg_small.SetTarget();
xNOT (ecx); // -ecx - 1
xMOV (eax, 0xffffffff);
xSHL (eax, cl);
xMOVDZX(t1, eax);
xPAND (from, t1);
case_end1.SetTarget();
case_end2.SetTarget();
case_end3.SetTarget();
case_end4.SetTarget();
xAND.PS(to, t1); // to contains mask
xAND.PS(from, t2); // from contains mask
xADD.SS(to, from); xADD.SS(to, from);
if (t1 != t1in) mVU.regAlloc->clearNeeded(t1); if (t1 != t1in) mVU.regAlloc->clearNeeded(t1);
if (t2 != t2in) mVU.regAlloc->clearNeeded(t2); }
// Turns out only this is needed to get TriAce games booting with mVU
// Modifies from's lower vector
void ADD_SS_TriAceHack(microVU& mVU, const xmm& to, const xmm& from)
{
xMOVD(eax, to);
xMOVD(ecx, from);
xSHR (eax, 23);
xSHR (ecx, 23);
xAND (eax, 0xff);
xAND (ecx, 0xff);
xSUB (ecx, eax); // Exponent Difference
xCMP (ecx, -25);
xForwardJLE8 case_neg_big;
xCMP (ecx, 25);
xForwardJL8 case_end1;
// case_pos_big:
xPAND(to, ptr128[sseMasks.ADD_SS]);
xForwardJump8 case_end2;
case_neg_big.SetTarget();
xPAND(from, ptr128[sseMasks.ADD_SS]);
case_end1.SetTarget();
case_end2.SetTarget();
xADD.SS(to, from);
} }
#define clampOp(opX, isPS) { \ #define clampOp(opX, isPS) { \
@ -464,7 +494,7 @@ void SSE_MINSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, co
void SSE_ADD2SS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) void SSE_ADD2SS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
{ {
if (!CHECK_VUADDSUBHACK) { clampOp(xADD.SS, 0); } if (!CHECK_VUADDSUBHACK) { clampOp(xADD.SS, 0); }
else { ADD_SS(mVU, to, from, t1, t2); } else { ADD_SS_TriAceHack(mVU, to, from); }
} }
// Does same as SSE_ADDPS since tri-ace games only need SS implementation of VUADDSUBHACK... // Does same as SSE_ADDPS since tri-ace games only need SS implementation of VUADDSUBHACK...