microVU:

- Rewrote and simplified the TriAce gamefix VU interpreter: - Implemented a TriAce gamefix for vu0 interpreter git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4960 96395faa-99c1-11dd-bbfe-3dabce05a288
2011-11-07 10:20:56 +00:00 · 2011-11-07 10:20:56 +00:00 · f423a9c41d
parent c234e1f6dd
commit f423a9c41d
2 changed files with 150 additions and 80 deletions
--- a/pcsx2/VUops.cpp
+++ b/pcsx2/VUops.cpp
@ -339,6 +339,36 @@ static __fi float vuDouble(u32 f)
 }
 #endif
 static __fi float vuADD_TriAceHack(u32 a, u32 b) {
 	// On VU0 TriAce Games use ADDi and expects these bit-perfect results:
 	//if (a == 0xb3e2a619 && b == 0x42546666) return vuDouble(0x42546666);
 	//if (a == 0x8b5b19e9 && b == 0xc7f079b3) return vuDouble(0xc7f079b3);
 	if (a == 0x4b1ed4a8 && b == 0x43a02666) return vuDouble(0x4b1ed5e7);
 	//if (a == 0x7d1ca47b && b == 0x42f23333) return vuDouble(0x7d1ca47b);
 	// In the 3rd case, some other rounding error is giving us incorrect
 	// operands ('a' is wrong); and therefor an incorrect result.
 	// We're getting:        0x4b1ed4a8 + 0x43a02666 = 0x4b1ed5e8
 	// We should be getting: 0x4b1ed4a7 + 0x43a02666 = 0x4b1ed5e7
 	// microVU gets the correct operands and result. The interps likely
 	// don't get it due to rounding towards nearest in other calculations.
 	if (0) {
 		// microVU uses something like this to get TriAce games working,
 		// but VU interpreters don't seem to need it currently:
 		s32 aExp = (a >> 23) & 0xff;
 		s32 bExp = (b >> 23) & 0xff;
 		if (aExp - bExp >= 25) b &= 0x80000000;
 		if (aExp - bExp <=-25) a &= 0x80000000;
 		float ret = vuDouble(a) + vuDouble(b);
 		DevCon.WriteLn("aExp = %d, bExp = %d", aExp, bExp);
 		DevCon.WriteLn("0x%08x + 0x%08x = 0x%08x", a, b, (u32&)ret);
 		DevCon.WriteLn("%f + %f = %f", vuDouble(a), vuDouble(b), ret);
 		return ret;
 	}
 	return vuDouble(a) + vuDouble(b);
 }
 void _vuABS(VURegs * VU) {
 	if (_Ft_ == 0) return;
@ -367,11 +397,21 @@ static __fi void _vuADDi(VURegs * VU) {
 	if (_Fd_ == 0) dst = &RDzero;
 	else dst = &VU->VF[_Fd_];
-	if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU);
+	if (!CHECK_VUADDSUBHACK) {
-	if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU);
+		if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU);
-	if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU);
+		if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU);
-	if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU);
+		if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU);
-	VU_STAT_UPDATE(VU);
+		if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU);
 		VU_STAT_UPDATE(VU);
 	}
 	else {
 		if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU);
 		if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU);
 		if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU);
 		if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU);
 		VU_STAT_UPDATE(VU);
 	}
 }/*Reworked from define to function. asadr*/
 static __fi void _vuADDq(VURegs * VU) {
--- a/pcsx2/x86/microVU_Misc.inl
+++ b/pcsx2/x86/microVU_Misc.inl
@ -214,8 +214,7 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
 //------------------------------------------------------------------
 // Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI)
-__fi void mVUbackupRegs(microVU& mVU, bool toMemory = false)
+__fi void mVUbackupRegs(microVU& mVU, bool toMemory = false) {
 {
 	if (toMemory) {
 		for(int i = 0; i < 8; i++) {
 			xMOVAPS(ptr128[&mVU.xmmBackup[i][0]], xmm(i));
@ -228,8 +227,7 @@ __fi void mVUbackupRegs(microVU& mVU, bool toMemory = false)
 }
 // Restore Volatile Regs
-__fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false)
+__fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false) {
 {
 	if (fromMemory) {
 		for(int i = 0; i < 8; i++) {
 			xMOVAPS(xmm(i), ptr128[&mVU.xmmBackup[i][0]]);
@ -238,6 +236,20 @@ __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false)
 	else xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]);
 }
 _mVUt void __fc mVUprintRegs() {
 	microVU& mVU = mVUx;
 	for(int i = 0; i < 8; i++) {
 		Console.WriteLn("xmm%d = [0x%08x,0x%08x,0x%08x,0x%08x]", i,
 			mVU.xmmBackup[i][0], mVU.xmmBackup[i][1],
 			mVU.xmmBackup[i][2], mVU.xmmBackup[i][3]);
 	}
 	for(int i = 0; i < 8; i++) {
 		Console.WriteLn("xmm%d = [%f,%f,%f,%f]", i,
 			(float&)mVU.xmmBackup[i][0], (float&)mVU.xmmBackup[i][1],
 			(float&)mVU.xmmBackup[i][2], (float&)mVU.xmmBackup[i][3]);
 	}
 }
 // Gets called by mVUaddrFix at execution-time
 static void __fc mVUwarningRegAccess(u32 prog, u32 pc) {
 	Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog);
@ -288,12 +300,13 @@ __fi void mVUaddrFix(mV, const x32& gprReg)
 // Micro VU - Custom SSE Instructions
 //------------------------------------------------------------------
-struct SSEMaskPair { u32 mask1[4], mask2[4]; };
+struct SSEMasks { u32 MIN_MAX_1[4], MIN_MAX_2[4], ADD_SS[4]; };
-static const __aligned16 SSEMaskPair MIN_MAX =
+static const __aligned16 SSEMasks sseMasks =
 {
 	{0xffffffff, 0x80000000, 0xffffffff, 0x80000000},
-	{0x00000000, 0x40000000, 0x00000000, 0x40000000}
+	{0x00000000, 0x40000000, 0x00000000, 0x40000000},
 	{0x80000000, 0xffffffff, 0xffffffff, 0xffffffff}
 };
@ -306,21 +319,21 @@ void MIN_MAX_PS(microVU& mVU, const xmm& to, const xmm& from, const xmm& t1in, c
 	if (0) { // use double comparison
 		// ZW
 		xPSHUF.D(t1, to, 0xfa);
-		xPAND   (t1, ptr128[MIN_MAX.mask1]);
+		xPAND   (t1, ptr128[sseMasks.MIN_MAX_1]);
-		xPOR    (t1, ptr128[MIN_MAX.mask2]);
+		xPOR    (t1, ptr128[sseMasks.MIN_MAX_2]);
 		xPSHUF.D(t2, from, 0xfa);
-		xPAND   (t2, ptr128[MIN_MAX.mask1]);
+		xPAND   (t2, ptr128[sseMasks.MIN_MAX_1]);
-		xPOR    (t2, ptr128[MIN_MAX.mask2]);
+		xPOR    (t2, ptr128[sseMasks.MIN_MAX_2]);
 		if (min) xMIN.PD(t1, t2);
 		else     xMAX.PD(t1, t2);
 		// XY
 		xPSHUF.D(t2, from, 0x50);
-		xPAND   (t2, ptr128[MIN_MAX.mask1]);
+		xPAND   (t2, ptr128[sseMasks.MIN_MAX_1]);
-		xPOR    (t2, ptr128[MIN_MAX.mask2]);
+		xPOR    (t2, ptr128[sseMasks.MIN_MAX_2]);
 		xPSHUF.D(to, to, 0x50);
-		xPAND   (to, ptr128[MIN_MAX.mask1]);
+		xPAND   (to, ptr128[sseMasks.MIN_MAX_1]);
-		xPOR    (to, ptr128[MIN_MAX.mask2]);
+		xPOR    (to, ptr128[sseMasks.MIN_MAX_2]);
 		if (min) xMIN.PD(to, t2);
 		else     xMAX.PD(to, t2);
@ -355,83 +368,100 @@ void MIN_MAX_SS(mV, const xmm& to, const xmm& from, const xmm& t1in, bool min)
 {
 	const xmm& t1 = t1in.IsEmpty() ? mVU.regAlloc->allocReg() : t1in;
 	xSHUF.PS(to, from, 0);
-	xPAND	(to, ptr128[MIN_MAX.mask1]);
+	xPAND	(to, ptr128[sseMasks.MIN_MAX_1]);
-	xPOR	(to, ptr128[MIN_MAX.mask2]);
+	xPOR	(to, ptr128[sseMasks.MIN_MAX_2]);
 	xPSHUF.D(t1, to, 0xee);
 	if (min) xMIN.PD(to, t1);
 	else	 xMAX.PD(to, t1);
 	if (t1 != t1in) mVU.regAlloc->clearNeeded(t1);
 }
-// Warning: Modifies all vectors in 'to' and 'from', and Modifies xmmT1 and xmmT2
+// Not Used! - TriAce games only need a portion of this code to boot (see function below)
-void ADD_SS(microVU& mVU, const xmm& to, const xmm& from, const xmm& t1in, const xmm& t2in)
+// What this code attempts to do is do a floating point ADD with only 1 guard bit,
 // whereas FPU calculations that follow the IEEE standard have 3 guard bits (guard|round|sticky)
 // Warning: Modifies all vectors in 'to' and 'from', and Modifies t1in
 void ADD_SS_Single_Guard_Bit(microVU& mVU, const xmm& to, const xmm& from, const xmm& t1in)
 {
 	const xmm& t1 = t1in.IsEmpty() ? mVU.regAlloc->allocReg() : t1in;
 	const xmm& t2 = t2in.IsEmpty() ? mVU.regAlloc->allocReg() : t2in;
-	xMOVAPS(t1, to);
+	xMOVD(eax, to);
-	xMOVAPS(t2, from);
+	xMOVD(ecx, from);
-	xMOVD(ecx, to);
+	xSHR (eax, 23);
-	xSHR(ecx, 23);
+	xSHR (ecx, 23);
-	xMOVD(eax, from);
+	xAND (eax, 0xff);
-	xSHR(eax, 23);
+	xAND (ecx, 0xff);
-	xAND(ecx, 0xff);
+	xSUB (ecx, eax); // Exponent Difference
 	xAND(eax, 0xff);
 	xSUB(ecx, eax); //ecx = exponent difference
-	xCMP(ecx, 25);
+	xForwardJL8 case_neg;
-	xForwardJGE8 case2;
+	xForwardJE8 case_end1;
 	xCMP(ecx, 0);
 	xForwardJG8 case3;
 	xForwardJE8 toend1;
 	xCMP(ecx, -25);
 	xForwardJLE8 case4;
-	// negative small
+	xCMP (ecx, 24);
-		xNOT(ecx); // -ecx - 1
+	xForwardJLE8 case_pos_small;
 		xMOV(eax, 0xffffffff);
 		xSHL(eax, cl);
 		xPCMP.EQB(to, to);
 		xMOVDZX(from, eax);
 		xMOVSS(to, from);
 		xPCMP.EQB(from, from);
 	xForwardJump8 toend2;
-	case2.SetTarget(); // positive large
+	// case_pos_big:
-		xMOV(eax, 0x80000000);
+	xPAND(to, ptr128[sseMasks.ADD_SS]);
-		xPCMP.EQB(from, from);
+	xForwardJump8 case_end2;
 		xMOVDZX(to, eax);
 		xMOVSS(from, to);
 		xPCMP.EQB(to, to);
 	xForwardJump8 toend3;
-	case3.SetTarget(); // positive small
+	case_pos_small.SetTarget();
-		xDEC(ecx);
+	xDEC   (ecx);
-		xMOV(eax, 0xffffffff);
+	xMOV   (eax, 0xffffffff);
-		xSHL(eax, cl);
+	xSHL   (eax, cl);
-		xPCMP.EQB(from, from);
+	xMOVDZX(t1, eax);
-		xMOVDZX(to, eax);
+	xPAND  (to, t1);
-		xMOVSS(from, to);
+	xForwardJump8 case_end3;
 		xPCMP.EQB(to, to);
 	xForwardJump8 toend4;
-	case4.SetTarget(); // negative large
+	case_neg.SetTarget();
-		xMOV(eax, 0x80000000);
+	xCMP (ecx, -24);
-		xPCMP.EQB(to, to);
+	xForwardJGE8 case_neg_small;
 		xMOVDZX(from, eax);
 		xMOVSS(to, from);
 		xPCMP.EQB(from, from);
-	toend1.SetTarget();
+	// case_neg_big:
-	toend2.SetTarget();
+	xPAND(from, ptr128[sseMasks.ADD_SS]);
-	toend3.SetTarget();
+	xForwardJump8 case_end4;
-	toend4.SetTarget();
+
 	case_neg_small.SetTarget();
 	xNOT   (ecx); // -ecx - 1
 	xMOV   (eax, 0xffffffff);
 	xSHL   (eax, cl);
 	xMOVDZX(t1, eax);
 	xPAND  (from, t1);
 	case_end1.SetTarget();
 	case_end2.SetTarget();
 	case_end3.SetTarget();
 	case_end4.SetTarget();
 	xAND.PS(to,   t1); // to   contains mask
 	xAND.PS(from, t2); // from contains mask
 	xADD.SS(to, from);
 	if (t1 != t1in) mVU.regAlloc->clearNeeded(t1);
-	if (t2 != t2in) mVU.regAlloc->clearNeeded(t2);
+}
 // Turns out only this is needed to get TriAce games booting with mVU
 // Modifies from's lower vector
 void ADD_SS_TriAceHack(microVU& mVU, const xmm& to, const xmm& from)
 {
 	xMOVD(eax, to);
 	xMOVD(ecx, from);
 	xSHR (eax, 23);
 	xSHR (ecx, 23);
 	xAND (eax, 0xff);
 	xAND (ecx, 0xff);
 	xSUB (ecx, eax); // Exponent Difference
 	xCMP (ecx, -25);
 	xForwardJLE8 case_neg_big;
 	xCMP (ecx,  25);
 	xForwardJL8  case_end1;
 	// case_pos_big:
 	xPAND(to, ptr128[sseMasks.ADD_SS]);
 	xForwardJump8 case_end2;
 	case_neg_big.SetTarget();
 	xPAND(from, ptr128[sseMasks.ADD_SS]);
 	case_end1.SetTarget();
 	case_end2.SetTarget();
 	xADD.SS(to, from);
 }
 #define clampOp(opX, isPS) {					\
@ -464,7 +494,7 @@ void SSE_MINSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, co
 void SSE_ADD2SS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg)
 {
 	if (!CHECK_VUADDSUBHACK) { clampOp(xADD.SS, 0); }
-	else					 { ADD_SS(mVU, to, from, t1, t2); }
+	else					 { ADD_SS_TriAceHack(mVU, to, from); }
 }
 // Does same as SSE_ADDPS since tri-ace games only need SS implementation of VUADDSUBHACK...