Added Nneeve's fix for recMADDU (it was using the IMUL instruction which would have produced potentially incorrect results in rare cases).

Cleaned up some of the signed/unsigned ambiguity surrounding MULT/MULTU instructions. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@533 96395faa-99c1-11dd-bbfe-3dabce05a288
2009-02-19 09:59:02 +00:00 · 2009-02-19 09:59:02 +00:00 · 3532ebada0
parent 4bd685485d
commit 3532ebada0
5 changed files with 70 additions and 19 deletions
--- a/pcsx2/FPU.cpp
+++ b/pcsx2/FPU.cpp
@ -58,6 +58,12 @@ using namespace std;			// for min / max
 #define _FdValUl_    fpuRegs.fpr[ _Fd_ ].UL
 #define _FAValUl_    fpuRegs.ACC.UL

+// S32's - useful for ensuring sign extension when needed.
+#define _FtValSl_    fpuRegs.fpr[ _Ft_ ].SL
+#define _FsValSl_    fpuRegs.fpr[ _Fs_ ].SL
+#define _FdValSl_    fpuRegs.fpr[ _Fd_ ].SL
+#define _FAValSl_    fpuRegs.ACC.SL
+
 // FPU Control Reg (FCR31)
 #define _ContVal_    fpuRegs.fprc[ 31 ]

@ -225,7 +231,7 @@ void C_LT() {

 void CFC1() {
 	if ( !_Rt_ || ( (_Fs_ != 0) && (_Fs_ != 31) ) ) return;
-	cpuRegs.GPR.r[_Rt_].SD[0] = (s64)(s32)fpuRegs.fprc[_Fs_];
+	cpuRegs.GPR.r[_Rt_].SD[0] = (s32)fpuRegs.fprc[_Fs_];	// force sign extension to 64 bit
 }

 void CTC1() {
@ -234,12 +240,12 @@ void CTC1() {
 }

 void CVT_S() {
-	_FdValf_ = (float)(*(s32*)&_FsValUl_);
+	_FdValf_ = (float)_FsValSl_;
 	_FdValf_ = fpuDouble( _FdValUl_ );
 }

 void CVT_W() {
-	if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValUl_ = (s32)_FsValf_; }
+	if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValSl_ = (s32)_FsValf_; }
 	else if ( ( _FsValUl_ & 0x80000000 ) == 0 ) { _FdValUl_ = 0x7fffffff; }
 	else { _FdValUl_ = 0x80000000; }
 }
@ -276,7 +282,7 @@ void MAX_S() {

 void MFC1() {
 	if ( !_Rt_ ) return;
-	cpuRegs.GPR.r[_Rt_].SD[0] = (s64)(s32)_FsValUl_;
+	cpuRegs.GPR.r[_Rt_].SD[0] = _FsValSl_;		// sign extension into 64bit
 }

 void MIN_S() {
@ -373,15 +379,15 @@ void SUBA_S() {

 void LWC1() {
 	u32 addr;
-	addr = cpuRegs.GPR.r[_Rs_].UL[0] + (s32)(s16)(cpuRegs.code & 0xffff);
-	if (addr & 0x00000003) { Console::Error( "FPU (LWC1 Opcode): Invalid Memory Address" ); return; }  // Should signal an exception?
+	addr = cpuRegs.GPR.r[_Rs_].UL[0] + (s16)(cpuRegs.code & 0xffff);	// force sign extension to 32bit
+	if (addr & 0x00000003) { Console::Error( "FPU (LWC1 Opcode): Invalid Unaligned Memory Address" ); return; }  // Should signal an exception?
 	memRead32(addr, &fpuRegs.fpr[_Rt_].UL);
 }

 void SWC1() {
 	u32 addr;
-	addr = cpuRegs.GPR.r[_Rs_].UL[0] + (s32)(s16)(cpuRegs.code & 0xffff);
-	if (addr & 0x00000003) { Console::Error( "FPU (SWC1 Opcode): Invalid Memory Address" ); return; }  // Should signal an exception?
+	addr = cpuRegs.GPR.r[_Rs_].UL[0] + (s16)(cpuRegs.code & 0xffff);	// force sign extension to 32bit
+	if (addr & 0x00000003) { Console::Error( "FPU (SWC1 Opcode): Invalid Unaligned Memory Address" ); return; }  // Should signal an exception?
 	memWrite32(addr,  fpuRegs.fpr[_Rt_].UL); 
 }

--- a/pcsx2/MMI.cpp
+++ b/pcsx2/MMI.cpp
@ -96,16 +96,28 @@ namespace OpcodeImpl {
 	void MULT1() {
 		s64 temp = (s64)cpuRegs.GPR.r[_Rs_].SL[0] * (s64)cpuRegs.GPR.r[_Rt_].SL[0];

-		cpuRegs.LO.UD[1] = (s64)(s32)(temp & 0xffffffff);
-		cpuRegs.HI.UD[1] = (s64)(s32)(temp >> 32);
+		// Sign-extend into 64 bits:
+		cpuRegs.LO.SD[1] = (s32)(temp & 0xffffffff);
+		cpuRegs.HI.SD[1] = (s32)(temp >> 32);

-		/* Modified a bit . asadr */
 		if (_Rd_) cpuRegs.GPR.r[_Rd_].UD[0] = cpuRegs.LO.UD[1];
 	}

 	void MULTU1() {
 		u64 tempu = (u64)cpuRegs.GPR.r[_Rs_].UL[0] * (u64)cpuRegs.GPR.r[_Rt_].UL[0];

+		// The EE says that results are "undefined" if the source operands are not correctly
+		// sign extended into the full 64 bits.  Since this is InterpreterLand, let's put a
+		// check in and issue a message if it ever happens.
+		// Could be a clue to something else someday.
+
+		if( cpuRegs.GPR.r[_Rs_].SL[0] != cpuRegs.GPR.r[_Rs_].SD[0] )
+			DevCon::Notice( "MULTU1 > Non-extended sign bit on Rs: %8.8x", params cpuRegs.GPR.r[_Rs_].SL[0] );
+
+		if( cpuRegs.GPR.r[_Rt_].SL[0] != cpuRegs.GPR.r[_Rt_].SD[0] )
+			DevCon::Notice( "MULTU1 > Non-extended sign bit on Rt: %8.8x", params cpuRegs.GPR.r[_Rt_].SL[0] );
+
+		// According to docs, sign-extend into 64 bits even though it's an unsigned mult.
 		cpuRegs.LO.UD[1] = (s32)(tempu & 0xffffffff);
 		cpuRegs.HI.UD[1] = (s32)(tempu >> 32);

@ -121,6 +133,16 @@ namespace OpcodeImpl {

 	void DIVU1() {
 		if (cpuRegs.GPR.r[_Rt_].UL[0] != 0) {
+
+			// See MULTU above for notes on the following sanity check
+
+			if( cpuRegs.GPR.r[_Rs_].SL[0] != cpuRegs.GPR.r[_Rs_].SD[0] )
+				DevCon::Notice( "DIVU1 > Non-extended sign bit on Rs: %8.8x", params cpuRegs.GPR.r[_Rs_].SL[0] );
+
+			if( cpuRegs.GPR.r[_Rt_].SL[0] != cpuRegs.GPR.r[_Rt_].SD[0] )
+				DevCon::Notice( "DIVU1 > Non-extended sign bit on Rt: %8.8x", params cpuRegs.GPR.r[_Rt_].SL[0] );
+
+			// note: DIVU has no sign extension when assigning back to 64 bits
 			cpuRegs.LO.UD[1] = cpuRegs.GPR.r[_Rs_].UL[0] / cpuRegs.GPR.r[_Rt_].UL[0];
 			cpuRegs.HI.UD[1] = cpuRegs.GPR.r[_Rs_].UL[0] % cpuRegs.GPR.r[_Rt_].UL[0];
 		}
--- a/pcsx2/R5900.h
+++ b/pcsx2/R5900.h
@ -128,6 +128,7 @@ union GPR_reg64 {
 union FPRreg {
 	float f;
 	u32 UL;
+	s32 SL;				// signed 32bit used for sign extension in interpreters.
 };

 struct fpuRegisters {
--- a/pcsx2/R5900OpcodeImpl.cpp
+++ b/pcsx2/R5900OpcodeImpl.cpp
@ -152,6 +152,16 @@ void DIV() {

 void DIVU() {
 	if (cpuRegs.GPR.r[_Rt_].UL[0] != 0) {
+	
+		// See MULTU below for notes on the following sanity check
+
+		if( cpuRegs.GPR.r[_Rs_].SL[0] != cpuRegs.GPR.r[_Rs_].SD[0] )
+			DevCon::Notice( "DIVU > Non-extended sign bit on Rs: %8.8x", params cpuRegs.GPR.r[_Rs_].SL[0] );
+
+		if( cpuRegs.GPR.r[_Rt_].SL[0] != cpuRegs.GPR.r[_Rt_].SD[0] )
+			DevCon::Notice( "DIVU > Non-extended sign bit on Rt: %8.8x", params cpuRegs.GPR.r[_Rt_].SL[0] );
+
+		// note: DIVU has no sign extension when assigning back to 64 bits
 		cpuRegs.LO.SD[0] = cpuRegs.GPR.r[_Rs_].UL[0] / cpuRegs.GPR.r[_Rt_].UL[0];
 		cpuRegs.HI.SD[0] = cpuRegs.GPR.r[_Rs_].UL[0] % cpuRegs.GPR.r[_Rt_].UL[0];
 	}
@ -160,21 +170,32 @@ void DIVU() {
 void MULT() { //different in ps2...
 	s64 res = (s64)cpuRegs.GPR.r[_Rs_].SL[0] * (s64)cpuRegs.GPR.r[_Rt_].SL[0];

+	// Sign-extend into 64 bits:
 	cpuRegs.LO.UD[0] = (s32)(res & 0xffffffff);
 	cpuRegs.HI.UD[0] = (s32)(res >> 32);

-	if (!_Rd_) return;
-	cpuRegs.GPR.r[_Rd_].UD[0]= cpuRegs.LO.UD[0]; //that is the difference
+	if( _Rd_ ) cpuRegs.GPR.r[_Rd_].UD[0] = cpuRegs.LO.UD[0]; //that is the difference
 }

 void MULTU() { //different in ps2..
 	u64 res = (u64)cpuRegs.GPR.r[_Rs_].UL[0] * (u64)cpuRegs.GPR.r[_Rt_].UL[0];

+	// The EE says that results are "undefined" if the source operands are not correctly
+	// sign extended into the full 64 bits.  Since this is InterpreterLand, let's put a
+	// check in and issue a message if it ever happens.
+	// Could be a clue to something else someday.
+
+	if( cpuRegs.GPR.r[_Rs_].SL[0] != cpuRegs.GPR.r[_Rs_].SD[0] )
+		DevCon::Notice( "MULTU > Non-extended sign bit on Rs: %8.8x", params cpuRegs.GPR.r[_Rs_].SL[0] );
+
+	if( cpuRegs.GPR.r[_Rt_].SL[0] != cpuRegs.GPR.r[_Rt_].SD[0] )
+		DevCon::Notice( "MULTU > Non-extended sign bit on Rt: %8.8x", params cpuRegs.GPR.r[_Rt_].SL[0] );
+
+	// According to docs, sign-extend into 64 bits even though it's an unsigned mult.
 	cpuRegs.LO.UD[0] = (s32)(res & 0xffffffff);
 	cpuRegs.HI.UD[0] = (s32)(res >> 32);

-	if (!_Rd_) return;
-	cpuRegs.GPR.r[_Rd_].UD[0]= cpuRegs.LO.UD[0]; //that is the difference
+	if( _Rd_ ) cpuRegs.GPR.r[_Rd_].UD[0] = cpuRegs.LO.UD[0]; //that is the difference
 }

 /*********************************************************
--- a/pcsx2/x86/ix86-32/iR5900MultDiv.cpp
+++ b/pcsx2/x86/ix86-32/iR5900MultDiv.cpp
@ -749,7 +749,6 @@ static PCSX2_ALIGNED16(u32 s_MaddMask[]) = { 0x80000000, 0, 0x80000000, 0 };

 void recMADDU()
 {
-	_eeOnWriteReg(_Rd_, 1);
 	EEINST_SETSIGNEXT(_Rs_);
 	EEINST_SETSIGNEXT(_Rt_);

@ -765,6 +764,7 @@ void recMADDU()
 		ADC32ItoR( ECX, (u32)(result>>32) );
 		CDQ();
 		if( _Rd_) {
+			_eeOnWriteReg(_Rd_, 1);
 			_deleteEEreg(_Rd_, 0);
 			MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ], EAX );
 			MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ], EDX );
@ -789,15 +789,15 @@ void recMADDU()

 	if( GPR_IS_CONST1(_Rs_) ) {
 		MOV32ItoR( EAX, g_cpuConstRegs[_Rs_].UL[0] );
-		IMUL32M( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] );
+		MUL32M( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] );
 	}
 	else if ( GPR_IS_CONST1(_Rt_) ) {
 		MOV32ItoR( EAX, g_cpuConstRegs[_Rt_].UL[0] );
-		IMUL32M( (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] );
+		MUL32M( (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] );
 	}
 	else {
 		MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] );
-		IMUL32M( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] );
+		MUL32M( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] );
 	}

 	MOV32RtoR( ECX, EDX );
@ -805,6 +805,7 @@ void recMADDU()
 	ADC32MtoR( ECX, (u32)&cpuRegs.HI.UL[0] );
 	CDQ();
 	if( _Rd_ ) {
+		_eeOnWriteReg(_Rd_, 1);
 		_deleteEEreg(_Rd_, 0);
 		MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ], EAX );
 		MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ], EDX );