further optimized recQFSRV thanks to some suggestions by Jake.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@599 a6443dda-0b58-4228-96e9-037be469359c
2009-01-16 08:01:44 +00:00 · 2009-01-16 08:01:44 +00:00 · 87271cb5b0
parent 66125f0c95
commit 87271cb5b0
3 changed files with 43 additions and 21 deletions
--- a/pcsx2/x86/iMMI.cpp
+++ b/pcsx2/x86/iMMI.cpp
@ -1713,52 +1713,57 @@ CPU_SSE_XMMCACHE_END
 }

 ////////////////////////////////////////////////////
-// 15 bytes
-#define QFSRV_bytes 15
+// Both Macros are 16 bytes so we can use a shift instead of a Mul instruction
+#define QFSRVhelper0() {  \
+	ajmp[0] = JMP32(0);  \
+	x86Ptr += 11;  \
+}
+
 #define QFSRVhelper(shift1, shift2) {  \
 	SSE2_PSRLDQ_I8_to_XMM(EEREC_D, shift1);  \
 	SSE2_PSLLDQ_I8_to_XMM(t0reg, shift2);  \
 	ajmp[shift1] = JMP32(0);  \
+	x86Ptr += 1;  \
 }

 void recQFSRV()
 {
-	if ( ! _Rd_ ) return;
+	if ( !_Rd_ ) return;
 	//SysPrintf("recQFSRV()\n");

-	CPU_SSE2_XMMCACHE_START( XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED)
+	CPU_SSE2_XMMCACHE_START( XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED )

 		u32 *ajmp[16];
 		int i, j;
 		int t0reg = _allocTempXMMreg(XMMT_INT, -1);
-		int x86temp	= _allocX86reg(EDX, X86TYPE_TEMP, 0, 0); // Use EDX because MUL32R modifies EDX and EAX

 		SSE2_MOVDQA_XMM_to_XMM(t0reg, EEREC_S);
 		SSE2_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);

-		MOV32MtoR(x86temp, (uptr)&cpuRegs.sa);
-		SHR32ItoR(x86temp, 3);
-		AND32I8toR(x86temp, 0xf); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes)
-		MOV32ItoR(EAX, QFSRV_bytes);
-		MUL32R(x86temp);
-		ADD32ItoR(EAX, (uptr)x86Ptr + 7); // ADD32 = 5 bytes, JMPR = 2 bytes
+		MOV32MtoR(EAX, (uptr)&cpuRegs.sa);
+		SHL32ItoR(EAX, 1); // Multiply SA bytes by 16 bytes (the amount of bytes in QFSRVhelper() macros)
+		AND32I8toR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes)
+		ADD32ItoEAX((uptr)x86Ptr + 7); // ADD32 = 5 bytes, JMPR = 2 bytes
 		JMPR(EAX); // Jumps to a QFSRVhelper() case below (a total of 16 different cases)
 	
-		// Cases 0 to 15:
-		for (i = 0, j = 16; i < 16; i++, j--) {
+		// Case 0:
+		QFSRVhelper0();
+
+		// Cases 1 to 15:
+		for (i = 1, j = 15; i < 16; i++, j--) {
 			QFSRVhelper(i, j);
 		}

 		// Set jump addresses for the JMP32's in QFSRVhelper()
-		for (i = 0; i < 16; i++) {
+		for (i = 1; i < 16; i++) {
 			x86SetJ32(ajmp[i]);
 		}

 		// Concatenate the regs after appropriate shifts have been made
 		SSE2_POR_XMM_to_XMM(EEREC_D, t0reg);
 		
+		x86SetJ32(ajmp[0]); // Case 0 jumps to here (to skip the POR)
 		_freeXMMreg(t0reg);
-		_freeX86reg(x86temp);

 	CPU_SSE_XMMCACHE_END
 	//recCall( Interp::QFSRV, _Rd_ );
--- a/pcsx2/x86/ix86/ix86.cpp
+++ b/pcsx2/x86/ix86/ix86.cpp
@ -272,6 +272,13 @@ __forceinline void CLC( void )
 	write8( 0xF8 );
 }

+// NOP 1-byte
+__forceinline void NOP( void )
+{
+	write8(0x90);
+}
+
+
 ////////////////////////////////////
 // mov instructions				/
 ////////////////////////////////////
@ -1017,6 +1024,13 @@ __forceinline void ADD64RtoR( x86IntRegType to, x86IntRegType from )
 	ModRM( 3, from, to );
 }

+/* add imm32 to EAX */
+void ADD32ItoEAX( u32 from )
+{
+	write8( 0x05 );
+	write32( from );
+}
+
 /* add imm32 to r32 */
 __forceinline void ADD32ItoR( x86IntRegType to, u32 from ) 
 {
@ -1029,15 +1043,15 @@ __forceinline void ADD32ItoR( x86IntRegType to, u32 from )
 	}
 	else
 	{
-		if ( to == EAX) {
-			write8( 0x05 ); 
+		if ( to == EAX ) {
+			ADD32ItoEAX(from);
 		}
 		else {
 			write8( 0x81 ); 
 			ModRM( 3, 0, to );
-		}
 			write32( from );
 		}
+	}
 }

 /* add imm32 to m32 */
--- a/pcsx2/x86/ix86/ix86.h
+++ b/pcsx2/x86/ix86/ix86.h
@ -222,6 +222,7 @@ extern void CMOV32MtoR( int cc, int to, uptr from );

 extern void STC( void );
 extern void CLC( void );
+extern void NOP( void );

 ////////////////////////////////////
 // mov instructions               //
@ -419,8 +420,10 @@ extern void ADD64ItoR( x86IntRegType to, u32 from );
 // add m64 to r64 
 extern void ADD64MtoR( x86IntRegType to, uptr from );

+// add imm32 to EAX
+extern void ADD32ItoEAX( u32 from );
 // add imm32 to r32 
-extern void ADD32ItoR( x86IntRegType to, u32 from );
+ extern void ADD32ItoR( x86IntRegType to, u32 from );
 // add imm32 to m32 
 extern void ADD32ItoM( uptr to, u32 from );
 // add imm32 to [r32+off]