further optimized recQFSRV thanks to some suggestions by Jake.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@599 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
cottonvibes 2009-01-16 08:01:44 +00:00 committed by Gregory Hainaut
parent 66125f0c95
commit 87271cb5b0
3 changed files with 43 additions and 21 deletions

View File

@ -1713,52 +1713,57 @@ CPU_SSE_XMMCACHE_END
}
////////////////////////////////////////////////////
// 15 bytes
#define QFSRV_bytes 15
// Both Macros are 16 bytes so we can use a shift instead of a Mul instruction
#define QFSRVhelper0() { \
ajmp[0] = JMP32(0); \
x86Ptr += 11; \
}
#define QFSRVhelper(shift1, shift2) { \
SSE2_PSRLDQ_I8_to_XMM(EEREC_D, shift1); \
SSE2_PSLLDQ_I8_to_XMM(t0reg, shift2); \
ajmp[shift1] = JMP32(0); \
x86Ptr += 1; \
}
void recQFSRV()
{
if ( ! _Rd_ ) return;
if ( !_Rd_ ) return;
//SysPrintf("recQFSRV()\n");
CPU_SSE2_XMMCACHE_START( XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED)
CPU_SSE2_XMMCACHE_START( XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED )
u32 *ajmp[16];
int i, j;
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int x86temp = _allocX86reg(EDX, X86TYPE_TEMP, 0, 0); // Use EDX because MUL32R modifies EDX and EAX
SSE2_MOVDQA_XMM_to_XMM(t0reg, EEREC_S);
SSE2_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T);
MOV32MtoR(x86temp, (uptr)&cpuRegs.sa);
SHR32ItoR(x86temp, 3);
AND32I8toR(x86temp, 0xf); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes)
MOV32ItoR(EAX, QFSRV_bytes);
MUL32R(x86temp);
ADD32ItoR(EAX, (uptr)x86Ptr + 7); // ADD32 = 5 bytes, JMPR = 2 bytes
MOV32MtoR(EAX, (uptr)&cpuRegs.sa);
SHL32ItoR(EAX, 1); // Multiply SA bytes by 16 bytes (the amount of bytes in QFSRVhelper() macros)
AND32I8toR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes)
ADD32ItoEAX((uptr)x86Ptr + 7); // ADD32 = 5 bytes, JMPR = 2 bytes
JMPR(EAX); // Jumps to a QFSRVhelper() case below (a total of 16 different cases)
// Cases 0 to 15:
for (i = 0, j = 16; i < 16; i++, j--) {
// Case 0:
QFSRVhelper0();
// Cases 1 to 15:
for (i = 1, j = 15; i < 16; i++, j--) {
QFSRVhelper(i, j);
}
// Set jump addresses for the JMP32's in QFSRVhelper()
for (i = 0; i < 16; i++) {
for (i = 1; i < 16; i++) {
x86SetJ32(ajmp[i]);
}
// Concatenate the regs after appropriate shifts have been made
SSE2_POR_XMM_to_XMM(EEREC_D, t0reg);
x86SetJ32(ajmp[0]); // Case 0 jumps to here (to skip the POR)
_freeXMMreg(t0reg);
_freeX86reg(x86temp);
CPU_SSE_XMMCACHE_END
//recCall( Interp::QFSRV, _Rd_ );

View File

@ -272,6 +272,13 @@ __forceinline void CLC( void )
write8( 0xF8 );
}
// NOP 1-byte
__forceinline void NOP( void )
{
write8(0x90);
}
////////////////////////////////////
// mov instructions /
////////////////////////////////////
@ -1017,6 +1024,13 @@ __forceinline void ADD64RtoR( x86IntRegType to, x86IntRegType from )
ModRM( 3, from, to );
}
/* add imm32 to EAX */
void ADD32ItoEAX( u32 from )
{
write8( 0x05 );
write32( from );
}
/* add imm32 to r32 */
__forceinline void ADD32ItoR( x86IntRegType to, u32 from )
{
@ -1029,15 +1043,15 @@ __forceinline void ADD32ItoR( x86IntRegType to, u32 from )
}
else
{
if ( to == EAX) {
write8( 0x05 );
if ( to == EAX ) {
ADD32ItoEAX(from);
}
else {
write8( 0x81 );
ModRM( 3, 0, to );
}
write32( from );
}
}
}
/* add imm32 to m32 */

View File

@ -222,6 +222,7 @@ extern void CMOV32MtoR( int cc, int to, uptr from );
extern void STC( void );
extern void CLC( void );
extern void NOP( void );
////////////////////////////////////
// mov instructions //
@ -419,8 +420,10 @@ extern void ADD64ItoR( x86IntRegType to, u32 from );
// add m64 to r64
extern void ADD64MtoR( x86IntRegType to, uptr from );
// add imm32 to EAX
extern void ADD32ItoEAX( u32 from );
// add imm32 to r32
extern void ADD32ItoR( x86IntRegType to, u32 from );
extern void ADD32ItoR( x86IntRegType to, u32 from );
// add imm32 to m32
extern void ADD32ItoM( uptr to, u32 from );
// add imm32 to [r32+off]