a few changes, some stuff is more optimized.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@43 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
cottonvibes 2008-08-17 11:09:19 +00:00 committed by Gregory Hainaut
parent 39284a86c6
commit 81c4d1d095
1 changed files with 50 additions and 51 deletions

View File

@ -1153,8 +1153,7 @@ void vuFloat2(int regd, int regTemp, int XYZW) {
// Clamps infinities to max/min non-infinity number (doesn't use any temp regs) // Clamps infinities to max/min non-infinity number (doesn't use any temp regs)
void vuFloat( int info, int regd, int XYZW) { void vuFloat( int info, int regd, int XYZW) {
if( CHECK_OVERFLOW ) { if( CHECK_OVERFLOW ) {
//Faster, doesnt break games, but seems unsafe to enable /*if ( (XYZW != 0) && (XYZW != 8) && (XYZW != 0xF) ) {
/*if ( !(CHECK_EXTRA_OVERFLOW) && (XYZW != 0) && (XYZW != 8) && (XYZW != 0xF) ) {
int t1reg = _vuGetTempXMMreg2(info, regd); int t1reg = _vuGetTempXMMreg2(info, regd);
if (t1reg >= 0) { if (t1reg >= 0) {
vuFloat2( regd, t1reg, XYZW ); vuFloat2( regd, t1reg, XYZW );
@ -1380,7 +1379,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
u32 flagmask; u32 flagmask;
u8* pjmp; u8* pjmp;
u32 macaddr, stataddr, prevstataddr; u32 macaddr, stataddr, prevstataddr;
int x86macflag, x86newflag, x86oldflag; int x86macflag, x86newflag, x86temp;
const static u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; const static u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
if( !(info & PROCESS_VU_UPDATEFLAGS) ) if( !(info & PROCESS_VU_UPDATEFLAGS) )
@ -1399,15 +1398,13 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
// 20 insts // 20 insts
x86newflag = ALLOCTEMPX86(MODE_8BITREG); x86newflag = ALLOCTEMPX86(MODE_8BITREG);
x86macflag = ALLOCTEMPX86(0); x86macflag = ALLOCTEMPX86(0);
x86oldflag = ALLOCTEMPX86(0); x86temp = ALLOCTEMPX86(0);
// can do with 8 bits since only computing zero/sign flags // can do with 8 bits since only computing zero/sign flags
if( EEREC_TEMP != reg ) { if( EEREC_TEMP != reg ) {
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw
MOV32MtoR(x86oldflag, prevstataddr); // Load the previous status in to x86oldflag
//-------------------------Check for Overflow flags------------------------------ //-------------------------Check for Overflow flags------------------------------
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
@ -1417,11 +1414,11 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
XOR32RtoR(EAX, EAX); //Clear EAX XOR32RtoR(x86temp, x86temp); //Clear x86temp
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified) AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
pjmp = JZ8(0); // Skip if none are pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 8); // Set if they are OR32ItoR(x86temp, 8); // Set if they are
x86SetJ8(pjmp); x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag); OR32RtoR(x86macflag, x86newflag);
@ -1442,7 +1439,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 4); // Set if they are OR32ItoR(x86temp, 4); // Set if they are
x86SetJ8(pjmp); x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag); OR32RtoR(x86macflag, x86newflag);
@ -1474,7 +1471,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 2); // Set if they are OR32ItoR(x86temp, 2); // Set if they are
x86SetJ8(pjmp); x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag); OR32RtoR(x86macflag, x86newflag);
@ -1495,7 +1492,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 1); // Set if they are OR32ItoR(x86temp, 1); // Set if they are
x86SetJ8(pjmp); x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag); OR32RtoR(x86macflag, x86newflag);
@ -1506,7 +1503,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
if( macaddr != 0 ) if( macaddr != 0 )
MOV16RtoM(macaddr, x86macflag); MOV16RtoM(macaddr, x86macflag);
else else
SysPrintf( "VU ALLOCATION ERROR: Macaddr == EAX!!! Can't set Mac Flags!\n" ); SysPrintf( "VU ALLOCATION ERROR: Can't set Mac Flags!\n" );
} }
//-------------------------Flag Setting if (reg == EEREC_TEMP)------------------------------ //-------------------------Flag Setting if (reg == EEREC_TEMP)------------------------------
else { else {
@ -1522,8 +1519,6 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw
MOV32MtoR(x86oldflag, prevstataddr); // Load the previous status in to x86oldflag
//-------------------------Check for Overflow flags------------------------------ //-------------------------Check for Overflow flags------------------------------
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
@ -1533,11 +1528,11 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
XOR32RtoR(EAX, EAX); //Clear EAX XOR32RtoR(x86temp, x86temp); //Clear x86temp
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified) AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
pjmp = JZ8(0); // Skip if none are pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 8); // Set if they are OR32ItoR(x86temp, 8); // Set if they are
x86SetJ8(pjmp); x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag); OR32RtoR(x86macflag, x86newflag);
@ -1558,7 +1553,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 4); // Set if they are OR32ItoR(x86temp, 4); // Set if they are
x86SetJ8(pjmp); x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag); OR32RtoR(x86macflag, x86newflag);
@ -1590,7 +1585,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 2); // Set if they are OR32ItoR(x86temp, 2); // Set if they are
x86SetJ8(pjmp); x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag); OR32RtoR(x86macflag, x86newflag);
@ -1611,7 +1606,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 1); // Set if they are OR32ItoR(x86temp, 1); // Set if they are
x86SetJ8(pjmp); x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag); OR32RtoR(x86macflag, x86newflag);
@ -1627,18 +1622,19 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
if( macaddr != 0 ) if( macaddr != 0 )
MOV16RtoM(macaddr, x86macflag); MOV16RtoM(macaddr, x86macflag);
else else
SysPrintf( "VU ALLOCATION ERROR: Macaddr == EAX!!! Can't set Mac Flags!\n" ); SysPrintf( "VU ALLOCATION ERROR: Can't set Mac Flags!\n" );
} }
SHR32ItoR(x86oldflag, 6); MOV32MtoR(x86macflag, prevstataddr); // Load the previous status in to x86macflag
OR32RtoR(x86oldflag, EAX); AND32ItoR(x86macflag, 0xff0); // Keep Sticky and D/I flags
SHL32ItoR(x86oldflag, 6); OR32RtoR(x86macflag, x86temp);
OR32RtoR(x86oldflag, EAX); SHL32ItoR(x86temp, 6);
MOV32RtoM(stataddr, x86oldflag); OR32RtoR(x86macflag, x86temp);
MOV32RtoM(stataddr, x86macflag);
_freeX86reg(x86macflag); _freeX86reg(x86macflag);
_freeX86reg(x86newflag); _freeX86reg(x86newflag);
_freeX86reg(x86oldflag); _freeX86reg(x86temp);
} }
/******************************/ /******************************/
@ -3664,7 +3660,7 @@ void recVUMI_CLIP(VURegs *VU, int info)
{ {
int t1reg = EEREC_D; int t1reg = EEREC_D;
int t2reg = EEREC_ACC; int t2reg = EEREC_ACC;
int x86temp0, x86temp1; int x86temp1, x86temp2;
u32 clipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 0); u32 clipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 0);
u32 prevclipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 2); u32 prevclipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 2);
@ -3678,52 +3674,51 @@ void recVUMI_CLIP(VURegs *VU, int info)
assert( t1reg != t2reg && t1reg != EEREC_TEMP && t2reg != EEREC_TEMP ); assert( t1reg != t2reg && t1reg != EEREC_TEMP && t2reg != EEREC_TEMP );
x86temp1 = ALLOCTEMPX86(MODE_8BITREG); x86temp1 = ALLOCTEMPX86(MODE_8BITREG);
x86temp0 = ALLOCTEMPX86(0); x86temp2 = ALLOCTEMPX86(MODE_8BITREG);
if ( (x86temp1 == 0) || (x86temp2 == 0) ) SysPrintf("VU CLIP Allocation Error: EAX being allocated!");
if( _Ft_ == 0 ) { if( _Ft_ == 0 ) {
// all 1s // all 1s
SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)&s_fones[0]); SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)&s_fones[0]);
SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)&s_fones[4]); SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)&s_fones[4]);
MOV32MtoR(EAX, prevclipaddr);
} }
else { else {
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, 3); _unpackVF_xyzw(EEREC_TEMP, EEREC_T, 3);
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[0]);
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (int)const_clip);
MOV32MtoR(EAX, prevclipaddr);
SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_TEMP); SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_TEMP);
SSE_ORPS_M128_to_XMM(t1reg, (uptr)&const_clip[4]); SSE_ORPS_M128_to_XMM(t1reg, (uptr)&const_clip[4]);
} }
SSE_CMPNLEPS_XMM_to_XMM(t1reg, EEREC_S); //-w, -z, -y, -x MOV32MtoR(EAX, prevclipaddr);
SSE_CMPLTPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); //+w, +z, +y, +x
SSE_CMPNLTPS_XMM_to_XMM(t1reg, EEREC_S); //-w, -z, -y, -x
SSE_CMPLEPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); //+w, +z, +y, +x
SHL32ItoR(EAX, 6); SHL32ItoR(EAX, 6);
SSE_MOVAPS_XMM_to_XMM(t2reg, EEREC_TEMP); //t2 = +w, +z, +y, +x SSE_MOVAPS_XMM_to_XMM(t2reg, EEREC_TEMP); //t2 = +w, +z, +y, +x
SSE_UNPCKLPS_XMM_to_XMM(EEREC_TEMP, t1reg); //EEREC_TEMP = -y,+y,-x,+x SSE_UNPCKLPS_XMM_to_XMM(EEREC_TEMP, t1reg); //EEREC_TEMP = -y,+y,-x,+x
SSE_UNPCKHPS_XMM_to_XMM(t2reg, t1reg); //t2reg = -w,+w,-z,+z SSE_UNPCKHPS_XMM_to_XMM(t2reg, t1reg); //t2reg = -w,+w,-z,+z
SSE_MOVMSKPS_XMM_to_R32(x86temp0, EEREC_TEMP); // -y,+y,-x,+x SSE_MOVMSKPS_XMM_to_R32(x86temp2, EEREC_TEMP); // -y,+y,-x,+x
SSE_MOVMSKPS_XMM_to_R32(x86temp1, t2reg); // -w,+w,-z,+z SSE_MOVMSKPS_XMM_to_R32(x86temp1, t2reg); // -w,+w,-z,+z
AND8ItoR(x86temp1, 0x3);
SHL8ItoR(x86temp1, 4);
OR8RtoR(EAX, x86temp1);
AND8ItoR(x86temp2, 0xf);
OR8RtoR(EAX, x86temp2);
AND32ItoR(EAX, 0xffffff); AND32ItoR(EAX, 0xffffff);
AND8ItoR(x86temp1, 0x3);
SHL32ItoR(x86temp1, 4);
OR32RtoR(EAX, x86temp0);
OR32RtoR(EAX, x86temp1);
MOV32RtoM(clipaddr, EAX); MOV32RtoM(clipaddr, EAX);
/*if( !(info&(PROCESS_VU_SUPER|PROCESS_VU_COP2)) )*/ MOV32RtoM((uptr)&VU->VI[REG_CLIP_FLAG], EAX); MOV32RtoM((uptr)&VU->VI[REG_CLIP_FLAG], EAX);
_freeXMMreg(t1reg); //if( !(info&(PROCESS_VU_SUPER|PROCESS_VU_COP2)) ) MOV32RtoM((uptr)&VU->VI[REG_CLIP_FLAG], EAX);
_freeXMMreg(t2reg); //_freeXMMreg(t1reg); // We Never Allocated these regs, so no need to free them
//_freeXMMreg(t2reg);
_freeX86reg(x86temp0);
_freeX86reg(x86temp1); _freeX86reg(x86temp1);
_freeX86reg(x86temp2);
} }
/******************************/ /******************************/
@ -3740,6 +3735,8 @@ void recVUMI_DIV(VURegs *VU, int info)
u32* pjmp2; u32* pjmp2;
u32* pjmp32; u32* pjmp32;
AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags
if( _Fs_ == 0 ) { if( _Fs_ == 0 ) {
if( _Ft_ == 0 ) { if( _Ft_ == 0 ) {
@ -4210,7 +4207,7 @@ void recVUMI_SQRT( VURegs *VU, int info )
u8* pjmp; u8* pjmp;
SysPrintf("SQRT Opcode \n"); SysPrintf("SQRT Opcode \n");
AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFDF); //Divide flag cleared regardless of result AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags
if( xmmregs[EEREC_T].mode & MODE_WRITE ) if( xmmregs[EEREC_T].mode & MODE_WRITE )
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_);
@ -4245,6 +4242,8 @@ void recVUMI_RSQRT(VURegs *VU, int info)
u32* ajmp32; u32* ajmp32;
u32* bjmp32; u32* bjmp32;
AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags
if( _Ftf_ ) if( _Ftf_ )
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_);
else else