A few optimizations to ivumicro stuff

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@80 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
ramapcsx2 2008-09-01 11:18:43 +00:00 committed by Gregory Hainaut
parent f1ac20f12a
commit 787513a327
1 changed files with 50 additions and 50 deletions

View File

@ -100,7 +100,7 @@ static s32 bpc;
_VURegsNum* g_VUregs = NULL; _VURegsNum* g_VUregs = NULL;
u8 g_MACFlagTransform[256] = {0}; // used to flip xyzw bits u8 g_MACFlagTransform[256] = {0}; // used to flip xyzw bits
const static int SSEmovMask[ 16 ][ 4 ] = const static PCSX2_ALIGNED16 (int SSEmovMask[ 16 ][ 4 ]) =
{ {
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF }, { 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF },
@ -1131,7 +1131,7 @@ void testWhenOverflow(int info, int regd, int t0reg) {
} }
// Clamps infinities to max/min non-infinity number (uses a temp reg) // Clamps infinities to max/min non-infinity number (uses a temp reg)
void vuFloat2(int regd, int regTemp, int XYZW) { void vuFloat2(int regd, int regTemp, u8 XYZW) {
if( CHECK_OVERFLOW ) { if( CHECK_OVERFLOW ) {
if (XYZW == 8) { if (XYZW == 8) {
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
@ -1151,7 +1151,7 @@ void vuFloat2(int regd, int regTemp, int XYZW) {
} }
// Clamps infinities to max/min non-infinity number (doesn't use any temp regs) // Clamps infinities to max/min non-infinity number (doesn't use any temp regs)
void vuFloat( int info, int regd, int XYZW) { void vuFloat( int regd, u8 XYZW) {
if( CHECK_OVERFLOW ) { if( CHECK_OVERFLOW ) {
/*if ( (XYZW != 0) && (XYZW != 8) && (XYZW != 0xF) ) { /*if ( (XYZW != 0) && (XYZW != 8) && (XYZW != 0xF) ) {
int t1reg = _vuGetTempXMMreg2(info, regd); int t1reg = _vuGetTempXMMreg2(info, regd);
@ -1165,10 +1165,22 @@ void vuFloat( int info, int regd, int XYZW) {
case 0: // Don't do anything if no vectors are being modified. case 0: // Don't do anything if no vectors are being modified.
break; break;
case 15: //1111 case 15: //1111 //15 and 14 happen most often
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
break; break;
case 14: //0111 //15 and 14 happen most often
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9);
break;
case 1: //1000 case 1: //1000
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
@ -1289,18 +1301,6 @@ void vuFloat( int info, int regd, int XYZW) {
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d);
break; break;
case 14: //0111
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9);
break;
} }
} }
} }
@ -1325,14 +1325,14 @@ void vuFloat3(uptr x86ptr)
void CheckForOverflow(VURegs *VU, int info, int regd) void CheckForOverflow(VURegs *VU, int info, int regd)
{ {
testWhenOverflow(info, regd, EEREC_TEMP); //testWhenOverflow(info, regd, EEREC_TEMP); //Enable if you need the debug output. It costs fps
//CheckForOverflow_(regd, EEREC_TEMP, _X_Y_Z_W); //CheckForOverflow_(regd, EEREC_TEMP, _X_Y_Z_W);
if (EEREC_TEMP != regd) { if (EEREC_TEMP != regd) {
//testWhenOverflow(info, regd, EEREC_TEMP); //testWhenOverflow(info, regd, EEREC_TEMP);
vuFloat2(regd, EEREC_TEMP, _X_Y_Z_W); vuFloat2(regd, EEREC_TEMP, _X_Y_Z_W);
} }
else else
vuFloat( info, regd, _X_Y_Z_W); vuFloat(regd, _X_Y_Z_W);
} }
// if unordered replaces with 0x7f7fffff // if unordered replaces with 0x7f7fffff
@ -1366,7 +1366,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
u8* pjmp; u8* pjmp;
u32 macaddr, stataddr, prevstataddr; u32 macaddr, stataddr, prevstataddr;
int x86macflag, x86newflag, x86temp; int x86macflag, x86newflag, x86temp;
const static u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; const static PCSX2_ALIGNED16 (u8 macarr[16]) = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
if( !(info & PROCESS_VU_UPDATEFLAGS) ) if( !(info & PROCESS_VU_UPDATEFLAGS) )
return; return;
@ -1679,8 +1679,8 @@ void recVUMI_ADD(VURegs *VU, int info)
} }
else { else {
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat(EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_T, _X_Y_Z_W); vuFloat(EEREC_T, _X_Y_Z_W);
} }
if( _X_Y_Z_W == 8 ) { // If only adding x, then we can do a Scalar Add if( _X_Y_Z_W == 8 ) { // If only adding x, then we can do a Scalar Add
if (EEREC_D == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T); if (EEREC_D == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T);
@ -2407,9 +2407,9 @@ void recVUMI_MUL_toD(VURegs *VU, int regd, int info)
{ {
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
//using vuFloat instead of vuFloat2 incase regd == EEREC_TEMP //using vuFloat instead of vuFloat2 incase regd == EEREC_TEMP
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat(EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_T, _X_Y_Z_W); vuFloat(EEREC_T, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W); vuFloat(regd, _X_Y_Z_W);
} }
if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W
@ -2449,8 +2449,8 @@ void recVUMI_MUL_iq_toD(VURegs *VU, uptr addr, int regd, int info)
{ {
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat3(addr); vuFloat3(addr);
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat(EEREC_S, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W); vuFloat(regd, _X_Y_Z_W);
} }
if( _XYZW_SS ) { if( _XYZW_SS ) {
@ -2503,11 +2503,11 @@ void recVUMI_MUL_iq_toD(VURegs *VU, uptr addr, int regd, int info)
void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
{ {
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat( info, regd, _X_Y_Z_W); vuFloat( regd, _X_Y_Z_W);
vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) ); vuFloat( EEREC_T, ( 1 << (3 - xyzw) ) );
} }
// This is needed for alot of games // This is needed for alot of games
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat( EEREC_S, _X_Y_Z_W);
if( _Ft_ == 0 ) { if( _Ft_ == 0 ) {
if( xyzw < 3 ) { if( xyzw < 3 ) {
@ -2634,9 +2634,9 @@ void recVUMI_MULAw(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 3, info); }
void recVUMI_MADD_toD(VURegs *VU, int regd, int info) void recVUMI_MADD_toD(VURegs *VU, int regd, int info)
{ {
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat( EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_T, _X_Y_Z_W); vuFloat( EEREC_T, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W); vuFloat( regd, _X_Y_Z_W);
} }
if( _X_Y_Z_W == 8 ) { if( _X_Y_Z_W == 8 ) {
@ -2692,8 +2692,8 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info)
{ {
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat3(addr); vuFloat3(addr);
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat( EEREC_S, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W); vuFloat( regd, _X_Y_Z_W);
} }
if( _X_Y_Z_W == 8 ) { if( _X_Y_Z_W == 8 ) {
@ -2770,12 +2770,12 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info)
void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
{ {
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) ); vuFloat( EEREC_T, ( 1 << (3 - xyzw) ) );
vuFloat( info, EEREC_ACC, _X_Y_Z_W); vuFloat( EEREC_ACC, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W); vuFloat( regd, _X_Y_Z_W);
} }
// This is needed for alot of games // This is needed for alot of games
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat( EEREC_S, _X_Y_Z_W);
if( _Ft_ == 0 ) { if( _Ft_ == 0 ) {
@ -2959,9 +2959,9 @@ void recVUMI_MADDAw( VURegs *VU , int info)
void recVUMI_MSUB_toD(VURegs *VU, int regd, int info) void recVUMI_MSUB_toD(VURegs *VU, int regd, int info)
{ {
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat( EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_T, _X_Y_Z_W); vuFloat( EEREC_T, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W); vuFloat( regd, _X_Y_Z_W);
} }
if (_X_Y_Z_W != 0xf) { if (_X_Y_Z_W != 0xf) {
@ -3014,9 +3014,9 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info)
void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info) void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info)
{ {
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat( EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_ACC, _X_Y_Z_W); vuFloat( EEREC_ACC, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W); vuFloat( regd, _X_Y_Z_W);
} }
if (_X_Y_Z_W != 0xf) { if (_X_Y_Z_W != 0xf) {
@ -3215,7 +3215,7 @@ void recVUMI_MAX_iq(VURegs *VU, uptr addr, int info)
{ {
if ( _Fd_ == 0 ) return; if ( _Fd_ == 0 ) return;
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat( EEREC_S, _X_Y_Z_W);
vuFloat3(addr); vuFloat3(addr);
} }
@ -3273,8 +3273,8 @@ void recVUMI_MAX_xyzw(VURegs *VU, int xyzw, int info)
{ {
if ( _Fd_ == 0 ) return; if ( _Fd_ == 0 ) return;
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat( EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) ); vuFloat( EEREC_T, ( 1 << (3 - xyzw) ) );
} }
if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) {
@ -3389,7 +3389,7 @@ void recVUMI_MINI_iq(VURegs *VU, uptr addr, int info)
{ {
if ( _Fd_ == 0 ) return; if ( _Fd_ == 0 ) return;
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat( EEREC_S, _X_Y_Z_W);
vuFloat3(addr); vuFloat3(addr);
} }
@ -3447,8 +3447,8 @@ void recVUMI_MINI_xyzw(VURegs *VU, int xyzw, int info)
{ {
if ( _Fd_ == 0 ) return; if ( _Fd_ == 0 ) return;
if (CHECK_EXTRA_OVERFLOW) { if (CHECK_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W); vuFloat( EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) ); vuFloat( EEREC_T, ( 1 << (3 - xyzw) ) );
} }
if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) {