Code cleanup of some VU clamp functions for SSE4, and reverted my previous useless commit :p

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@576 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
tmkkmac 2009-02-23 11:54:24 +00:00
parent 44d1ff527c
commit 5870bb43e9
3 changed files with 52 additions and 140 deletions

View File

@ -1000,17 +1000,6 @@ void vFloat1(int regd, int regTemp) { //1000
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
} }
void vFloat1b(int regd, int regTemp) { //1000 //regTemp is Modified
if ( cpucaps.hasStreamingSIMD4Extensions ) {
CLAMP_NORMAL_SSE4(1);
}
else {
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
}
}
void vFloat1c(int regd, int regTemp) { //1000 void vFloat1c(int regd, int regTemp) { //1000
if ( cpucaps.hasStreamingSIMD4Extensions ) { if ( cpucaps.hasStreamingSIMD4Extensions ) {
CLAMP_SIGN_SSE4(1); CLAMP_SIGN_SSE4(1);
@ -1031,17 +1020,6 @@ void vFloat2(int regd, int regTemp) { //0100
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
} }
void vFloat2b(int regd, int regTemp) { //0100 //regTemp is Modified
if ( cpucaps.hasStreamingSIMD4Extensions ) {
CLAMP_NORMAL_SSE4(2);
}
else {
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
}
}
void vFloat2c(int regd, int regTemp) { //0100 void vFloat2c(int regd, int regTemp) { //0100
if ( cpucaps.hasStreamingSIMD4Extensions ) { if ( cpucaps.hasStreamingSIMD4Extensions ) {
CLAMP_SIGN_SSE4(2); CLAMP_SIGN_SSE4(2);
@ -1066,15 +1044,10 @@ void vFloat3(int regd, int regTemp) { //1100
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36);
} }
void vFloat3b(int regd, int regTemp) { //1100 //regTemp is Modified void vFloat3b(int regd, int regTemp) { //1100 //regTemp is Modified
if ( cpucaps.hasStreamingSIMD4Extensions ) { SSE2_MOVSD_XMM_to_XMM(regTemp, regd);
CLAMP_NORMAL_SSE4(3); SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
} SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
else { SSE2_MOVSD_XMM_to_XMM(regd, regTemp);
SSE2_MOVSD_XMM_to_XMM(regTemp, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE2_MOVSD_XMM_to_XMM(regd, regTemp);
}
} }
void vFloat3c(int regd, int regTemp) { //1100 void vFloat3c(int regd, int regTemp) { //1100
if ( cpucaps.hasStreamingSIMD4Extensions ) { if ( cpucaps.hasStreamingSIMD4Extensions ) {
@ -1099,17 +1072,6 @@ void vFloat4(int regd, int regTemp) { //0010
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
} }
void vFloat4b(int regd, int regTemp) { //0010 //regTemp is Modified
if ( cpucaps.hasStreamingSIMD4Extensions ) {
CLAMP_NORMAL_SSE4(4);
}
else {
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
}
}
void vFloat4c(int regd, int regTemp) { //0010 void vFloat4c(int regd, int regTemp) { //0010
if ( cpucaps.hasStreamingSIMD4Extensions ) { if ( cpucaps.hasStreamingSIMD4Extensions ) {
CLAMP_SIGN_SSE4(4); CLAMP_SIGN_SSE4(4);
@ -1229,15 +1191,10 @@ void vFloat7_useEAX(int regd, int regTemp) { //1110 //EAX is Modified
} }
} }
void vFloat7b(int regd, int regTemp) { //1110 //regTemp is Modified void vFloat7b(int regd, int regTemp) { //1110 //regTemp is Modified
if ( cpucaps.hasStreamingSIMD4Extensions ) { SSE_MOVSS_XMM_to_XMM(regTemp, regd);
CLAMP_NORMAL_SSE4(7); SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
} SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
else { SSE_MOVSS_XMM_to_XMM(regd, regTemp);
SSE_MOVSS_XMM_to_XMM(regTemp, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE_MOVSS_XMM_to_XMM(regd, regTemp);
}
} }
void vFloat7c(int regd, int regTemp) { //1110 void vFloat7c(int regd, int regTemp) { //1110
if ( cpucaps.hasStreamingSIMD4Extensions ) { if ( cpucaps.hasStreamingSIMD4Extensions ) {
@ -1278,15 +1235,6 @@ void vFloat8(int regd, int regTemp) { //0001
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
} }
void vFloat8b(int regd, int regTemp) { //0001 //regTemp is Modified
if ( cpucaps.hasStreamingSIMD4Extensions ) {
CLAMP_NORMAL_SSE4(8);
}
else {
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
}
}
void vFloat8c(int regd, int regTemp) { //0001 void vFloat8c(int regd, int regTemp) { //0001
if ( cpucaps.hasStreamingSIMD4Extensions ) { if ( cpucaps.hasStreamingSIMD4Extensions ) {
CLAMP_SIGN_SSE4(8); CLAMP_SIGN_SSE4(8);
@ -1399,16 +1347,11 @@ void vFloat11_useEAX(int regd, int regTemp) { //1101 //EAX is Modified
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
} }
void vFloat11b(int regd, int regTemp) { //1101 //regTemp is Modified void vFloat11b(int regd, int regTemp) { //1101 //regTemp is Modified
if ( cpucaps.hasStreamingSIMD4Extensions ) { SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
CLAMP_NORMAL_SSE4(11); SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
} SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
else { SSE_MOVSS_XMM_to_XMM(regTemp, regd);
SSE_MOVAPS_XMM_to_XMM(regTemp, regd); SSE2_MOVSD_XMM_to_XMM(regd, regTemp);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE_MOVSS_XMM_to_XMM(regTemp, regd);
SSE2_MOVSD_XMM_to_XMM(regd, regTemp);
}
} }
void vFloat11c(int regd, int regTemp) { //1101 void vFloat11c(int regd, int regTemp) { //1101
if ( cpucaps.hasStreamingSIMD4Extensions ) { if ( cpucaps.hasStreamingSIMD4Extensions ) {
@ -1455,15 +1398,10 @@ void vFloat12(int regd, int regTemp) { //0011
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
} }
void vFloat12b(int regd, int regTemp) { //0011 //regTemp is Modified void vFloat12b(int regd, int regTemp) { //0011 //regTemp is Modified
if ( cpucaps.hasStreamingSIMD4Extensions ) { SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
CLAMP_NORMAL_SSE4(12); SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
} SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
else { SSE2_PUNPCKLQDQ_XMM_to_XMM(regd, regTemp);
SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE2_PUNPCKLQDQ_XMM_to_XMM(regd, regTemp);
}
} }
void vFloat12c(int regd, int regTemp) { //0011 void vFloat12c(int regd, int regTemp) { //0011
if ( cpucaps.hasStreamingSIMD4Extensions ) { if ( cpucaps.hasStreamingSIMD4Extensions ) {
@ -1507,16 +1445,11 @@ void vFloat13_useEAX(int regd, int regTemp) { //1011 // EAX is modified
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
} }
void vFloat13b(int regd, int regTemp) { //1011 //regTemp is Modified void vFloat13b(int regd, int regTemp) { //1011 //regTemp is Modified
if ( cpucaps.hasStreamingSIMD4Extensions ) { SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
CLAMP_NORMAL_SSE4(13); SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
} SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
else { SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
SSE_MOVAPS_XMM_to_XMM(regTemp, regd); SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0x64);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0x64);
}
} }
void vFloat13c(int regd, int regTemp) { //1011 void vFloat13c(int regd, int regTemp) { //1011
if ( cpucaps.hasStreamingSIMD4Extensions ) { if ( cpucaps.hasStreamingSIMD4Extensions ) {
@ -1580,16 +1513,11 @@ void vFloat14_useEAX(int regd, int regTemp) { //0111 // EAX is modified
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
} }
void vFloat14b(int regd, int regTemp) { //0111 //regTemp is Modified void vFloat14b(int regd, int regTemp) { //0111 //regTemp is Modified
if ( cpucaps.hasStreamingSIMD4Extensions ) { SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
CLAMP_NORMAL_SSE4(14); SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
} SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
else { SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
SSE_MOVAPS_XMM_to_XMM(regTemp, regd); SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0xc4);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0xc4);
}
} }
void vFloat14c(int regd, int regTemp) { //0111 void vFloat14c(int regd, int regTemp) { //0111
if ( cpucaps.hasStreamingSIMD4Extensions ) { if ( cpucaps.hasStreamingSIMD4Extensions ) {
@ -1631,15 +1559,6 @@ void vFloat15(int regd, int regTemp) { //1111
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
} }
void vFloat15b(int regd, int regTemp) { //1111
if ( cpucaps.hasStreamingSIMD4Extensions ) {
CLAMP_NORMAL_SSE4(15);
}
else {
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
}
}
void vFloat15c(int regd, int regTemp) { //1111 void vFloat15c(int regd, int regTemp) { //1111
if ( cpucaps.hasStreamingSIMD4Extensions ) { if ( cpucaps.hasStreamingSIMD4Extensions ) {
CLAMP_SIGN_SSE4(15); CLAMP_SIGN_SSE4(15);
@ -1671,12 +1590,6 @@ vFloat vFloats2[16] = { //regTemp is modified
vFloat8, vFloat9b, vFloat10b, vFloat11b, vFloat8, vFloat9b, vFloat10b, vFloat11b,
vFloat12b, vFloat13b, vFloat14b, vFloat15 }; vFloat12b, vFloat13b, vFloat14b, vFloat15 };
vFloat vFloats2_MUL_MADD[16] = { //regTemp is modified (Faster than vFloats2 if dealing with Denormals and using SSE4)
vFloat0, vFloat1b, vFloat2b, vFloat3b,
vFloat4b, vFloat5b, vFloat6b, vFloat7b,
vFloat8b, vFloat9b, vFloat10b, vFloat11b,
vFloat12b, vFloat13b, vFloat14b, vFloat15b };
vFloat vFloats4[16] = { //regTemp is modified vFloat vFloats4[16] = { //regTemp is modified
vFloat0, vFloat1c, vFloat2c, vFloat3c, vFloat0, vFloat1c, vFloat2c, vFloat3c,
vFloat4c, vFloat5c, vFloat6c, vFloat7c, vFloat4c, vFloat5c, vFloat6c, vFloat7c,

View File

@ -72,7 +72,6 @@ typedef void (*vFloat)(int regd, int regTemp);
extern vFloat vFloats1[16]; extern vFloat vFloats1[16];
extern vFloat vFloats1_useEAX[16]; extern vFloat vFloats1_useEAX[16];
extern vFloat vFloats2[16]; extern vFloat vFloats2[16];
extern vFloat vFloats2_MUL_MADD[16];
extern vFloat vFloats4[16]; extern vFloat vFloats4[16];
extern vFloat vFloats4_useEAX[16]; extern vFloat vFloats4_useEAX[16];
extern PCSX2_ALIGNED16(float s_fones[8]); extern PCSX2_ALIGNED16(float s_fones[8]);

View File

@ -820,7 +820,7 @@ void recVUMI_ADD(VURegs *VU, int info)
if ( _Fs_ == 0 && _Ft_ == 0 ) { // if adding VF00 with VF00, then the result is always 0,0,0,2 if ( _Fs_ == 0 && _Ft_ == 0 ) { // if adding VF00 with VF00, then the result is always 0,0,0,2
if ( _X_Y_Z_W != 0xf ) { if ( _X_Y_Z_W != 0xf ) {
SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)s_two); SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)s_two);
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP); VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
} }
else SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)s_two); else SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)s_two);
} }
@ -840,7 +840,7 @@ void recVUMI_ADD(VURegs *VU, int info)
else if (_X_Y_Z_W != 0xf) { // If xyzw != 1111, then we have to use a temp reg else if (_X_Y_Z_W != 0xf) { // If xyzw != 1111, then we have to use a temp reg
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP); VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
} }
else { // All xyzw being modified (xyzw == 1111) else { // All xyzw being modified (xyzw == 1111)
if (EEREC_D == EEREC_S) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_T); if (EEREC_D == EEREC_S) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_T);
@ -899,7 +899,7 @@ void recVUMI_ADD_iq(VURegs *VU, uptr addr, int info)
if (_X_Y_Z_W != 0xf) { if (_X_Y_Z_W != 0xf) {
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP); VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
} }
else { else {
if ( EEREC_D == EEREC_TEMP ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); if ( EEREC_D == EEREC_TEMP ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
@ -929,7 +929,7 @@ void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info)
if ( _X_Y_Z_W == 0x8 ) { VU_MERGE_REGS(EEREC_D, EEREC_S); } if ( _X_Y_Z_W == 0x8 ) { VU_MERGE_REGS(EEREC_D, EEREC_S); }
else if ( _X_Y_Z_W != 0xf ) { else if ( _X_Y_Z_W != 0xf ) {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP); VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
} }
else SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); else SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
} }
@ -949,13 +949,13 @@ void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info)
} }
else if( _Fs_ == 0 && !_W ) { // just move else if( _Fs_ == 0 && !_W ) { // just move
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP); VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
} }
else { else {
if ( _X_Y_Z_W != 0xf ) { if ( _X_Y_Z_W != 0xf ) {
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP); VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
} }
else { else {
if( EEREC_D == EEREC_TEMP ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); } if( EEREC_D == EEREC_TEMP ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); }
@ -1164,7 +1164,7 @@ void recVUMI_SUB(VURegs *VU, int info)
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if( ( _Ft_ > 0 ) || _W ) SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); if( ( _Ft_ > 0 ) || _W ) SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP); VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
} }
else { else {
if (EEREC_D == EEREC_S) SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_T); if (EEREC_D == EEREC_S) SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_T);
@ -1238,7 +1238,7 @@ void recVUMI_SUB_iq(VURegs *VU, uptr addr, int info)
// negate // negate
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP); VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
} }
} }
else { else {
@ -1304,7 +1304,7 @@ void recVUMI_SUB_xyzw(VURegs *VU, int xyzw, int info)
// negate // negate
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP); VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
} }
} }
else { else {
@ -1507,7 +1507,7 @@ void recVUMI_MUL_toD(VURegs *VU, int regd, int info)
if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, _Ft_ ? EEREC_T : EEREC_S); SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, _Ft_ ? EEREC_T : EEREC_S);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
else if( _Fd_ == _Fs_ && _Fs_ == _Ft_ && _XYZW_SS ) { else if( _Fd_ == _Fs_ && _Fs_ == _Ft_ && _XYZW_SS ) {
_vuFlipRegSS(VU, EEREC_D); _vuFlipRegSS(VU, EEREC_D);
@ -1526,7 +1526,7 @@ void recVUMI_MUL_toD(VURegs *VU, int regd, int info)
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
else { else {
if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_T); if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
@ -1579,7 +1579,7 @@ void recVUMI_MUL_iq_toD(VURegs *VU, uptr addr, int regd, int info)
if (_X_Y_Z_W != 0xf) { if (_X_Y_Z_W != 0xf) {
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
else { else {
if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S); if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
@ -1601,13 +1601,13 @@ void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
} }
if (_Fs_) { // This is needed for alot of games; so always clamp this operand if (_Fs_) { // This is needed for alot of games; so always clamp this operand
if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
else vFloats2_MUL_MADD[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set else vFloats2[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
} }
if( _Ft_ == 0 ) { if( _Ft_ == 0 ) {
if( xyzw < 3 ) { if( xyzw < 3 ) {
if (_X_Y_Z_W != 0xf) { if (_X_Y_Z_W != 0xf) {
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
else SSE_XORPS_XMM_to_XMM(regd, regd); else SSE_XORPS_XMM_to_XMM(regd, regd);
} }
@ -1615,7 +1615,7 @@ void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
assert(xyzw==3); assert(xyzw==3);
if (_X_Y_Z_W != 0xf) { if (_X_Y_Z_W != 0xf) {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S); else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S);
} }
@ -1648,7 +1648,7 @@ void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
if (_X_Y_Z_W != 0xf) { if (_X_Y_Z_W != 0xf) {
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
else { else {
if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S); if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
@ -1770,7 +1770,7 @@ void recVUMI_MADD_toD(VURegs *VU, int regd, int info)
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); }
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
else { else {
if( regd == EEREC_ACC ) { if( regd == EEREC_ACC ) {
@ -1857,7 +1857,7 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info)
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); }
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
else { else {
if( regd == EEREC_ACC ) { if( regd == EEREC_ACC ) {
@ -1895,7 +1895,7 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
} }
if (_Fs_) { // This is needed for alot of games; so always clamp this operand if (_Fs_) { // This is needed for alot of games; so always clamp this operand
if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
else vFloats2_MUL_MADD[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set else vFloats2[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
} }
if( _Ft_ == 0 ) { if( _Ft_ == 0 ) {
@ -1913,7 +1913,7 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
else { else {
if( regd == EEREC_S ) SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); if( regd == EEREC_S ) SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
@ -1928,7 +1928,7 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
// just move acc to regd // just move acc to regd
if( _X_Y_Z_W != 0xf ) { if( _X_Y_Z_W != 0xf ) {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC); else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC);
} }
@ -1971,7 +1971,7 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); }
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
else { else {
if( regd == EEREC_ACC ) { if( regd == EEREC_ACC ) {
@ -2116,7 +2116,7 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info)
else { else {
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
} }
else { else {
@ -2173,7 +2173,7 @@ void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info)
else { else {
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP); VU_MERGE_REGS(regd, EEREC_TEMP);
} }
} }
else { else {
@ -2832,7 +2832,7 @@ void recVUMI_OPMSUB( VURegs *VU, int info )
// negate and add // negate and add
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS_CUSTOM(EEREC_D, EEREC_TEMP, 14); VU_MERGE_REGS_CUSTOM(EEREC_D, EEREC_TEMP, 14);
// revert EEREC_T // revert EEREC_T
if( EEREC_T != EEREC_D ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9); if( EEREC_T != EEREC_D ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9);