mirror of https://github.com/PCSX2/pcsx2.git
Code cleanup of some VU clamp functions for SSE4, and reverted my previous useless commit :p
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@576 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
44d1ff527c
commit
5870bb43e9
|
@ -1000,17 +1000,6 @@ void vFloat1(int regd, int regTemp) { //1000
|
||||||
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
||||||
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
|
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
|
||||||
}
|
}
|
||||||
void vFloat1b(int regd, int regTemp) { //1000 //regTemp is Modified
|
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
|
||||||
CLAMP_NORMAL_SSE4(1);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
|
|
||||||
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
|
|
||||||
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void vFloat1c(int regd, int regTemp) { //1000
|
void vFloat1c(int regd, int regTemp) { //1000
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
CLAMP_SIGN_SSE4(1);
|
CLAMP_SIGN_SSE4(1);
|
||||||
|
@ -1031,17 +1020,6 @@ void vFloat2(int regd, int regTemp) { //0100
|
||||||
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
||||||
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
|
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
|
||||||
}
|
}
|
||||||
void vFloat2b(int regd, int regTemp) { //0100 //regTemp is Modified
|
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
|
||||||
CLAMP_NORMAL_SSE4(2);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
|
|
||||||
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
|
|
||||||
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void vFloat2c(int regd, int regTemp) { //0100
|
void vFloat2c(int regd, int regTemp) { //0100
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
CLAMP_SIGN_SSE4(2);
|
CLAMP_SIGN_SSE4(2);
|
||||||
|
@ -1066,15 +1044,10 @@ void vFloat3(int regd, int regTemp) { //1100
|
||||||
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36);
|
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36);
|
||||||
}
|
}
|
||||||
void vFloat3b(int regd, int regTemp) { //1100 //regTemp is Modified
|
void vFloat3b(int regd, int regTemp) { //1100 //regTemp is Modified
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
SSE2_MOVSD_XMM_to_XMM(regTemp, regd);
|
||||||
CLAMP_NORMAL_SSE4(3);
|
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
||||||
}
|
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
||||||
else {
|
SSE2_MOVSD_XMM_to_XMM(regd, regTemp);
|
||||||
SSE2_MOVSD_XMM_to_XMM(regTemp, regd);
|
|
||||||
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
|
||||||
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
|
||||||
SSE2_MOVSD_XMM_to_XMM(regd, regTemp);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
void vFloat3c(int regd, int regTemp) { //1100
|
void vFloat3c(int regd, int regTemp) { //1100
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
|
@ -1099,17 +1072,6 @@ void vFloat4(int regd, int regTemp) { //0010
|
||||||
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
||||||
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
|
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
|
||||||
}
|
}
|
||||||
void vFloat4b(int regd, int regTemp) { //0010 //regTemp is Modified
|
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
|
||||||
CLAMP_NORMAL_SSE4(4);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
|
|
||||||
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
|
|
||||||
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
|
||||||
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void vFloat4c(int regd, int regTemp) { //0010
|
void vFloat4c(int regd, int regTemp) { //0010
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
CLAMP_SIGN_SSE4(4);
|
CLAMP_SIGN_SSE4(4);
|
||||||
|
@ -1229,15 +1191,10 @@ void vFloat7_useEAX(int regd, int regTemp) { //1110 //EAX is Modified
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void vFloat7b(int regd, int regTemp) { //1110 //regTemp is Modified
|
void vFloat7b(int regd, int regTemp) { //1110 //regTemp is Modified
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
SSE_MOVSS_XMM_to_XMM(regTemp, regd);
|
||||||
CLAMP_NORMAL_SSE4(7);
|
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
||||||
}
|
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
||||||
else {
|
SSE_MOVSS_XMM_to_XMM(regd, regTemp);
|
||||||
SSE_MOVSS_XMM_to_XMM(regTemp, regd);
|
|
||||||
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
|
||||||
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
|
||||||
SSE_MOVSS_XMM_to_XMM(regd, regTemp);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
void vFloat7c(int regd, int regTemp) { //1110
|
void vFloat7c(int regd, int regTemp) { //1110
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
|
@ -1278,15 +1235,6 @@ void vFloat8(int regd, int regTemp) { //0001
|
||||||
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
|
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
|
||||||
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
||||||
}
|
}
|
||||||
void vFloat8b(int regd, int regTemp) { //0001 //regTemp is Modified
|
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
|
||||||
CLAMP_NORMAL_SSE4(8);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
|
|
||||||
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void vFloat8c(int regd, int regTemp) { //0001
|
void vFloat8c(int regd, int regTemp) { //0001
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
CLAMP_SIGN_SSE4(8);
|
CLAMP_SIGN_SSE4(8);
|
||||||
|
@ -1399,16 +1347,11 @@ void vFloat11_useEAX(int regd, int regTemp) { //1101 //EAX is Modified
|
||||||
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
|
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
|
||||||
}
|
}
|
||||||
void vFloat11b(int regd, int regTemp) { //1101 //regTemp is Modified
|
void vFloat11b(int regd, int regTemp) { //1101 //regTemp is Modified
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
|
||||||
CLAMP_NORMAL_SSE4(11);
|
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
||||||
}
|
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
||||||
else {
|
SSE_MOVSS_XMM_to_XMM(regTemp, regd);
|
||||||
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
|
SSE2_MOVSD_XMM_to_XMM(regd, regTemp);
|
||||||
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
|
||||||
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
|
||||||
SSE_MOVSS_XMM_to_XMM(regTemp, regd);
|
|
||||||
SSE2_MOVSD_XMM_to_XMM(regd, regTemp);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
void vFloat11c(int regd, int regTemp) { //1101
|
void vFloat11c(int regd, int regTemp) { //1101
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
|
@ -1455,15 +1398,10 @@ void vFloat12(int regd, int regTemp) { //0011
|
||||||
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
|
SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e);
|
||||||
}
|
}
|
||||||
void vFloat12b(int regd, int regTemp) { //0011 //regTemp is Modified
|
void vFloat12b(int regd, int regTemp) { //0011 //regTemp is Modified
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
|
||||||
CLAMP_NORMAL_SSE4(12);
|
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
||||||
}
|
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
||||||
else {
|
SSE2_PUNPCKLQDQ_XMM_to_XMM(regd, regTemp);
|
||||||
SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
|
|
||||||
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
|
||||||
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
|
||||||
SSE2_PUNPCKLQDQ_XMM_to_XMM(regd, regTemp);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
void vFloat12c(int regd, int regTemp) { //0011
|
void vFloat12c(int regd, int regTemp) { //0011
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
|
@ -1507,16 +1445,11 @@ void vFloat13_useEAX(int regd, int regTemp) { //1011 // EAX is modified
|
||||||
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
|
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
|
||||||
}
|
}
|
||||||
void vFloat13b(int regd, int regTemp) { //1011 //regTemp is Modified
|
void vFloat13b(int regd, int regTemp) { //1011 //regTemp is Modified
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
|
||||||
CLAMP_NORMAL_SSE4(13);
|
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
||||||
}
|
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
||||||
else {
|
SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
|
||||||
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
|
SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0x64);
|
||||||
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
|
||||||
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
|
||||||
SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0x64);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
void vFloat13c(int regd, int regTemp) { //1011
|
void vFloat13c(int regd, int regTemp) { //1011
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
|
@ -1580,16 +1513,11 @@ void vFloat14_useEAX(int regd, int regTemp) { //0111 // EAX is modified
|
||||||
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
|
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
|
||||||
}
|
}
|
||||||
void vFloat14b(int regd, int regTemp) { //0111 //regTemp is Modified
|
void vFloat14b(int regd, int regTemp) { //0111 //regTemp is Modified
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
|
||||||
CLAMP_NORMAL_SSE4(14);
|
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
||||||
}
|
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
||||||
else {
|
SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
|
||||||
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
|
SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0xc4);
|
||||||
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
|
||||||
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
|
||||||
SSE_MOVHLPS_XMM_to_XMM(regTemp, regd);
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0xc4);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
void vFloat14c(int regd, int regTemp) { //0111
|
void vFloat14c(int regd, int regTemp) { //0111
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
|
@ -1631,15 +1559,6 @@ void vFloat15(int regd, int regTemp) { //1111
|
||||||
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
||||||
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
||||||
}
|
}
|
||||||
void vFloat15b(int regd, int regTemp) { //1111
|
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
|
||||||
CLAMP_NORMAL_SSE4(15);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
|
|
||||||
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void vFloat15c(int regd, int regTemp) { //1111
|
void vFloat15c(int regd, int regTemp) { //1111
|
||||||
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
if ( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
CLAMP_SIGN_SSE4(15);
|
CLAMP_SIGN_SSE4(15);
|
||||||
|
@ -1671,12 +1590,6 @@ vFloat vFloats2[16] = { //regTemp is modified
|
||||||
vFloat8, vFloat9b, vFloat10b, vFloat11b,
|
vFloat8, vFloat9b, vFloat10b, vFloat11b,
|
||||||
vFloat12b, vFloat13b, vFloat14b, vFloat15 };
|
vFloat12b, vFloat13b, vFloat14b, vFloat15 };
|
||||||
|
|
||||||
vFloat vFloats2_MUL_MADD[16] = { //regTemp is modified (Faster than vFloats2 if dealing with Denormals and using SSE4)
|
|
||||||
vFloat0, vFloat1b, vFloat2b, vFloat3b,
|
|
||||||
vFloat4b, vFloat5b, vFloat6b, vFloat7b,
|
|
||||||
vFloat8b, vFloat9b, vFloat10b, vFloat11b,
|
|
||||||
vFloat12b, vFloat13b, vFloat14b, vFloat15b };
|
|
||||||
|
|
||||||
vFloat vFloats4[16] = { //regTemp is modified
|
vFloat vFloats4[16] = { //regTemp is modified
|
||||||
vFloat0, vFloat1c, vFloat2c, vFloat3c,
|
vFloat0, vFloat1c, vFloat2c, vFloat3c,
|
||||||
vFloat4c, vFloat5c, vFloat6c, vFloat7c,
|
vFloat4c, vFloat5c, vFloat6c, vFloat7c,
|
||||||
|
|
|
@ -72,7 +72,6 @@ typedef void (*vFloat)(int regd, int regTemp);
|
||||||
extern vFloat vFloats1[16];
|
extern vFloat vFloats1[16];
|
||||||
extern vFloat vFloats1_useEAX[16];
|
extern vFloat vFloats1_useEAX[16];
|
||||||
extern vFloat vFloats2[16];
|
extern vFloat vFloats2[16];
|
||||||
extern vFloat vFloats2_MUL_MADD[16];
|
|
||||||
extern vFloat vFloats4[16];
|
extern vFloat vFloats4[16];
|
||||||
extern vFloat vFloats4_useEAX[16];
|
extern vFloat vFloats4_useEAX[16];
|
||||||
extern PCSX2_ALIGNED16(float s_fones[8]);
|
extern PCSX2_ALIGNED16(float s_fones[8]);
|
||||||
|
|
|
@ -820,7 +820,7 @@ void recVUMI_ADD(VURegs *VU, int info)
|
||||||
if ( _Fs_ == 0 && _Ft_ == 0 ) { // if adding VF00 with VF00, then the result is always 0,0,0,2
|
if ( _Fs_ == 0 && _Ft_ == 0 ) { // if adding VF00 with VF00, then the result is always 0,0,0,2
|
||||||
if ( _X_Y_Z_W != 0xf ) {
|
if ( _X_Y_Z_W != 0xf ) {
|
||||||
SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)s_two);
|
SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)s_two);
|
||||||
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)s_two);
|
else SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)s_two);
|
||||||
}
|
}
|
||||||
|
@ -840,7 +840,7 @@ void recVUMI_ADD(VURegs *VU, int info)
|
||||||
else if (_X_Y_Z_W != 0xf) { // If xyzw != 1111, then we have to use a temp reg
|
else if (_X_Y_Z_W != 0xf) { // If xyzw != 1111, then we have to use a temp reg
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
||||||
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else { // All xyzw being modified (xyzw == 1111)
|
else { // All xyzw being modified (xyzw == 1111)
|
||||||
if (EEREC_D == EEREC_S) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
if (EEREC_D == EEREC_S) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||||
|
@ -899,7 +899,7 @@ void recVUMI_ADD_iq(VURegs *VU, uptr addr, int info)
|
||||||
|
|
||||||
if (_X_Y_Z_W != 0xf) {
|
if (_X_Y_Z_W != 0xf) {
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if ( EEREC_D == EEREC_TEMP ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
if ( EEREC_D == EEREC_TEMP ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||||
|
@ -929,7 +929,7 @@ void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info)
|
||||||
if ( _X_Y_Z_W == 0x8 ) { VU_MERGE_REGS(EEREC_D, EEREC_S); }
|
if ( _X_Y_Z_W == 0x8 ) { VU_MERGE_REGS(EEREC_D, EEREC_S); }
|
||||||
else if ( _X_Y_Z_W != 0xf ) {
|
else if ( _X_Y_Z_W != 0xf ) {
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
else SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||||
}
|
}
|
||||||
|
@ -949,13 +949,13 @@ void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info)
|
||||||
}
|
}
|
||||||
else if( _Fs_ == 0 && !_W ) { // just move
|
else if( _Fs_ == 0 && !_W ) { // just move
|
||||||
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
||||||
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if ( _X_Y_Z_W != 0xf ) {
|
if ( _X_Y_Z_W != 0xf ) {
|
||||||
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( EEREC_D == EEREC_TEMP ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); }
|
if( EEREC_D == EEREC_TEMP ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); }
|
||||||
|
@ -1164,7 +1164,7 @@ void recVUMI_SUB(VURegs *VU, int info)
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
if( ( _Ft_ > 0 ) || _W ) SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
if( ( _Ft_ > 0 ) || _W ) SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
||||||
|
|
||||||
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (EEREC_D == EEREC_S) SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
if (EEREC_D == EEREC_S) SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||||
|
@ -1238,7 +1238,7 @@ void recVUMI_SUB_iq(VURegs *VU, uptr addr, int info)
|
||||||
// negate
|
// negate
|
||||||
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -1304,7 +1304,7 @@ void recVUMI_SUB_xyzw(VURegs *VU, int xyzw, int info)
|
||||||
// negate
|
// negate
|
||||||
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -1507,7 +1507,7 @@ void recVUMI_MUL_toD(VURegs *VU, int regd, int info)
|
||||||
|
|
||||||
if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W
|
if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, _Ft_ ? EEREC_T : EEREC_S);
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, _Ft_ ? EEREC_T : EEREC_S);
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else if( _Fd_ == _Fs_ && _Fs_ == _Ft_ && _XYZW_SS ) {
|
else if( _Fd_ == _Fs_ && _Fs_ == _Ft_ && _XYZW_SS ) {
|
||||||
_vuFlipRegSS(VU, EEREC_D);
|
_vuFlipRegSS(VU, EEREC_D);
|
||||||
|
@ -1526,7 +1526,7 @@ void recVUMI_MUL_toD(VURegs *VU, int regd, int info)
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
||||||
|
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
|
if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
|
||||||
|
@ -1579,7 +1579,7 @@ void recVUMI_MUL_iq_toD(VURegs *VU, uptr addr, int regd, int info)
|
||||||
|
|
||||||
if (_X_Y_Z_W != 0xf) {
|
if (_X_Y_Z_W != 0xf) {
|
||||||
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
||||||
|
@ -1601,13 +1601,13 @@ void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
|
||||||
}
|
}
|
||||||
if (_Fs_) { // This is needed for alot of games; so always clamp this operand
|
if (_Fs_) { // This is needed for alot of games; so always clamp this operand
|
||||||
if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
|
if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
|
||||||
else vFloats2_MUL_MADD[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
|
else vFloats2[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
|
||||||
}
|
}
|
||||||
if( _Ft_ == 0 ) {
|
if( _Ft_ == 0 ) {
|
||||||
if( xyzw < 3 ) {
|
if( xyzw < 3 ) {
|
||||||
if (_X_Y_Z_W != 0xf) {
|
if (_X_Y_Z_W != 0xf) {
|
||||||
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else SSE_XORPS_XMM_to_XMM(regd, regd);
|
else SSE_XORPS_XMM_to_XMM(regd, regd);
|
||||||
}
|
}
|
||||||
|
@ -1615,7 +1615,7 @@ void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
|
||||||
assert(xyzw==3);
|
assert(xyzw==3);
|
||||||
if (_X_Y_Z_W != 0xf) {
|
if (_X_Y_Z_W != 0xf) {
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S);
|
else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S);
|
||||||
}
|
}
|
||||||
|
@ -1648,7 +1648,7 @@ void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
|
||||||
|
|
||||||
if (_X_Y_Z_W != 0xf) {
|
if (_X_Y_Z_W != 0xf) {
|
||||||
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
||||||
|
@ -1770,7 +1770,7 @@ void recVUMI_MADD_toD(VURegs *VU, int regd, int info)
|
||||||
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); }
|
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); }
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
||||||
|
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( regd == EEREC_ACC ) {
|
if( regd == EEREC_ACC ) {
|
||||||
|
@ -1857,7 +1857,7 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info)
|
||||||
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); }
|
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); }
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
||||||
|
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( regd == EEREC_ACC ) {
|
if( regd == EEREC_ACC ) {
|
||||||
|
@ -1895,7 +1895,7 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
|
||||||
}
|
}
|
||||||
if (_Fs_) { // This is needed for alot of games; so always clamp this operand
|
if (_Fs_) { // This is needed for alot of games; so always clamp this operand
|
||||||
if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
|
if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
|
||||||
else vFloats2_MUL_MADD[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
|
else vFloats2[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
|
||||||
}
|
}
|
||||||
if( _Ft_ == 0 ) {
|
if( _Ft_ == 0 ) {
|
||||||
|
|
||||||
|
@ -1913,7 +1913,7 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
||||||
|
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( regd == EEREC_S ) SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
if( regd == EEREC_S ) SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
||||||
|
@ -1928,7 +1928,7 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
|
||||||
// just move acc to regd
|
// just move acc to regd
|
||||||
if( _X_Y_Z_W != 0xf ) {
|
if( _X_Y_Z_W != 0xf ) {
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC);
|
else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC);
|
||||||
}
|
}
|
||||||
|
@ -1971,7 +1971,7 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
|
||||||
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); }
|
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); }
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
||||||
|
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if( regd == EEREC_ACC ) {
|
if( regd == EEREC_ACC ) {
|
||||||
|
@ -2116,7 +2116,7 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info)
|
||||||
else {
|
else {
|
||||||
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -2173,7 +2173,7 @@ void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info)
|
||||||
else {
|
else {
|
||||||
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
||||||
if( regd != EEREC_TEMP ) VU_MERGE_REGS(regd, EEREC_TEMP);
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -2832,7 +2832,7 @@ void recVUMI_OPMSUB( VURegs *VU, int info )
|
||||||
// negate and add
|
// negate and add
|
||||||
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
|
||||||
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
||||||
if( EEREC_D != EEREC_TEMP ) VU_MERGE_REGS_CUSTOM(EEREC_D, EEREC_TEMP, 14);
|
VU_MERGE_REGS_CUSTOM(EEREC_D, EEREC_TEMP, 14);
|
||||||
|
|
||||||
// revert EEREC_T
|
// revert EEREC_T
|
||||||
if( EEREC_T != EEREC_D ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9);
|
if( EEREC_T != EEREC_D ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9);
|
||||||
|
|
Loading…
Reference in New Issue