Cleanup work on the faster clamping code, also by tmkk :)

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@637 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
ramapcsx2 2009-01-26 13:19:26 +00:00 committed by Gregory Hainaut
parent e8ec3e1269
commit 4cd211d8ad
4 changed files with 320 additions and 218 deletions

View File

@ -656,8 +656,6 @@ void VU_MERGE3(int dest, int src) { // 1100s
}
void VU_MERGE4(int dest, int src) { // 0010
SSE_MOVSS_XMM_to_XMM(src, dest);
//SSE_SHUFPS_XMM_to_XMM(src, dest, 0xe4);
//SSE_MOVAPS_XMM_to_XMM(dest, src);
SSE2_MOVSD_XMM_to_XMM(dest, src);
}
void VU_MERGE4b(int dest, int src) { // 0010s
@ -909,12 +907,6 @@ void vFloat5(int regd, int regTemp) { //1010
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d);
}
void vFloat5b(int regd, int regTemp) { //1010
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE4_BLENDPS_XMM_to_XMM(regd, regTemp, 0x5);
}
void vFloat5c(int regd, int regTemp) { //1010
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]);
@ -936,12 +928,6 @@ void vFloat6(int regd, int regTemp) { //0110
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9);
}
void vFloat6b(int regd, int regTemp) { //0110
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE4_BLENDPS_XMM_to_XMM(regd, regTemp, 0x9);
}
void vFloat6c(int regd, int regTemp) { //0110
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]);
@ -955,23 +941,27 @@ void vFloat6c(int regd, int regTemp) { //0110
SSE_ORPS_XMM_to_XMM(regd, regTemp);
}
void vFloat7(int regd, int regTemp) { //1110
if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE2_MOVD_XMM_to_R(EAX, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x39);
}
void vFloat7_useEAX(int regd, int regTemp) { //1110 //EAX is Modified
SSE2_MOVD_XMM_to_R(EAX, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
if ( cpucaps.hasStreamingSIMD4Extensions )
SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00);
}
else {
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x39);
SSE_PINSRW_R32_to_XMM(regd, EAX, 0);
SHR32ItoR(EAX, 16);
SSE_PINSRW_R32_to_XMM(regd, EAX, 1);
}
}
void vFloat7b(int regd, int regTemp) { //1110 //regTemp is Modified
@ -981,6 +971,21 @@ void vFloat7b(int regd, int regTemp) { //1110 //regTemp is Modified
SSE_MOVSS_XMM_to_XMM(regd, regTemp);
}
void vFloat7c(int regd, int regTemp) { //1110
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x39);
SSE_ORPS_XMM_to_XMM(regd, regTemp);
}
void vFloat7c_useEAX(int regd, int regTemp) { //1110 //EAX is Modified
SSE2_MOVD_XMM_to_R(EAX, regd);
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]);
@ -1013,12 +1018,6 @@ void vFloat9(int regd, int regTemp) { //1001
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
}
void vFloat9b(int regd, int regTemp) { //1001
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE4_BLENDPS_XMM_to_XMM(regd, regTemp, 0x6);
}
void vFloat9c(int regd, int regTemp) { //1001
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]);
@ -1038,12 +1037,6 @@ void vFloat10(int regd, int regTemp) { //0101
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
}
void vFloat10b(int regd, int regTemp) { //0101
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE4_BLENDPS_XMM_to_XMM(regd, regTemp, 0xa);
}
void vFloat10c(int regd, int regTemp) { //0101
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]);
@ -1056,25 +1049,29 @@ void vFloat10c(int regd, int regTemp) { //0101
SSE_ORPS_XMM_to_XMM(regd, regTemp);
}
void vFloat11(int regd, int regTemp) { //1101
if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE2_MOVD_XMM_to_R(EAX, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36);
}
void vFloat11_useEAX(int regd, int regTemp) { //1101 //EAX is Modified
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE2_MOVD_XMM_to_R(EAX, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
if ( cpucaps.hasStreamingSIMD4Extensions )
SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
}
else {
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36);
SSE_PINSRW_R32_to_XMM(regd, EAX, 0);
SHR32ItoR(EAX, 16);
SSE_PINSRW_R32_to_XMM(regd, EAX, 1);
}
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
}
void vFloat11b(int regd, int regTemp) { //1101 //regTemp is Modified
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
@ -1088,6 +1085,20 @@ void vFloat11b(int regd, int regTemp) { //1101 //regTemp is Modified
}
}
void vFloat11c(int regd, int regTemp) { //1101
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36);
SSE_ORPS_XMM_to_XMM(regd, regTemp);
}
void vFloat11c_useEAX(int regd, int regTemp) { //1101 // EAX is modified
SSE2_PSHUFD_XMM_to_XMM(regTemp, regd, 0xe1);
SSE2_MOVD_XMM_to_R(EAX, regTemp);
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
@ -1131,25 +1142,29 @@ void vFloat12c(int regd, int regTemp) { //0011
SSE_ORPS_XMM_to_XMM(regd, regTemp);
}
void vFloat13(int regd, int regTemp) { //1011
if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE2_MOVD_XMM_to_R(EAX, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d);
}
void vFloat13_useEAX(int regd, int regTemp) { //1011 // EAX is modified
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE2_MOVD_XMM_to_R(EAX, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
if ( cpucaps.hasStreamingSIMD4Extensions )
SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
}
else {
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d);
SSE_PINSRW_R32_to_XMM(regd, EAX, 0);
SHR32ItoR(EAX, 16);
SSE_PINSRW_R32_to_XMM(regd, EAX, 1);
}
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
}
void vFloat13b(int regd, int regTemp) { //1011 //regTemp is Modified
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
@ -1163,6 +1178,20 @@ void vFloat13b(int regd, int regTemp) { //1011 //regTemp is Modified
}
}
void vFloat13c(int regd, int regTemp) { //1011
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d);
SSE_ORPS_XMM_to_XMM(regd, regTemp);
}
void vFloat13c_useEAX(int regd, int regTemp) { //1011 // EAX is modified
SSE2_PSHUFD_XMM_to_XMM(regTemp, regd, 0xd2);
SSE2_MOVD_XMM_to_R(EAX, regTemp);
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
@ -1179,25 +1208,29 @@ void vFloat13c(int regd, int regTemp) { //1011
}
}
void vFloat14(int regd, int regTemp) { //0111
if ( cpucaps.hasStreamingSIMD4Extensions ) {
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE2_MOVD_XMM_to_R(EAX, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9);
}
void vFloat14_useEAX(int regd, int regTemp) { //0111 // EAX is modified
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
SSE2_MOVD_XMM_to_R(EAX, regd);
SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals);
if ( cpucaps.hasStreamingSIMD4Extensions )
SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
}
else {
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9);
SSE_PINSRW_R32_to_XMM(regd, EAX, 0);
SHR32ItoR(EAX, 16);
SSE_PINSRW_R32_to_XMM(regd, EAX, 1);
}
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
}
void vFloat14b(int regd, int regTemp) { //0111 //regTemp is Modified
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
@ -1211,6 +1244,20 @@ void vFloat14b(int regd, int regTemp) { //0111 //regTemp is Modified
}
}
void vFloat14c(int regd, int regTemp) { //0111
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6);
SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals);
SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9);
SSE_ORPS_XMM_to_XMM(regd, regTemp);
}
void vFloat14c_useEAX(int regd, int regTemp) { //0111 // EAX is modified
SSE2_PSHUFD_XMM_to_XMM(regTemp, regd, 0x93);
SSE2_MOVD_XMM_to_R(EAX, regTemp);
SSE_MOVAPS_XMM_to_XMM(regTemp, regd);
@ -1244,6 +1291,12 @@ vFloat vFloats1[16] = { //regTemp is not modified
vFloat8, vFloat9, vFloat10, vFloat11,
vFloat12, vFloat13, vFloat14, vFloat15 };
vFloat vFloats1_useEAX[16] = { //regTemp is not modified but EAX is used
vFloat0, vFloat1, vFloat2, vFloat3,
vFloat4, vFloat5, vFloat6, vFloat7_useEAX,
vFloat8, vFloat9, vFloat10, vFloat11_useEAX,
vFloat12, vFloat13_useEAX, vFloat14_useEAX, vFloat15 };
vFloat vFloats2[16] = { //regTemp is modified
vFloat0, vFloat1, vFloat2, vFloat3b,
vFloat4, vFloat5, vFloat6, vFloat7b,
@ -1256,24 +1309,28 @@ vFloat vFloats4[16] = { //regTemp is modified
vFloat8c, vFloat9c, vFloat10c, vFloat11c,
vFloat1c, vFloat13c, vFloat14c, vFloat15c };
PCSX2_ALIGNED16(u64 vuFloatData[2]);
PCSX2_ALIGNED16(u64 vuFloatData2[2]);
// Makes NaN == 0, Infinities stay the same; Very Slow - Use only for debugging
void vuFloatExtra( int regd, int XYZW) {
int t1reg = (regd == 0) ? (regd + 1) : (regd - 1);
int t2reg = (regd <= 1) ? (regd + 2) : (regd - 2);
SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData, t1reg );
SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData2, t2reg );
vFloat vFloats4_useEAX[16] = { //regTemp is modified and EAX is used
vFloat0, vFloat1c, vFloat2c, vFloat3c,
vFloat4c, vFloat5c, vFloat6c, vFloat7c_useEAX,
vFloat8c, vFloat9c, vFloat10c, vFloat11c_useEAX,
vFloat1c, vFloat13c_useEAX, vFloat14c_useEAX, vFloat15c };
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
SSE_CMPORDPS_XMM_to_XMM(t1reg, regd);
SSE_MOVAPS_XMM_to_XMM(t2reg, regd);
SSE_ANDPS_XMM_to_XMM(t2reg, t1reg);
VU_MERGE_REGS_CUSTOM(regd, t2reg, XYZW);
SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)vuFloatData );
SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)vuFloatData2 );
}
//------------------------------------------------------------------
// Clamping Functions (wrapper for vFloat* functions)
// vuFloat : "normal" clamping
// vuFloat_useEAX : "normal" clamping (faster but EAX is modified)
// vuFloat2 : "normal" clamping (fastest but regTemp is modified)
// vuFloat3 : "preserve sign" clamping for pointer
// vuFloat4 : "preserve sign" clamping (regTemp is modified)
// vuFloat4_useEAX : "preserve sign" clamping (faster but regTemp and EAX are modified)
// vuFloat5 : wrapper function for vuFloat2 and vuFloat4
// vuFloat5_useEAX : wrapper function for vuFloat2 and vuFloat4_useEAX
// vuFloatExtra : for debugging
//
// Notice 1: vuFloat*_useEAX may be slower on AMD CPUs, which have independent execution pipeline for
// vector and scalar instructions (need checks)
// Notice 2: recVUMI_MUL_xyzw_toD and recVUMI_MADD_xyzw_toD use vFloats directly!
//------------------------------------------------------------------
// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (doesn't use any temp regs)
void vuFloat( int info, int regd, int XYZW) {
@ -1291,6 +1348,13 @@ void vuFloat( int info, int regd, int XYZW) {
}
}
// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (uses EAX as a temp register; faster but **destroyes EAX**)
void vuFloat_useEAX( int info, int regd, int XYZW) {
if( CHECK_VU_OVERFLOW ) {
vFloats1_useEAX[XYZW](regd, regd);
}
}
// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (uses a temp reg)
void vuFloat2(int regd, int regTemp, int XYZW) {
if( CHECK_VU_OVERFLOW ) {
@ -1306,6 +1370,13 @@ void vuFloat4(int regd, int regTemp, int XYZW) {
}
}
// Clamps +/-NaN and +/-Inf to +/-fMax (uses a temp reg, and uses EAX as a temp register; faster but **destroyes EAX**)
void vuFloat4_useEAX(int regd, int regTemp, int XYZW) {
if( CHECK_VU_OVERFLOW ) {
vFloats4_useEAX[XYZW](regd, regTemp);
}
}
// Uses vuFloat4 or vuFloat2 depending on the CHECK_VU_SIGN_OVERFLOW setting
void vuFloat5(int regd, int regTemp, int XYZW) {
if (CHECK_VU_SIGN_OVERFLOW) {
@ -1314,6 +1385,14 @@ void vuFloat5(int regd, int regTemp, int XYZW) {
else vuFloat2(regd, regTemp, XYZW);
}
// Uses vuFloat4_useEAX or vuFloat2 depending on the CHECK_VU_SIGN_OVERFLOW setting (uses EAX as a temp register; faster but **destoroyes EAX**)
void vuFloat5_useEAX(int regd, int regTemp, int XYZW) {
if (CHECK_VU_SIGN_OVERFLOW) {
vuFloat4_useEAX(regd, regTemp, XYZW);
}
else vuFloat2(regd, regTemp, XYZW);
}
// Clamps +/-infs to +/-fMax, and +/-NaNs to +/-fMax
void vuFloat3(uptr x86ptr) {
u8* pjmp;
@ -1331,6 +1410,25 @@ void vuFloat3(uptr x86ptr) {
}
}
PCSX2_ALIGNED16(u64 vuFloatData[2]);
PCSX2_ALIGNED16(u64 vuFloatData2[2]);
// Makes NaN == 0, Infinities stay the same; Very Slow - Use only for debugging
void vuFloatExtra( int regd, int XYZW) {
int t1reg = (regd == 0) ? (regd + 1) : (regd - 1);
int t2reg = (regd <= 1) ? (regd + 2) : (regd - 2);
SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData, t1reg );
SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData2, t2reg );
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
SSE_CMPORDPS_XMM_to_XMM(t1reg, regd);
SSE_MOVAPS_XMM_to_XMM(t2reg, regd);
SSE_ANDPS_XMM_to_XMM(t2reg, t1reg);
VU_MERGE_REGS_CUSTOM(regd, t2reg, XYZW);
SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)vuFloatData );
SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)vuFloatData2 );
}
static PCSX2_ALIGNED16(u32 tempRegX[]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
// Called by testWhenOverflow() function

View File

@ -70,6 +70,7 @@ void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK
extern int vucycle;
typedef void (*vFloat)(int regd, int regTemp);
extern vFloat vFloats1[16];
extern vFloat vFloats1_useEAX[16];
extern vFloat vFloats2[16];
extern PCSX2_ALIGNED16(float s_fones[8]);
extern PCSX2_ALIGNED16(u32 s_mask[4]);
@ -81,10 +82,13 @@ extern PCSX2_ALIGNED16(u32 const_clip[8]);
u32 GetVIAddr(VURegs * VU, int reg, int read, int info);
int _vuGetTempXMMreg(int info);
void vuFloat(int info, int regd, int XYZW);
void vuFloat_useEAX(int regd, int regTemp, int XYZW);
void vuFloat2(int regd, int regTemp, int XYZW);
void vuFloat3(uptr x86ptr);
void vuFloat4(int regd, int regTemp, int XYZW);
void vuFloat4_useEAX(int regd, int regTemp, int XYZW);
void vuFloat5(int regd, int regTemp, int XYZW);
void vuFloat5_useEAX(int regd, int regTemp, int XYZW);
void _vuFlipRegSS(VURegs * VU, int reg);
void _vuFlipRegSS_xyzw(int reg, int xyzw);
void _vuMoveSS(VURegs * VU, int dstreg, int srcreg);
@ -279,4 +283,4 @@ void recVUMI_XTOP(VURegs *vuRegs, int info);
void recVUMI_XITOP(VURegs *vuRegs, int info);
void recVUMI_XTOP( VURegs *VU , int info);
#endif /* __IVUMICRO_H__ */
#endif /* __IVUMICRO_H__ */

View File

@ -130,8 +130,8 @@ void recVUMI_DIV(VURegs *VU, int info)
x86SetJ32(ajmp32);
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat5(EEREC_S, EEREC_TEMP, (1 << (3-_Fsf_)));
vuFloat5(EEREC_T, EEREC_TEMP, (1 << (3-_Ftf_)));
vuFloat5_useEAX(EEREC_S, EEREC_TEMP, (1 << (3-_Fsf_)));
vuFloat5_useEAX(EEREC_T, EEREC_TEMP, (1 << (3-_Ftf_)));
}
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
@ -1722,7 +1722,7 @@ void recVUMI_ESUM( VURegs *VU, int info )
SSE_ADDSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // x+y+z+w, w+y, w+y, w+y
}
vuFloat(info, EEREC_TEMP, 8);
vuFloat_useEAX(info, EEREC_TEMP, 8);
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
}
//------------------------------------------------------------------
@ -1739,34 +1739,34 @@ void recVUMI_ERCPR( VURegs *VU, int info )
// don't use RCPSS (very bad precision)
switch ( _Fsf_ ) {
case 0: //0001
if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5(EEREC_S, EEREC_TEMP, 8);
if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8);
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
break;
case 1: //0010
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xe1);
if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5(EEREC_S, EEREC_TEMP, 8);
if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8);
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xe1);
break;
case 2: //0100
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xc6);
if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5(EEREC_S, EEREC_TEMP, 8);
if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8);
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xc6);
break;
case 3: //1000
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0x27);
if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5(EEREC_S, EEREC_TEMP, 8);
if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8);
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0x27);
break;
}
vuFloat(info, EEREC_TEMP, 8);
vuFloat_useEAX(info, EEREC_TEMP, 8);
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
}
//------------------------------------------------------------------
@ -1809,7 +1809,7 @@ void recVUMI_ERSQRT( VURegs *VU, int info )
{
SSE_MOVSS_M32_to_XMM(t1reg, (uptr)VU_ONE);
SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP);
vuFloat(info, t1reg, 8);
vuFloat_useEAX(info, t1reg, 8);
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), t1reg);
_freeXMMreg(t1reg);
}
@ -1818,7 +1818,7 @@ void recVUMI_ERSQRT( VURegs *VU, int info )
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE);
SSE_DIVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 0));
vuFloat(info, EEREC_TEMP, 8);
vuFloat_useEAX(info, EEREC_TEMP, 8);
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
}
}

View File

@ -827,8 +827,8 @@ void recVUMI_ADD(VURegs *VU, int info)
}
else {
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
}
if( _X_Y_Z_W == 8 ) { // If only adding x, then we can do a Scalar Add
if (EEREC_D == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T);
@ -863,7 +863,7 @@ void recVUMI_ADD_iq(VURegs *VU, uptr addr, int info)
if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP);
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat3(addr);
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
}
if ( _XYZW_SS ) {
@ -922,8 +922,8 @@ void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info)
if ( _X_Y_Z_W == 0 ) goto flagUpdate;
if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP);
if (CHECK_VU_EXTRA_OVERFLOW) {
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
}
if ( _Ft_ == 0 && xyzw < 3 ) { // just move since adding zero
@ -985,8 +985,8 @@ void recVUMI_ADDA(VURegs *VU, int info)
//SysPrintf("recVUMI_ADDA()\n");
if ( _X_Y_Z_W == 0 ) goto flagUpdate;
if (CHECK_VU_EXTRA_OVERFLOW) {
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
}
if( _X_Y_Z_W == 8 ) {
@ -1021,7 +1021,7 @@ void recVUMI_ADDA_iq(VURegs *VU, uptr addr, int info)
if ( _X_Y_Z_W == 0 ) goto flagUpdate;
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat3(addr);
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
}
if( _XYZW_SS ) {
@ -1072,8 +1072,8 @@ void recVUMI_ADDA_xyzw(VURegs *VU, int xyzw, int info)
//SysPrintf("recVUMI_ADDA_xyzw()\n");
if ( _X_Y_Z_W == 0 ) goto flagUpdate;
if (CHECK_VU_EXTRA_OVERFLOW) {
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
}
if( _X_Y_Z_W == 8 ) {
@ -1137,8 +1137,8 @@ void recVUMI_SUB(VURegs *VU, int info)
}
else if( _X_Y_Z_W == 8 ) {
if (CHECK_VU_EXTRA_OVERFLOW) {
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
}
if (EEREC_D == EEREC_S) {
if (_Ft_) SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T);
@ -1158,8 +1158,8 @@ void recVUMI_SUB(VURegs *VU, int info)
}
else {
if (CHECK_VU_EXTRA_OVERFLOW) {
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
}
if (_X_Y_Z_W != 0xf) {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
@ -1190,7 +1190,7 @@ void recVUMI_SUB_iq(VURegs *VU, uptr addr, int info)
if ( _X_Y_Z_W == 0 ) goto flagUpdate;
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat3(addr);
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
}
if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP);
@ -1263,8 +1263,8 @@ void recVUMI_SUB_xyzw(VURegs *VU, int xyzw, int info)
if ( _X_Y_Z_W == 0 ) goto flagUpdate;
if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP);
if (CHECK_VU_EXTRA_OVERFLOW) {
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
}
if ( _X_Y_Z_W == 8 ) {
@ -1340,8 +1340,8 @@ void recVUMI_SUBA(VURegs *VU, int info)
//SysPrintf("recVUMI_SUBA()\n");
if ( _X_Y_Z_W == 0 ) goto flagUpdate;
if (CHECK_VU_EXTRA_OVERFLOW) {
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
}
if( EEREC_S == EEREC_T ) {
@ -1387,7 +1387,7 @@ void recVUMI_SUBA_iq(VURegs *VU, uptr addr, int info)
//SysPrintf ("recVUMI_SUBA_iq \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat3(addr);
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
}
if( _XYZW_SS ) {
@ -1443,8 +1443,8 @@ void recVUMI_SUBA_xyzw(VURegs *VU, int xyzw, int info)
{
//SysPrintf ("recVUMI_SUBA_xyzw \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
}
if( _X_Y_Z_W == 8 ) {
@ -1503,8 +1503,8 @@ void recVUMI_MUL_toD(VURegs *VU, int regd, int info)
//SysPrintf ("recVUMI_MUL_toD \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
//using vuFloat instead of vuFloat2 incase regd == EEREC_TEMP
if (_Fs_) vuFloat( info, EEREC_S, _X_Y_Z_W);
if (_Ft_) vuFloat( info, EEREC_T, _X_Y_Z_W);
if (_Fs_) vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W);
if (_Ft_) vuFloat_useEAX( info, EEREC_T, _X_Y_Z_W);
}
if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W
@ -1545,7 +1545,7 @@ void recVUMI_MUL_iq_toD(VURegs *VU, uptr addr, int regd, int info)
//SysPrintf ("recVUMI_MUL_iq_toD \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat3(addr);
if (_Fs_) vuFloat( info, EEREC_S, _X_Y_Z_W);
if (_Fs_) vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W);
}
if( _XYZW_SS ) {
@ -1599,10 +1599,10 @@ void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
{
//SysPrintf ("recVUMI_MUL_xyzw_toD \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
if (_Ft_) vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) );
if (_Ft_) vuFloat_useEAX( info, EEREC_T, ( 1 << (3 - xyzw) ) );
}
// This is needed for alot of games
vFloats1[_X_Y_Z_W]( EEREC_S, EEREC_S ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
vFloats1_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_S ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
if( _Ft_ == 0 ) {
if( xyzw < 3 ) {
@ -1736,39 +1736,39 @@ void recVUMI_MADD_toD(VURegs *VU, int regd, int info)
{
//SysPrintf ("recVUMI_MADD_toD \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_T, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_T, _X_Y_Z_W);
vuFloat_useEAX( info, regd, _X_Y_Z_W);
}
if( _X_Y_Z_W == 8 ) {
if( regd == EEREC_ACC ) {
SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, 8); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); }
SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP);
}
else if (regd == EEREC_T) {
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, 8); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); }
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
}
else if (regd == EEREC_S) {
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, 8); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); }
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
}
else {
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, 8); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); }
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
}
}
else if (_X_Y_Z_W != 0xf) {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
VU_MERGE_REGS(regd, EEREC_TEMP);
@ -1777,23 +1777,23 @@ void recVUMI_MADD_toD(VURegs *VU, int regd, int info)
if( regd == EEREC_ACC ) {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP);
}
else if (regd == EEREC_T) {
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
}
else if (regd == EEREC_S) {
SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
}
else {
SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S);
SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
}
}
@ -1804,8 +1804,8 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info)
//SysPrintf ("recVUMI_MADD_iq_toD \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat3(addr);
vuFloat( info, EEREC_S, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W);
vuFloat_useEAX( info, regd, _X_Y_Z_W);
}
if( _X_Y_Z_W == 8 ) {
@ -1820,19 +1820,19 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info)
assert( EEREC_TEMP < XMMREGS );
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, 8); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); }
SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP);
}
}
else if( regd == EEREC_S ) {
SSE_MULSS_M32_to_XMM(regd, addr);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
}
else {
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
SSE_MULSS_M32_to_XMM(regd, addr);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
}
}
@ -1855,7 +1855,7 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info)
if (_X_Y_Z_W != 0xf) {
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
VU_MERGE_REGS(regd, EEREC_TEMP);
@ -1863,24 +1863,24 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info)
else {
if( regd == EEREC_ACC ) {
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP);
}
else if( regd == EEREC_S ) {
SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
}
else if( regd == EEREC_TEMP ) {
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
}
else {
SSE_MOVSS_M32_to_XMM(regd, addr);
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x00);
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
}
}
@ -1891,12 +1891,12 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
{
//SysPrintf ("recVUMI_MADD_xyzw_toD \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) );
vuFloat( info, EEREC_ACC, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_T, ( 1 << (3 - xyzw) ) );
vuFloat_useEAX( info, EEREC_ACC, _X_Y_Z_W);
vuFloat_useEAX( info, regd, _X_Y_Z_W);
}
// This is needed for alot of games
vFloats1[_X_Y_Z_W]( EEREC_S, EEREC_S ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
vFloats1_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_S ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set
if( _Ft_ == 0 ) {
@ -1942,23 +1942,23 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
if( regd == EEREC_ACC ) {
SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, 8); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); }
SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP);
}
else if( regd == EEREC_S ) {
SSE_MULSS_XMM_to_XMM(regd, EEREC_TEMP);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, 8); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); }
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
}
else if( regd == EEREC_TEMP ) {
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, 8); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); }
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
}
else {
SSE_MOVSS_XMM_to_XMM(regd, EEREC_ACC);
SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, 8); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); }
SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP);
}
}
@ -1969,7 +1969,7 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
if (_X_Y_Z_W != 0xf) {
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
VU_MERGE_REGS(regd, EEREC_TEMP);
@ -1977,23 +1977,23 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
else {
if( regd == EEREC_ACC ) {
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP);
}
else if( regd == EEREC_S ) {
SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
}
else if( regd == EEREC_TEMP ) {
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
}
else {
_unpackVF_xyzw(regd, EEREC_T, xyzw);
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
}
}
@ -2095,9 +2095,9 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info)
{
//SysPrintf ("recVUMI_MSUB_toD \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_T, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_T, _X_Y_Z_W);
vuFloat_useEAX( info, regd, _X_Y_Z_W);
}
if (_X_Y_Z_W != 0xf) {
@ -2105,7 +2105,7 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info)
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); }
if( t1reg >= 0 ) {
SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_ACC);
@ -2124,21 +2124,21 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info)
if( regd == EEREC_S ) {
assert( regd != EEREC_ACC );
SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC);
SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]);
}
else if( regd == EEREC_T ) {
assert( regd != EEREC_ACC );
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC);
SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]);
}
else if( regd == EEREC_TEMP ) {
SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S);
SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC);
SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]);
}
@ -2146,7 +2146,7 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info)
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC);
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); }
SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP);
}
}
@ -2156,16 +2156,16 @@ void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info)
{
//SysPrintf ("recVUMI_MSUB_temp_toD \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_ACC, _X_Y_Z_W);
vuFloat( info, regd, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_ACC, _X_Y_Z_W);
vuFloat_useEAX( info, regd, _X_Y_Z_W);
}
if (_X_Y_Z_W != 0xf) {
int t1reg = _vuGetTempXMMreg(info);
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); }
if( t1reg >= 0 ) {
SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_ACC);
@ -2185,25 +2185,25 @@ void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info)
else {
if( regd == EEREC_ACC ) {
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); }
SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP);
}
else if( regd == EEREC_S ) {
SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC);
SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]);
}
else if( regd == EEREC_TEMP ) {
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); }
SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC);
SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]);
}
else {
SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC);
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); }
SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP);
}
}
@ -2339,8 +2339,8 @@ void recVUMI_MAX(VURegs *VU, int info)
//SysPrintf ("recVUMI_MAX \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
}
if( _X_Y_Z_W == 8 ) {
@ -2373,7 +2373,7 @@ void recVUMI_MAX_iq(VURegs *VU, uptr addr, int info)
//SysPrintf ("recVUMI_MAX_iq \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W);
vuFloat3(addr);
}
@ -2433,8 +2433,8 @@ void recVUMI_MAX_xyzw(VURegs *VU, int xyzw, int info)
//SysPrintf ("recVUMI_MAX_xyzw \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) );
vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_T, ( 1 << (3 - xyzw) ) );
}
if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) {
@ -2513,7 +2513,7 @@ void recVUMI_MINI(VURegs *VU, int info)
//SysPrintf ("recVUMI_MINI \n");
if( _X_Y_Z_W == 8 ) {
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5( EEREC_S, EEREC_TEMP, 8); vuFloat5( EEREC_T, EEREC_TEMP, 8); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 8); vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 8); }
if (EEREC_D == EEREC_S) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T);
else if (EEREC_D == EEREC_T) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_S);
else {
@ -2522,14 +2522,14 @@ void recVUMI_MINI(VURegs *VU, int info)
}
}
else if (_X_Y_Z_W != 0xf) {
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W); }
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MINPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
}
else {
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5( EEREC_S, EEREC_TEMP, 0xf); vuFloat5( EEREC_T, EEREC_TEMP, 0xf); }
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xf); vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xf); }
if( EEREC_D == EEREC_S ) {
//ClampUnordered(EEREC_T, EEREC_TEMP, 0); // need for GT4 vu0rec
SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_T);
@ -2551,7 +2551,7 @@ void recVUMI_MINI_iq(VURegs *VU, uptr addr, int info)
//SysPrintf ("recVUMI_MINI_iq \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W);
vuFloat3(addr);
}
@ -2611,8 +2611,8 @@ void recVUMI_MINI_xyzw(VURegs *VU, int xyzw, int info)
//SysPrintf ("recVUMI_MINI_xyzw \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) );
vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W);
vuFloat_useEAX( info, EEREC_T, ( 1 << (3 - xyzw) ) );
}
if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) {
@ -2662,8 +2662,8 @@ void recVUMI_OPMULA( VURegs *VU, int info )
{
//SysPrintf ("recVUMI_OPMULA \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat5( EEREC_S, EEREC_TEMP, 0xE);
vuFloat5( EEREC_T, EEREC_TEMP, 0xE);
vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xE);
vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xE);
}
SSE_MOVAPS_XMM_to_XMM( EEREC_TEMP, EEREC_S );
@ -2689,8 +2689,8 @@ void recVUMI_OPMSUB( VURegs *VU, int info )
{
//SysPrintf ("recVUMI_OPMSUB \n");
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat5( EEREC_S, EEREC_TEMP, 0xE);
vuFloat5( EEREC_T, EEREC_TEMP, 0xE);
vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xE);
vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xE);
}
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
@ -2768,7 +2768,7 @@ void recVUMI_FTOI0(VURegs *VU, int info)
if (_X_Y_Z_W != 0xf) {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
vuFloat( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
t1reg = _vuGetTempXMMreg(info);
@ -2803,7 +2803,7 @@ void recVUMI_FTOI0(VURegs *VU, int info)
else {
if (EEREC_T != EEREC_S) {
SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
vuFloat( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
vuFloat_useEAX( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
t1reg = _vuGetTempXMMreg(info);
@ -2824,7 +2824,7 @@ void recVUMI_FTOI0(VURegs *VU, int info)
}
else {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
vuFloat( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
t1reg = _vuGetTempXMMreg(info);
@ -2869,7 +2869,7 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info)
if (_X_Y_Z_W != 0xf) {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
vuFloat( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
t1reg = _vuGetTempXMMreg(info);
@ -2905,7 +2905,7 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info)
if (EEREC_T != EEREC_S) {
SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
vuFloat( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
vuFloat_useEAX( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
t1reg = _vuGetTempXMMreg(info);
@ -2927,7 +2927,7 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info)
else {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
vuFloat( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax)
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
t1reg = _vuGetTempXMMreg(info);
@ -2978,7 +2978,7 @@ void recVUMI_ITOF0( VURegs *VU, int info )
//SysPrintf ("recVUMI_ITOF0 \n");
if (_X_Y_Z_W != 0xf) {
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
vuFloat( info, EEREC_TEMP, 15); // Clamp infinities
vuFloat_useEAX( info, EEREC_TEMP, 15); // Clamp infinities
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
xmmregs[EEREC_T].mode |= MODE_WRITE;
}
@ -2996,7 +2996,7 @@ void recVUMI_ITOFX(VURegs *VU, int addr, int info)
if (_X_Y_Z_W != 0xf) {
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
vuFloat( info, EEREC_TEMP, 15); // Clamp infinities
vuFloat_useEAX( info, EEREC_TEMP, 15); // Clamp infinities
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
xmmregs[EEREC_T].mode |= MODE_WRITE;
}