From 4cd211d8adaccc98be7d997781b13840702e7aca Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Mon, 26 Jan 2009 13:19:26 +0000 Subject: [PATCH] Cleanup work on the faster clamping code, also by tmkk :) git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@637 a6443dda-0b58-4228-96e9-037be469359c --- pcsx2/x86/iVUmicro.cpp | 310 ++++++++++++++++++++++++------------ pcsx2/x86/iVUmicro.h | 6 +- pcsx2/x86/iVUmicroLower.cpp | 20 +-- pcsx2/x86/iVUmicroUpper.cpp | 202 +++++++++++------------ 4 files changed, 320 insertions(+), 218 deletions(-) diff --git a/pcsx2/x86/iVUmicro.cpp b/pcsx2/x86/iVUmicro.cpp index e5f9be2fe5..e0ebdd6a21 100644 --- a/pcsx2/x86/iVUmicro.cpp +++ b/pcsx2/x86/iVUmicro.cpp @@ -656,8 +656,6 @@ void VU_MERGE3(int dest, int src) { // 1100s } void VU_MERGE4(int dest, int src) { // 0010 SSE_MOVSS_XMM_to_XMM(src, dest); - //SSE_SHUFPS_XMM_to_XMM(src, dest, 0xe4); - //SSE_MOVAPS_XMM_to_XMM(dest, src); SSE2_MOVSD_XMM_to_XMM(dest, src); } void VU_MERGE4b(int dest, int src) { // 0010s @@ -909,12 +907,6 @@ void vFloat5(int regd, int regTemp) { //1010 SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); } -void vFloat5b(int regd, int regTemp) { //1010 - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE4_BLENDPS_XMM_to_XMM(regd, regTemp, 0x5); -} void vFloat5c(int regd, int regTemp) { //1010 SSE_MOVAPS_XMM_to_XMM(regTemp, regd); SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); @@ -936,12 +928,6 @@ void vFloat6(int regd, int regTemp) { //0110 SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); } -void vFloat6b(int regd, int regTemp) { //0110 - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE4_BLENDPS_XMM_to_XMM(regd, regTemp, 0x9); -} void vFloat6c(int regd, int regTemp) { //0110 SSE_MOVAPS_XMM_to_XMM(regTemp, regd); SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); @@ -955,23 +941,27 @@ void vFloat6c(int regd, int regTemp) { //0110 SSE_ORPS_XMM_to_XMM(regd, regTemp); } void vFloat7(int regd, int regTemp) { //1110 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x39); +} +void vFloat7_useEAX(int regd, int regTemp) { //1110 //EAX is Modified + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + if ( cpucaps.hasStreamingSIMD4Extensions ) SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); - } else { - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x39); + SSE_PINSRW_R32_to_XMM(regd, EAX, 0); + SHR32ItoR(EAX, 16); + SSE_PINSRW_R32_to_XMM(regd, EAX, 1); } } void vFloat7b(int regd, int regTemp) { //1110 //regTemp is Modified @@ -981,6 +971,21 @@ void vFloat7b(int regd, int regTemp) { //1110 //regTemp is Modified SSE_MOVSS_XMM_to_XMM(regd, regTemp); } void vFloat7c(int regd, int regTemp) { //1110 + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x39); + SSE_ORPS_XMM_to_XMM(regd, regTemp); +} +void vFloat7c_useEAX(int regd, int regTemp) { //1110 //EAX is Modified SSE2_MOVD_XMM_to_R(EAX, regd); SSE_MOVAPS_XMM_to_XMM(regTemp, regd); SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); @@ -1013,12 +1018,6 @@ void vFloat9(int regd, int regTemp) { //1001 SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); } -void vFloat9b(int regd, int regTemp) { //1001 - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE4_BLENDPS_XMM_to_XMM(regd, regTemp, 0x6); -} void vFloat9c(int regd, int regTemp) { //1001 SSE_MOVAPS_XMM_to_XMM(regTemp, regd); SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); @@ -1038,12 +1037,6 @@ void vFloat10(int regd, int regTemp) { //0101 SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); } -void vFloat10b(int regd, int regTemp) { //0101 - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE4_BLENDPS_XMM_to_XMM(regd, regTemp, 0xa); -} void vFloat10c(int regd, int regTemp) { //0101 SSE_MOVAPS_XMM_to_XMM(regTemp, regd); SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); @@ -1056,25 +1049,29 @@ void vFloat10c(int regd, int regTemp) { //0101 SSE_ORPS_XMM_to_XMM(regd, regTemp); } void vFloat11(int regd, int regTemp) { //1101 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); +} +void vFloat11_useEAX(int regd, int regTemp) { //1101 //EAX is Modified + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + if ( cpucaps.hasStreamingSIMD4Extensions ) SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); - } else { - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); + SSE_PINSRW_R32_to_XMM(regd, EAX, 0); + SHR32ItoR(EAX, 16); + SSE_PINSRW_R32_to_XMM(regd, EAX, 1); } + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); } void vFloat11b(int regd, int regTemp) { //1101 //regTemp is Modified SSE_MOVAPS_XMM_to_XMM(regTemp, regd); @@ -1088,6 +1085,20 @@ void vFloat11b(int regd, int regTemp) { //1101 //regTemp is Modified } } void vFloat11c(int regd, int regTemp) { //1101 + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); + SSE_ORPS_XMM_to_XMM(regd, regTemp); +} +void vFloat11c_useEAX(int regd, int regTemp) { //1101 // EAX is modified SSE2_PSHUFD_XMM_to_XMM(regTemp, regd, 0xe1); SSE2_MOVD_XMM_to_R(EAX, regTemp); SSE_MOVAPS_XMM_to_XMM(regTemp, regd); @@ -1131,25 +1142,29 @@ void vFloat12c(int regd, int regTemp) { //0011 SSE_ORPS_XMM_to_XMM(regd, regTemp); } void vFloat13(int regd, int regTemp) { //1011 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); +} +void vFloat13_useEAX(int regd, int regTemp) { //1011 // EAX is modified + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + if ( cpucaps.hasStreamingSIMD4Extensions ) SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - } else { - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); + SSE_PINSRW_R32_to_XMM(regd, EAX, 0); + SHR32ItoR(EAX, 16); + SSE_PINSRW_R32_to_XMM(regd, EAX, 1); } + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); } void vFloat13b(int regd, int regTemp) { //1011 //regTemp is Modified SSE_MOVAPS_XMM_to_XMM(regTemp, regd); @@ -1163,6 +1178,20 @@ void vFloat13b(int regd, int regTemp) { //1011 //regTemp is Modified } } void vFloat13c(int regd, int regTemp) { //1011 + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); + SSE_ORPS_XMM_to_XMM(regd, regTemp); +} +void vFloat13c_useEAX(int regd, int regTemp) { //1011 // EAX is modified SSE2_PSHUFD_XMM_to_XMM(regTemp, regd, 0xd2); SSE2_MOVD_XMM_to_R(EAX, regTemp); SSE_MOVAPS_XMM_to_XMM(regTemp, regd); @@ -1179,25 +1208,29 @@ void vFloat13c(int regd, int regTemp) { //1011 } } void vFloat14(int regd, int regTemp) { //0111 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); +} +void vFloat14_useEAX(int regd, int regTemp) { //0111 // EAX is modified + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + if ( cpucaps.hasStreamingSIMD4Extensions ) SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - } else { - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); + SSE_PINSRW_R32_to_XMM(regd, EAX, 0); + SHR32ItoR(EAX, 16); + SSE_PINSRW_R32_to_XMM(regd, EAX, 1); } + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); } void vFloat14b(int regd, int regTemp) { //0111 //regTemp is Modified SSE_MOVAPS_XMM_to_XMM(regTemp, regd); @@ -1211,6 +1244,20 @@ void vFloat14b(int regd, int regTemp) { //0111 //regTemp is Modified } } void vFloat14c(int regd, int regTemp) { //0111 + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); + SSE_ORPS_XMM_to_XMM(regd, regTemp); +} +void vFloat14c_useEAX(int regd, int regTemp) { //0111 // EAX is modified SSE2_PSHUFD_XMM_to_XMM(regTemp, regd, 0x93); SSE2_MOVD_XMM_to_R(EAX, regTemp); SSE_MOVAPS_XMM_to_XMM(regTemp, regd); @@ -1244,6 +1291,12 @@ vFloat vFloats1[16] = { //regTemp is not modified vFloat8, vFloat9, vFloat10, vFloat11, vFloat12, vFloat13, vFloat14, vFloat15 }; +vFloat vFloats1_useEAX[16] = { //regTemp is not modified but EAX is used + vFloat0, vFloat1, vFloat2, vFloat3, + vFloat4, vFloat5, vFloat6, vFloat7_useEAX, + vFloat8, vFloat9, vFloat10, vFloat11_useEAX, + vFloat12, vFloat13_useEAX, vFloat14_useEAX, vFloat15 }; + vFloat vFloats2[16] = { //regTemp is modified vFloat0, vFloat1, vFloat2, vFloat3b, vFloat4, vFloat5, vFloat6, vFloat7b, @@ -1256,24 +1309,28 @@ vFloat vFloats4[16] = { //regTemp is modified vFloat8c, vFloat9c, vFloat10c, vFloat11c, vFloat1c, vFloat13c, vFloat14c, vFloat15c }; -PCSX2_ALIGNED16(u64 vuFloatData[2]); -PCSX2_ALIGNED16(u64 vuFloatData2[2]); - // Makes NaN == 0, Infinities stay the same; Very Slow - Use only for debugging -void vuFloatExtra( int regd, int XYZW) { - int t1reg = (regd == 0) ? (regd + 1) : (regd - 1); - int t2reg = (regd <= 1) ? (regd + 2) : (regd - 2); - SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData, t1reg ); - SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData2, t2reg ); +vFloat vFloats4_useEAX[16] = { //regTemp is modified and EAX is used + vFloat0, vFloat1c, vFloat2c, vFloat3c, + vFloat4c, vFloat5c, vFloat6c, vFloat7c_useEAX, + vFloat8c, vFloat9c, vFloat10c, vFloat11c_useEAX, + vFloat1c, vFloat13c_useEAX, vFloat14c_useEAX, vFloat15c }; - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); - SSE_CMPORDPS_XMM_to_XMM(t1reg, regd); - SSE_MOVAPS_XMM_to_XMM(t2reg, regd); - SSE_ANDPS_XMM_to_XMM(t2reg, t1reg); - VU_MERGE_REGS_CUSTOM(regd, t2reg, XYZW); - - SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)vuFloatData ); - SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)vuFloatData2 ); -} +//------------------------------------------------------------------ +// Clamping Functions (wrapper for vFloat* functions) +// vuFloat : "normal" clamping +// vuFloat_useEAX : "normal" clamping (faster but EAX is modified) +// vuFloat2 : "normal" clamping (fastest but regTemp is modified) +// vuFloat3 : "preserve sign" clamping for pointer +// vuFloat4 : "preserve sign" clamping (regTemp is modified) +// vuFloat4_useEAX : "preserve sign" clamping (faster but regTemp and EAX are modified) +// vuFloat5 : wrapper function for vuFloat2 and vuFloat4 +// vuFloat5_useEAX : wrapper function for vuFloat2 and vuFloat4_useEAX +// vuFloatExtra : for debugging +// +// Notice 1: vuFloat*_useEAX may be slower on AMD CPUs, which have independent execution pipeline for +// vector and scalar instructions (need checks) +// Notice 2: recVUMI_MUL_xyzw_toD and recVUMI_MADD_xyzw_toD use vFloats directly! +//------------------------------------------------------------------ // Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (doesn't use any temp regs) void vuFloat( int info, int regd, int XYZW) { @@ -1291,6 +1348,13 @@ void vuFloat( int info, int regd, int XYZW) { } } +// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (uses EAX as a temp register; faster but **destroyes EAX**) +void vuFloat_useEAX( int info, int regd, int XYZW) { + if( CHECK_VU_OVERFLOW ) { + vFloats1_useEAX[XYZW](regd, regd); + } +} + // Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (uses a temp reg) void vuFloat2(int regd, int regTemp, int XYZW) { if( CHECK_VU_OVERFLOW ) { @@ -1306,6 +1370,13 @@ void vuFloat4(int regd, int regTemp, int XYZW) { } } +// Clamps +/-NaN and +/-Inf to +/-fMax (uses a temp reg, and uses EAX as a temp register; faster but **destroyes EAX**) +void vuFloat4_useEAX(int regd, int regTemp, int XYZW) { + if( CHECK_VU_OVERFLOW ) { + vFloats4_useEAX[XYZW](regd, regTemp); + } +} + // Uses vuFloat4 or vuFloat2 depending on the CHECK_VU_SIGN_OVERFLOW setting void vuFloat5(int regd, int regTemp, int XYZW) { if (CHECK_VU_SIGN_OVERFLOW) { @@ -1314,6 +1385,14 @@ void vuFloat5(int regd, int regTemp, int XYZW) { else vuFloat2(regd, regTemp, XYZW); } +// Uses vuFloat4_useEAX or vuFloat2 depending on the CHECK_VU_SIGN_OVERFLOW setting (uses EAX as a temp register; faster but **destoroyes EAX**) +void vuFloat5_useEAX(int regd, int regTemp, int XYZW) { + if (CHECK_VU_SIGN_OVERFLOW) { + vuFloat4_useEAX(regd, regTemp, XYZW); + } + else vuFloat2(regd, regTemp, XYZW); +} + // Clamps +/-infs to +/-fMax, and +/-NaNs to +/-fMax void vuFloat3(uptr x86ptr) { u8* pjmp; @@ -1331,6 +1410,25 @@ void vuFloat3(uptr x86ptr) { } } +PCSX2_ALIGNED16(u64 vuFloatData[2]); +PCSX2_ALIGNED16(u64 vuFloatData2[2]); +// Makes NaN == 0, Infinities stay the same; Very Slow - Use only for debugging +void vuFloatExtra( int regd, int XYZW) { + int t1reg = (regd == 0) ? (regd + 1) : (regd - 1); + int t2reg = (regd <= 1) ? (regd + 2) : (regd - 2); + SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData, t1reg ); + SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData2, t2reg ); + + SSE_XORPS_XMM_to_XMM(t1reg, t1reg); + SSE_CMPORDPS_XMM_to_XMM(t1reg, regd); + SSE_MOVAPS_XMM_to_XMM(t2reg, regd); + SSE_ANDPS_XMM_to_XMM(t2reg, t1reg); + VU_MERGE_REGS_CUSTOM(regd, t2reg, XYZW); + + SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)vuFloatData ); + SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)vuFloatData2 ); +} + static PCSX2_ALIGNED16(u32 tempRegX[]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; // Called by testWhenOverflow() function diff --git a/pcsx2/x86/iVUmicro.h b/pcsx2/x86/iVUmicro.h index c44e1ff291..f5f15d1cc0 100644 --- a/pcsx2/x86/iVUmicro.h +++ b/pcsx2/x86/iVUmicro.h @@ -70,6 +70,7 @@ void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK extern int vucycle; typedef void (*vFloat)(int regd, int regTemp); extern vFloat vFloats1[16]; +extern vFloat vFloats1_useEAX[16]; extern vFloat vFloats2[16]; extern PCSX2_ALIGNED16(float s_fones[8]); extern PCSX2_ALIGNED16(u32 s_mask[4]); @@ -81,10 +82,13 @@ extern PCSX2_ALIGNED16(u32 const_clip[8]); u32 GetVIAddr(VURegs * VU, int reg, int read, int info); int _vuGetTempXMMreg(int info); void vuFloat(int info, int regd, int XYZW); +void vuFloat_useEAX(int regd, int regTemp, int XYZW); void vuFloat2(int regd, int regTemp, int XYZW); void vuFloat3(uptr x86ptr); void vuFloat4(int regd, int regTemp, int XYZW); +void vuFloat4_useEAX(int regd, int regTemp, int XYZW); void vuFloat5(int regd, int regTemp, int XYZW); +void vuFloat5_useEAX(int regd, int regTemp, int XYZW); void _vuFlipRegSS(VURegs * VU, int reg); void _vuFlipRegSS_xyzw(int reg, int xyzw); void _vuMoveSS(VURegs * VU, int dstreg, int srcreg); @@ -279,4 +283,4 @@ void recVUMI_XTOP(VURegs *vuRegs, int info); void recVUMI_XITOP(VURegs *vuRegs, int info); void recVUMI_XTOP( VURegs *VU , int info); -#endif /* __IVUMICRO_H__ */ +#endif /* __IVUMICRO_H__ */ \ No newline at end of file diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 4216abea5d..42e8a22eb1 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -130,8 +130,8 @@ void recVUMI_DIV(VURegs *VU, int info) x86SetJ32(ajmp32); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat5(EEREC_S, EEREC_TEMP, (1 << (3-_Fsf_))); - vuFloat5(EEREC_T, EEREC_TEMP, (1 << (3-_Ftf_))); + vuFloat5_useEAX(EEREC_S, EEREC_TEMP, (1 << (3-_Fsf_))); + vuFloat5_useEAX(EEREC_T, EEREC_TEMP, (1 << (3-_Ftf_))); } _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); @@ -1722,7 +1722,7 @@ void recVUMI_ESUM( VURegs *VU, int info ) SSE_ADDSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // x+y+z+w, w+y, w+y, w+y } - vuFloat(info, EEREC_TEMP, 8); + vuFloat_useEAX(info, EEREC_TEMP, 8); SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); } //------------------------------------------------------------------ @@ -1739,34 +1739,34 @@ void recVUMI_ERCPR( VURegs *VU, int info ) // don't use RCPSS (very bad precision) switch ( _Fsf_ ) { case 0: //0001 - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5(EEREC_S, EEREC_TEMP, 8); + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); break; case 1: //0010 SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xe1); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5(EEREC_S, EEREC_TEMP, 8); + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xe1); break; case 2: //0100 SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xc6); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5(EEREC_S, EEREC_TEMP, 8); + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xc6); break; case 3: //1000 SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0x27); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5(EEREC_S, EEREC_TEMP, 8); + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0x27); break; } - vuFloat(info, EEREC_TEMP, 8); + vuFloat_useEAX(info, EEREC_TEMP, 8); SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); } //------------------------------------------------------------------ @@ -1809,7 +1809,7 @@ void recVUMI_ERSQRT( VURegs *VU, int info ) { SSE_MOVSS_M32_to_XMM(t1reg, (uptr)VU_ONE); SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP); - vuFloat(info, t1reg, 8); + vuFloat_useEAX(info, t1reg, 8); SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), t1reg); _freeXMMreg(t1reg); } @@ -1818,7 +1818,7 @@ void recVUMI_ERSQRT( VURegs *VU, int info ) SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); SSE_DIVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 0)); - vuFloat(info, EEREC_TEMP, 8); + vuFloat_useEAX(info, EEREC_TEMP, 8); SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); } } diff --git a/pcsx2/x86/iVUmicroUpper.cpp b/pcsx2/x86/iVUmicroUpper.cpp index 396281ea5f..4936f6e1a8 100644 --- a/pcsx2/x86/iVUmicroUpper.cpp +++ b/pcsx2/x86/iVUmicroUpper.cpp @@ -827,8 +827,8 @@ void recVUMI_ADD(VURegs *VU, int info) } else { if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); - vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W); + vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W); } if( _X_Y_Z_W == 8 ) { // If only adding x, then we can do a Scalar Add if (EEREC_D == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T); @@ -863,7 +863,7 @@ void recVUMI_ADD_iq(VURegs *VU, uptr addr, int info) if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat3(addr); - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); } if ( _XYZW_SS ) { @@ -922,8 +922,8 @@ void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info) if ( _X_Y_Z_W == 0 ) goto flagUpdate; if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); - if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); } if ( _Ft_ == 0 && xyzw < 3 ) { // just move since adding zero @@ -985,8 +985,8 @@ void recVUMI_ADDA(VURegs *VU, int info) //SysPrintf("recVUMI_ADDA()\n"); if ( _X_Y_Z_W == 0 ) goto flagUpdate; if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); - if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W); } if( _X_Y_Z_W == 8 ) { @@ -1021,7 +1021,7 @@ void recVUMI_ADDA_iq(VURegs *VU, uptr addr, int info) if ( _X_Y_Z_W == 0 ) goto flagUpdate; if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat3(addr); - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); } if( _XYZW_SS ) { @@ -1072,8 +1072,8 @@ void recVUMI_ADDA_xyzw(VURegs *VU, int xyzw, int info) //SysPrintf("recVUMI_ADDA_xyzw()\n"); if ( _X_Y_Z_W == 0 ) goto flagUpdate; if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); - if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); } if( _X_Y_Z_W == 8 ) { @@ -1137,8 +1137,8 @@ void recVUMI_SUB(VURegs *VU, int info) } else if( _X_Y_Z_W == 8 ) { if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); - if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W); } if (EEREC_D == EEREC_S) { if (_Ft_) SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T); @@ -1158,8 +1158,8 @@ void recVUMI_SUB(VURegs *VU, int info) } else { if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); - if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W); } if (_X_Y_Z_W != 0xf) { SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); @@ -1190,7 +1190,7 @@ void recVUMI_SUB_iq(VURegs *VU, uptr addr, int info) if ( _X_Y_Z_W == 0 ) goto flagUpdate; if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat3(addr); - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); } if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); @@ -1263,8 +1263,8 @@ void recVUMI_SUB_xyzw(VURegs *VU, int xyzw, int info) if ( _X_Y_Z_W == 0 ) goto flagUpdate; if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); - if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); } if ( _X_Y_Z_W == 8 ) { @@ -1340,8 +1340,8 @@ void recVUMI_SUBA(VURegs *VU, int info) //SysPrintf("recVUMI_SUBA()\n"); if ( _X_Y_Z_W == 0 ) goto flagUpdate; if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); - if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W); } if( EEREC_S == EEREC_T ) { @@ -1387,7 +1387,7 @@ void recVUMI_SUBA_iq(VURegs *VU, uptr addr, int info) //SysPrintf ("recVUMI_SUBA_iq \n"); if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat3(addr); - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); } if( _XYZW_SS ) { @@ -1443,8 +1443,8 @@ void recVUMI_SUBA_xyzw(VURegs *VU, int xyzw, int info) { //SysPrintf ("recVUMI_SUBA_xyzw \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); - if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); } if( _X_Y_Z_W == 8 ) { @@ -1503,8 +1503,8 @@ void recVUMI_MUL_toD(VURegs *VU, int regd, int info) //SysPrintf ("recVUMI_MUL_toD \n"); if (CHECK_VU_EXTRA_OVERFLOW) { //using vuFloat instead of vuFloat2 incase regd == EEREC_TEMP - if (_Fs_) vuFloat( info, EEREC_S, _X_Y_Z_W); - if (_Ft_) vuFloat( info, EEREC_T, _X_Y_Z_W); + if (_Fs_) vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W); + if (_Ft_) vuFloat_useEAX( info, EEREC_T, _X_Y_Z_W); } if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W @@ -1545,7 +1545,7 @@ void recVUMI_MUL_iq_toD(VURegs *VU, uptr addr, int regd, int info) //SysPrintf ("recVUMI_MUL_iq_toD \n"); if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat3(addr); - if (_Fs_) vuFloat( info, EEREC_S, _X_Y_Z_W); + if (_Fs_) vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W); } if( _XYZW_SS ) { @@ -1599,10 +1599,10 @@ void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) { //SysPrintf ("recVUMI_MUL_xyzw_toD \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Ft_) vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) ); + if (_Ft_) vuFloat_useEAX( info, EEREC_T, ( 1 << (3 - xyzw) ) ); } // This is needed for alot of games - vFloats1[_X_Y_Z_W]( EEREC_S, EEREC_S ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set + vFloats1_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_S ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set if( _Ft_ == 0 ) { if( xyzw < 3 ) { @@ -1736,39 +1736,39 @@ void recVUMI_MADD_toD(VURegs *VU, int regd, int info) { //SysPrintf ("recVUMI_MADD_toD \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat( info, EEREC_S, _X_Y_Z_W); - vuFloat( info, EEREC_T, _X_Y_Z_W); - vuFloat( info, regd, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_T, _X_Y_Z_W); + vuFloat_useEAX( info, regd, _X_Y_Z_W); } if( _X_Y_Z_W == 8 ) { if( regd == EEREC_ACC ) { SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, 8); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); } else if (regd == EEREC_T) { SSE_MULSS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, 8); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); } else if (regd == EEREC_S) { SSE_MULSS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, 8); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); } else { SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); SSE_MULSS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, 8); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); } } else if (_X_Y_Z_W != 0xf) { SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); VU_MERGE_REGS(regd, EEREC_TEMP); @@ -1777,23 +1777,23 @@ void recVUMI_MADD_toD(VURegs *VU, int regd, int info) if( regd == EEREC_ACC ) { SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP); } else if (regd == EEREC_T) { SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); } else if (regd == EEREC_S) { SSE_MULPS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); } else { SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S); SSE_MULPS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); } } @@ -1804,8 +1804,8 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info) //SysPrintf ("recVUMI_MADD_iq_toD \n"); if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat3(addr); - vuFloat( info, EEREC_S, _X_Y_Z_W); - vuFloat( info, regd, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W); + vuFloat_useEAX( info, regd, _X_Y_Z_W); } if( _X_Y_Z_W == 8 ) { @@ -1820,19 +1820,19 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info) assert( EEREC_TEMP < XMMREGS ); SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, 8); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); } } else if( regd == EEREC_S ) { SSE_MULSS_M32_to_XMM(regd, addr); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); } else { SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); SSE_MULSS_M32_to_XMM(regd, addr); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); } } @@ -1855,7 +1855,7 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info) if (_X_Y_Z_W != 0xf) { SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); VU_MERGE_REGS(regd, EEREC_TEMP); @@ -1863,24 +1863,24 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info) else { if( regd == EEREC_ACC ) { SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP); } else if( regd == EEREC_S ) { SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); } else if( regd == EEREC_TEMP ) { SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); } else { SSE_MOVSS_M32_to_XMM(regd, addr); SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x00); SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); } } @@ -1891,12 +1891,12 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) { //SysPrintf ("recVUMI_MADD_xyzw_toD \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) ); - vuFloat( info, EEREC_ACC, _X_Y_Z_W); - vuFloat( info, regd, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_T, ( 1 << (3 - xyzw) ) ); + vuFloat_useEAX( info, EEREC_ACC, _X_Y_Z_W); + vuFloat_useEAX( info, regd, _X_Y_Z_W); } // This is needed for alot of games - vFloats1[_X_Y_Z_W]( EEREC_S, EEREC_S ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set + vFloats1_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_S ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set if( _Ft_ == 0 ) { @@ -1942,23 +1942,23 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) if( regd == EEREC_ACC ) { SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, 8); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); } else if( regd == EEREC_S ) { SSE_MULSS_XMM_to_XMM(regd, EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, 8); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); } else if( regd == EEREC_TEMP ) { SSE_MULSS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, 8); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); } else { SSE_MOVSS_XMM_to_XMM(regd, EEREC_ACC); SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, 8); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); } } @@ -1969,7 +1969,7 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) if (_X_Y_Z_W != 0xf) { SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); VU_MERGE_REGS(regd, EEREC_TEMP); @@ -1977,23 +1977,23 @@ void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) else { if( regd == EEREC_ACC ) { SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP); } else if( regd == EEREC_S ) { SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); } else if( regd == EEREC_TEMP ) { SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); } else { _unpackVF_xyzw(regd, EEREC_T, xyzw); SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); } } @@ -2095,9 +2095,9 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info) { //SysPrintf ("recVUMI_MSUB_toD \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat( info, EEREC_S, _X_Y_Z_W); - vuFloat( info, EEREC_T, _X_Y_Z_W); - vuFloat( info, regd, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_T, _X_Y_Z_W); + vuFloat_useEAX( info, regd, _X_Y_Z_W); } if (_X_Y_Z_W != 0xf) { @@ -2105,7 +2105,7 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); } if( t1reg >= 0 ) { SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_ACC); @@ -2124,21 +2124,21 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info) if( regd == EEREC_S ) { assert( regd != EEREC_ACC ); SSE_MULPS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); } else if( regd == EEREC_T ) { assert( regd != EEREC_ACC ); SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); } else if( regd == EEREC_TEMP ) { SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S); SSE_MULPS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); } @@ -2146,7 +2146,7 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC); SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); } SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP); } } @@ -2156,16 +2156,16 @@ void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info) { //SysPrintf ("recVUMI_MSUB_temp_toD \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat( info, EEREC_S, _X_Y_Z_W); - vuFloat( info, EEREC_ACC, _X_Y_Z_W); - vuFloat( info, regd, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_ACC, _X_Y_Z_W); + vuFloat_useEAX( info, regd, _X_Y_Z_W); } if (_X_Y_Z_W != 0xf) { int t1reg = _vuGetTempXMMreg(info); SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); } if( t1reg >= 0 ) { SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_ACC); @@ -2185,25 +2185,25 @@ void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info) else { if( regd == EEREC_ACC ) { SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); } SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP); } else if( regd == EEREC_S ) { SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); } else if( regd == EEREC_TEMP ) { SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, regd, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W); } SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); } else { SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC); SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat( info, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W); } SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP); } } @@ -2339,8 +2339,8 @@ void recVUMI_MAX(VURegs *VU, int info) //SysPrintf ("recVUMI_MAX \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); - vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W); + vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); + vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W); } if( _X_Y_Z_W == 8 ) { @@ -2373,7 +2373,7 @@ void recVUMI_MAX_iq(VURegs *VU, uptr addr, int info) //SysPrintf ("recVUMI_MAX_iq \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat( info, EEREC_S, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W); vuFloat3(addr); } @@ -2433,8 +2433,8 @@ void recVUMI_MAX_xyzw(VURegs *VU, int xyzw, int info) //SysPrintf ("recVUMI_MAX_xyzw \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat( info, EEREC_S, _X_Y_Z_W); - vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) ); + vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_T, ( 1 << (3 - xyzw) ) ); } if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { @@ -2513,7 +2513,7 @@ void recVUMI_MINI(VURegs *VU, int info) //SysPrintf ("recVUMI_MINI \n"); if( _X_Y_Z_W == 8 ) { - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5( EEREC_S, EEREC_TEMP, 8); vuFloat5( EEREC_T, EEREC_TEMP, 8); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 8); vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 8); } if (EEREC_D == EEREC_S) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T); else if (EEREC_D == EEREC_T) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_S); else { @@ -2522,14 +2522,14 @@ void recVUMI_MINI(VURegs *VU, int info) } } else if (_X_Y_Z_W != 0xf) { - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W); vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W); vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W); } SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MINPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); VU_MERGE_REGS(EEREC_D, EEREC_TEMP); } else { - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5( EEREC_S, EEREC_TEMP, 0xf); vuFloat5( EEREC_T, EEREC_TEMP, 0xf); } + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xf); vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xf); } if( EEREC_D == EEREC_S ) { //ClampUnordered(EEREC_T, EEREC_TEMP, 0); // need for GT4 vu0rec SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_T); @@ -2551,7 +2551,7 @@ void recVUMI_MINI_iq(VURegs *VU, uptr addr, int info) //SysPrintf ("recVUMI_MINI_iq \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat( info, EEREC_S, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W); vuFloat3(addr); } @@ -2611,8 +2611,8 @@ void recVUMI_MINI_xyzw(VURegs *VU, int xyzw, int info) //SysPrintf ("recVUMI_MINI_xyzw \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat( info, EEREC_S, _X_Y_Z_W); - vuFloat( info, EEREC_T, ( 1 << (3 - xyzw) ) ); + vuFloat_useEAX( info, EEREC_S, _X_Y_Z_W); + vuFloat_useEAX( info, EEREC_T, ( 1 << (3 - xyzw) ) ); } if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { @@ -2662,8 +2662,8 @@ void recVUMI_OPMULA( VURegs *VU, int info ) { //SysPrintf ("recVUMI_OPMULA \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat5( EEREC_S, EEREC_TEMP, 0xE); - vuFloat5( EEREC_T, EEREC_TEMP, 0xE); + vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xE); + vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xE); } SSE_MOVAPS_XMM_to_XMM( EEREC_TEMP, EEREC_S ); @@ -2689,8 +2689,8 @@ void recVUMI_OPMSUB( VURegs *VU, int info ) { //SysPrintf ("recVUMI_OPMSUB \n"); if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat5( EEREC_S, EEREC_TEMP, 0xE); - vuFloat5( EEREC_T, EEREC_TEMP, 0xE); + vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xE); + vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xE); } if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); @@ -2768,7 +2768,7 @@ void recVUMI_FTOI0(VURegs *VU, int info) if (_X_Y_Z_W != 0xf) { SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - vuFloat( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); t1reg = _vuGetTempXMMreg(info); @@ -2803,7 +2803,7 @@ void recVUMI_FTOI0(VURegs *VU, int info) else { if (EEREC_T != EEREC_S) { SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S); - vuFloat( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + vuFloat_useEAX( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T); t1reg = _vuGetTempXMMreg(info); @@ -2824,7 +2824,7 @@ void recVUMI_FTOI0(VURegs *VU, int info) } else { SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - vuFloat( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); t1reg = _vuGetTempXMMreg(info); @@ -2869,7 +2869,7 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info) if (_X_Y_Z_W != 0xf) { SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr); - vuFloat( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); t1reg = _vuGetTempXMMreg(info); @@ -2905,7 +2905,7 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info) if (EEREC_T != EEREC_S) { SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S); SSE_MULPS_M128_to_XMM(EEREC_T, addr); - vuFloat( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + vuFloat_useEAX( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T); t1reg = _vuGetTempXMMreg(info); @@ -2927,7 +2927,7 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info) else { SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr); - vuFloat( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); t1reg = _vuGetTempXMMreg(info); @@ -2978,7 +2978,7 @@ void recVUMI_ITOF0( VURegs *VU, int info ) //SysPrintf ("recVUMI_ITOF0 \n"); if (_X_Y_Z_W != 0xf) { SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - vuFloat( info, EEREC_TEMP, 15); // Clamp infinities + vuFloat_useEAX( info, EEREC_TEMP, 15); // Clamp infinities VU_MERGE_REGS(EEREC_T, EEREC_TEMP); xmmregs[EEREC_T].mode |= MODE_WRITE; } @@ -2996,7 +2996,7 @@ void recVUMI_ITOFX(VURegs *VU, int addr, int info) if (_X_Y_Z_W != 0xf) { SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr); - vuFloat( info, EEREC_TEMP, 15); // Clamp infinities + vuFloat_useEAX( info, EEREC_TEMP, 15); // Clamp infinities VU_MERGE_REGS(EEREC_T, EEREC_TEMP); xmmregs[EEREC_T].mode |= MODE_WRITE; }