diff --git a/pcsx2/Misc.h b/pcsx2/Misc.h index fd5dec12c8..49f44a57f8 100644 --- a/pcsx2/Misc.h +++ b/pcsx2/Misc.h @@ -55,6 +55,8 @@ //------------ SPEED/MISC HACKS!!! --------------- #define CHECK_OVERFLOW (!(Config.Hacks & 0x2)) #define CHECK_EXTRA_OVERFLOW (Config.Hacks & 0x40) // If enabled, Operands are checked for infinities before being used in the VU recs +#define CHECK_FPU_OVERFLOW (!(Config.Hacks & 0x800)) +#define CHECK_FPU_EXTRA_OVERFLOW (Config.Hacks & 0x1000) // If enabled, Operands are checked for infinities before being used in the FPU recs #define CHECK_EESYNC_HACK (Config.Hacks & 0x1) #define CHECK_IOPSYNC_HACK (Config.Hacks & 0x10) #define CHECK_EE_IOP_EXTRA (Config.Hacks & 0x20) diff --git a/pcsx2/windows/WinMain.c b/pcsx2/windows/WinMain.c index 8692b8293c..138d1d028d 100644 --- a/pcsx2/windows/WinMain.c +++ b/pcsx2/windows/WinMain.c @@ -722,16 +722,19 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) { switch (message) { case WM_INITDIALOG: if(Config.Hacks & 0x1) CheckDlgButton(hDlg, IDC_SYNCHACK, TRUE); - if(Config.Hacks & 0x2) CheckDlgButton(hDlg, IDC_OVERFLOWHACK, TRUE); + if(Config.Hacks & 0x2) CheckDlgButton(hDlg, IDC_VU_OVERFLOWHACK, TRUE); if(Config.Hacks & 0x4) CheckDlgButton(hDlg, IDC_SOUNDHACK, TRUE); if(Config.Hacks & 0x8) CheckDlgButton(hDlg, IDC_DENORMALS, TRUE); if(Config.Hacks & 0x10) CheckDlgButton(hDlg, IDC_SYNCHACK2, TRUE); if(Config.Hacks & 0x20) CheckDlgButton(hDlg, IDC_SYNCHACK3, TRUE); - if(Config.Hacks & 0x40) CheckDlgButton(hDlg, IDC_OVERFLOWHACK_EXTRA, TRUE); + if(Config.Hacks & 0x40) CheckDlgButton(hDlg, IDC_VU_OVERFLOWHACK, 2); if(Config.Hacks & 0x80) CheckDlgButton(hDlg, IDC_FASTBRANCHES, TRUE); if(Config.Hacks & 0x100) CheckDlgButton(hDlg, IDC_VUCLIPHACK, TRUE); if(Config.Hacks & 0x200) CheckDlgButton(hDlg, IDC_FPUCLAMPHACK, TRUE); if(Config.Hacks & 0x400) CheckDlgButton(hDlg, IDC_DENORMALS, 2); + if(Config.Hacks & 0x800) CheckDlgButton(hDlg, IDC_FPU_OVERFLOWHACK, TRUE); + if(Config.Hacks & 0x1000) CheckDlgButton(hDlg, IDC_FPU_OVERFLOWHACK, 2); + return TRUE; @@ -739,16 +742,16 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) { if (LOWORD(wParam) == IDOK) { Config.Hacks = 0; Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK) ? 0x1 : 0; - Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_OVERFLOWHACK) ? 0x2 : 0; + Config.Hacks |= ( IsDlgButtonChecked(hDlg, IDC_VU_OVERFLOWHACK) == 2 ) ? 0x40 : (IsDlgButtonChecked(hDlg, IDC_VU_OVERFLOWHACK) ? 0x2 : 0); // 0x40 == greyed checkbox (extra overflow checking); 0x2 == checked (disable overflow checking) Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SOUNDHACK) ? 0x4 : 0; Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK2) ? 0x10 : 0; Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK3) ? 0x20 : 0; - Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_OVERFLOWHACK_EXTRA) ? 0x40 : 0; Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FASTBRANCHES) ? 0x80 : 0; Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_VUCLIPHACK) ? 0x100 : 0; Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FPUCLAMPHACK) ? 0x200 : 0; Config.Hacks |= ( IsDlgButtonChecked(hDlg, IDC_DENORMALS) == 2 ) ? 0x408 : (IsDlgButtonChecked(hDlg, IDC_DENORMALS) ? 0x8 : 0); // 0x408 == greyed checkbox (DaZ SSE flag; so the CPU sets denormals to zero) - + Config.Hacks |= ( IsDlgButtonChecked(hDlg, IDC_FPU_OVERFLOWHACK) == 2 ) ? 0x1000 : (IsDlgButtonChecked(hDlg, IDC_FPU_OVERFLOWHACK) ? 0x800 : 0); // 0x1000 == greyed checkbox (extra overflow checking); 0x800 == checked (disable overflow checking) + g_sseVUMXCSR = CHECK_DENORMALS; SetCPUState(g_sseMXCSR, g_sseVUMXCSR); diff --git a/pcsx2/windows/pcsx2.rc b/pcsx2/windows/pcsx2.rc index eb1d23461b..ec446a6625 100644 --- a/pcsx2/windows/pcsx2.rc +++ b/pcsx2/windows/pcsx2.rc @@ -938,38 +938,38 @@ BEGIN CONTROL 132,IDC_PS2SILVER_RECT,"Static",SS_BITMAP,0,167,70,74 END -IDD_HACKS DIALOGEX 0, 0, 406, 273 +IDD_HACKS DIALOGEX 0, 0, 511, 275 STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU CAPTION "PCSX2 Speed Hacks" FONT 8, "MS Shell Dlg", 400, 0, 0x1 BEGIN - DEFPUSHBUTTON "OK",IDOK,146,252,50,14 - PUSHBUTTON "Cancel",IDCANCEL,202,252,50,14 - CONTROL "EE Sync Hack (x2) - Doubles the cycle rate of the EE.",IDC_SYNCHACK, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,114,252,10 - CONTROL "Disable All Overflow Checks - Doesn't check for overflow at all in the VU Recs.",IDC_OVERFLOWHACK, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,63,373,10 - CTEXT "These hacks will effect the speed of PCSX2 but possibly comprimise on compatability",IDC_HACKDESC,7,7,392,8 + DEFPUSHBUTTON "OK",IDOK,195,254,50,14 + PUSHBUTTON "Cancel",IDCANCEL,251,254,50,14 + CONTROL "EE Sync Hack (x2) - Doubles the cycle rate of the EE. ( Big Speedup in most games! )",IDC_SYNCHACK, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,111,418,10 + CONTROL "Disable VU Overflow Checks - *Checked = Disables overflow checks. ( Speedup! ) *Greyed = Extra overflow checks. ( Helps SPS, Slow! )",IDC_VU_OVERFLOWHACK, + "Button",BS_AUTO3STATE | WS_TABSTOP,14,49,475,10 + CTEXT "These hacks will effect the speed of PCSX2 but possibly comprimise on compatability",IDC_HACKDESC,7,7,497,8 CONTROL "Tighter SPU2 Sync ( FFXII vids) - Slower, not very useful anymore.",IDC_SOUNDHACK, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,197,323,10 - CONTROL "IOP Sync Hack (x2) - Doubles the cycle rate of the IOP.",IDC_SYNCHACK2, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,128,270,10 - CONTROL "EE/IOP Sync Hack (x3) - Makes EE and IOP hacks triple the cycle rate ( Not Recommended! )",IDC_SYNCHACK3, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,142,359,10 - CONTROL "Enable Extra Overflow Checks - Enable extra overflow checks used to help stop SPS. ( Slow! )",IDC_OVERFLOWHACK_EXTRA, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,50,377,10 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,194,421,10 + CONTROL "IOP Sync Hack (x2) - Doubles the cycle rate of the IOP. ( Speedup but breaks some games. )",IDC_SYNCHACK2, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,125,410,10 + CONTROL "EE/IOP Sync Hack (x3) - Makes EE and IOP hacks triple the cycle rate. ( Sometimes speeds games a bit more, but can break games. )",IDC_SYNCHACK3, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,139,464,11 + CONTROL "Disable FPU Overflow Checks - *Checked = Disables overflow checks. ( Speedup! ) *Greyed = Extra overflow checks. ( Helps SPS, Slow! )",IDC_FPU_OVERFLOWHACK, + "Button",BS_AUTO3STATE | WS_TABSTOP,14,63,483,10 CONTROL "EE/IOP Fast Branches - Quick branching ( Very small speedup; Not Recommended! )",IDC_FASTBRANCHES, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,183,351,10 - CTEXT "If you have problems, disable all these and try again",IDC_STATIC,7,22,392,8 - GROUPBOX "Overflow and Underflow",IDC_STATIC,7,36,392,60 - CONTROL "Disable Underflow Checks - ( Checked = Small Speedup. ) ( Grey = DaZ Flag; Big Speedup for Intel CPU's! )",IDC_DENORMALS, - "Button",BS_AUTO3STATE | WS_TABSTOP,14,76,377,10 - GROUPBOX "Sync Hacks",IDC_STATIC,7,101,392,59 - GROUPBOX "Miscellaneous / Special Game Fixes",IDC_STATIC,7,168,392,76 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,180,423,10 + CTEXT "If you have problems, disable all these and try again",IDC_STATIC,7,22,497,8 + GROUPBOX "Overflow and Underflow",IDC_STATIC,7,36,497,58 + CONTROL "Disable Underflow Checks - *Checked = Disables underflow checks. ( Speedup! ) *Greyed = DaZ flag. ( Big Speedup on Intel CPU's )",IDC_DENORMALS, + "Button",BS_AUTO3STATE | WS_TABSTOP,14,77,483,10 + GROUPBOX "Sync Hacks",IDC_STATIC,7,98,497,63 + GROUPBOX "Miscellaneous / Special Game Fixes",IDC_STATIC,7,165,497,76 CONTROL "VU Clip Hack - Special fix for God of War; Breaks Rockstar games!",IDC_VUCLIPHACK, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,227,323,10 - CONTROL "FPU Clamp Hack - Special fix for Gran Turismo 4 and possibly other games",IDC_FPUCLAMPHACK, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,212,323,10 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,224,421,10 + CONTROL "FPU Clamp Hack - Special fix for Gran Turismo 4 and possibly other games.",IDC_FPUCLAMPHACK, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,209,428,10 END @@ -990,9 +990,9 @@ BEGIN IDD_HACKS, DIALOG BEGIN LEFTMARGIN, 7 - RIGHTMARGIN, 399 + RIGHTMARGIN, 504 TOPMARGIN, 7 - BOTTOMMARGIN, 266 + BOTTOMMARGIN, 268 END END #endif // APSTUDIO_INVOKED diff --git a/pcsx2/windows/resource.h b/pcsx2/windows/resource.h index 3334d95ede..475a4a2732 100644 --- a/pcsx2/windows/resource.h +++ b/pcsx2/windows/resource.h @@ -569,6 +569,7 @@ #define IDC_CONVERTEDCODE 1278 #define IDC_CUSTOM_CONSECUTIVE_FRAMES 1278 #define IDC_OVERFLOWHACK 1278 +#define IDC_VU_OVERFLOWHACK 1278 #define IDC_HACKDESC 1279 #define IDC_CONVERT 1279 #define IDC_EDITPATCH 1279 @@ -577,6 +578,7 @@ #define IDC_ADDPATCH 1280 #define IDC_FRAMESKIP_LABEL2 1280 #define IDC_OVERFLOWHACK_EXTRA 1280 +#define IDC_FPU_OVERFLOWHACK 1280 #define IDC_GROUP 1281 #define IDC_ADDRAW 1281 #define IDC_FRAMESKIP_LABEL3 1281 @@ -608,7 +610,6 @@ #define IDC_DENORMALS 1301 #define IDC_VUCLIPHACK 1302 #define IDC_FRAMELIMIT_OPTIONS 1303 -#define IDC_VUCLAMPHACK 1303 #define IDC_FPUCLAMPHACK 1303 #define IDC_LOG 1500 #define IDC_CPULOG 1500 diff --git a/pcsx2/x86/iFPU.c b/pcsx2/x86/iFPU.c index b9913c0ef4..705aa540df 100644 --- a/pcsx2/x86/iFPU.c +++ b/pcsx2/x86/iFPU.c @@ -752,13 +752,15 @@ FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS|XMMINFO_READT); // Doesnt seem to like negatives - Ruins katamari graphics // I REPEAT THE SIGN BIT (THATS 0x80000000) MUST *NOT* BE SET, jeez. -static PCSX2_ALIGNED16(u32 s_overflowmask[]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; +static PCSX2_ALIGNED16(u32 s_overflowmask[]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; static u32 s_signbit = 0x80000000; extern int g_VuNanHandling; void ClampValues(regd) { - SSE_MAXSS_M32_to_XMM(regd, (uptr)&g_minvals[0]); - SSE_MINSS_M32_to_XMM(regd, (uptr)&g_maxvals[0]); + if (CHECK_FPU_OVERFLOW) { + SSE_MAXSS_M32_to_XMM(regd, (uptr)&g_minvals[0]); + SSE_MINSS_M32_to_XMM(regd, (uptr)&g_maxvals[0]); + } /* int t5reg = _allocTempXMMreg(XMMT_FPS, -1); @@ -788,15 +790,17 @@ void ClampValues2(regd) { SSE_ANDPS_XMM_to_XMM(regd, t5reg); - // not necessary since above ORPS handles that (i think) Lets enable it for now ;) - SSE_MAXSS_M32_to_XMM(regd, (uptr)&g_minvals[0]); - SSE_MINSS_M32_to_XMM(regd, (uptr)&g_maxvals[0]); + // clamp infinities + //SSE_MAXSS_M32_to_XMM(regd, (uptr)&g_minvals[0]); + SSE_MINSS_M32_to_XMM(regd, (uptr)&g_maxvals[0]); // after above calculations, unordered floats will be positive _freeXMMreg(t5reg); } else { - SSE_MAXSS_M32_to_XMM(regd, (uptr)&g_minvals[0]); - SSE_MINSS_M32_to_XMM(regd, (uptr)&g_maxvals[0]); + if (CHECK_FPU_OVERFLOW) { + SSE_MAXSS_M32_to_XMM(regd, (uptr)&g_minvals[0]); + SSE_MINSS_M32_to_XMM(regd, (uptr)&g_maxvals[0]); + } } } @@ -807,23 +811,67 @@ static void (*recComOpM32_to_XMM[] )(x86SSERegType, uptr) = { SSE_ADDSS_M32_to_XMM, SSE_MULSS_M32_to_XMM, SSE_MAXSS_M32_to_XMM, SSE_MINSS_M32_to_XMM }; int recCommutativeOp(int info, int regd, int op) { + if (CHECK_FPU_EXTRA_OVERFLOW) { + int t0reg = _allocTempXMMreg(XMMT_FPS, -1); + if (t0reg < 0) goto allocationError; + + switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + case PROCESS_EE_S: + if (regd != EEREC_S) SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + ClampValues (regd); + ClampValues (t0reg); + recComOpXMM_to_XMM[op](regd, t0reg); + break; + case PROCESS_EE_T: + if (regd != EEREC_T) SSE_MOVSS_XMM_to_XMM(regd, EEREC_T); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]); + ClampValues (regd); + ClampValues (t0reg); + recComOpXMM_to_XMM[op](regd, t0reg); + break; + case (PROCESS_EE_S|PROCESS_EE_T): + if (regd == EEREC_S) { + ClampValues (regd); + ClampValues (EEREC_T); + recComOpXMM_to_XMM[op](regd, EEREC_T); + } + else if (regd == EEREC_T) { + ClampValues (regd); + ClampValues (EEREC_S); + recComOpXMM_to_XMM[op](regd, EEREC_S); + } + else { + ClampValues (EEREC_S); + ClampValues (EEREC_T); + SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + recComOpXMM_to_XMM[op](regd, EEREC_T); + } + break; + default: + SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + ClampValues (regd); + ClampValues (t0reg); + recComOpXMM_to_XMM[op](regd, t0reg); + break; + } + _freeXMMreg(t0reg); + + return regd; +allocationError: + SysPrintf("recCommutativeOp() allocation error! Skipping Pre-Opcode Overflow checks! \n"); + } // End of pre-opcode overflow checking switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { case PROCESS_EE_S: - if (regd == EEREC_S) recComOpM32_to_XMM[op](regd, (uptr)&fpuRegs.fpr[_Ft_]); - else { - SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - recComOpM32_to_XMM[op](regd, (uptr)&fpuRegs.fpr[_Ft_]); - } + if (regd != EEREC_S) SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + recComOpM32_to_XMM[op](regd, (uptr)&fpuRegs.fpr[_Ft_]); break; case PROCESS_EE_T: - if (regd == EEREC_T) recComOpM32_to_XMM[op](regd, (uptr)&fpuRegs.fpr[_Fs_]); - else { - SSE_MOVSS_XMM_to_XMM(regd, EEREC_T); - recComOpM32_to_XMM[op](regd, (uptr)&fpuRegs.fpr[_Fs_]); - } + if (regd != EEREC_T) SSE_MOVSS_XMM_to_XMM(regd, EEREC_T); + recComOpM32_to_XMM[op](regd, (uptr)&fpuRegs.fpr[_Fs_]); break; case (PROCESS_EE_S|PROCESS_EE_T): - // SysPrintf("Hello2 :)\n"); if (regd == EEREC_S) recComOpXMM_to_XMM[op](regd, EEREC_T); else if (regd == EEREC_T) recComOpXMM_to_XMM[op](regd, EEREC_S); else { @@ -832,17 +880,6 @@ int recCommutativeOp(int info, int regd, int op) { } break; default: - SysPrintf("But we dont have regs2 :(\n"); - /*if (regd == EEREC_S) { - recComOpXMM_to_XMM[op](regd, EEREC_T); - } - else if (regd == EEREC_T) { - recComOpXMM_to_XMM[op](regd, EEREC_S); - } - else { - SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - recComOpXMM_to_XMM[op](regd, EEREC_T); - }*/ SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]); recComOpM32_to_XMM[op](regd, (uptr)&fpuRegs.fpr[_Ft_]); break; @@ -959,32 +996,18 @@ void recSQRT_S_xmm(int info) { SysPrintf("FPU: SQRT \n"); if( info & PROCESS_EE_T ) { - //if( CHECK_OVERFLOW ) { - if( EEREC_D == EEREC_T ) SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); - else { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T); - SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); - - } - + if (CHECK_FPU_EXTRA_OVERFLOW) { ClampValues(EEREC_T); } + if( EEREC_D != EEREC_T ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T); + SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); - //} - /*else { - SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_T); - }*/ } else { - //if( CHECK_OVERFLOW ) { SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Ft_]); SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); - + if (CHECK_FPU_EXTRA_OVERFLOW) { ClampValues(EEREC_D); } SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); - /*} - else { - SSE_SQRTSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Ft_]); - }*/ } - ClampValues(EEREC_D); + //ClampValues(EEREC_D); // No need to clamp since sqrt of a number is always smaller than that number } FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT); @@ -999,7 +1022,6 @@ void recABS_S_xmm(int info) else { SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); - //xmmregs[EEREC_D].mode &= ~MODE_WRITE; } ClampValues(EEREC_D); } @@ -1037,43 +1059,51 @@ void recRSQRT_S_xmm(int info) int t0reg = _allocTempXMMreg(XMMT_FPS, -1); switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { case PROCESS_EE_S: - if( EEREC_D == EEREC_S ) { - SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); - } - else { - SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); + if (CHECK_FPU_EXTRA_OVERFLOW) { + ClampValues(EEREC_S); + SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); } + if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg); + SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); break; - case PROCESS_EE_T: - SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); - SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); - + case PROCESS_EE_T: + SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); + SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); + if (CHECK_FPU_EXTRA_OVERFLOW) { + ClampValues(EEREC_D); + SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); + } + SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg); SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); + + break; + case (PROCESS_EE_S | PROCESS_EE_T): + SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); + SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); + if (CHECK_FPU_EXTRA_OVERFLOW) { + ClampValues(EEREC_S); + SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); + } + if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg); + SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); + break; default: - if( (info & PROCESS_EE_T) && (info & PROCESS_EE_S) ) { - if( EEREC_D == EEREC_T ){ - SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); - } - else if( EEREC_D == EEREC_S ){ - SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); - } else { - SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); - } - }else{ - SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); - SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); + SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + if (CHECK_FPU_EXTRA_OVERFLOW) { + ClampValues(EEREC_D); + SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); } + SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg); + SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); break; }