diff --git a/pcsx2/Misc.h b/pcsx2/Misc.h index e251b4f9eb..f6f2cfcdb3 100644 --- a/pcsx2/Misc.h +++ b/pcsx2/Misc.h @@ -63,6 +63,9 @@ #define CHECK_UNDERFLOW (!(Config.Hacks & 0x8)) //#define CHECK_DENORMALS ((Config.Hacks & 0x400) ? 0xffc0 : 0x7f80) //If enabled, Denormals are Zero for the recs and flush to zero is enabled as well #define CHECK_FASTBRANCHES (Config.Hacks & 0x80) +#define CHECK_VU_EXTRA_FLAGS (!(Config.Hacks & 0x100)) // Sets correct flags in the VU recs +#define CHECK_FPU_EXTRA_FLAGS (!(Config.Hacks & 0x200)) // Sets correct flags in the FPU recs + //------------ SPECIAL GAME FIXES!!! --------------- #define CHECK_FPUCLAMPHACK (Config.GameFixes & 0x1) // Special Fix for GT4, different clamping for FPU (Note: sets negative infinity to positive fMax when clamping, which the real ps2 doesn't do) #define CHECK_VUCLIPHACK (Config.GameFixes & 0x2) // Special Fix for GoW, updates the clipflag differently in recVUMI_CLIP() (note: turning this hack on, breaks Rockstar games) diff --git a/pcsx2/windows/WinMain.c b/pcsx2/windows/WinMain.c index 9c8353a4c9..9424cd8fea 100644 --- a/pcsx2/windows/WinMain.c +++ b/pcsx2/windows/WinMain.c @@ -759,8 +759,8 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) { if(Config.Hacks & 0x20) CheckDlgButton(hDlg, IDC_SYNCHACK3, TRUE); if(Config.Hacks & 0x40) CheckDlgButton(hDlg, IDC_VU_OVERFLOWHACK, 2); if(Config.Hacks & 0x80) CheckDlgButton(hDlg, IDC_FASTBRANCHES, TRUE); - //if(Config.Hacks & 0x100) CheckDlgButton(hDlg, IDC_VUCLIPHACK, TRUE); - //if(Config.Hacks & 0x200) CheckDlgButton(hDlg, IDC_FPUCLAMPHACK, TRUE); + if(Config.Hacks & 0x100) CheckDlgButton(hDlg, IDC_VU_FLAGS, TRUE); + if(Config.Hacks & 0x200) CheckDlgButton(hDlg, IDC_FPU_FLAGS, TRUE); //if(Config.Hacks & 0x400) CheckDlgButton(hDlg, IDC_DENORMALS, 2); if(Config.Hacks & 0x800) CheckDlgButton(hDlg, IDC_FPU_OVERFLOWHACK, TRUE); if(Config.Hacks & 0x1000) CheckDlgButton(hDlg, IDC_FPU_OVERFLOWHACK, 2); @@ -778,8 +778,8 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) { Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK2) ? 0x10 : 0; Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK3) ? 0x20 : 0; Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FASTBRANCHES) ? 0x80 : 0; - //Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_VUCLIPHACK) ? 0x100 : 0; - //Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FPUCLAMPHACK) ? 0x200 : 0; + Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_VU_FLAGS) ? 0x100 : 0; + Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FPU_FLAGS) ? 0x200 : 0; //Config.Hacks |= ( IsDlgButtonChecked(hDlg, IDC_DENORMALS) == 2 ) ? 0x408 : (IsDlgButtonChecked(hDlg, IDC_DENORMALS) ? 0x8 : 0); // 0x408 == greyed checkbox (DaZ SSE flag; so the CPU sets denormals to zero) Config.Hacks |= ( IsDlgButtonChecked(hDlg, IDC_FPU_OVERFLOWHACK) == 2 ) ? 0x1000 : (IsDlgButtonChecked(hDlg, IDC_FPU_OVERFLOWHACK) ? 0x800 : 0); // 0x1000 == greyed checkbox (extra overflow checking); 0x800 == checked (disable overflow checking) diff --git a/pcsx2/windows/pcsx2.rc b/pcsx2/windows/pcsx2.rc index 72e4675f58..678f0a0eed 100644 --- a/pcsx2/windows/pcsx2.rc +++ b/pcsx2/windows/pcsx2.rc @@ -1030,34 +1030,39 @@ BEGIN CONTROL 132,IDC_PS2SILVER_RECT,"Static",SS_BITMAP,0,167,70,74 END -IDD_HACKS DIALOGEX 0, 0, 511, 243 +IDD_HACKS DIALOGEX 0, 0, 511, 295 STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU CAPTION "PCSX2 Speed Hacks" FONT 8, "MS Shell Dlg", 400, 0, 0x1 BEGIN - DEFPUSHBUTTON "OK",IDOK,205,222,50,14 - PUSHBUTTON "Cancel",IDCANCEL,261,222,50,14 + DEFPUSHBUTTON "OK",IDOK,205,274,50,14 + PUSHBUTTON "Cancel",IDCANCEL,261,274,50,14 CONTROL "EE Sync Hack (x2) - Doubles the cycle rate of the EE. ( Big Speedup in most games! )",IDC_SYNCHACK, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,111,418,10 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,162,418,10 CONTROL "Disable VU Overflow Checks - *Checked = Disables overflow checks. ( Speedup! ) *Greyed = Extra overflow checks. ( Helps SPS, Slow! )",IDC_VU_OVERFLOWHACK, - "Button",BS_AUTO3STATE | WS_TABSTOP,14,49,475,10 + "Button",BS_AUTO3STATE | WS_TABSTOP,15,49,475,10 CTEXT "These hacks will effect the speed of PCSX2 but possibly comprimise on compatability",IDC_HACKDESC,7,7,497,8 CONTROL "Tighter SPU2 Sync ( FFXII vids) - Slower, not very useful anymore.",IDC_SOUNDHACK, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,194,421,10 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,245,421,10 CONTROL "IOP Sync Hack (x2) - Doubles the cycle rate of the IOP. ( Speedup but breaks some games. )",IDC_SYNCHACK2, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,125,410,10 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,176,410,10 CONTROL "EE/IOP Sync Hack (x3) - Makes EE and IOP hacks triple the cycle rate. ( Sometimes speeds games a bit more, but can break games. )",IDC_SYNCHACK3, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,139,464,11 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,190,464,11 CONTROL "Disable FPU Overflow Checks - *Checked = Disables overflow checks. ( Speedup! ) *Greyed = Extra overflow checks. ( Helps SPS, Slow! )",IDC_FPU_OVERFLOWHACK, - "Button",BS_AUTO3STATE | WS_TABSTOP,14,63,483,10 + "Button",BS_AUTO3STATE | WS_TABSTOP,15,63,483,10 CONTROL "EE/IOP Fast Branches - Quick branching ( Very small speedup; Not Recommended! )",IDC_FASTBRANCHES, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,180,423,10 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,231,423,10 CTEXT "If you have problems, disable all these and try again",IDC_STATIC,7,22,497,8 GROUPBOX "Overflow and Underflow",IDC_STATIC,7,36,497,58 CONTROL "Disable Underflow Checks - *Checked = Disables underflow checks. ( Speedup! )",IDC_DENORMALS, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,77,319,10 - GROUPBOX "Sync Hacks",IDC_STATIC,7,98,497,63 - GROUPBOX "Miscellaneous",IDC_STATIC,7,165,497,50 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,77,319,10 + GROUPBOX "Sync Hacks",IDC_STATIC,7,149,497,63 + GROUPBOX "Miscellaneous",IDC_STATIC,7,216,497,50 + GROUPBOX "Flag Setting",IDC_STATIC,7,100,497,41 + CONTROL "Disable Extra VU Flags - When checked, PCSX2 doesn't set some flags that are rarely used by games. ( Speedup! )",IDC_VU_FLAGS, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,113,442,10 + CONTROL "Disable Extra FPU Flags - When checked, PCSX2 doesn't set some flags that are rarely used by games. ( Speedup! )",IDC_FPU_FLAGS, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,126,414,10 END @@ -1079,8 +1084,9 @@ BEGIN BEGIN LEFTMARGIN, 7 RIGHTMARGIN, 504 + VERTGUIDE, 15 TOPMARGIN, 7 - BOTTOMMARGIN, 236 + BOTTOMMARGIN, 288 END END #endif // APSTUDIO_INVOKED diff --git a/pcsx2/windows/resource.h b/pcsx2/windows/resource.h index 3582b5961f..8ee4d72237 100644 --- a/pcsx2/windows/resource.h +++ b/pcsx2/windows/resource.h @@ -614,13 +614,15 @@ #define IDC_GAMEFIX1 1300 #define IDC_DENORMALS 1301 #define IDC_EE_CHECK2 1301 -#define IDC_CHECK3 1301 #define IDC_GAMEFIX2 1301 #define IDC_VUCLIPHACK 1302 #define IDC_VU_CHECK1 1302 +#define IDC_VU_FLAGS 1302 #define IDC_FRAMELIMIT_OPTIONS 1303 #define IDC_FPUCLAMPHACK 1303 #define IDC_VU_CHECK2 1303 +#define IDC_VU_FLAGS2 1303 +#define IDC_FPU_FLAGS 1303 #define IDC_ROUNDMODE 1304 #define IDC_EE_ROUNDMODE0 1305 #define IDC_EE_ROUNDMODE1 1306 diff --git a/pcsx2/x86/iFPU.c b/pcsx2/x86/iFPU.c index 999baedc94..07351194f5 100644 --- a/pcsx2/x86/iFPU.c +++ b/pcsx2/x86/iFPU.c @@ -978,25 +978,27 @@ void recSQRT_S_xmm(int info) int tempReg; u8* pjmp; - SysPrintf("FPU: SQRT \n"); + SysPrintf("FPU: SQRT\n"); tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); - if (tempReg == -1) {SysPrintf("FPU: SQRT Allocation Error! \n"); tempReg = EAX;} + if (tempReg == -1) {SysPrintf("FPU: SQRT Allocation Error!\n"); tempReg = EAX;} if( info & PROCESS_EE_T ) { if ( EEREC_D != EEREC_T ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T); } else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Ft_]); - AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags + if (CHECK_FPU_EXTRA_FLAGS) { + AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags - /*--- Check for negative SQRT ---*/ - XOR32RtoR(tempReg, tempReg); - SSE_MOVMSKPS_XMM_to_R32(tempReg, EEREC_D); - AND32ItoR(tempReg, 1); //Check sign - pjmp = JZ8(0); //Skip if none are - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags - SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive - x86SetJ8(pjmp); + /*--- Check for negative SQRT ---*/ + SSE_MOVMSKPS_XMM_to_R32(tempReg, EEREC_D); + AND32ItoR(tempReg, 1); //Check sign + pjmp = JZ8(0); //Skip if none are + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags + SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive + x86SetJ8(pjmp); + } + else SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive if (CHECK_FPU_OVERFLOW) // Only need to do positive clamp, since EEREC_D is positive SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)&g_maxvals[0]); @@ -1051,54 +1053,105 @@ void recNEG_S_xmm(int info) { FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS); -void recRSQRT_S_xmm(int info) -{ - int t0reg = _allocTempXMMreg(XMMT_FPS, -1); +// Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Sets correct flags) +void recRSQRThelper1(int regd, int t0reg) +{ + u8* pjmp1; + u8* pjmp2; + u32* pjmp32; + int t1reg = _allocTempXMMreg(XMMT_FPS, -1); + int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); + if (t1reg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n");} + if (tempReg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n"); tempReg = EAX;} + AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags + + /*--- Check for zero ---*/ + SSE_XORPS_XMM_to_XMM(t1reg, t1reg); + SSE_CMPEQSS_XMM_to_XMM(t1reg, t0reg); + SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg); + AND32ItoR(tempReg, 1); //Check sign (if t0reg == zero, sign will be set) + pjmp1 = JZ8(0); //Skip if not set + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags + SSE_XORPS_XMM_to_XMM(regd, t0reg); // Make regd Positive or Negative + SSE_ANDPS_M128_to_XMM(regd, (uptr)&s_neg[0]); // Get the sign bit + SSE_ORPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); // regd = +/- Maximum + pjmp32 = JMP32(0); + x86SetJ8(pjmp1); + + /*--- Check for negative SQRT ---*/ + SSE_MOVMSKPS_XMM_to_R32(tempReg, t0reg); + AND32ItoR(tempReg, 1); //Check sign + pjmp2 = JZ8(0); //Skip if not set + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags + SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive + x86SetJ8(pjmp2); + + if (CHECK_FPU_EXTRA_OVERFLOW) { + SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive + ClampValues(regd); + } + + SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg); + SSE_DIVSS_XMM_to_XMM(regd, t0reg); + + ClampValues(regd); + x86SetJ32(pjmp32); + + _freeXMMreg(t1reg); + _freeX86reg(tempReg); +} + +// Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Doesn't set flags) +void recRSQRThelper2(int regd, int t0reg) +{ + SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive + if (CHECK_FPU_EXTRA_OVERFLOW) { + SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive + ClampValues(regd); + } + SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg); + SSE_DIVSS_XMM_to_XMM(regd, t0reg); + ClampValues(regd); +} + +void recRSQRT_S_xmm(int info) +{ + int t0reg = _allocTempXMMreg(XMMT_FPS, -1); + SysPrintf("FPU: RSQRT\n"); + if (t0reg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n");} + switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { case PROCESS_EE_S: - if( EEREC_D == EEREC_S ) { - SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); - } - else { - SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); - } - + //SysPrintf("FPU: RSQRT case 1\n"); + if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); + else recRSQRThelper2(EEREC_D, t0reg); break; - case PROCESS_EE_T: - SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); - SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); - - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); + case PROCESS_EE_T: + //SysPrintf("FPU: RSQRT case 2\n"); + SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); + SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); + else recRSQRThelper2(EEREC_D, t0reg); + break; + case (PROCESS_EE_S|PROCESS_EE_T): + //SysPrintf("FPU: RSQRT case 3\n"); + SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); + if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); + else recRSQRThelper2(EEREC_D, t0reg); break; default: - if( (info & PROCESS_EE_T) && (info & PROCESS_EE_S) ) { - if( EEREC_D == EEREC_T ){ - SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); - } - else if( EEREC_D == EEREC_S ){ - SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); - } else { - SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); - } - }else{ - SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); - SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); - SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); - } - + //SysPrintf("FPU: RSQRT case 4\n"); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); + else recRSQRThelper2(EEREC_D, t0reg); break; } _freeXMMreg(t0reg); - ClampValues(EEREC_D); } FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); diff --git a/pcsx2/x86/iVUmicro.c b/pcsx2/x86/iVUmicro.c index a62eb471cf..b745c01fd6 100644 --- a/pcsx2/x86/iVUmicro.c +++ b/pcsx2/x86/iVUmicro.c @@ -1398,53 +1398,53 @@ void recUpdateFlags(VURegs * VU, int reg, int info) if( EEREC_TEMP != reg ) { SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw - - //-------------------------Check for Overflow flags------------------------------ - - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP - SSE_CMPUNORDPS_XMM_to_XMM(EEREC_TEMP, reg); // If reg == NaN then set Vector to 0xFFFFFFFF - XOR32RtoR(x86macflag, x86macflag); // Clear Mac Flag - - SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation - XOR32RtoR(x86temp, x86temp); //Clear x86temp - AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified) - pjmp = JZ8(0); // Skip if none are - OR32ItoR(x86temp, 8); // Set if they are - x86SetJ8(pjmp); + if (CHECK_VU_EXTRA_FLAGS) { + //-------------------------Check for Overflow flags------------------------------ - OR32RtoR(x86macflag, x86newflag); - SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4 + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP + SSE_CMPUNORDPS_XMM_to_XMM(EEREC_TEMP, reg); // If reg == NaN then set Vector to 0xFFFFFFFF - //-------------------------Check for Underflow flags------------------------------ + SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg + AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified) + pjmp = JZ8(0); // Skip if none are + OR32ItoR(x86temp, 8); // Set if they are + x86SetJ8(pjmp); - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask1[ 0 ]); - SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP == zero exponent) then set Vector to 0xFFFFFFFF + OR32RtoR(x86macflag, x86newflag); + SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4 - SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, reg); - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask2[ 0 ]); - SSE_CMPNEPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP != zero mantisa) then set Vector to 0xFFFFFFFF + //-------------------------Check for Underflow flags------------------------------ - SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg - AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - OR32ItoR(x86temp, 4); // Set if they are - x86SetJ8(pjmp); + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask1[ 0 ]); + SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP == zero exponent) then set Vector to 0xFFFFFFFF - OR32RtoR(x86macflag, x86newflag); - SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4 + SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, reg); + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask2[ 0 ]); + SSE_CMPNEPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP != zero mantisa) then set Vector to 0xFFFFFFFF - //-------------------------Optional Code: Denormals Are Zero------------------------------ - if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero - SSE_ANDNPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP = !EEREC_TEMP & reg - // Now we have Denormals are Positive Zero in EEREC_TEMP; the next two lines take Signed Zero into account - SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]); - SSE_ORPS_XMM_to_XMM(reg, EEREC_TEMP); + SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation + + AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation + pjmp = JZ8(0); // Skip if none are + OR32ItoR(x86temp, 4); // Set if they are + x86SetJ8(pjmp); + + OR32RtoR(x86macflag, x86newflag); + SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4 + + //-------------------------Optional Code: Denormals Are Zero------------------------------ + if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero + SSE_ANDNPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP = !EEREC_TEMP & reg + // Now we have Denormals are Positive Zero in EEREC_TEMP; the next two lines take Signed Zero into account + SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]); + SSE_ORPS_XMM_to_XMM(reg, EEREC_TEMP); + } } //-------------------------Check for Signed flags------------------------------ @@ -1512,53 +1512,53 @@ void recUpdateFlags(VURegs * VU, int reg, int info) } SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw - - //-------------------------Check for Overflow flags------------------------------ - - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg - SSE_CMPUNORDPS_XMM_to_XMM(t1reg, reg); // If reg == NaN then set Vector to 0xFFFFFFFF - XOR32RtoR(x86macflag, x86macflag); // Clear Mac Flag - - SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation - XOR32RtoR(x86temp, x86temp); //Clear x86temp - AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified) - pjmp = JZ8(0); // Skip if none are - OR32ItoR(x86temp, 8); // Set if they are - x86SetJ8(pjmp); + if (CHECK_VU_EXTRA_FLAGS) { + //-------------------------Check for Overflow flags------------------------------ - OR32RtoR(x86macflag, x86newflag); - SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4 + SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg + SSE_CMPUNORDPS_XMM_to_XMM(t1reg, reg); // If reg == NaN then set Vector to 0xFFFFFFFF - //-------------------------Check for Underflow flags------------------------------ + SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation - SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg + AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified) + pjmp = JZ8(0); // Skip if none are + OR32ItoR(x86temp, 8); // Set if they are + x86SetJ8(pjmp); - SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask1[ 0 ]); - SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF + OR32RtoR(x86macflag, x86newflag); + SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4 - SSE_ANDPS_XMM_to_XMM(t1reg, reg); - SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask2[ 0 ]); - SSE_CMPNEPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF + //-------------------------Check for Underflow flags------------------------------ - SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation + SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg - AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - OR32ItoR(x86temp, 4); // Set if they are - x86SetJ8(pjmp); + SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask1[ 0 ]); + SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF - OR32RtoR(x86macflag, x86newflag); - SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4 + SSE_ANDPS_XMM_to_XMM(t1reg, reg); + SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask2[ 0 ]); + SSE_CMPNEPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF - //-------------------------Optional Code: Denormals Are Zero------------------------------ - if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero - SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg - // Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account - SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]); - SSE_ORPS_XMM_to_XMM(reg, t1reg); + SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation + + AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation + pjmp = JZ8(0); // Skip if none are + OR32ItoR(x86temp, 4); // Set if they are + x86SetJ8(pjmp); + + OR32RtoR(x86macflag, x86newflag); + SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4 + + //-------------------------Optional Code: Denormals Are Zero------------------------------ + if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero + SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg + // Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account + SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]); + SSE_ORPS_XMM_to_XMM(reg, t1reg); + } } //-------------------------Check for Signed flags------------------------------