recoded some more FPU opcodes, and added 2 new speedhacks: "disable extra VU flags" and "disable extra FPU flags".

in the PS2, certain "flags" are set to indicate different statuses. There are flags for overflow, underflow, invalid operation, divide by zero, is Zero, is Negative, etc...
some of these flags are rarely checked by games; so what these speedhacks do is not perform the extra code for flags that are rarely used by games.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@95 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
cottonvibes 2008-09-05 21:39:38 +00:00 committed by Gregory Hainaut
parent bfc387a024
commit 2d805b4987
6 changed files with 203 additions and 139 deletions

View File

@ -63,6 +63,9 @@
#define CHECK_UNDERFLOW (!(Config.Hacks & 0x8))
//#define CHECK_DENORMALS ((Config.Hacks & 0x400) ? 0xffc0 : 0x7f80) //If enabled, Denormals are Zero for the recs and flush to zero is enabled as well
#define CHECK_FASTBRANCHES (Config.Hacks & 0x80)
#define CHECK_VU_EXTRA_FLAGS (!(Config.Hacks & 0x100)) // Sets correct flags in the VU recs
#define CHECK_FPU_EXTRA_FLAGS (!(Config.Hacks & 0x200)) // Sets correct flags in the FPU recs
//------------ SPECIAL GAME FIXES!!! ---------------
#define CHECK_FPUCLAMPHACK (Config.GameFixes & 0x1) // Special Fix for GT4, different clamping for FPU (Note: sets negative infinity to positive fMax when clamping, which the real ps2 doesn't do)
#define CHECK_VUCLIPHACK (Config.GameFixes & 0x2) // Special Fix for GoW, updates the clipflag differently in recVUMI_CLIP() (note: turning this hack on, breaks Rockstar games)

View File

@ -759,8 +759,8 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) {
if(Config.Hacks & 0x20) CheckDlgButton(hDlg, IDC_SYNCHACK3, TRUE);
if(Config.Hacks & 0x40) CheckDlgButton(hDlg, IDC_VU_OVERFLOWHACK, 2);
if(Config.Hacks & 0x80) CheckDlgButton(hDlg, IDC_FASTBRANCHES, TRUE);
//if(Config.Hacks & 0x100) CheckDlgButton(hDlg, IDC_VUCLIPHACK, TRUE);
//if(Config.Hacks & 0x200) CheckDlgButton(hDlg, IDC_FPUCLAMPHACK, TRUE);
if(Config.Hacks & 0x100) CheckDlgButton(hDlg, IDC_VU_FLAGS, TRUE);
if(Config.Hacks & 0x200) CheckDlgButton(hDlg, IDC_FPU_FLAGS, TRUE);
//if(Config.Hacks & 0x400) CheckDlgButton(hDlg, IDC_DENORMALS, 2);
if(Config.Hacks & 0x800) CheckDlgButton(hDlg, IDC_FPU_OVERFLOWHACK, TRUE);
if(Config.Hacks & 0x1000) CheckDlgButton(hDlg, IDC_FPU_OVERFLOWHACK, 2);
@ -778,8 +778,8 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) {
Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK2) ? 0x10 : 0;
Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK3) ? 0x20 : 0;
Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FASTBRANCHES) ? 0x80 : 0;
//Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_VUCLIPHACK) ? 0x100 : 0;
//Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FPUCLAMPHACK) ? 0x200 : 0;
Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_VU_FLAGS) ? 0x100 : 0;
Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FPU_FLAGS) ? 0x200 : 0;
//Config.Hacks |= ( IsDlgButtonChecked(hDlg, IDC_DENORMALS) == 2 ) ? 0x408 : (IsDlgButtonChecked(hDlg, IDC_DENORMALS) ? 0x8 : 0); // 0x408 == greyed checkbox (DaZ SSE flag; so the CPU sets denormals to zero)
Config.Hacks |= ( IsDlgButtonChecked(hDlg, IDC_FPU_OVERFLOWHACK) == 2 ) ? 0x1000 : (IsDlgButtonChecked(hDlg, IDC_FPU_OVERFLOWHACK) ? 0x800 : 0); // 0x1000 == greyed checkbox (extra overflow checking); 0x800 == checked (disable overflow checking)

View File

@ -1030,34 +1030,39 @@ BEGIN
CONTROL 132,IDC_PS2SILVER_RECT,"Static",SS_BITMAP,0,167,70,74
END
IDD_HACKS DIALOGEX 0, 0, 511, 243
IDD_HACKS DIALOGEX 0, 0, 511, 295
STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU
CAPTION "PCSX2 Speed Hacks"
FONT 8, "MS Shell Dlg", 400, 0, 0x1
BEGIN
DEFPUSHBUTTON "OK",IDOK,205,222,50,14
PUSHBUTTON "Cancel",IDCANCEL,261,222,50,14
DEFPUSHBUTTON "OK",IDOK,205,274,50,14
PUSHBUTTON "Cancel",IDCANCEL,261,274,50,14
CONTROL "EE Sync Hack (x2) - Doubles the cycle rate of the EE. ( Big Speedup in most games! )",IDC_SYNCHACK,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,111,418,10
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,162,418,10
CONTROL "Disable VU Overflow Checks - *Checked = Disables overflow checks. ( Speedup! ) *Greyed = Extra overflow checks. ( Helps SPS, Slow! )",IDC_VU_OVERFLOWHACK,
"Button",BS_AUTO3STATE | WS_TABSTOP,14,49,475,10
"Button",BS_AUTO3STATE | WS_TABSTOP,15,49,475,10
CTEXT "These hacks will effect the speed of PCSX2 but possibly comprimise on compatability",IDC_HACKDESC,7,7,497,8
CONTROL "Tighter SPU2 Sync ( FFXII vids) - Slower, not very useful anymore.",IDC_SOUNDHACK,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,194,421,10
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,245,421,10
CONTROL "IOP Sync Hack (x2) - Doubles the cycle rate of the IOP. ( Speedup but breaks some games. )",IDC_SYNCHACK2,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,125,410,10
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,176,410,10
CONTROL "EE/IOP Sync Hack (x3) - Makes EE and IOP hacks triple the cycle rate. ( Sometimes speeds games a bit more, but can break games. )",IDC_SYNCHACK3,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,139,464,11
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,190,464,11
CONTROL "Disable FPU Overflow Checks - *Checked = Disables overflow checks. ( Speedup! ) *Greyed = Extra overflow checks. ( Helps SPS, Slow! )",IDC_FPU_OVERFLOWHACK,
"Button",BS_AUTO3STATE | WS_TABSTOP,14,63,483,10
"Button",BS_AUTO3STATE | WS_TABSTOP,15,63,483,10
CONTROL "EE/IOP Fast Branches - Quick branching ( Very small speedup; Not Recommended! )",IDC_FASTBRANCHES,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,180,423,10
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,231,423,10
CTEXT "If you have problems, disable all these and try again",IDC_STATIC,7,22,497,8
GROUPBOX "Overflow and Underflow",IDC_STATIC,7,36,497,58
CONTROL "Disable Underflow Checks - *Checked = Disables underflow checks. ( Speedup! )",IDC_DENORMALS,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,77,319,10
GROUPBOX "Sync Hacks",IDC_STATIC,7,98,497,63
GROUPBOX "Miscellaneous",IDC_STATIC,7,165,497,50
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,77,319,10
GROUPBOX "Sync Hacks",IDC_STATIC,7,149,497,63
GROUPBOX "Miscellaneous",IDC_STATIC,7,216,497,50
GROUPBOX "Flag Setting",IDC_STATIC,7,100,497,41
CONTROL "Disable Extra VU Flags - When checked, PCSX2 doesn't set some flags that are rarely used by games. ( Speedup! )",IDC_VU_FLAGS,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,113,442,10
CONTROL "Disable Extra FPU Flags - When checked, PCSX2 doesn't set some flags that are rarely used by games. ( Speedup! )",IDC_FPU_FLAGS,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,126,414,10
END
@ -1079,8 +1084,9 @@ BEGIN
BEGIN
LEFTMARGIN, 7
RIGHTMARGIN, 504
VERTGUIDE, 15
TOPMARGIN, 7
BOTTOMMARGIN, 236
BOTTOMMARGIN, 288
END
END
#endif // APSTUDIO_INVOKED

View File

@ -614,13 +614,15 @@
#define IDC_GAMEFIX1 1300
#define IDC_DENORMALS 1301
#define IDC_EE_CHECK2 1301
#define IDC_CHECK3 1301
#define IDC_GAMEFIX2 1301
#define IDC_VUCLIPHACK 1302
#define IDC_VU_CHECK1 1302
#define IDC_VU_FLAGS 1302
#define IDC_FRAMELIMIT_OPTIONS 1303
#define IDC_FPUCLAMPHACK 1303
#define IDC_VU_CHECK2 1303
#define IDC_VU_FLAGS2 1303
#define IDC_FPU_FLAGS 1303
#define IDC_ROUNDMODE 1304
#define IDC_EE_ROUNDMODE0 1305
#define IDC_EE_ROUNDMODE1 1306

View File

@ -978,25 +978,27 @@ void recSQRT_S_xmm(int info)
int tempReg;
u8* pjmp;
SysPrintf("FPU: SQRT \n");
SysPrintf("FPU: SQRT\n");
tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
if (tempReg == -1) {SysPrintf("FPU: SQRT Allocation Error! \n"); tempReg = EAX;}
if (tempReg == -1) {SysPrintf("FPU: SQRT Allocation Error!\n"); tempReg = EAX;}
if( info & PROCESS_EE_T ) {
if ( EEREC_D != EEREC_T ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T);
}
else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Ft_]);
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
if (CHECK_FPU_EXTRA_FLAGS) {
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
/*--- Check for negative SQRT ---*/
XOR32RtoR(tempReg, tempReg);
SSE_MOVMSKPS_XMM_to_R32(tempReg, EEREC_D);
AND32ItoR(tempReg, 1); //Check sign
pjmp = JZ8(0); //Skip if none are
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags
SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive
x86SetJ8(pjmp);
/*--- Check for negative SQRT ---*/
SSE_MOVMSKPS_XMM_to_R32(tempReg, EEREC_D);
AND32ItoR(tempReg, 1); //Check sign
pjmp = JZ8(0); //Skip if none are
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags
SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive
x86SetJ8(pjmp);
}
else SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive
if (CHECK_FPU_OVERFLOW) // Only need to do positive clamp, since EEREC_D is positive
SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)&g_maxvals[0]);
@ -1051,54 +1053,105 @@ void recNEG_S_xmm(int info) {
FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS);
void recRSQRT_S_xmm(int info)
{
int t0reg = _allocTempXMMreg(XMMT_FPS, -1);
// Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Sets correct flags)
void recRSQRThelper1(int regd, int t0reg)
{
u8* pjmp1;
u8* pjmp2;
u32* pjmp32;
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
if (t1reg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n");}
if (tempReg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n"); tempReg = EAX;}
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
/*--- Check for zero ---*/
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
SSE_CMPEQSS_XMM_to_XMM(t1reg, t0reg);
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
AND32ItoR(tempReg, 1); //Check sign (if t0reg == zero, sign will be set)
pjmp1 = JZ8(0); //Skip if not set
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags
SSE_XORPS_XMM_to_XMM(regd, t0reg); // Make regd Positive or Negative
SSE_ANDPS_M128_to_XMM(regd, (uptr)&s_neg[0]); // Get the sign bit
SSE_ORPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); // regd = +/- Maximum
pjmp32 = JMP32(0);
x86SetJ8(pjmp1);
/*--- Check for negative SQRT ---*/
SSE_MOVMSKPS_XMM_to_R32(tempReg, t0reg);
AND32ItoR(tempReg, 1); //Check sign
pjmp2 = JZ8(0); //Skip if not set
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags
SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive
x86SetJ8(pjmp2);
if (CHECK_FPU_EXTRA_OVERFLOW) {
SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive
ClampValues(regd);
}
SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg);
SSE_DIVSS_XMM_to_XMM(regd, t0reg);
ClampValues(regd);
x86SetJ32(pjmp32);
_freeXMMreg(t1reg);
_freeX86reg(tempReg);
}
// Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Doesn't set flags)
void recRSQRThelper2(int regd, int t0reg)
{
SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive
if (CHECK_FPU_EXTRA_OVERFLOW) {
SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive
ClampValues(regd);
}
SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg);
SSE_DIVSS_XMM_to_XMM(regd, t0reg);
ClampValues(regd);
}
void recRSQRT_S_xmm(int info)
{
int t0reg = _allocTempXMMreg(XMMT_FPS, -1);
SysPrintf("FPU: RSQRT\n");
if (t0reg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n");}
switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) {
case PROCESS_EE_S:
if( EEREC_D == EEREC_S ) {
SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
}
else {
SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
}
//SysPrintf("FPU: RSQRT case 1\n");
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg);
else recRSQRThelper2(EEREC_D, t0reg);
break;
case PROCESS_EE_T:
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
case PROCESS_EE_T:
//SysPrintf("FPU: RSQRT case 2\n");
SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T);
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg);
else recRSQRThelper2(EEREC_D, t0reg);
break;
case (PROCESS_EE_S|PROCESS_EE_T):
//SysPrintf("FPU: RSQRT case 3\n");
SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T);
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg);
else recRSQRThelper2(EEREC_D, t0reg);
break;
default:
if( (info & PROCESS_EE_T) && (info & PROCESS_EE_S) ) {
if( EEREC_D == EEREC_T ){
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
}
else if( EEREC_D == EEREC_S ){
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
} else {
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
}
}else{
SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
}
//SysPrintf("FPU: RSQRT case 4\n");
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg);
else recRSQRThelper2(EEREC_D, t0reg);
break;
}
_freeXMMreg(t0reg);
ClampValues(EEREC_D);
}
FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);

View File

@ -1398,53 +1398,53 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
if( EEREC_TEMP != reg ) {
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw
//-------------------------Check for Overflow flags------------------------------
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
SSE_CMPUNORDPS_XMM_to_XMM(EEREC_TEMP, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
XOR32RtoR(x86macflag, x86macflag); // Clear Mac Flag
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
XOR32RtoR(x86temp, x86temp); //Clear x86temp
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
pjmp = JZ8(0); // Skip if none are
OR32ItoR(x86temp, 8); // Set if they are
x86SetJ8(pjmp);
if (CHECK_VU_EXTRA_FLAGS) {
//-------------------------Check for Overflow flags------------------------------
OR32RtoR(x86macflag, x86newflag);
SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
SSE_CMPUNORDPS_XMM_to_XMM(EEREC_TEMP, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
//-------------------------Check for Underflow flags------------------------------
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
pjmp = JZ8(0); // Skip if none are
OR32ItoR(x86temp, 8); // Set if they are
x86SetJ8(pjmp);
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask1[ 0 ]);
SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP == zero exponent) then set Vector to 0xFFFFFFFF
OR32RtoR(x86macflag, x86newflag);
SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4
SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, reg);
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask2[ 0 ]);
SSE_CMPNEPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP != zero mantisa) then set Vector to 0xFFFFFFFF
//-------------------------Check for Underflow flags------------------------------
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are
OR32ItoR(x86temp, 4); // Set if they are
x86SetJ8(pjmp);
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask1[ 0 ]);
SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP == zero exponent) then set Vector to 0xFFFFFFFF
OR32RtoR(x86macflag, x86newflag);
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, reg);
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask2[ 0 ]);
SSE_CMPNEPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP != zero mantisa) then set Vector to 0xFFFFFFFF
//-------------------------Optional Code: Denormals Are Zero------------------------------
if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero
SSE_ANDNPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP = !EEREC_TEMP & reg
// Now we have Denormals are Positive Zero in EEREC_TEMP; the next two lines take Signed Zero into account
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
SSE_ORPS_XMM_to_XMM(reg, EEREC_TEMP);
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are
OR32ItoR(x86temp, 4); // Set if they are
x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag);
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
//-------------------------Optional Code: Denormals Are Zero------------------------------
if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero
SSE_ANDNPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP = !EEREC_TEMP & reg
// Now we have Denormals are Positive Zero in EEREC_TEMP; the next two lines take Signed Zero into account
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
SSE_ORPS_XMM_to_XMM(reg, EEREC_TEMP);
}
}
//-------------------------Check for Signed flags------------------------------
@ -1512,53 +1512,53 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
}
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw
//-------------------------Check for Overflow flags------------------------------
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
SSE_CMPUNORDPS_XMM_to_XMM(t1reg, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
XOR32RtoR(x86macflag, x86macflag); // Clear Mac Flag
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
XOR32RtoR(x86temp, x86temp); //Clear x86temp
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
pjmp = JZ8(0); // Skip if none are
OR32ItoR(x86temp, 8); // Set if they are
x86SetJ8(pjmp);
if (CHECK_VU_EXTRA_FLAGS) {
//-------------------------Check for Overflow flags------------------------------
OR32RtoR(x86macflag, x86newflag);
SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
SSE_CMPUNORDPS_XMM_to_XMM(t1reg, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
//-------------------------Check for Underflow flags------------------------------
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
pjmp = JZ8(0); // Skip if none are
OR32ItoR(x86temp, 8); // Set if they are
x86SetJ8(pjmp);
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask1[ 0 ]);
SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF
OR32RtoR(x86macflag, x86newflag);
SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4
SSE_ANDPS_XMM_to_XMM(t1reg, reg);
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask2[ 0 ]);
SSE_CMPNEPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF
//-------------------------Check for Underflow flags------------------------------
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are
OR32ItoR(x86temp, 4); // Set if they are
x86SetJ8(pjmp);
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask1[ 0 ]);
SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF
OR32RtoR(x86macflag, x86newflag);
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
SSE_ANDPS_XMM_to_XMM(t1reg, reg);
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask2[ 0 ]);
SSE_CMPNEPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF
//-------------------------Optional Code: Denormals Are Zero------------------------------
if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero
SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg
// Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
SSE_ORPS_XMM_to_XMM(reg, t1reg);
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are
OR32ItoR(x86temp, 4); // Set if they are
x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag);
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
//-------------------------Optional Code: Denormals Are Zero------------------------------
if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero
SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg
// Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
SSE_ORPS_XMM_to_XMM(reg, t1reg);
}
}
//-------------------------Check for Signed flags------------------------------