mirror of https://github.com/PCSX2/pcsx2.git
recoded some more FPU opcodes, and added 2 new speedhacks: "disable extra VU flags" and "disable extra FPU flags".
in the PS2, certain "flags" are set to indicate different statuses. There are flags for overflow, underflow, invalid operation, divide by zero, is Zero, is Negative, etc... some of these flags are rarely checked by games; so what these speedhacks do is not perform the extra code for flags that are rarely used by games. git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@95 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
bfc387a024
commit
2d805b4987
|
@ -63,6 +63,9 @@
|
|||
#define CHECK_UNDERFLOW (!(Config.Hacks & 0x8))
|
||||
//#define CHECK_DENORMALS ((Config.Hacks & 0x400) ? 0xffc0 : 0x7f80) //If enabled, Denormals are Zero for the recs and flush to zero is enabled as well
|
||||
#define CHECK_FASTBRANCHES (Config.Hacks & 0x80)
|
||||
#define CHECK_VU_EXTRA_FLAGS (!(Config.Hacks & 0x100)) // Sets correct flags in the VU recs
|
||||
#define CHECK_FPU_EXTRA_FLAGS (!(Config.Hacks & 0x200)) // Sets correct flags in the FPU recs
|
||||
|
||||
//------------ SPECIAL GAME FIXES!!! ---------------
|
||||
#define CHECK_FPUCLAMPHACK (Config.GameFixes & 0x1) // Special Fix for GT4, different clamping for FPU (Note: sets negative infinity to positive fMax when clamping, which the real ps2 doesn't do)
|
||||
#define CHECK_VUCLIPHACK (Config.GameFixes & 0x2) // Special Fix for GoW, updates the clipflag differently in recVUMI_CLIP() (note: turning this hack on, breaks Rockstar games)
|
||||
|
|
|
@ -759,8 +759,8 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) {
|
|||
if(Config.Hacks & 0x20) CheckDlgButton(hDlg, IDC_SYNCHACK3, TRUE);
|
||||
if(Config.Hacks & 0x40) CheckDlgButton(hDlg, IDC_VU_OVERFLOWHACK, 2);
|
||||
if(Config.Hacks & 0x80) CheckDlgButton(hDlg, IDC_FASTBRANCHES, TRUE);
|
||||
//if(Config.Hacks & 0x100) CheckDlgButton(hDlg, IDC_VUCLIPHACK, TRUE);
|
||||
//if(Config.Hacks & 0x200) CheckDlgButton(hDlg, IDC_FPUCLAMPHACK, TRUE);
|
||||
if(Config.Hacks & 0x100) CheckDlgButton(hDlg, IDC_VU_FLAGS, TRUE);
|
||||
if(Config.Hacks & 0x200) CheckDlgButton(hDlg, IDC_FPU_FLAGS, TRUE);
|
||||
//if(Config.Hacks & 0x400) CheckDlgButton(hDlg, IDC_DENORMALS, 2);
|
||||
if(Config.Hacks & 0x800) CheckDlgButton(hDlg, IDC_FPU_OVERFLOWHACK, TRUE);
|
||||
if(Config.Hacks & 0x1000) CheckDlgButton(hDlg, IDC_FPU_OVERFLOWHACK, 2);
|
||||
|
@ -778,8 +778,8 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) {
|
|||
Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK2) ? 0x10 : 0;
|
||||
Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK3) ? 0x20 : 0;
|
||||
Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FASTBRANCHES) ? 0x80 : 0;
|
||||
//Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_VUCLIPHACK) ? 0x100 : 0;
|
||||
//Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FPUCLAMPHACK) ? 0x200 : 0;
|
||||
Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_VU_FLAGS) ? 0x100 : 0;
|
||||
Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FPU_FLAGS) ? 0x200 : 0;
|
||||
//Config.Hacks |= ( IsDlgButtonChecked(hDlg, IDC_DENORMALS) == 2 ) ? 0x408 : (IsDlgButtonChecked(hDlg, IDC_DENORMALS) ? 0x8 : 0); // 0x408 == greyed checkbox (DaZ SSE flag; so the CPU sets denormals to zero)
|
||||
Config.Hacks |= ( IsDlgButtonChecked(hDlg, IDC_FPU_OVERFLOWHACK) == 2 ) ? 0x1000 : (IsDlgButtonChecked(hDlg, IDC_FPU_OVERFLOWHACK) ? 0x800 : 0); // 0x1000 == greyed checkbox (extra overflow checking); 0x800 == checked (disable overflow checking)
|
||||
|
||||
|
|
|
@ -1030,34 +1030,39 @@ BEGIN
|
|||
CONTROL 132,IDC_PS2SILVER_RECT,"Static",SS_BITMAP,0,167,70,74
|
||||
END
|
||||
|
||||
IDD_HACKS DIALOGEX 0, 0, 511, 243
|
||||
IDD_HACKS DIALOGEX 0, 0, 511, 295
|
||||
STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU
|
||||
CAPTION "PCSX2 Speed Hacks"
|
||||
FONT 8, "MS Shell Dlg", 400, 0, 0x1
|
||||
BEGIN
|
||||
DEFPUSHBUTTON "OK",IDOK,205,222,50,14
|
||||
PUSHBUTTON "Cancel",IDCANCEL,261,222,50,14
|
||||
DEFPUSHBUTTON "OK",IDOK,205,274,50,14
|
||||
PUSHBUTTON "Cancel",IDCANCEL,261,274,50,14
|
||||
CONTROL "EE Sync Hack (x2) - Doubles the cycle rate of the EE. ( Big Speedup in most games! )",IDC_SYNCHACK,
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,111,418,10
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,162,418,10
|
||||
CONTROL "Disable VU Overflow Checks - *Checked = Disables overflow checks. ( Speedup! ) *Greyed = Extra overflow checks. ( Helps SPS, Slow! )",IDC_VU_OVERFLOWHACK,
|
||||
"Button",BS_AUTO3STATE | WS_TABSTOP,14,49,475,10
|
||||
"Button",BS_AUTO3STATE | WS_TABSTOP,15,49,475,10
|
||||
CTEXT "These hacks will effect the speed of PCSX2 but possibly comprimise on compatability",IDC_HACKDESC,7,7,497,8
|
||||
CONTROL "Tighter SPU2 Sync ( FFXII vids) - Slower, not very useful anymore.",IDC_SOUNDHACK,
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,194,421,10
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,245,421,10
|
||||
CONTROL "IOP Sync Hack (x2) - Doubles the cycle rate of the IOP. ( Speedup but breaks some games. )",IDC_SYNCHACK2,
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,125,410,10
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,176,410,10
|
||||
CONTROL "EE/IOP Sync Hack (x3) - Makes EE and IOP hacks triple the cycle rate. ( Sometimes speeds games a bit more, but can break games. )",IDC_SYNCHACK3,
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,139,464,11
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,190,464,11
|
||||
CONTROL "Disable FPU Overflow Checks - *Checked = Disables overflow checks. ( Speedup! ) *Greyed = Extra overflow checks. ( Helps SPS, Slow! )",IDC_FPU_OVERFLOWHACK,
|
||||
"Button",BS_AUTO3STATE | WS_TABSTOP,14,63,483,10
|
||||
"Button",BS_AUTO3STATE | WS_TABSTOP,15,63,483,10
|
||||
CONTROL "EE/IOP Fast Branches - Quick branching ( Very small speedup; Not Recommended! )",IDC_FASTBRANCHES,
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,180,423,10
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,231,423,10
|
||||
CTEXT "If you have problems, disable all these and try again",IDC_STATIC,7,22,497,8
|
||||
GROUPBOX "Overflow and Underflow",IDC_STATIC,7,36,497,58
|
||||
CONTROL "Disable Underflow Checks - *Checked = Disables underflow checks. ( Speedup! )",IDC_DENORMALS,
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,77,319,10
|
||||
GROUPBOX "Sync Hacks",IDC_STATIC,7,98,497,63
|
||||
GROUPBOX "Miscellaneous",IDC_STATIC,7,165,497,50
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,77,319,10
|
||||
GROUPBOX "Sync Hacks",IDC_STATIC,7,149,497,63
|
||||
GROUPBOX "Miscellaneous",IDC_STATIC,7,216,497,50
|
||||
GROUPBOX "Flag Setting",IDC_STATIC,7,100,497,41
|
||||
CONTROL "Disable Extra VU Flags - When checked, PCSX2 doesn't set some flags that are rarely used by games. ( Speedup! )",IDC_VU_FLAGS,
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,113,442,10
|
||||
CONTROL "Disable Extra FPU Flags - When checked, PCSX2 doesn't set some flags that are rarely used by games. ( Speedup! )",IDC_FPU_FLAGS,
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,15,126,414,10
|
||||
END
|
||||
|
||||
|
||||
|
@ -1079,8 +1084,9 @@ BEGIN
|
|||
BEGIN
|
||||
LEFTMARGIN, 7
|
||||
RIGHTMARGIN, 504
|
||||
VERTGUIDE, 15
|
||||
TOPMARGIN, 7
|
||||
BOTTOMMARGIN, 236
|
||||
BOTTOMMARGIN, 288
|
||||
END
|
||||
END
|
||||
#endif // APSTUDIO_INVOKED
|
||||
|
|
|
@ -614,13 +614,15 @@
|
|||
#define IDC_GAMEFIX1 1300
|
||||
#define IDC_DENORMALS 1301
|
||||
#define IDC_EE_CHECK2 1301
|
||||
#define IDC_CHECK3 1301
|
||||
#define IDC_GAMEFIX2 1301
|
||||
#define IDC_VUCLIPHACK 1302
|
||||
#define IDC_VU_CHECK1 1302
|
||||
#define IDC_VU_FLAGS 1302
|
||||
#define IDC_FRAMELIMIT_OPTIONS 1303
|
||||
#define IDC_FPUCLAMPHACK 1303
|
||||
#define IDC_VU_CHECK2 1303
|
||||
#define IDC_VU_FLAGS2 1303
|
||||
#define IDC_FPU_FLAGS 1303
|
||||
#define IDC_ROUNDMODE 1304
|
||||
#define IDC_EE_ROUNDMODE0 1305
|
||||
#define IDC_EE_ROUNDMODE1 1306
|
||||
|
|
147
pcsx2/x86/iFPU.c
147
pcsx2/x86/iFPU.c
|
@ -978,25 +978,27 @@ void recSQRT_S_xmm(int info)
|
|||
int tempReg;
|
||||
u8* pjmp;
|
||||
|
||||
SysPrintf("FPU: SQRT \n");
|
||||
SysPrintf("FPU: SQRT\n");
|
||||
tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
|
||||
if (tempReg == -1) {SysPrintf("FPU: SQRT Allocation Error! \n"); tempReg = EAX;}
|
||||
if (tempReg == -1) {SysPrintf("FPU: SQRT Allocation Error!\n"); tempReg = EAX;}
|
||||
|
||||
if( info & PROCESS_EE_T ) {
|
||||
if ( EEREC_D != EEREC_T ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
||||
}
|
||||
else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
|
||||
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
|
||||
if (CHECK_FPU_EXTRA_FLAGS) {
|
||||
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
|
||||
|
||||
/*--- Check for negative SQRT ---*/
|
||||
XOR32RtoR(tempReg, tempReg);
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, EEREC_D);
|
||||
AND32ItoR(tempReg, 1); //Check sign
|
||||
pjmp = JZ8(0); //Skip if none are
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags
|
||||
SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive
|
||||
x86SetJ8(pjmp);
|
||||
/*--- Check for negative SQRT ---*/
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, EEREC_D);
|
||||
AND32ItoR(tempReg, 1); //Check sign
|
||||
pjmp = JZ8(0); //Skip if none are
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags
|
||||
SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive
|
||||
x86SetJ8(pjmp);
|
||||
}
|
||||
else SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive
|
||||
|
||||
if (CHECK_FPU_OVERFLOW) // Only need to do positive clamp, since EEREC_D is positive
|
||||
SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)&g_maxvals[0]);
|
||||
|
@ -1051,54 +1053,105 @@ void recNEG_S_xmm(int info) {
|
|||
|
||||
FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS);
|
||||
|
||||
// Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Sets correct flags)
|
||||
void recRSQRThelper1(int regd, int t0reg)
|
||||
{
|
||||
u8* pjmp1;
|
||||
u8* pjmp2;
|
||||
u32* pjmp32;
|
||||
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
|
||||
if (t1reg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n");}
|
||||
if (tempReg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n"); tempReg = EAX;}
|
||||
|
||||
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
|
||||
|
||||
/*--- Check for zero ---*/
|
||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
|
||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, t0reg);
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
|
||||
AND32ItoR(tempReg, 1); //Check sign (if t0reg == zero, sign will be set)
|
||||
pjmp1 = JZ8(0); //Skip if not set
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags
|
||||
SSE_XORPS_XMM_to_XMM(regd, t0reg); // Make regd Positive or Negative
|
||||
SSE_ANDPS_M128_to_XMM(regd, (uptr)&s_neg[0]); // Get the sign bit
|
||||
SSE_ORPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); // regd = +/- Maximum
|
||||
pjmp32 = JMP32(0);
|
||||
x86SetJ8(pjmp1);
|
||||
|
||||
/*--- Check for negative SQRT ---*/
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, t0reg);
|
||||
AND32ItoR(tempReg, 1); //Check sign
|
||||
pjmp2 = JZ8(0); //Skip if not set
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags
|
||||
SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive
|
||||
x86SetJ8(pjmp2);
|
||||
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) {
|
||||
SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive
|
||||
ClampValues(regd);
|
||||
}
|
||||
|
||||
SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg);
|
||||
SSE_DIVSS_XMM_to_XMM(regd, t0reg);
|
||||
|
||||
ClampValues(regd);
|
||||
x86SetJ32(pjmp32);
|
||||
|
||||
_freeXMMreg(t1reg);
|
||||
_freeX86reg(tempReg);
|
||||
}
|
||||
|
||||
// Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Doesn't set flags)
|
||||
void recRSQRThelper2(int regd, int t0reg)
|
||||
{
|
||||
SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) {
|
||||
SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive
|
||||
ClampValues(regd);
|
||||
}
|
||||
SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg);
|
||||
SSE_DIVSS_XMM_to_XMM(regd, t0reg);
|
||||
ClampValues(regd);
|
||||
}
|
||||
|
||||
void recRSQRT_S_xmm(int info)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
|
||||
SysPrintf("FPU: RSQRT\n");
|
||||
if (t0reg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n");}
|
||||
|
||||
switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) {
|
||||
case PROCESS_EE_S:
|
||||
if( EEREC_D == EEREC_S ) {
|
||||
SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
|
||||
}
|
||||
|
||||
//SysPrintf("FPU: RSQRT case 1\n");
|
||||
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg);
|
||||
else recRSQRThelper2(EEREC_D, t0reg);
|
||||
break;
|
||||
case PROCESS_EE_T:
|
||||
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
|
||||
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
|
||||
//SysPrintf("FPU: RSQRT case 2\n");
|
||||
SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg);
|
||||
else recRSQRThelper2(EEREC_D, t0reg);
|
||||
break;
|
||||
case (PROCESS_EE_S|PROCESS_EE_T):
|
||||
//SysPrintf("FPU: RSQRT case 3\n");
|
||||
SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg);
|
||||
else recRSQRThelper2(EEREC_D, t0reg);
|
||||
break;
|
||||
default:
|
||||
if( (info & PROCESS_EE_T) && (info & PROCESS_EE_S) ) {
|
||||
if( EEREC_D == EEREC_T ){
|
||||
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
|
||||
}
|
||||
else if( EEREC_D == EEREC_S ){
|
||||
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
|
||||
} else {
|
||||
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
||||
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
|
||||
}
|
||||
}else{
|
||||
SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
|
||||
}
|
||||
|
||||
//SysPrintf("FPU: RSQRT case 4\n");
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg);
|
||||
else recRSQRThelper2(EEREC_D, t0reg);
|
||||
break;
|
||||
}
|
||||
_freeXMMreg(t0reg);
|
||||
ClampValues(EEREC_D);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
||||
|
|
|
@ -1398,53 +1398,53 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
|
|||
if( EEREC_TEMP != reg ) {
|
||||
|
||||
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw
|
||||
|
||||
//-------------------------Check for Overflow flags------------------------------
|
||||
|
||||
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
|
||||
SSE_CMPUNORDPS_XMM_to_XMM(EEREC_TEMP, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
|
||||
|
||||
XOR32RtoR(x86macflag, x86macflag); // Clear Mac Flag
|
||||
|
||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
|
||||
|
||||
XOR32RtoR(x86temp, x86temp); //Clear x86temp
|
||||
|
||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
|
||||
pjmp = JZ8(0); // Skip if none are
|
||||
OR32ItoR(x86temp, 8); // Set if they are
|
||||
x86SetJ8(pjmp);
|
||||
if (CHECK_VU_EXTRA_FLAGS) {
|
||||
//-------------------------Check for Overflow flags------------------------------
|
||||
|
||||
OR32RtoR(x86macflag, x86newflag);
|
||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4
|
||||
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
|
||||
SSE_CMPUNORDPS_XMM_to_XMM(EEREC_TEMP, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
|
||||
|
||||
//-------------------------Check for Underflow flags------------------------------
|
||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
|
||||
|
||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg
|
||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
|
||||
pjmp = JZ8(0); // Skip if none are
|
||||
OR32ItoR(x86temp, 8); // Set if they are
|
||||
x86SetJ8(pjmp);
|
||||
|
||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask1[ 0 ]);
|
||||
SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP == zero exponent) then set Vector to 0xFFFFFFFF
|
||||
OR32RtoR(x86macflag, x86newflag);
|
||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4
|
||||
|
||||
SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, reg);
|
||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask2[ 0 ]);
|
||||
SSE_CMPNEPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP != zero mantisa) then set Vector to 0xFFFFFFFF
|
||||
//-------------------------Check for Underflow flags------------------------------
|
||||
|
||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
|
||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg
|
||||
|
||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
|
||||
pjmp = JZ8(0); // Skip if none are
|
||||
OR32ItoR(x86temp, 4); // Set if they are
|
||||
x86SetJ8(pjmp);
|
||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask1[ 0 ]);
|
||||
SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP == zero exponent) then set Vector to 0xFFFFFFFF
|
||||
|
||||
OR32RtoR(x86macflag, x86newflag);
|
||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
|
||||
SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, reg);
|
||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask2[ 0 ]);
|
||||
SSE_CMPNEPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP != zero mantisa) then set Vector to 0xFFFFFFFF
|
||||
|
||||
//-------------------------Optional Code: Denormals Are Zero------------------------------
|
||||
if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero
|
||||
SSE_ANDNPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP = !EEREC_TEMP & reg
|
||||
// Now we have Denormals are Positive Zero in EEREC_TEMP; the next two lines take Signed Zero into account
|
||||
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
|
||||
SSE_ORPS_XMM_to_XMM(reg, EEREC_TEMP);
|
||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
|
||||
|
||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
|
||||
pjmp = JZ8(0); // Skip if none are
|
||||
OR32ItoR(x86temp, 4); // Set if they are
|
||||
x86SetJ8(pjmp);
|
||||
|
||||
OR32RtoR(x86macflag, x86newflag);
|
||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
|
||||
|
||||
//-------------------------Optional Code: Denormals Are Zero------------------------------
|
||||
if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero
|
||||
SSE_ANDNPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP = !EEREC_TEMP & reg
|
||||
// Now we have Denormals are Positive Zero in EEREC_TEMP; the next two lines take Signed Zero into account
|
||||
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
|
||||
SSE_ORPS_XMM_to_XMM(reg, EEREC_TEMP);
|
||||
}
|
||||
}
|
||||
//-------------------------Check for Signed flags------------------------------
|
||||
|
||||
|
@ -1512,53 +1512,53 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
|
|||
}
|
||||
|
||||
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw
|
||||
|
||||
//-------------------------Check for Overflow flags------------------------------
|
||||
|
||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
||||
SSE_CMPUNORDPS_XMM_to_XMM(t1reg, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
|
||||
|
||||
XOR32RtoR(x86macflag, x86macflag); // Clear Mac Flag
|
||||
|
||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
|
||||
|
||||
XOR32RtoR(x86temp, x86temp); //Clear x86temp
|
||||
|
||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
|
||||
pjmp = JZ8(0); // Skip if none are
|
||||
OR32ItoR(x86temp, 8); // Set if they are
|
||||
x86SetJ8(pjmp);
|
||||
if (CHECK_VU_EXTRA_FLAGS) {
|
||||
//-------------------------Check for Overflow flags------------------------------
|
||||
|
||||
OR32RtoR(x86macflag, x86newflag);
|
||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4
|
||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
||||
SSE_CMPUNORDPS_XMM_to_XMM(t1reg, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
|
||||
|
||||
//-------------------------Check for Underflow flags------------------------------
|
||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
|
||||
|
||||
SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg
|
||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
|
||||
pjmp = JZ8(0); // Skip if none are
|
||||
OR32ItoR(x86temp, 8); // Set if they are
|
||||
x86SetJ8(pjmp);
|
||||
|
||||
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask1[ 0 ]);
|
||||
SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF
|
||||
OR32RtoR(x86macflag, x86newflag);
|
||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4
|
||||
|
||||
SSE_ANDPS_XMM_to_XMM(t1reg, reg);
|
||||
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask2[ 0 ]);
|
||||
SSE_CMPNEPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF
|
||||
//-------------------------Check for Underflow flags------------------------------
|
||||
|
||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
|
||||
SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg
|
||||
|
||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
|
||||
pjmp = JZ8(0); // Skip if none are
|
||||
OR32ItoR(x86temp, 4); // Set if they are
|
||||
x86SetJ8(pjmp);
|
||||
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask1[ 0 ]);
|
||||
SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF
|
||||
|
||||
OR32RtoR(x86macflag, x86newflag);
|
||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
|
||||
SSE_ANDPS_XMM_to_XMM(t1reg, reg);
|
||||
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask2[ 0 ]);
|
||||
SSE_CMPNEPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF
|
||||
|
||||
//-------------------------Optional Code: Denormals Are Zero------------------------------
|
||||
if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero
|
||||
SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg
|
||||
// Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account
|
||||
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
|
||||
SSE_ORPS_XMM_to_XMM(reg, t1reg);
|
||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
|
||||
|
||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
|
||||
pjmp = JZ8(0); // Skip if none are
|
||||
OR32ItoR(x86temp, 4); // Set if they are
|
||||
x86SetJ8(pjmp);
|
||||
|
||||
OR32RtoR(x86macflag, x86newflag);
|
||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
|
||||
|
||||
//-------------------------Optional Code: Denormals Are Zero------------------------------
|
||||
if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero
|
||||
SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg
|
||||
// Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account
|
||||
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
|
||||
SSE_ORPS_XMM_to_XMM(reg, t1reg);
|
||||
}
|
||||
}
|
||||
//-------------------------Check for Signed flags------------------------------
|
||||
|
||||
|
|
Loading…
Reference in New Issue