mirror of https://github.com/PCSX2/pcsx2.git
nneeve improves the software-emulated FPU accuracy ("Full" mode in Advanced Dialog).
Appended notes: * ADD in iFPUd should be bit accurate (unless it isn't. needs TESTING) * MUL in iFPUd with Software Emulate MUL is as much as I could get near bit accurate (not quite enough, probably. needs TESTING) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@728 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
5dc76238bf
commit
aef3af8a29
|
@ -77,6 +77,7 @@ extern SessionOverrideFlags g_Session;
|
|||
#define CHECK_FPU_EXTRA_OVERFLOW (Config.eeOptions & 0x2) // If enabled, Operands are checked for infinities before being used in the FPU recs
|
||||
#define CHECK_FPU_EXTRA_FLAGS 1 // Always enabled now // Sets D/I flags on FPU instructions
|
||||
#define CHECK_FPU_FULL (Config.eeOptions & 0x4)
|
||||
#define CHECK_FPU_ATTEMPT_MUL (Config.eeOptions & 0x8)
|
||||
#define DEFAULT_eeOptions 0x01
|
||||
#define DEFAULT_vuOptions 0x01
|
||||
//------------ DEFAULT sseMXCSR VALUES!!! ---------------
|
||||
|
|
|
@ -35,6 +35,8 @@ static void InitRoundClampModes( HWND hDlg, u32 new_eeopt, u32 new_vuopt )
|
|||
else if (new_vuopt & 0x1) CheckRadioButton(hDlg, IDC_VU_CLAMPMODE0, IDC_VU_CLAMPMODE3, IDC_VU_CLAMPMODE0 + 1);
|
||||
else CheckRadioButton(hDlg, IDC_VU_CLAMPMODE0, IDC_VU_CLAMPMODE3, IDC_VU_CLAMPMODE0 + 0);
|
||||
|
||||
CheckDlgButton(hDlg, IDC_EE_CHECK3, (new_eeopt & 0x8) ? TRUE : FALSE);
|
||||
|
||||
if (new_eeopt & 0x4) CheckRadioButton(hDlg, IDC_EE_CLAMPMODE0, IDC_EE_CLAMPMODE3, IDC_EE_CLAMPMODE0 + 3);
|
||||
else if (new_eeopt & 0x2) CheckRadioButton(hDlg, IDC_EE_CLAMPMODE0, IDC_EE_CLAMPMODE3, IDC_EE_CLAMPMODE0 + 2);
|
||||
else if (new_eeopt & 0x1) CheckRadioButton(hDlg, IDC_EE_CLAMPMODE0, IDC_EE_CLAMPMODE3, IDC_EE_CLAMPMODE0 + 1);
|
||||
|
@ -94,6 +96,8 @@ BOOL APIENTRY AdvancedOptionsProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM
|
|||
new_eeopt |= IsDlgButtonChecked(hDlg, IDC_EE_CLAMPMODE2) ? 0x3 : 0;
|
||||
new_eeopt |= IsDlgButtonChecked(hDlg, IDC_EE_CLAMPMODE3) ? 0x7 : 0;
|
||||
|
||||
new_eeopt |= IsDlgButtonChecked(hDlg, IDC_EE_CHECK3) ? 0x8 : 0;
|
||||
|
||||
new_vuopt |= IsDlgButtonChecked(hDlg, IDC_VU_CLAMPMODE0) ? 0x0 : 0;
|
||||
new_vuopt |= IsDlgButtonChecked(hDlg, IDC_VU_CLAMPMODE1) ? 0x1 : 0;
|
||||
new_vuopt |= IsDlgButtonChecked(hDlg, IDC_VU_CLAMPMODE2) ? 0x3 : 0;
|
||||
|
|
|
@ -195,30 +195,31 @@ BEGIN
|
|||
RADIOBUTTON "Chop / Zero",IDC_EE_ROUNDMODE3,156,36,54,16
|
||||
CONTROL " Flush to Zero",IDC_EE_CHECK1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,20,105,58,13
|
||||
CONTROL " Denormals are Zero",IDC_EE_CHECK2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,111,105,79,13
|
||||
CONTROL " Flush to Zero",IDC_VU_CHECK1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,20,214,58,13
|
||||
CONTROL " Denormals are Zero",IDC_VU_CHECK2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,111,214,81,13
|
||||
RADIOBUTTON "Nearest",IDC_VU_ROUNDMODE0,20,154,44,12
|
||||
RADIOBUTTON "Negative",IDC_VU_ROUNDMODE1,64,154,47,12
|
||||
RADIOBUTTON "Positive",IDC_VU_ROUNDMODE2,111,154,45,12
|
||||
RADIOBUTTON "Chop / Zero",IDC_VU_ROUNDMODE3,156,154,52,12
|
||||
CONTROL " Flush to Zero",IDC_VU_CHECK1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,20,232,58,13
|
||||
CONTROL " Denormals are Zero",IDC_VU_CHECK2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,111,232,81,13
|
||||
RADIOBUTTON "Nearest",IDC_VU_ROUNDMODE0,20,172,44,12
|
||||
RADIOBUTTON "Negative",IDC_VU_ROUNDMODE1,64,172,47,12
|
||||
RADIOBUTTON "Positive",IDC_VU_ROUNDMODE2,111,172,45,12
|
||||
RADIOBUTTON "Chop / Zero",IDC_VU_ROUNDMODE3,156,172,52,12
|
||||
PUSHBUTTON "Defaults",IDDEFAULT,346,254,50,14
|
||||
GROUPBOX "VU Recs Options",IDC_STATIC,7,128,250,122,BS_CENTER
|
||||
GROUPBOX "EE Recs Options",IDC_STATIC,7,12,251,111,BS_CENTER
|
||||
GROUPBOX "Round Mode",IDC_STATIC,11,141,236,32
|
||||
GROUPBOX "VU Recs Options",IDC_STATIC,7,146,250,122,BS_CENTER
|
||||
GROUPBOX "EE Recs Options",IDC_STATIC,7,12,250,129,BS_CENTER
|
||||
GROUPBOX "Round Mode",IDC_STATIC,11,159,236,32
|
||||
GROUPBOX "Round Mode",IDC_STATIC,11,26,236,36
|
||||
GROUPBOX "Help",IDC_STATIC,271,12,251,238,BS_CENTER
|
||||
GROUPBOX "Clamp Mode",IDC_STATIC,11,178,236,31
|
||||
RADIOBUTTON "None",IDC_VU_CLAMPMODE0,20,189,44,12
|
||||
RADIOBUTTON "Normal",IDC_VU_CLAMPMODE1,64,189,47,12
|
||||
RADIOBUTTON "Extra",IDC_VU_CLAMPMODE2,111,189,45,12
|
||||
RADIOBUTTON "Extra + Preserve Sign",IDC_VU_CLAMPMODE3,156,189,85,12
|
||||
CONTROL " Set O and U Flags",IDC_VU_CHECK3,"Button",BS_AUTOCHECKBOX | WS_DISABLED | WS_TABSTOP,20,232,91,13
|
||||
CONTROL " Software Emulate DaZ",IDC_VU_CHECK4,"Button",BS_AUTOCHECKBOX | WS_DISABLED | WS_TABSTOP,111,232,116,13
|
||||
GROUPBOX "Clamp Mode",IDC_STATIC,11,196,236,31
|
||||
RADIOBUTTON "None",IDC_VU_CLAMPMODE0,20,207,44,12
|
||||
RADIOBUTTON "Normal",IDC_VU_CLAMPMODE1,64,207,47,12
|
||||
RADIOBUTTON "Extra",IDC_VU_CLAMPMODE2,111,207,45,12
|
||||
RADIOBUTTON "Extra + Preserve Sign",IDC_VU_CLAMPMODE3,156,207,85,12
|
||||
CONTROL " Set O and U Flags",IDC_VU_CHECK3,"Button",BS_AUTOCHECKBOX | WS_DISABLED | WS_TABSTOP,20,250,91,13
|
||||
CONTROL " Software Emulate DaZ",IDC_VU_CHECK4,"Button",BS_AUTOCHECKBOX | WS_DISABLED | WS_TABSTOP,111,250,116,13
|
||||
GROUPBOX "Clamp Mode",IDC_STATIC,11,67,236,31
|
||||
RADIOBUTTON "None",IDC_EE_CLAMPMODE0,20,76,44,16
|
||||
RADIOBUTTON "Normal",IDC_EE_CLAMPMODE1,64,76,47,16
|
||||
RADIOBUTTON "Extra + Preserve Sign",IDC_EE_CLAMPMODE2,111,76,91,16
|
||||
RADIOBUTTON "Full",IDC_EE_CLAMPMODE3,202,76,38,16
|
||||
CONTROL " Software Emulate MUL",IDC_EE_CHECK3,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,20,123,116,13
|
||||
LTEXT "These options specify how your CPU rounds floating point values.\n\nTry changing the roundmode for EE if your game hangs, it could make it work again.",IDC_STATIC,287,33,216,35
|
||||
GROUPBOX "Round Mode",IDC_STATIC,281,22,235,51,BS_LEFT
|
||||
GROUPBOX "Clamp Mode",IDC_STATIC,281,80,236,127,BS_LEFT
|
||||
|
|
|
@ -271,6 +271,7 @@
|
|||
#define IDC_MCD_LABEL1 1324
|
||||
#define IDC_MCD_LABEL2 1325
|
||||
#define IDC_INTCSTATHACK 1326
|
||||
#define IDC_EE_CHECK3 1327
|
||||
#define IDC_CPULOG 1500
|
||||
#define IDC_MEMLOG 1501
|
||||
#define IDC_HWLOG 1502
|
||||
|
|
|
@ -588,12 +588,92 @@ void FPU_SUB(int regd, int regt) {
|
|||
else SSE_SUBSS_XMM_to_XMM(regd, regt);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FPU_MUL (Used to approximate PS2's FPU mul behavior)
|
||||
//------------------------------------------------------------------
|
||||
// PS2's multiplication uses some modification (possibly not the one used in this function)
|
||||
// of booth multiplication with wallace trees (not used in this function)
|
||||
// it cuts of some bits, resulting in inaccurate and non-commutative results.
|
||||
// This function attempts to replicate this. It is currently inaccurate. But still not too bad.
|
||||
//------------------------------------------------------------------
|
||||
// Tales of Destiny hangs in a (very) certain place without this function. Probably its only use.
|
||||
// Can be optimized, of course.
|
||||
// shouldn't be compiled with SSE/MMX optimizations (but none of PCSX2 should be, right?)
|
||||
u32 __fastcall FPU_MUL_MANTISSA(u32 s, u32 t)
|
||||
{
|
||||
s = (s & 0x7fffff) | 0x800000;
|
||||
t = (t & 0x7fffff) | 0x800000;
|
||||
t<<=1;
|
||||
u32 part[13]; //partial products
|
||||
u32 bit[13]; //more partial products. 0 or 1.
|
||||
for (int i = 0; i <= 12; i++, t>>=2)
|
||||
{
|
||||
u32 test = t & 7;
|
||||
if (test == 0 || test == 7)
|
||||
{
|
||||
part[i] = 0;
|
||||
bit[i] = 0;
|
||||
}
|
||||
else if (test == 3)
|
||||
{
|
||||
part[i] = (s<<1);
|
||||
bit[i] = 0;
|
||||
}
|
||||
else if (test == 4)
|
||||
{
|
||||
part[i] = ~(s<<1);
|
||||
bit[i] = 1;
|
||||
}
|
||||
else if (test < 4)
|
||||
{
|
||||
part[i] = s;
|
||||
bit[i] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
part[i] = ~s;
|
||||
bit[i] = 1;
|
||||
}
|
||||
}
|
||||
s64 res = 0;
|
||||
u64 mask = 0;
|
||||
mask = (~mask) << 12; //mask
|
||||
for (int i=0; i<=12; i++)
|
||||
{
|
||||
res += (s64)(s32)part[i]<<(i*2);
|
||||
res &= mask;
|
||||
res += bit[i]<<(i*2);
|
||||
}
|
||||
u32 man_res = (res >> 23);
|
||||
if (man_res & (1 << 24))
|
||||
man_res >>= 1;
|
||||
man_res &= 0x7fffff;
|
||||
return man_res;
|
||||
}
|
||||
|
||||
void FPU_MUL(int regd, int regt)
|
||||
{
|
||||
if (CHECK_FPU_ATTEMPT_MUL)
|
||||
{
|
||||
SSE2_MOVD_XMM_to_R(ECX, regd);
|
||||
SSE2_MOVD_XMM_to_R(EDX, regt);
|
||||
SSE_MULSS_XMM_to_XMM(regd, regt);
|
||||
CALLFunc( (uptr)&FPU_MUL_MANTISSA );
|
||||
SSE2_MOVD_XMM_to_R(ECX, regd);
|
||||
AND32ItoR(ECX, 0xff800000);
|
||||
OR32RtoR(EAX, ECX);
|
||||
SSE2_MOVD_R_to_XMM(regd, EAX);
|
||||
}
|
||||
else
|
||||
SSE_MULSS_XMM_to_XMM(regd, regt);
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// CommutativeOp XMM (used for ADD, MUL, MAX, and MIN opcodes)
|
||||
//------------------------------------------------------------------
|
||||
static void (*recComOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = {
|
||||
FPU_ADD, SSE_MULSS_XMM_to_XMM, SSE_MAXSS_XMM_to_XMM, SSE_MINSS_XMM_to_XMM };
|
||||
FPU_ADD, FPU_MUL, SSE_MAXSS_XMM_to_XMM, SSE_MINSS_XMM_to_XMM };
|
||||
|
||||
//static void (*recComOpM32_to_XMM[] )(x86SSERegType, uptr) = {
|
||||
// SSE_ADDSS_M32_to_XMM, SSE_MULSS_M32_to_XMM, SSE_MAXSS_M32_to_XMM, SSE_MINSS_M32_to_XMM };
|
||||
|
@ -1125,6 +1205,7 @@ FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
|||
//------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// MADD XMM
|
||||
//------------------------------------------------------------------
|
||||
|
@ -1138,7 +1219,7 @@ void recMADDtemp(int info, int regd)
|
|||
if(regd == EEREC_S) {
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
FPU_MUL(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1152,14 +1233,14 @@ void recMADDtemp(int info, int regd)
|
|||
else if (regd == EEREC_ACC){
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_S); fpuFloat2(t0reg); }
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_S);
|
||||
FPU_MUL(t0reg, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_ADD(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
||||
FPU_MUL(regd, EEREC_S);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1175,7 +1256,7 @@ void recMADDtemp(int info, int regd)
|
|||
if(regd == EEREC_T) {
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
FPU_MUL(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1189,14 +1270,14 @@ void recMADDtemp(int info, int regd)
|
|||
else if (regd == EEREC_ACC){
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_T); fpuFloat2(t0reg); }
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
FPU_MUL(t0reg, EEREC_T);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_ADD(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1211,7 +1292,7 @@ void recMADDtemp(int info, int regd)
|
|||
case (PROCESS_EE_S|PROCESS_EE_T):
|
||||
if(regd == EEREC_S) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1224,7 +1305,7 @@ void recMADDtemp(int info, int regd)
|
|||
}
|
||||
else if(regd == EEREC_T) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
||||
FPU_MUL(regd, EEREC_S);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1238,14 +1319,14 @@ void recMADDtemp(int info, int regd)
|
|||
else if(regd == EEREC_ACC) {
|
||||
SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(EEREC_T); }
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
FPU_MUL(t0reg, EEREC_T);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_ADD(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1263,7 +1344,7 @@ void recMADDtemp(int info, int regd)
|
|||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
SSE_MOVSS_M32_to_XMM(t1reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(t1reg); }
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, t1reg);
|
||||
FPU_MUL(t0reg, t1reg);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_ADD(regd, t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
|
@ -1273,7 +1354,7 @@ void recMADDtemp(int info, int regd)
|
|||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
FPU_MUL(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1356,7 +1437,7 @@ int t1reg;
|
|||
if(regd == EEREC_S) {
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
FPU_MUL(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1366,14 +1447,14 @@ int t1reg;
|
|||
else if (regd == EEREC_ACC){
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_S); fpuFloat2(t0reg); }
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_S);
|
||||
FPU_MUL(t0reg, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_SUB(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
||||
FPU_MUL(regd, EEREC_S);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1385,7 +1466,7 @@ int t1reg;
|
|||
if(regd == EEREC_T) {
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
FPU_MUL(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1395,14 +1476,14 @@ int t1reg;
|
|||
else if (regd == EEREC_ACC){
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_T); fpuFloat2(t0reg); }
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
FPU_MUL(t0reg, EEREC_T);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_SUB(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1413,7 +1494,7 @@ int t1reg;
|
|||
case (PROCESS_EE_S|PROCESS_EE_T):
|
||||
if(regd == EEREC_S) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1422,7 +1503,7 @@ int t1reg;
|
|||
}
|
||||
else if(regd == EEREC_T) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
||||
FPU_MUL(regd, EEREC_S);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1432,14 +1513,14 @@ int t1reg;
|
|||
else if(regd == EEREC_ACC) {
|
||||
SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(EEREC_T); }
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
FPU_MUL(t0reg, EEREC_T);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_SUB(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1453,7 +1534,7 @@ int t1reg;
|
|||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
SSE_MOVSS_M32_to_XMM(t1reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(t1reg); }
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, t1reg);
|
||||
FPU_MUL(t0reg, t1reg);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_SUB(regd, t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
|
@ -1463,7 +1544,7 @@ int t1reg;
|
|||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
FPU_MUL(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
|
|
@ -76,6 +76,9 @@ namespace R5900 {
|
|||
namespace Dynarec {
|
||||
namespace OpcodeImpl {
|
||||
namespace COP1 {
|
||||
|
||||
u32 __fastcall FPU_MUL_MANTISSA(u32 s, u32 t);
|
||||
|
||||
namespace DOUBLE {
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
@ -408,6 +411,8 @@ static u64 PCSX2_ALIGNED16(dbl_s_neg[2]) = {0x8000000000000000ULL, 0};
|
|||
|
||||
// converts small normal numbers to double equivalent
|
||||
// converts large normal numbers (which represent NaN/inf in IEEE) to double equivalent
|
||||
|
||||
//mustn't use EAX/ECX/EDX/x86regs (MUL)
|
||||
void ToDouble(int reg)
|
||||
{
|
||||
SSE_UCOMISS_M32_to_XMM(reg, (uptr)&pos_inf); //sets ZF if equal or uncomparable
|
||||
|
@ -439,6 +444,7 @@ void ToDouble(int reg)
|
|||
otherwise, results are still usually better than iFPU.cpp.
|
||||
*/
|
||||
|
||||
//mustn't use EAX/ECX/EDX/x86regs (MUL)
|
||||
|
||||
// converts small normal numbers to PS2 equivalent
|
||||
// converts large normal numbers to PS2 equivalent (which represent NaN/inf in IEEE)
|
||||
|
@ -501,6 +507,7 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc)
|
|||
x86SetJ8(end3);
|
||||
}
|
||||
|
||||
//mustn't use EAX/ECX/EDX/x86regs (MUL)
|
||||
void ToPS2FPU(int reg, bool flags, int absreg, bool acc)
|
||||
{
|
||||
if (FPU_RESULT)
|
||||
|
@ -642,12 +649,36 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a
|
|||
}
|
||||
|
||||
|
||||
|
||||
void FPU_MUL(int info, int regd, int sreg, int treg, bool acc)
|
||||
{
|
||||
if (CHECK_FPU_ATTEMPT_MUL)
|
||||
{
|
||||
SSE2_MOVD_XMM_to_R(ECX, sreg);
|
||||
SSE2_MOVD_XMM_to_R(EDX, treg);
|
||||
CALLFunc( (uptr)&FPU_MUL_MANTISSA );
|
||||
ToDouble(sreg); ToDouble(treg);
|
||||
SSE2_MULSD_XMM_to_XMM(sreg, treg);
|
||||
ToPS2FPU(sreg, true, treg, acc);
|
||||
SSE_MOVSS_XMM_to_XMM(regd, sreg);
|
||||
SSE2_MOVD_XMM_to_R(ECX, regd);
|
||||
AND32ItoR(ECX, 0xff800000);
|
||||
OR32RtoR(EAX, ECX);
|
||||
SSE2_MOVD_R_to_XMM(regd, EAX);
|
||||
}
|
||||
else
|
||||
{
|
||||
ToDouble(sreg); ToDouble(treg);
|
||||
SSE2_MULSD_XMM_to_XMM(sreg, treg);
|
||||
ToPS2FPU(sreg, true, treg, acc);
|
||||
SSE_MOVSS_XMM_to_XMM(regd, sreg);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// CommutativeOp XMM (used for ADD, MUL, MAX, MIN and SUB opcodes)
|
||||
//------------------------------------------------------------------
|
||||
static void (*recFPUOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = {
|
||||
SSE2_ADDSD_XMM_to_XMM, SSE2_MULSD_XMM_to_XMM, SSE2_MAXSD_XMM_to_XMM, SSE2_MINSD_XMM_to_XMM, SSE2_SUBSD_XMM_to_XMM };
|
||||
SSE2_ADDSD_XMM_to_XMM, NULL, SSE2_MAXSD_XMM_to_XMM, SSE2_MINSD_XMM_to_XMM, SSE2_SUBSD_XMM_to_XMM };
|
||||
|
||||
void recFPUOp(int info, int regd, int op, bool acc)
|
||||
{
|
||||
|
@ -951,13 +982,10 @@ FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
|||
void recMaddsub(int info, int regd, int op, bool acc)
|
||||
{
|
||||
int sreg, treg;
|
||||
|
||||
ALLOC_S(sreg); ALLOC_T(treg);
|
||||
ToDouble(sreg); ToDouble(treg);
|
||||
|
||||
SSE2_MULSD_XMM_to_XMM(sreg, treg);
|
||||
FPU_MUL(info, sreg, sreg, treg, false);
|
||||
|
||||
ToPS2FPU(sreg, true, treg, false);
|
||||
GET_ACC(treg);
|
||||
|
||||
if (FPU_ADD_SUB_HACK) //ADD or SUB
|
||||
|
@ -1077,14 +1105,22 @@ FPURECOMPILE_CONSTCODE(MSUBA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|X
|
|||
//------------------------------------------------------------------
|
||||
void recMUL_S_xmm(int info)
|
||||
{
|
||||
recFPUOp(info, EEREC_D, 1, false);
|
||||
int sreg, treg;
|
||||
ALLOC_S(sreg); ALLOC_T(treg);
|
||||
|
||||
FPU_MUL(info, EEREC_D, sreg, treg, false);
|
||||
_freeXMMreg(sreg); _freeXMMreg(treg);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
||||
|
||||
void recMULA_S_xmm(int info)
|
||||
{
|
||||
recFPUOp(info, EEREC_ACC, 1, true);
|
||||
int sreg, treg;
|
||||
ALLOC_S(sreg); ALLOC_T(treg);
|
||||
|
||||
FPU_MUL(info, EEREC_ACC, sreg, treg, true);
|
||||
_freeXMMreg(sreg); _freeXMMreg(treg);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(MULA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
|
||||
|
|
Loading…
Reference in New Issue