mirror of https://github.com/PCSX2/pcsx2.git
Changed the fpu MUL hack for tales of destiny. (Doesn't break under most settings now).
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2324 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
611444b9d5
commit
00f14b5760
|
@ -75,7 +75,8 @@ void recRSQRT_S_xmm(int info);
|
|||
#define FPUflagSO 0X00000010
|
||||
#define FPUflagSU 0X00000008
|
||||
|
||||
#define FPU_ADD_SUB_HACK 1 // Add/Sub opcodes produce more ps2-like results if set to 1
|
||||
// Add/Sub opcodes produce the same results as the ps2
|
||||
#define FPU_CORRECT_ADD_SUB 1
|
||||
|
||||
static const __aligned16 u32 s_neg[4] = { 0x80000000, 0xffffffff, 0xffffffff, 0xffffffff };
|
||||
static const __aligned16 u32 s_pos[4] = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff };
|
||||
|
@ -546,95 +547,54 @@ void FPU_ADD_SUB(int regd, int regt, int issub)
|
|||
}
|
||||
|
||||
void FPU_ADD(int regd, int regt) {
|
||||
if (FPU_ADD_SUB_HACK) FPU_ADD_SUB(regd, regt, 0);
|
||||
if (FPU_CORRECT_ADD_SUB) FPU_ADD_SUB(regd, regt, 0);
|
||||
else SSE_ADDSS_XMM_to_XMM(regd, regt);
|
||||
}
|
||||
|
||||
void FPU_SUB(int regd, int regt) {
|
||||
if (FPU_ADD_SUB_HACK) FPU_ADD_SUB(regd, regt, 1);
|
||||
if (FPU_CORRECT_ADD_SUB) FPU_ADD_SUB(regd, regt, 1);
|
||||
else SSE_SUBSS_XMM_to_XMM(regd, regt);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FPU_MUL (Used to approximate PS2's FPU mul behavior)
|
||||
// Note: PS2's multiplication uses some variant of booth multiplication with wallace trees:
|
||||
// It cuts off some bits, resulting in inaccurate and non-commutative results.
|
||||
// The PS2's result mantissa is either equal to x86's rounding to zero result mantissa
|
||||
// or SMALLER (by 0x1). (this means that x86's other rounding modes are only less similar to PS2's mul)
|
||||
//------------------------------------------------------------------
|
||||
// PS2's multiplication uses some modification (possibly not the one used in this function)
|
||||
// of booth multiplication with wallace trees (not used in this function)
|
||||
// it cuts of some bits, resulting in inaccurate and non-commutative results.
|
||||
// This function attempts to replicate this. It is currently inaccurate. But still not too bad.
|
||||
//------------------------------------------------------------------
|
||||
// Tales of Destiny hangs in a (very) certain place without this function. Probably its only use.
|
||||
// Can be optimized, of course.
|
||||
// shouldn't be compiled with SSE/MMX optimizations (but none of PCSX2 should be, right?)
|
||||
u32 __fastcall FPU_MUL_MANTISSA(u32 s, u32 t)
|
||||
|
||||
u32 __fastcall FPU_MUL_HACK(u32 s, u32 t)
|
||||
{
|
||||
s = (s & 0x7fffff) | 0x800000;
|
||||
t = (t & 0x7fffff) | 0x800000;
|
||||
t<<=1;
|
||||
u32 part[13]; //partial products
|
||||
u32 bit[13]; //more partial products. 0 or 1.
|
||||
for (int i = 0; i <= 12; i++, t>>=2)
|
||||
{
|
||||
u32 test = t & 7;
|
||||
if (test == 0 || test == 7)
|
||||
{
|
||||
part[i] = 0;
|
||||
bit[i] = 0;
|
||||
}
|
||||
else if (test == 3)
|
||||
{
|
||||
part[i] = (s<<1);
|
||||
bit[i] = 0;
|
||||
}
|
||||
else if (test == 4)
|
||||
{
|
||||
part[i] = ~(s<<1);
|
||||
bit[i] = 1;
|
||||
}
|
||||
else if (test < 4)
|
||||
{
|
||||
part[i] = s;
|
||||
bit[i] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
part[i] = ~s;
|
||||
bit[i] = 1;
|
||||
}
|
||||
}
|
||||
s64 res = 0;
|
||||
u64 mask = 0;
|
||||
mask = (~mask) << 12; //mask
|
||||
for (int i=0; i<=12; i++)
|
||||
{
|
||||
res += (s64)(s32)part[i]<<(i*2);
|
||||
res &= mask;
|
||||
res += bit[i]<<(i*2);
|
||||
}
|
||||
u32 man_res = (res >> 23);
|
||||
if (man_res & (1 << 24))
|
||||
man_res >>= 1;
|
||||
man_res &= 0x7fffff;
|
||||
return man_res;
|
||||
if ((s == 0x3e800000) && (t == 0x40490fdb))
|
||||
return 0x3f490fda; // needed for Tales of Destiny Remake (only in a very specific room late-game)
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void FPU_MUL(int regd, int regt)
|
||||
void FPU_MUL(int regd, int regt, bool reverseOperands)
|
||||
{
|
||||
u8 *noHack, *endMul;
|
||||
|
||||
if (CHECK_FPUMULHACK)
|
||||
{
|
||||
SSE2_MOVD_XMM_to_R(ECX, regd);
|
||||
SSE2_MOVD_XMM_to_R(EDX, regt);
|
||||
SSE_MULSS_XMM_to_XMM(regd, regt);
|
||||
CALLFunc( (uptr)&FPU_MUL_MANTISSA );
|
||||
SSE2_MOVD_XMM_to_R(ECX, regd);
|
||||
AND32ItoR(ECX, 0xff800000);
|
||||
OR32RtoR(EAX, ECX);
|
||||
SSE2_MOVD_R_to_XMM(regd, EAX);
|
||||
SSE2_MOVD_XMM_to_R(ECX, reverseOperands ? regt : regd);
|
||||
SSE2_MOVD_XMM_to_R(EDX, reverseOperands ? regd : regt);
|
||||
CALLFunc( (uptr)&FPU_MUL_HACK ); //returns the hacked result or 0
|
||||
TEST32RtoR(EAX, EAX);
|
||||
noHack = JZ8(0);
|
||||
SSE2_MOVD_R_to_XMM(regd, EAX);
|
||||
endMul = JMP8(0);
|
||||
x86SetJ8(noHack);
|
||||
}
|
||||
else
|
||||
SSE_MULSS_XMM_to_XMM(regd, regt);
|
||||
|
||||
SSE_MULSS_XMM_to_XMM(regd, regt);
|
||||
|
||||
if (CHECK_FPUMULHACK)
|
||||
x86SetJ8(endMul);
|
||||
}
|
||||
|
||||
void FPU_MUL(int regd, int regt) { FPU_MUL(regd, regt, false); }
|
||||
void FPU_MUL_REV(int regd, int regt) { FPU_MUL(regd, regt, true); } //reversed operands
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// CommutativeOp XMM (used for ADD, MUL, MAX, and MIN opcodes)
|
||||
|
@ -642,6 +602,9 @@ void FPU_MUL(int regd, int regt)
|
|||
static void (*recComOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = {
|
||||
FPU_ADD, FPU_MUL, SSE_MAXSS_XMM_to_XMM, SSE_MINSS_XMM_to_XMM };
|
||||
|
||||
static void (*recComOpXMM_to_XMM_REV[] )(x86SSERegType, x86SSERegType) = { //reversed operands
|
||||
FPU_ADD, FPU_MUL_REV, SSE_MAXSS_XMM_to_XMM, SSE_MINSS_XMM_to_XMM };
|
||||
|
||||
//static void (*recComOpM32_to_XMM[] )(x86SSERegType, uptr) = {
|
||||
// SSE_ADDSS_M32_to_XMM, SSE_MULSS_M32_to_XMM, SSE_MAXSS_M32_to_XMM, SSE_MINSS_M32_to_XMM };
|
||||
|
||||
|
@ -660,14 +623,14 @@ int recCommutativeOp(int info, int regd, int op)
|
|||
else {
|
||||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) { fpuFloat2(regd); fpuFloat2(EEREC_S); }
|
||||
recComOpXMM_to_XMM[op](regd, EEREC_S);
|
||||
recComOpXMM_to_XMM_REV[op](regd, EEREC_S);
|
||||
}
|
||||
break;
|
||||
case PROCESS_EE_T:
|
||||
if (regd == EEREC_T) {
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
recComOpXMM_to_XMM[op](regd, t0reg);
|
||||
recComOpXMM_to_XMM_REV[op](regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
|
@ -678,7 +641,7 @@ int recCommutativeOp(int info, int regd, int op)
|
|||
case (PROCESS_EE_S|PROCESS_EE_T):
|
||||
if (regd == EEREC_T) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW || (op >= 2)) { fpuFloat2(regd); fpuFloat2(EEREC_S); }
|
||||
recComOpXMM_to_XMM[op](regd, EEREC_S);
|
||||
recComOpXMM_to_XMM_REV[op](regd, EEREC_S);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
||||
|
@ -1204,7 +1167,7 @@ void recMADDtemp(int info, int regd)
|
|||
if(regd == EEREC_S) {
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
FPU_MUL(regd, t0reg);
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1218,14 +1181,14 @@ void recMADDtemp(int info, int regd)
|
|||
else if (regd == EEREC_ACC){
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_S); fpuFloat2(t0reg); }
|
||||
FPU_MUL(t0reg, EEREC_S);
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_ADD(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); }
|
||||
FPU_MUL(regd, EEREC_S);
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1241,7 +1204,7 @@ void recMADDtemp(int info, int regd)
|
|||
if(regd == EEREC_T) {
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
FPU_MUL(regd, t0reg);
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1255,14 +1218,14 @@ void recMADDtemp(int info, int regd)
|
|||
else if (regd == EEREC_ACC){
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_T); fpuFloat2(t0reg); }
|
||||
FPU_MUL(t0reg, EEREC_T);
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_ADD(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(EEREC_ACC); fpuFloat(regd); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1277,7 +1240,7 @@ void recMADDtemp(int info, int regd)
|
|||
case (PROCESS_EE_S|PROCESS_EE_T):
|
||||
if(regd == EEREC_S) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1290,7 +1253,7 @@ void recMADDtemp(int info, int regd)
|
|||
}
|
||||
else if(regd == EEREC_T) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); }
|
||||
FPU_MUL(regd, EEREC_S);
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1304,14 +1267,14 @@ void recMADDtemp(int info, int regd)
|
|||
else if(regd == EEREC_ACC) {
|
||||
SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(EEREC_T); }
|
||||
FPU_MUL(t0reg, EEREC_T);
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_ADD(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1329,7 +1292,7 @@ void recMADDtemp(int info, int regd)
|
|||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
SSE_MOVSS_M32_to_XMM(t1reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(t1reg); }
|
||||
FPU_MUL(t0reg, t1reg);
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, t1reg);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_ADD(regd, t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
|
@ -1339,7 +1302,7 @@ void recMADDtemp(int info, int regd)
|
|||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
FPU_MUL(regd, t0reg);
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(EEREC_ACC); }
|
||||
FPU_ADD(regd, EEREC_ACC);
|
||||
|
@ -1422,7 +1385,7 @@ int t1reg;
|
|||
if(regd == EEREC_S) {
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
FPU_MUL(regd, t0reg);
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1432,14 +1395,14 @@ int t1reg;
|
|||
else if (regd == EEREC_ACC){
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_S); fpuFloat2(t0reg); }
|
||||
FPU_MUL(t0reg, EEREC_S);
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_SUB(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); }
|
||||
FPU_MUL(regd, EEREC_S);
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1451,7 +1414,7 @@ int t1reg;
|
|||
if(regd == EEREC_T) {
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
FPU_MUL(regd, t0reg);
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1461,14 +1424,14 @@ int t1reg;
|
|||
else if (regd == EEREC_ACC){
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_T); fpuFloat2(t0reg); }
|
||||
FPU_MUL(t0reg, EEREC_T);
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_SUB(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1479,7 +1442,7 @@ int t1reg;
|
|||
case (PROCESS_EE_S|PROCESS_EE_T):
|
||||
if(regd == EEREC_S) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1488,7 +1451,7 @@ int t1reg;
|
|||
}
|
||||
else if(regd == EEREC_T) {
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_S); }
|
||||
FPU_MUL(regd, EEREC_S);
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1498,14 +1461,14 @@ int t1reg;
|
|||
else if(regd == EEREC_ACC) {
|
||||
SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(EEREC_T); }
|
||||
FPU_MUL(t0reg, EEREC_T);
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, EEREC_T);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_SUB(regd, t0reg);
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(EEREC_T); }
|
||||
FPU_MUL(regd, EEREC_T);
|
||||
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
@ -1519,7 +1482,7 @@ int t1reg;
|
|||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
SSE_MOVSS_M32_to_XMM(t1reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(t1reg); }
|
||||
FPU_MUL(t0reg, t1reg);
|
||||
SSE_MULSS_XMM_to_XMM(t0reg, t1reg);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
FPU_SUB(regd, t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
|
@ -1529,7 +1492,7 @@ int t1reg;
|
|||
SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]);
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(regd); fpuFloat2(t0reg); }
|
||||
FPU_MUL(regd, t0reg);
|
||||
SSE_MULSS_XMM_to_XMM(regd, t0reg);
|
||||
if (info & PROCESS_EE_ACC) { SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_ACC); }
|
||||
else { SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.ACC); }
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(t0reg); }
|
||||
|
|
|
@ -22,24 +22,38 @@
|
|||
#include "iR5900.h"
|
||||
#include "iFPU.h"
|
||||
|
||||
/* Version of the FPU that emulates an exponent of 0xff and overflow/underflow flags */
|
||||
/* This is a version of the FPU that emulates an exponent of 0xff and overflow/underflow flags */
|
||||
|
||||
/* Can be made faster by not converting stuff back and forth between instructions. */
|
||||
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// FPU emulation status:
|
||||
// ADD, SUB (incl. accumulation stage of MADD/MSUB) - no known problems.
|
||||
// Mul (incl. multiplication stage of MADD/MSUB) - incorrect. PS2's result mantissa is sometimes
|
||||
// smaller by 0x1 than IEEE's result (with round to zero).
|
||||
// DIV, SQRT, RSQRT - incorrect. PS2's result varies between IEEE's result with round to zero
|
||||
// and IEEE's result with round to +/-infinity.
|
||||
// other stuff - no known problems.
|
||||
//----------------------------------------------------------------
|
||||
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
//set overflow flag (set only if FPU_RESULT is 1)
|
||||
// Set overflow flag (define only if FPU_RESULT is 1)
|
||||
#define FPU_FLAGS_OVERFLOW 1
|
||||
//set underflow flag (set only if FPU_RESULT is 1)
|
||||
// Set underflow flag (define only if FPU_RESULT is 1)
|
||||
#define FPU_FLAGS_UNDERFLOW 1
|
||||
|
||||
//if 1, result is not clamped (Gives correct results as in PS2,
|
||||
//but can cause problems due to insufficient clamping levels in the VUs)
|
||||
// If 1, result is not clamped (Gives correct results as in PS2,
|
||||
// but can cause problems due to insufficient clamping levels in the VUs)
|
||||
#define FPU_RESULT 1
|
||||
|
||||
//set I&D flags. also impacts other aspects of DIV/R/SQRT correctness
|
||||
// Set I&D flags. also impacts other aspects of DIV/R/SQRT correctness
|
||||
#define FPU_FLAGS_ID 1
|
||||
|
||||
// Add/Sub opcodes produce the same results as the ps2
|
||||
#define FPU_CORRECT_ADD_SUB 1
|
||||
|
||||
#ifdef FPU_RECOMPILE
|
||||
|
||||
|
@ -48,8 +62,8 @@ namespace R5900 {
|
|||
namespace Dynarec {
|
||||
namespace OpcodeImpl {
|
||||
namespace COP1 {
|
||||
|
||||
u32 __fastcall FPU_MUL_MANTISSA(u32 s, u32 t);
|
||||
|
||||
u32 __fastcall FPU_MUL_HACK(u32 s, u32 t);
|
||||
|
||||
namespace DOUBLE {
|
||||
|
||||
|
@ -71,8 +85,6 @@ namespace DOUBLE {
|
|||
#define FPUflagSO 0X00000010
|
||||
#define FPUflagSU 0X00000008
|
||||
|
||||
#define FPU_ADD_SUB_HACK 1 // Add/Sub opcodes produce more ps2-like results if set to 1
|
||||
|
||||
#define REC_FPUBRANCH(f) \
|
||||
void f(); \
|
||||
void rec##f() { \
|
||||
|
@ -145,26 +157,25 @@ static const __aligned(32) FPUd_Globals s_const =
|
|||
};
|
||||
|
||||
|
||||
// converts small normal numbers to double equivalent
|
||||
// converts large normal numbers (which represent NaN/inf in IEEE) to double equivalent
|
||||
// ToDouble : converts single-precision PS2 float to double-precision IEEE float
|
||||
|
||||
//mustn't use EAX/ECX/EDX/x86regs (MUL)
|
||||
void ToDouble(int reg)
|
||||
{
|
||||
SSE_UCOMISS_M32_to_XMM(reg, (uptr)s_const.pos_inf); //sets ZF if equal or incomparable
|
||||
u8 *to_complex = JE8(0); //complex conversion if positive infinity or NaN
|
||||
SSE_UCOMISS_M32_to_XMM(reg, (uptr)s_const.pos_inf); // Sets ZF if reg is equal or incomparable to pos_inf
|
||||
u8 *to_complex = JE8(0); // Complex conversion if positive infinity or NaN
|
||||
SSE_UCOMISS_M32_to_XMM(reg, (uptr)s_const.neg_inf);
|
||||
u8 *to_complex2 = JE8(0); //complex conversion if negative infinity
|
||||
u8 *to_complex2 = JE8(0); // Complex conversion if negative infinity
|
||||
|
||||
SSE2_CVTSS2SD_XMM_to_XMM(reg, reg); //simply convert
|
||||
SSE2_CVTSS2SD_XMM_to_XMM(reg, reg); // Simply convert
|
||||
u8 *end = JMP8(0);
|
||||
|
||||
x86SetJ8(to_complex);
|
||||
x86SetJ8(to_complex2);
|
||||
|
||||
SSE2_PSUBD_M128_to_XMM(reg, (uptr)s_const.one_exp); //lower exponent
|
||||
// Special conversion for when IEEE sees the value in reg as an INF/NaN
|
||||
SSE2_PSUBD_M128_to_XMM(reg, (uptr)s_const.one_exp); // Lower exponent by one
|
||||
SSE2_CVTSS2SD_XMM_to_XMM(reg, reg);
|
||||
SSE2_PADDQ_M128_to_XMM(reg, (uptr)s_const.dbl_one_exp); //raise exponent
|
||||
SSE2_PADDQ_M128_to_XMM(reg, (uptr)s_const.dbl_one_exp); // Raise exponent by one
|
||||
|
||||
x86SetJ8(end);
|
||||
}
|
||||
|
@ -174,19 +185,20 @@ void ToDouble(int reg)
|
|||
//------------------------------------------------------------------
|
||||
|
||||
/*
|
||||
if FPU_RESULT, results are more like the real PS2's FPU. But if the VU doesn't clamp all operands,
|
||||
new issues may happen if a game transfers the FPU results into the VU and continues operations there.
|
||||
ar tonelico 1 does this with the result from DIV/RSQRT (when a division by zero occurs)
|
||||
if FPU_RESULT is defined, results are more like the real PS2's FPU. But new issues may happen if
|
||||
the VU isn't clamping all operands since games may transfer FPU results into the VU.
|
||||
Ar tonelico 1 does this with the result from DIV/RSQRT (when a division by zero occurs)
|
||||
otherwise, results are still usually better than iFPU.cpp.
|
||||
*/
|
||||
|
||||
//mustn't use EAX/ECX/EDX/x86regs (MUL)
|
||||
|
||||
// ToPS2FPU_Full - converts double-precision IEEE float to single-precision PS2 float
|
||||
|
||||
// converts small normal numbers to PS2 equivalent
|
||||
// converts large normal numbers to PS2 equivalent (which represent NaN/inf in IEEE)
|
||||
// converts really large normal numbers to PS2 signed max
|
||||
// converts really small normal numbers to zero (flush)
|
||||
// doesn't handle inf/nan/denormal
|
||||
|
||||
void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc, bool addsub)
|
||||
{
|
||||
if (flags)
|
||||
|
@ -259,7 +271,6 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc, bool addsub)
|
|||
x86SetJ8(end4);
|
||||
}
|
||||
|
||||
//mustn't use EAX/ECX/EDX/x86regs (MUL)
|
||||
void ToPS2FPU(int reg, bool flags, int absreg, bool acc, bool addsub = false)
|
||||
{
|
||||
if (FPU_RESULT)
|
||||
|
@ -399,31 +410,31 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a
|
|||
_freeX86reg(temp2);
|
||||
_freeX86reg(tempecx);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void FPU_MUL(int info, int regd, int sreg, int treg, bool acc)
|
||||
{
|
||||
u8 *noHack;
|
||||
u32 *endMul;
|
||||
|
||||
if (CHECK_FPUMULHACK)
|
||||
{
|
||||
SSE2_MOVD_XMM_to_R(ECX, sreg);
|
||||
SSE2_MOVD_XMM_to_R(EDX, treg);
|
||||
CALLFunc( (uptr)&FPU_MUL_MANTISSA );
|
||||
ToDouble(sreg); ToDouble(treg);
|
||||
SSE2_MULSD_XMM_to_XMM(sreg, treg);
|
||||
ToPS2FPU(sreg, true, treg, acc);
|
||||
SSE_MOVSS_XMM_to_XMM(regd, sreg);
|
||||
SSE2_MOVD_XMM_to_R(ECX, regd);
|
||||
AND32ItoR(ECX, 0xff800000);
|
||||
OR32RtoR(EAX, ECX);
|
||||
SSE2_MOVD_R_to_XMM(regd, EAX);
|
||||
}
|
||||
else
|
||||
{
|
||||
ToDouble(sreg); ToDouble(treg);
|
||||
SSE2_MULSD_XMM_to_XMM(sreg, treg);
|
||||
ToPS2FPU(sreg, true, treg, acc);
|
||||
SSE_MOVSS_XMM_to_XMM(regd, sreg);
|
||||
CALLFunc( (uptr)&FPU_MUL_HACK ); //returns the hacked result or 0
|
||||
TEST32RtoR(EAX, EAX);
|
||||
noHack = JZ8(0);
|
||||
SSE2_MOVD_R_to_XMM(regd, EAX);
|
||||
endMul = JMP32(0);
|
||||
x86SetJ8(noHack);
|
||||
}
|
||||
|
||||
ToDouble(sreg); ToDouble(treg);
|
||||
SSE2_MULSD_XMM_to_XMM(sreg, treg);
|
||||
ToPS2FPU(sreg, true, treg, acc);
|
||||
SSE_MOVSS_XMM_to_XMM(regd, sreg);
|
||||
|
||||
if (CHECK_FPUMULHACK)
|
||||
x86SetJ32(endMul);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
@ -437,7 +448,7 @@ void recFPUOp(int info, int regd, int op, bool acc)
|
|||
int sreg, treg;
|
||||
ALLOC_S(sreg); ALLOC_T(treg);
|
||||
|
||||
if (FPU_ADD_SUB_HACK) //ADD or SUB
|
||||
if (FPU_CORRECT_ADD_SUB)
|
||||
FPU_ADD_SUB(sreg, treg);
|
||||
|
||||
ToDouble(sreg); ToDouble(treg);
|
||||
|
@ -709,7 +720,7 @@ void recMaddsub(int info, int regd, int op, bool acc)
|
|||
|
||||
GET_ACC(treg);
|
||||
|
||||
if (FPU_ADD_SUB_HACK) //ADD or SUB
|
||||
if (FPU_CORRECT_ADD_SUB)
|
||||
FPU_ADD_SUB(treg, sreg); //might be problematic for something!!!!
|
||||
|
||||
// TEST FOR ACC/MUL OVERFLOWS, PROPOGATE THEM IF THEY OCCUR
|
||||
|
|
Loading…
Reference in New Issue