Merge pull request #2381 from Tilka/fma
Interpreter: fix NaN handling in FMA instructions
This commit is contained in:
commit
e04a457b31
|
@ -11,9 +11,6 @@
|
||||||
#include "Core/PowerPC/Gekko.h"
|
#include "Core/PowerPC/Gekko.h"
|
||||||
#include "Core/PowerPC/Interpreter/Interpreter.h"
|
#include "Core/PowerPC/Interpreter/Interpreter.h"
|
||||||
|
|
||||||
// warning! very slow! This setting fixes NAN
|
|
||||||
//#define VERY_ACCURATE_FP
|
|
||||||
|
|
||||||
#define MIN_SINGLE 0xc7efffffe0000000ull
|
#define MIN_SINGLE 0xc7efffffe0000000ull
|
||||||
#define MAX_SINGLE 0x47efffffe0000000ull
|
#define MAX_SINGLE 0x47efffffe0000000ull
|
||||||
|
|
||||||
|
@ -84,9 +81,8 @@ inline double Force25Bit(double d)
|
||||||
// these functions allow globally modify operations behaviour
|
// these functions allow globally modify operations behaviour
|
||||||
// also, these may be used to set flags like FR, FI, OX, UX
|
// also, these may be used to set flags like FR, FI, OX, UX
|
||||||
|
|
||||||
inline double NI_mul(const double a, const double b)
|
inline double NI_mul(double a, double b)
|
||||||
{
|
{
|
||||||
#ifdef VERY_ACCURATE_FP
|
|
||||||
if (a != a) return a;
|
if (a != a) return a;
|
||||||
if (b != b) return b;
|
if (b != b) return b;
|
||||||
double t = a * b;
|
double t = a * b;
|
||||||
|
@ -96,14 +92,10 @@ inline double NI_mul(const double a, const double b)
|
||||||
return PPC_NAN;
|
return PPC_NAN;
|
||||||
}
|
}
|
||||||
return t;
|
return t;
|
||||||
#else
|
|
||||||
return a * b;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline double NI_add(const double a, const double b)
|
inline double NI_add(double a, double b)
|
||||||
{
|
{
|
||||||
#ifdef VERY_ACCURATE_FP
|
|
||||||
if (a != a) return a;
|
if (a != a) return a;
|
||||||
if (b != b) return b;
|
if (b != b) return b;
|
||||||
double t = a + b;
|
double t = a + b;
|
||||||
|
@ -113,14 +105,10 @@ inline double NI_add(const double a, const double b)
|
||||||
return PPC_NAN;
|
return PPC_NAN;
|
||||||
}
|
}
|
||||||
return t;
|
return t;
|
||||||
#else
|
|
||||||
return a + b;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline double NI_sub(const double a, const double b)
|
inline double NI_sub(double a, double b)
|
||||||
{
|
{
|
||||||
#ifdef VERY_ACCURATE_FP
|
|
||||||
if (a != a) return a;
|
if (a != a) return a;
|
||||||
if (b != b) return b;
|
if (b != b) return b;
|
||||||
double t = a - b;
|
double t = a - b;
|
||||||
|
@ -130,62 +118,47 @@ inline double NI_sub(const double a, const double b)
|
||||||
return PPC_NAN;
|
return PPC_NAN;
|
||||||
}
|
}
|
||||||
return t;
|
return t;
|
||||||
#else
|
|
||||||
return a - b;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline double NI_madd(const double a, const double b, const double c)
|
inline double NI_madd(double a, double c, double b, bool negate = false)
|
||||||
{
|
{
|
||||||
#ifdef VERY_ACCURATE_FP
|
|
||||||
if (a != a) return a;
|
if (a != a) return a;
|
||||||
if (c != c) return c;
|
|
||||||
if (b != b) return b;
|
if (b != b) return b;
|
||||||
double t = a * b;
|
if (c != c) return c;
|
||||||
|
double t = a * c;
|
||||||
if (t != t)
|
if (t != t)
|
||||||
{
|
{
|
||||||
SetFPException(FPSCR_VXIMZ);
|
SetFPException(FPSCR_VXIMZ);
|
||||||
return PPC_NAN;
|
return PPC_NAN;
|
||||||
}
|
}
|
||||||
t = t + c;
|
t = t + b;
|
||||||
if (t != t)
|
if (t != t)
|
||||||
{
|
{
|
||||||
SetFPException(FPSCR_VXISI);
|
SetFPException(FPSCR_VXISI);
|
||||||
return PPC_NAN;
|
return PPC_NAN;
|
||||||
}
|
}
|
||||||
return t;
|
return negate ? -t : t;
|
||||||
#else
|
|
||||||
return NI_add(NI_mul(a, b), c);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline double NI_msub(const double a, const double b, const double c)
|
inline double NI_msub(double a, double c, double b, bool negate = false)
|
||||||
{
|
{
|
||||||
//#ifdef VERY_ACCURATE_FP
|
if (a != a) return a;
|
||||||
// This code does not produce accurate fp! NAN's are not calculated correctly, nor negative zero.
|
if (b != b) return b;
|
||||||
// The code is kept here for reference.
|
if (c != c) return c;
|
||||||
//
|
double t = a * c;
|
||||||
// if (a != a) return a;
|
if (t != t)
|
||||||
// if (c != c) return c;
|
{
|
||||||
// if (b != b) return b;
|
SetFPException(FPSCR_VXIMZ);
|
||||||
// double t = a * b;
|
return PPC_NAN;
|
||||||
// if (t != t)
|
}
|
||||||
// {
|
|
||||||
// SetFPException(FPSCR_VXIMZ);
|
t = t - b;
|
||||||
// return PPC_NAN;
|
if (t != t)
|
||||||
// }
|
{
|
||||||
//
|
SetFPException(FPSCR_VXISI);
|
||||||
// t = t - c;
|
return PPC_NAN;
|
||||||
// if (t != t)
|
}
|
||||||
// {
|
return negate ? -t : t;
|
||||||
// SetFPException(FPSCR_VXISI);
|
|
||||||
// return PPC_NAN;
|
|
||||||
// }
|
|
||||||
// return t;
|
|
||||||
//#else
|
|
||||||
// This code does not calculate QNAN's correctly but calculates negative zero correctly.
|
|
||||||
return NI_sub(NI_mul(a, b), c);
|
|
||||||
// #endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// used by stfsXX instructions and ps_rsqrte
|
// used by stfsXX instructions and ps_rsqrte
|
||||||
|
|
|
@ -310,7 +310,7 @@ void Interpreter::fmulsx(UGeckoInstruction _inst)
|
||||||
|
|
||||||
void Interpreter::fmaddx(UGeckoInstruction _inst)
|
void Interpreter::fmaddx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
double result = ForceDouble(NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ));
|
double result = ForceDouble(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
|
||||||
rPS0(_inst.FD) = result;
|
rPS0(_inst.FD) = result;
|
||||||
UpdateFPRF(result);
|
UpdateFPRF(result);
|
||||||
|
|
||||||
|
@ -490,7 +490,7 @@ void Interpreter::fmsubsx(UGeckoInstruction _inst)
|
||||||
|
|
||||||
void Interpreter::fnmaddx(UGeckoInstruction _inst)
|
void Interpreter::fnmaddx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = ForceDouble(-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
|
rPS0(_inst.FD) = ForceDouble(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB), true));
|
||||||
UpdateFPRF(rPS0(_inst.FD));
|
UpdateFPRF(rPS0(_inst.FD));
|
||||||
|
|
||||||
if (_inst.Rc)
|
if (_inst.Rc)
|
||||||
|
@ -500,7 +500,7 @@ void Interpreter::fnmaddx(UGeckoInstruction _inst)
|
||||||
void Interpreter::fnmaddsx(UGeckoInstruction _inst)
|
void Interpreter::fnmaddsx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
double c_value = Force25Bit(rPS0(_inst.FC));
|
double c_value = Force25Bit(rPS0(_inst.FC));
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(-NI_madd(rPS0(_inst.FA), c_value, rPS0(_inst.FB)));
|
rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), c_value, rPS0(_inst.FB), true));
|
||||||
UpdateFPRF(rPS0(_inst.FD));
|
UpdateFPRF(rPS0(_inst.FD));
|
||||||
|
|
||||||
if (_inst.Rc)
|
if (_inst.Rc)
|
||||||
|
@ -509,18 +509,17 @@ void Interpreter::fnmaddsx(UGeckoInstruction _inst)
|
||||||
|
|
||||||
void Interpreter::fnmsubx(UGeckoInstruction _inst)
|
void Interpreter::fnmsubx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = ForceDouble(-NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
|
rPS0(_inst.FD) = ForceDouble(NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB), true));
|
||||||
UpdateFPRF(rPS0(_inst.FD));
|
UpdateFPRF(rPS0(_inst.FD));
|
||||||
|
|
||||||
if (_inst.Rc)
|
if (_inst.Rc)
|
||||||
Helper_UpdateCR1();
|
Helper_UpdateCR1();
|
||||||
}
|
}
|
||||||
|
|
||||||
// fnmsubsx does not handle QNAN properly - see NI_msub
|
|
||||||
void Interpreter::fnmsubsx(UGeckoInstruction _inst)
|
void Interpreter::fnmsubsx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
double c_value = Force25Bit(rPS0(_inst.FC));
|
double c_value = Force25Bit(rPS0(_inst.FC));
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(-NI_msub(rPS0(_inst.FA), c_value, rPS0(_inst.FB)));
|
rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), c_value, rPS0(_inst.FB), true));
|
||||||
UpdateFPRF(rPS0(_inst.FD));
|
UpdateFPRF(rPS0(_inst.FD));
|
||||||
|
|
||||||
if (_inst.Rc)
|
if (_inst.Rc)
|
||||||
|
|
|
@ -303,8 +303,8 @@ void Interpreter::ps_nmsub(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
double c0 = Force25Bit(rPS0(_inst.FC));
|
double c0 = Force25Bit(rPS0(_inst.FC));
|
||||||
double c1 = Force25Bit(rPS1(_inst.FC));
|
double c1 = Force25Bit(rPS1(_inst.FC));
|
||||||
rPS0(_inst.FD) = ForceSingle(-NI_msub(rPS0(_inst.FA), c0, rPS0(_inst.FB)));
|
rPS0(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), c0, rPS0(_inst.FB), true));
|
||||||
rPS1(_inst.FD) = ForceSingle(-NI_msub(rPS1(_inst.FA), c1, rPS1(_inst.FB)));
|
rPS1(_inst.FD) = ForceSingle(NI_msub(rPS1(_inst.FA), c1, rPS1(_inst.FB), true));
|
||||||
UpdateFPRF(rPS0(_inst.FD));
|
UpdateFPRF(rPS0(_inst.FD));
|
||||||
|
|
||||||
if (_inst.Rc)
|
if (_inst.Rc)
|
||||||
|
@ -315,8 +315,8 @@ void Interpreter::ps_nmadd(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
double c0 = Force25Bit(rPS0(_inst.FC));
|
double c0 = Force25Bit(rPS0(_inst.FC));
|
||||||
double c1 = Force25Bit(rPS1(_inst.FC));
|
double c1 = Force25Bit(rPS1(_inst.FC));
|
||||||
rPS0(_inst.FD) = ForceSingle(-NI_madd(rPS0(_inst.FA), c0, rPS0(_inst.FB)));
|
rPS0(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), c0, rPS0(_inst.FB), true));
|
||||||
rPS1(_inst.FD) = ForceSingle(-NI_madd(rPS1(_inst.FA), c1, rPS1(_inst.FB)));
|
rPS1(_inst.FD) = ForceSingle(NI_madd(rPS1(_inst.FA), c1, rPS1(_inst.FB), true));
|
||||||
UpdateFPRF(rPS0(_inst.FD));
|
UpdateFPRF(rPS0(_inst.FD));
|
||||||
|
|
||||||
if (_inst.Rc)
|
if (_inst.Rc)
|
||||||
|
|
Loading…
Reference in New Issue