Interpreter: fix NaN handling in FMA

This commit is contained in:
Tillmann Karras 2015-05-09 07:23:10 +02:00
parent 0f7f8f8774
commit 68eb83ea83
3 changed files with 40 additions and 45 deletions

View File

@ -84,7 +84,7 @@ inline double Force25Bit(double d)
// these functions allow globally modify operations behaviour // these functions allow globally modify operations behaviour
// also, these may be used to set flags like FR, FI, OX, UX // also, these may be used to set flags like FR, FI, OX, UX
inline double NI_mul(const double a, const double b) inline double NI_mul(double a, double b)
{ {
#ifdef VERY_ACCURATE_FP #ifdef VERY_ACCURATE_FP
if (a != a) return a; if (a != a) return a;
@ -101,7 +101,7 @@ inline double NI_mul(const double a, const double b)
#endif #endif
} }
inline double NI_add(const double a, const double b) inline double NI_add(double a, double b)
{ {
#ifdef VERY_ACCURATE_FP #ifdef VERY_ACCURATE_FP
if (a != a) return a; if (a != a) return a;
@ -118,7 +118,7 @@ inline double NI_add(const double a, const double b)
#endif #endif
} }
inline double NI_sub(const double a, const double b) inline double NI_sub(double a, double b)
{ {
#ifdef VERY_ACCURATE_FP #ifdef VERY_ACCURATE_FP
if (a != a) return a; if (a != a) return a;
@ -135,57 +135,53 @@ inline double NI_sub(const double a, const double b)
#endif #endif
} }
inline double NI_madd(const double a, const double b, const double c) inline double NI_madd(double a, double c, double b, bool negate = false)
{ {
#ifdef VERY_ACCURATE_FP #ifdef VERY_ACCURATE_FP
if (a != a) return a; if (a != a) return a;
if (c != c) return c;
if (b != b) return b; if (b != b) return b;
double t = a * b; if (c != c) return c;
double t = a * c;
if (t != t) if (t != t)
{ {
SetFPException(FPSCR_VXIMZ); SetFPException(FPSCR_VXIMZ);
return PPC_NAN; return PPC_NAN;
} }
t = t + c; t = t + b;
if (t != t) if (t != t)
{ {
SetFPException(FPSCR_VXISI); SetFPException(FPSCR_VXISI);
return PPC_NAN; return PPC_NAN;
} }
return t;
#else #else
return NI_add(NI_mul(a, b), c); double t = NI_add(NI_mul(a, c), b);
#endif #endif
return negate ? -t : t;
} }
inline double NI_msub(const double a, const double b, const double c) inline double NI_msub(double a, double c, double b, bool negate = false)
{ {
//#ifdef VERY_ACCURATE_FP #ifdef VERY_ACCURATE_FP
// This code does not produce accurate fp! NAN's are not calculated correctly, nor negative zero. if (a != a) return a;
// The code is kept here for reference. if (b != b) return b;
// if (c != c) return c;
// if (a != a) return a; double t = a * c;
// if (c != c) return c; if (t != t)
// if (b != b) return b; {
// double t = a * b; SetFPException(FPSCR_VXIMZ);
// if (t != t) return PPC_NAN;
// { }
// SetFPException(FPSCR_VXIMZ);
// return PPC_NAN; t = t - b;
// } if (t != t)
// {
// t = t - c; SetFPException(FPSCR_VXISI);
// if (t != t) return PPC_NAN;
// { }
// SetFPException(FPSCR_VXISI); #else
// return PPC_NAN; double t = NI_sub(NI_mul(a, c), b);
// } #endif
// return t; return negate ? -t : t;
//#else
// This code does not calculate QNAN's correctly but calculates negative zero correctly.
return NI_sub(NI_mul(a, b), c);
// #endif
} }
// used by stfsXX instructions and ps_rsqrte // used by stfsXX instructions and ps_rsqrte

View File

@ -310,7 +310,7 @@ void Interpreter::fmulsx(UGeckoInstruction _inst)
void Interpreter::fmaddx(UGeckoInstruction _inst) void Interpreter::fmaddx(UGeckoInstruction _inst)
{ {
double result = ForceDouble(NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) )); double result = ForceDouble(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
rPS0(_inst.FD) = result; rPS0(_inst.FD) = result;
UpdateFPRF(result); UpdateFPRF(result);
@ -490,7 +490,7 @@ void Interpreter::fmsubsx(UGeckoInstruction _inst)
void Interpreter::fnmaddx(UGeckoInstruction _inst) void Interpreter::fnmaddx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = ForceDouble(-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); rPS0(_inst.FD) = ForceDouble(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB), true));
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) if (_inst.Rc)
@ -500,7 +500,7 @@ void Interpreter::fnmaddx(UGeckoInstruction _inst)
void Interpreter::fnmaddsx(UGeckoInstruction _inst) void Interpreter::fnmaddsx(UGeckoInstruction _inst)
{ {
double c_value = Force25Bit(rPS0(_inst.FC)); double c_value = Force25Bit(rPS0(_inst.FC));
rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(-NI_madd(rPS0(_inst.FA), c_value, rPS0(_inst.FB))); rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), c_value, rPS0(_inst.FB), true));
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) if (_inst.Rc)
@ -509,18 +509,17 @@ void Interpreter::fnmaddsx(UGeckoInstruction _inst)
void Interpreter::fnmsubx(UGeckoInstruction _inst) void Interpreter::fnmsubx(UGeckoInstruction _inst)
{ {
rPS0(_inst.FD) = ForceDouble(-NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); rPS0(_inst.FD) = ForceDouble(NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB), true));
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) if (_inst.Rc)
Helper_UpdateCR1(); Helper_UpdateCR1();
} }
// fnmsubsx does not handle QNAN properly - see NI_msub
void Interpreter::fnmsubsx(UGeckoInstruction _inst) void Interpreter::fnmsubsx(UGeckoInstruction _inst)
{ {
double c_value = Force25Bit(rPS0(_inst.FC)); double c_value = Force25Bit(rPS0(_inst.FC));
rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(-NI_msub(rPS0(_inst.FA), c_value, rPS0(_inst.FB))); rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), c_value, rPS0(_inst.FB), true));
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) if (_inst.Rc)

View File

@ -303,8 +303,8 @@ void Interpreter::ps_nmsub(UGeckoInstruction _inst)
{ {
double c0 = Force25Bit(rPS0(_inst.FC)); double c0 = Force25Bit(rPS0(_inst.FC));
double c1 = Force25Bit(rPS1(_inst.FC)); double c1 = Force25Bit(rPS1(_inst.FC));
rPS0(_inst.FD) = ForceSingle(-NI_msub(rPS0(_inst.FA), c0, rPS0(_inst.FB))); rPS0(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), c0, rPS0(_inst.FB), true));
rPS1(_inst.FD) = ForceSingle(-NI_msub(rPS1(_inst.FA), c1, rPS1(_inst.FB))); rPS1(_inst.FD) = ForceSingle(NI_msub(rPS1(_inst.FA), c1, rPS1(_inst.FB), true));
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) if (_inst.Rc)
@ -315,8 +315,8 @@ void Interpreter::ps_nmadd(UGeckoInstruction _inst)
{ {
double c0 = Force25Bit(rPS0(_inst.FC)); double c0 = Force25Bit(rPS0(_inst.FC));
double c1 = Force25Bit(rPS1(_inst.FC)); double c1 = Force25Bit(rPS1(_inst.FC));
rPS0(_inst.FD) = ForceSingle(-NI_madd(rPS0(_inst.FA), c0, rPS0(_inst.FB))); rPS0(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), c0, rPS0(_inst.FB), true));
rPS1(_inst.FD) = ForceSingle(-NI_madd(rPS1(_inst.FA), c1, rPS1(_inst.FB))); rPS1(_inst.FD) = ForceSingle(NI_madd(rPS1(_inst.FA), c1, rPS1(_inst.FB), true));
UpdateFPRF(rPS0(_inst.FD)); UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) if (_inst.Rc)