From 68eb83ea83e5e53d0ae486a2f0f67d8dad93151d Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Sat, 9 May 2015 07:23:10 +0200 Subject: [PATCH] Interpreter: fix NaN handling in FMA --- .../PowerPC/Interpreter/Interpreter_FPUtils.h | 66 +++++++++---------- .../Interpreter/Interpreter_FloatingPoint.cpp | 11 ++-- .../Interpreter/Interpreter_Paired.cpp | 8 +-- 3 files changed, 40 insertions(+), 45 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h index 24ae1b1775..0aa022c0d5 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h @@ -84,7 +84,7 @@ inline double Force25Bit(double d) // these functions allow globally modify operations behaviour // also, these may be used to set flags like FR, FI, OX, UX -inline double NI_mul(const double a, const double b) +inline double NI_mul(double a, double b) { #ifdef VERY_ACCURATE_FP if (a != a) return a; @@ -101,7 +101,7 @@ inline double NI_mul(const double a, const double b) #endif } -inline double NI_add(const double a, const double b) +inline double NI_add(double a, double b) { #ifdef VERY_ACCURATE_FP if (a != a) return a; @@ -118,7 +118,7 @@ inline double NI_add(const double a, const double b) #endif } -inline double NI_sub(const double a, const double b) +inline double NI_sub(double a, double b) { #ifdef VERY_ACCURATE_FP if (a != a) return a; @@ -135,57 +135,53 @@ inline double NI_sub(const double a, const double b) #endif } -inline double NI_madd(const double a, const double b, const double c) +inline double NI_madd(double a, double c, double b, bool negate = false) { #ifdef VERY_ACCURATE_FP if (a != a) return a; - if (c != c) return c; if (b != b) return b; - double t = a * b; + if (c != c) return c; + double t = a * c; if (t != t) { SetFPException(FPSCR_VXIMZ); return PPC_NAN; } - t = t + c; + t = t + b; if (t != t) { SetFPException(FPSCR_VXISI); return PPC_NAN; } - return t; #else - return NI_add(NI_mul(a, b), c); + double t = NI_add(NI_mul(a, c), b); #endif + return negate ? -t : t; } -inline double NI_msub(const double a, const double b, const double c) +inline double NI_msub(double a, double c, double b, bool negate = false) { -//#ifdef VERY_ACCURATE_FP -// This code does not produce accurate fp! NAN's are not calculated correctly, nor negative zero. -// The code is kept here for reference. -// -// if (a != a) return a; -// if (c != c) return c; -// if (b != b) return b; -// double t = a * b; -// if (t != t) -// { -// SetFPException(FPSCR_VXIMZ); -// return PPC_NAN; -// } -// -// t = t - c; -// if (t != t) -// { -// SetFPException(FPSCR_VXISI); -// return PPC_NAN; -// } -// return t; -//#else -// This code does not calculate QNAN's correctly but calculates negative zero correctly. - return NI_sub(NI_mul(a, b), c); -// #endif +#ifdef VERY_ACCURATE_FP + if (a != a) return a; + if (b != b) return b; + if (c != c) return c; + double t = a * c; + if (t != t) + { + SetFPException(FPSCR_VXIMZ); + return PPC_NAN; + } + + t = t - b; + if (t != t) + { + SetFPException(FPSCR_VXISI); + return PPC_NAN; + } +#else + double t = NI_sub(NI_mul(a, c), b); +#endif + return negate ? -t : t; } // used by stfsXX instructions and ps_rsqrte diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp index fc0ceb57fd..c5eee4d0ea 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -310,7 +310,7 @@ void Interpreter::fmulsx(UGeckoInstruction _inst) void Interpreter::fmaddx(UGeckoInstruction _inst) { - double result = ForceDouble(NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) )); + double result = ForceDouble(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); rPS0(_inst.FD) = result; UpdateFPRF(result); @@ -490,7 +490,7 @@ void Interpreter::fmsubsx(UGeckoInstruction _inst) void Interpreter::fnmaddx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = ForceDouble(-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); + rPS0(_inst.FD) = ForceDouble(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB), true)); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) @@ -500,7 +500,7 @@ void Interpreter::fnmaddx(UGeckoInstruction _inst) void Interpreter::fnmaddsx(UGeckoInstruction _inst) { double c_value = Force25Bit(rPS0(_inst.FC)); - rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(-NI_madd(rPS0(_inst.FA), c_value, rPS0(_inst.FB))); + rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), c_value, rPS0(_inst.FB), true)); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) @@ -509,18 +509,17 @@ void Interpreter::fnmaddsx(UGeckoInstruction _inst) void Interpreter::fnmsubx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = ForceDouble(-NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); + rPS0(_inst.FD) = ForceDouble(NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB), true)); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(); } -// fnmsubsx does not handle QNAN properly - see NI_msub void Interpreter::fnmsubsx(UGeckoInstruction _inst) { double c_value = Force25Bit(rPS0(_inst.FC)); - rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(-NI_msub(rPS0(_inst.FA), c_value, rPS0(_inst.FB))); + rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), c_value, rPS0(_inst.FB), true)); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp index 612b6753ea..2c9656bd8f 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp @@ -303,8 +303,8 @@ void Interpreter::ps_nmsub(UGeckoInstruction _inst) { double c0 = Force25Bit(rPS0(_inst.FC)); double c1 = Force25Bit(rPS1(_inst.FC)); - rPS0(_inst.FD) = ForceSingle(-NI_msub(rPS0(_inst.FA), c0, rPS0(_inst.FB))); - rPS1(_inst.FD) = ForceSingle(-NI_msub(rPS1(_inst.FA), c1, rPS1(_inst.FB))); + rPS0(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), c0, rPS0(_inst.FB), true)); + rPS1(_inst.FD) = ForceSingle(NI_msub(rPS1(_inst.FA), c1, rPS1(_inst.FB), true)); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) @@ -315,8 +315,8 @@ void Interpreter::ps_nmadd(UGeckoInstruction _inst) { double c0 = Force25Bit(rPS0(_inst.FC)); double c1 = Force25Bit(rPS1(_inst.FC)); - rPS0(_inst.FD) = ForceSingle(-NI_madd(rPS0(_inst.FA), c0, rPS0(_inst.FB))); - rPS1(_inst.FD) = ForceSingle(-NI_madd(rPS1(_inst.FA), c1, rPS1(_inst.FB))); + rPS0(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), c0, rPS0(_inst.FB), true)); + rPS1(_inst.FD) = ForceSingle(NI_madd(rPS1(_inst.FA), c1, rPS1(_inst.FB), true)); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc)