From 68eb83ea83e5e53d0ae486a2f0f67d8dad93151d Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Sat, 9 May 2015 07:23:10 +0200
Subject: [PATCH] Interpreter: fix NaN handling in FMA

---
 .../PowerPC/Interpreter/Interpreter_FPUtils.h | 66 +++++++++----------
 .../Interpreter/Interpreter_FloatingPoint.cpp | 11 ++--
 .../Interpreter/Interpreter_Paired.cpp        |  8 +--
 3 files changed, 40 insertions(+), 45 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
index 24ae1b1775..0aa022c0d5 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
@@ -84,7 +84,7 @@ inline double Force25Bit(double d)
 // these functions allow globally modify operations behaviour
 // also, these may be used to set flags like FR, FI, OX, UX
 
-inline double NI_mul(const double a, const double b)
+inline double NI_mul(double a, double b)
 {
 #ifdef VERY_ACCURATE_FP
 	if (a != a) return a;
@@ -101,7 +101,7 @@ inline double NI_mul(const double a, const double b)
 #endif
 }
 
-inline double NI_add(const double a, const double b)
+inline double NI_add(double a, double b)
 {
 #ifdef VERY_ACCURATE_FP
 	if (a != a) return a;
@@ -118,7 +118,7 @@ inline double NI_add(const double a, const double b)
 #endif
 }
 
-inline double NI_sub(const double a, const double b)
+inline double NI_sub(double a, double b)
 {
 #ifdef VERY_ACCURATE_FP
 	if (a != a) return a;
@@ -135,57 +135,53 @@ inline double NI_sub(const double a, const double b)
 #endif
 }
 
-inline double NI_madd(const double a, const double b, const double c)
+inline double NI_madd(double a, double c, double b, bool negate = false)
 {
 #ifdef VERY_ACCURATE_FP
 	if (a != a) return a;
-	if (c != c) return c;
 	if (b != b) return b;
-	double t = a * b;
+	if (c != c) return c;
+	double t = a * c;
 	if (t != t)
 	{
 		SetFPException(FPSCR_VXIMZ);
 		return PPC_NAN;
 	}
-	t = t + c;
+	t = t + b;
 	if (t != t)
 	{
 		SetFPException(FPSCR_VXISI);
 		return PPC_NAN;
 	}
-	return t;
 #else
-	return NI_add(NI_mul(a, b), c);
+	double t = NI_add(NI_mul(a, c), b);
 #endif
+	return negate ? -t : t;
 }
 
-inline double NI_msub(const double a, const double b, const double c)
+inline double NI_msub(double a, double c, double b, bool negate = false)
 {
-//#ifdef VERY_ACCURATE_FP
-//  This code does not produce accurate fp!  NAN's are not calculated correctly, nor negative zero.
-//	The code is kept here for reference.
-//
-//	if (a != a) return a;
-//	if (c != c) return c;
-//	if (b != b) return b;
-//	double t = a * b;
-//	if (t != t)
-//	{
-//		SetFPException(FPSCR_VXIMZ);
-//		return PPC_NAN;
-//	}
-//
-//	t = t - c;
-//	if (t != t)
-//	{
-//		SetFPException(FPSCR_VXISI);
-//		return PPC_NAN;
-//	}
-//	return t;
-//#else
-//	This code does not calculate QNAN's correctly but calculates negative zero correctly.
-	return NI_sub(NI_mul(a, b), c);
-// #endif
+#ifdef VERY_ACCURATE_FP
+	if (a != a) return a;
+	if (b != b) return b;
+	if (c != c) return c;
+	double t = a * c;
+	if (t != t)
+	{
+		SetFPException(FPSCR_VXIMZ);
+		return PPC_NAN;
+	}
+
+	t = t - b;
+	if (t != t)
+	{
+		SetFPException(FPSCR_VXISI);
+		return PPC_NAN;
+	}
+#else
+	double t = NI_sub(NI_mul(a, c), b);
+#endif
+	return negate ? -t : t;
 }
 
 // used by stfsXX instructions and ps_rsqrte
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
index fc0ceb57fd..c5eee4d0ea 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
@@ -310,7 +310,7 @@ void Interpreter::fmulsx(UGeckoInstruction _inst)
 
 void Interpreter::fmaddx(UGeckoInstruction _inst)
 {
-	double result = ForceDouble(NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ));
+	double result = ForceDouble(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
 	rPS0(_inst.FD) = result;
 	UpdateFPRF(result);
 
@@ -490,7 +490,7 @@ void Interpreter::fmsubsx(UGeckoInstruction _inst)
 
 void Interpreter::fnmaddx(UGeckoInstruction _inst)
 {
-	rPS0(_inst.FD) = ForceDouble(-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
+	rPS0(_inst.FD) = ForceDouble(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB), true));
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)
@@ -500,7 +500,7 @@ void Interpreter::fnmaddx(UGeckoInstruction _inst)
 void Interpreter::fnmaddsx(UGeckoInstruction _inst)
 {
 	double c_value = Force25Bit(rPS0(_inst.FC));
-	rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(-NI_madd(rPS0(_inst.FA), c_value, rPS0(_inst.FB)));
+	rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), c_value, rPS0(_inst.FB), true));
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)
@@ -509,18 +509,17 @@ void Interpreter::fnmaddsx(UGeckoInstruction _inst)
 
 void Interpreter::fnmsubx(UGeckoInstruction _inst)
 {
-	rPS0(_inst.FD) = ForceDouble(-NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
+	rPS0(_inst.FD) = ForceDouble(NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB), true));
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)
 		Helper_UpdateCR1();
 }
 
-// fnmsubsx does not handle QNAN properly - see NI_msub
 void Interpreter::fnmsubsx(UGeckoInstruction _inst)
 {
 	double c_value = Force25Bit(rPS0(_inst.FC));
-	rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(-NI_msub(rPS0(_inst.FA), c_value, rPS0(_inst.FB)));
+	rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), c_value, rPS0(_inst.FB), true));
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
index 612b6753ea..2c9656bd8f 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
@@ -303,8 +303,8 @@ void Interpreter::ps_nmsub(UGeckoInstruction _inst)
 {
 	double c0 = Force25Bit(rPS0(_inst.FC));
 	double c1 = Force25Bit(rPS1(_inst.FC));
-	rPS0(_inst.FD) = ForceSingle(-NI_msub(rPS0(_inst.FA), c0, rPS0(_inst.FB)));
-	rPS1(_inst.FD) = ForceSingle(-NI_msub(rPS1(_inst.FA), c1, rPS1(_inst.FB)));
+	rPS0(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), c0, rPS0(_inst.FB), true));
+	rPS1(_inst.FD) = ForceSingle(NI_msub(rPS1(_inst.FA), c1, rPS1(_inst.FB), true));
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)
@@ -315,8 +315,8 @@ void Interpreter::ps_nmadd(UGeckoInstruction _inst)
 {
 	double c0 = Force25Bit(rPS0(_inst.FC));
 	double c1 = Force25Bit(rPS1(_inst.FC));
-	rPS0(_inst.FD) = ForceSingle(-NI_madd(rPS0(_inst.FA), c0, rPS0(_inst.FB)));
-	rPS1(_inst.FD) = ForceSingle(-NI_madd(rPS1(_inst.FA), c1, rPS1(_inst.FB)));
+	rPS0(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), c0, rPS0(_inst.FB), true));
+	rPS1(_inst.FD) = ForceSingle(NI_madd(rPS1(_inst.FA), c1, rPS1(_inst.FB), true));
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)