From bcdafc7e34077c3f196009da93777d8e3af2de74 Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Wed, 24 Jun 2015 17:36:44 +0200
Subject: [PATCH] Interpreter: fix rounding of FNMA instructions

x86:     round(-a*c +/- b)
PowerPC: -round(a*c +/- b)

If the rounding mode is set to +/- infinity, the order of round/negate
matters.
---
 .../Core/PowerPC/Interpreter/Interpreter_FPUtils.h   |  8 ++++----
 .../Interpreter/Interpreter_FloatingPoint.cpp        | 12 ++++++++----
 .../Core/PowerPC/Interpreter/Interpreter_Paired.cpp  | 12 ++++++++----
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
index 0c210fcc7d..6b22e7472a 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
@@ -150,7 +150,7 @@ inline double NI_sub(double a, double b)
 // FMA instructions on PowerPC are weird:
 // They calculate (a * c) + b, but the order in which
 // inputs are checked for NaN is still a, b, c.
-inline double NI_madd(double a, double c, double b, bool negate = false)
+inline double NI_madd(double a, double c, double b)
 {
 	double t = a * c;
 	if (std::isnan(t))
@@ -168,10 +168,10 @@ inline double NI_madd(double a, double c, double b, bool negate = false)
 		SetFPException(FPSCR_VXISI);
 		return PPC_NAN;
 	}
-	return negate ? -t : t;
+	return t;
 }
 
-inline double NI_msub(double a, double c, double b, bool negate = false)
+inline double NI_msub(double a, double c, double b)
 {
 	double t = a * c;
 	if (std::isnan(t))
@@ -190,7 +190,7 @@ inline double NI_msub(double a, double c, double b, bool negate = false)
 		SetFPException(FPSCR_VXISI);
 		return PPC_NAN;
 	}
-	return negate ? -t : t;
+	return t;
 }
 
 // used by stfsXX instructions and ps_rsqrte
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
index fa4132c4ee..8b5715a70b 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
@@ -425,7 +425,8 @@ void Interpreter::fmsubsx(UGeckoInstruction _inst)
 
 void Interpreter::fnmaddx(UGeckoInstruction _inst)
 {
-	rPS0(_inst.FD) = ForceDouble(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB), true));
+	double result = ForceDouble(NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
+	rPS0(_inst.FD) = std::isnan(result) ? result : -result;
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)
@@ -435,7 +436,8 @@ void Interpreter::fnmaddx(UGeckoInstruction _inst)
 void Interpreter::fnmaddsx(UGeckoInstruction _inst)
 {
 	double c_value = Force25Bit(rPS0(_inst.FC));
-	rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), c_value, rPS0(_inst.FB), true));
+	double result = ForceSingle(NI_madd(rPS0(_inst.FA), c_value, rPS0(_inst.FB)));
+	rPS0(_inst.FD) = rPS1(_inst.FD) = std::isnan(result) ? result : -result;
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)
@@ -444,7 +446,8 @@ void Interpreter::fnmaddsx(UGeckoInstruction _inst)
 
 void Interpreter::fnmsubx(UGeckoInstruction _inst)
 {
-	rPS0(_inst.FD) = ForceDouble(NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB), true));
+	double result = ForceDouble(NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB)));
+	rPS0(_inst.FD) = std::isnan(result) ? result : -result;
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)
@@ -454,7 +457,8 @@ void Interpreter::fnmsubx(UGeckoInstruction _inst)
 void Interpreter::fnmsubsx(UGeckoInstruction _inst)
 {
 	double c_value = Force25Bit(rPS0(_inst.FC));
-	rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), c_value, rPS0(_inst.FB), true));
+	double result = ForceSingle(NI_msub(rPS0(_inst.FA), c_value, rPS0(_inst.FB)));
+	rPS0(_inst.FD) = rPS1(_inst.FD) = std::isnan(result) ? result : -result;
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
index fc56a4a33d..e4fd42ab41 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
@@ -214,8 +214,10 @@ void Interpreter::ps_nmsub(UGeckoInstruction _inst)
 {
 	double c0 = Force25Bit(rPS0(_inst.FC));
 	double c1 = Force25Bit(rPS1(_inst.FC));
-	rPS0(_inst.FD) = ForceSingle(NI_msub(rPS0(_inst.FA), c0, rPS0(_inst.FB), true));
-	rPS1(_inst.FD) = ForceSingle(NI_msub(rPS1(_inst.FA), c1, rPS1(_inst.FB), true));
+	double result0 = ForceSingle(NI_msub(rPS0(_inst.FA), c0, rPS0(_inst.FB)));
+	double result1 = ForceSingle(NI_msub(rPS1(_inst.FA), c1, rPS1(_inst.FB)));
+	rPS0(_inst.FD) = std::isnan(result0) ? result0 : -result0;
+	rPS1(_inst.FD) = std::isnan(result1) ? result1 : -result1;
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)
@@ -226,8 +228,10 @@ void Interpreter::ps_nmadd(UGeckoInstruction _inst)
 {
 	double c0 = Force25Bit(rPS0(_inst.FC));
 	double c1 = Force25Bit(rPS1(_inst.FC));
-	rPS0(_inst.FD) = ForceSingle(NI_madd(rPS0(_inst.FA), c0, rPS0(_inst.FB), true));
-	rPS1(_inst.FD) = ForceSingle(NI_madd(rPS1(_inst.FA), c1, rPS1(_inst.FB), true));
+	double result0 = ForceSingle(NI_madd(rPS0(_inst.FA), c0, rPS0(_inst.FB)));
+	double result1 = ForceSingle(NI_madd(rPS1(_inst.FA), c1, rPS1(_inst.FB)));
+	rPS0(_inst.FD) = std::isnan(result0) ? result0 : -result0;
+	rPS1(_inst.FD) = std::isnan(result1) ? result1 : -result1;
 	UpdateFPRF(rPS0(_inst.FD));
 
 	if (_inst.Rc)