From b3b7ed2a2faf8af8c5ad680354e08c2a3af59b32 Mon Sep 17 00:00:00 2001 From: skidau Date: Sat, 16 Jan 2010 03:43:16 +0000 Subject: [PATCH] FPU NaN and negative zero fixes. Verified against the real hardware. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4847 8ced0084-cf51-0410-be5f-012b33b47a6e --- .../PowerPC/Interpreter/Interpreter_FPUtils.h | 45 ++++++++++--------- .../Interpreter/Interpreter_FloatingPoint.cpp | 13 +++--- .../Interpreter/Interpreter_Paired.cpp | 8 ++-- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h index 693d317a8d..0f54bd9060 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h @@ -24,7 +24,7 @@ using namespace MathUtil; -// warining! very slow! +// warning! very slow! This setting fixes NAN //#define VERY_ACCURATE_FP #define MIN_SINGLE 0xc7efffffe0000000ull @@ -188,26 +188,31 @@ inline double NI_madd(const double a, const double b, const double c) inline double NI_msub(const double a, const double b, const double c) { -#ifdef VERY_ACCURATE_FP - if (a != a) return a; - if (c != c) return c; - if (b != b) return b; - double t = a * b; - if (t != t) - { - SetFPException(FPSCR_VXIMZ); - return PPC_NAN; - } - t = t - c; - if (t != t) - { - SetFPException(FPSCR_VXISI); - return PPC_NAN; - } - return t; -#else +//#ifdef VERY_ACCURATE_FP +// This code does not produce accurate fp! NAN's are not calculated correctly, nor negative zero. +// The code is kept here for reference. +// +// if (a != a) return a; +// if (c != c) return c; +// if (b != b) return b; +// double t = a * b; +// if (t != t) +// { +// SetFPException(FPSCR_VXIMZ); +// return PPC_NAN; +// } +// +// t = t - c; +// if (t != t) +// { +// SetFPException(FPSCR_VXISI); +// return PPC_NAN; +// } +// return t; +//#else +// This code does not calculate QNAN's correctly but calculates negative zero correctly. return NI_sub(NI_mul(a, b), c); -#endif +// #endif } // used by stfsXX instructions and ps_rsqrte diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp index 3693aef7b1..2db80f8195 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -232,7 +232,7 @@ void fnabsx(UGeckoInstruction _inst) { riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63); // This is a binary instruction. Does not alter FPSCR - if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); + if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fnegx(UGeckoInstruction _inst) @@ -436,27 +436,28 @@ void fmsubsx(UGeckoInstruction _inst) if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } - void fnmaddx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = ForceDouble(0.0-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); + rPS0(_inst.FD) = ForceDouble(-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fnmaddsx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS1(_inst.FD) = - ForceSingle(0.0-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); + ForceSingle(-NI_madd(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fnmsubx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = ForceDouble(0.0-NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); + rPS0(_inst.FD) = ForceDouble(-NI_msub(rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB))); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } + +// fnmsubsx does not handle QNAN properly - see NI_msub void fnmsubsx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS1(_inst.FD) = @@ -465,13 +466,13 @@ void fnmsubsx(UGeckoInstruction _inst) if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } - void fsubx(UGeckoInstruction _inst) { rPS0(_inst.FD) = ForceDouble(NI_sub(rPS0(_inst.FA), rPS0(_inst.FB))); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } + void fsubsx(UGeckoInstruction _inst) { rPS0(_inst.FD) = rPS1(_inst.FD) = ForceSingle(NI_sub(rPS0(_inst.FA), rPS0(_inst.FB))); diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp index b172aae979..7222d390ac 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_Paired.cpp @@ -296,16 +296,16 @@ void ps_madd(UGeckoInstruction _inst) void ps_nmsub(UGeckoInstruction _inst) { - rPS0(_inst.FD) = ForceSingle( 0.0-NI_msub( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ) ); - rPS1(_inst.FD) = ForceSingle( 0.0-NI_msub( rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB) ) ); + rPS0(_inst.FD) = ForceSingle( -NI_msub( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ) ); + rPS1(_inst.FD) = ForceSingle( -NI_msub( rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB) ) ); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void ps_nmadd(UGeckoInstruction _inst) { - rPS0(_inst.FD) = ForceSingle( 0.0-NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ) ); - rPS1(_inst.FD) = ForceSingle( 0.0-NI_madd( rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB) ) ); + rPS0(_inst.FD) = ForceSingle( -NI_madd( rPS0(_inst.FA), rPS0(_inst.FC), rPS0(_inst.FB) ) ); + rPS1(_inst.FD) = ForceSingle( -NI_madd( rPS1(_inst.FA), rPS1(_inst.FC), rPS1(_inst.FB) ) ); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); }