diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h index 3971999fa8..6c112eac66 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h @@ -6,6 +6,7 @@ #include "Common/CPUDetect.h" #include "Common/MathUtil.h" +#include "Core/PowerPC/LUT_frsqrtex.h" #include "Core/PowerPC/Interpreter/Interpreter.h" using namespace MathUtil; @@ -329,3 +330,34 @@ inline double ApproximateReciprocal(double val) vali |= (long long)(expected_base[i / 1024] - (expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29; return valf; } + +inline double ApproximateReciprocalSquareRoot(double val) +{ + if (val < 0) + return PPC_NAN; + if (val == 0.0) + return INFINITY; + + union + { + double valf; + u64 vali; + }; + valf = val; + + u32 fsa = vali >> 32; + u32 idx = (fsa >> 5) % (sizeof(frsqrtex_lut) / sizeof(frsqrtex_lut[0])); + + s32 e = fsa >> (32 - 12); + e &= 2047; + e -= 1023; + s32 oe = -((e + 1) / 2); + oe -= ((e + 1) & 1); + + u32 outb = frsqrtex_lut[idx] << 20; + u32 outa = ((oe + 1023) & 2047) << 20; + outa |= frsqrtex_lut[idx] >> 12; + + vali = ((u64)outa << 32) + (u64)outb; + return valf; +} diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp index 980e0e7078..f214a53447 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -18,7 +18,6 @@ #endif #include "Common/MathUtil.h" -#include "Core/PowerPC/LUT_frsqrtex.h" #include "Core/PowerPC/Interpreter/Interpreter.h" #include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h" @@ -403,33 +402,12 @@ void Interpreter::frsqrtex(UGeckoInstruction _inst) if (b < 0.0) { SetFPException(FPSCR_VXSQRT); - rPS0(_inst.FD) = PPC_NAN; } - else + else if (b == 0.0) { - if (b == 0.0) - { - SetFPException(FPSCR_ZX); - riPS0(_inst.FD) = 0x7ff0000000000000; - } - else - { - u32 fsa = Common::swap32(Common::swap64(riPS0(_inst.FB))); - u32 fsb = Common::swap32(Common::swap64(riPS0(_inst.FB)) >> 32); - u32 idx=(fsa >> 5) % (sizeof(frsqrtex_lut) / sizeof(frsqrtex_lut[0])); - - s32 e = fsa >> (32-12); - e &= 2047; - e -= 1023; - s32 oe =- ((e + 1) / 2); - oe -= ((e + 1) & 1); - - u32 outb = frsqrtex_lut[idx] << 20; - u32 outa = ((oe + 1023) & 2047) << 20; - outa |= frsqrtex_lut[idx] >> 12; - riPS0(_inst.FD) = ((u64)outa << 32) + (u64)outb; - } + SetFPException(FPSCR_ZX); } + rPS0(_inst.FD) = ApproximateReciprocalSquareRoot(b); UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp index babf80db09..bfd8007ead 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp @@ -186,36 +186,19 @@ void Interpreter::ps_res(UGeckoInstruction _inst) void Interpreter::ps_rsqrte(UGeckoInstruction _inst) { - // this code is based on the real hardware tests if (rPS0(_inst.FB) == 0.0 || rPS1(_inst.FB) == 0.0) { SetFPException(FPSCR_ZX); } - // PS0 - if (rPS0(_inst.FB) < 0.0) + + if (rPS0(_inst.FB) < 0.0 || rPS1(_inst.FB) < 0.0) { SetFPException(FPSCR_VXSQRT); - rPS0(_inst.FD) = PPC_NAN; - } - else - { - rPS0(_inst.FD) = 1.0 / sqrt(rPS0(_inst.FB)); - u32 t = ConvertToSingle(riPS0(_inst.FD)); - rPS0(_inst.FD) = *(float*)&t; - } - // PS1 - if (rPS1(_inst.FB) < 0.0) - { - SetFPException(FPSCR_VXSQRT); - rPS1(_inst.FD) = PPC_NAN; - } - else - { - rPS1(_inst.FD) = 1.0 / sqrt(rPS1(_inst.FB)); - u32 t = ConvertToSingle(riPS1(_inst.FD)); - rPS1(_inst.FD) = *(float*)&t; } + rPS0(_inst.FD) = ApproximateReciprocalSquareRoot(rPS0(_inst.FB)); + rPS1(_inst.FD) = ApproximateReciprocalSquareRoot(rPS1(_inst.FB)); + UpdateFPRF(rPS0(_inst.FD)); if (_inst.Rc) Helper_UpdateCR1(); }