Interpreter: refactor the rsqrte code, and use it for ps_rsqrte.

2014-05-22 01:53:22 -07:00 · 2014-05-22 01:53:22 -07:00 · 129e76e60d
parent 567724b2f8
commit 129e76e60d
3 changed files with 40 additions and 47 deletions
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
@ -6,6 +6,7 @@

 #include "Common/CPUDetect.h"
 #include "Common/MathUtil.h"
+#include "Core/PowerPC/LUT_frsqrtex.h"
 #include "Core/PowerPC/Interpreter/Interpreter.h"

 using namespace MathUtil;
@ -329,3 +330,34 @@ inline double ApproximateReciprocal(double val)
 	vali |= (long long)(expected_base[i / 1024] - (expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29;
 	return valf;
 }
+
+inline double ApproximateReciprocalSquareRoot(double val)
+{
+	if (val < 0)
+		return PPC_NAN;
+	if (val == 0.0)
+		return INFINITY;
+
+	union
+	{
+		double valf;
+		u64 vali;
+	};
+	valf = val;
+
+	u32 fsa = vali >> 32;
+	u32 idx = (fsa >> 5) % (sizeof(frsqrtex_lut) / sizeof(frsqrtex_lut[0]));
+
+	s32 e = fsa >> (32 - 12);
+	e &= 2047;
+	e -= 1023;
+	s32 oe = -((e + 1) / 2);
+	oe -= ((e + 1) & 1);
+
+	u32 outb = frsqrtex_lut[idx] << 20;
+	u32 outa = ((oe + 1023) & 2047) << 20;
+	outa |= frsqrtex_lut[idx] >> 12;
+
+	vali = ((u64)outa << 32) + (u64)outb;
+	return valf;
+}
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
@ -18,7 +18,6 @@
 #endif

 #include "Common/MathUtil.h"
-#include "Core/PowerPC/LUT_frsqrtex.h"
 #include "Core/PowerPC/Interpreter/Interpreter.h"
 #include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"

@ -403,33 +402,12 @@ void Interpreter::frsqrtex(UGeckoInstruction _inst)
 	if (b < 0.0)
 	{
 		SetFPException(FPSCR_VXSQRT);
-		rPS0(_inst.FD) = PPC_NAN;
 	}
-	else
+	else if (b == 0.0)
 	{
-		if (b == 0.0)
-		{
-			SetFPException(FPSCR_ZX);
-			riPS0(_inst.FD) = 0x7ff0000000000000;
-		}
-		else
-		{
-			u32 fsa = Common::swap32(Common::swap64(riPS0(_inst.FB)));
-			u32 fsb = Common::swap32(Common::swap64(riPS0(_inst.FB)) >> 32);
-			u32 idx=(fsa >> 5) % (sizeof(frsqrtex_lut) / sizeof(frsqrtex_lut[0]));
-
-			s32 e = fsa >> (32-12);
-			e &= 2047;
-			e -= 1023;
-			s32 oe =- ((e + 1) / 2);
-			oe -= ((e + 1) & 1);
-
-			u32 outb = frsqrtex_lut[idx] << 20;
-			u32 outa = ((oe + 1023) & 2047) << 20;
-			outa |= frsqrtex_lut[idx] >> 12;
-			riPS0(_inst.FD) = ((u64)outa << 32) + (u64)outb;
-		}
+		SetFPException(FPSCR_ZX);
 	}
+	rPS0(_inst.FD) = ApproximateReciprocalSquareRoot(b);
 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1();
 }
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
@ -186,36 +186,19 @@ void Interpreter::ps_res(UGeckoInstruction _inst)

 void Interpreter::ps_rsqrte(UGeckoInstruction _inst)
 {
-	// this code is based on the real hardware tests
 	if (rPS0(_inst.FB) == 0.0 || rPS1(_inst.FB) == 0.0)
 	{
 		SetFPException(FPSCR_ZX);
 	}
-	// PS0
-	if (rPS0(_inst.FB) < 0.0)
+
+	if (rPS0(_inst.FB) < 0.0 || rPS1(_inst.FB) < 0.0)
 	{
 		SetFPException(FPSCR_VXSQRT);
-		rPS0(_inst.FD) = PPC_NAN;
-	}
-	else
-	{
-		rPS0(_inst.FD) = 1.0 / sqrt(rPS0(_inst.FB));
-		u32 t = ConvertToSingle(riPS0(_inst.FD));
-		rPS0(_inst.FD) = *(float*)&t;
-	}
-	// PS1
-	if (rPS1(_inst.FB) < 0.0)
-	{
-		SetFPException(FPSCR_VXSQRT);
-		rPS1(_inst.FD) = PPC_NAN;
-	}
-	else
-	{
-		rPS1(_inst.FD) = 1.0 / sqrt(rPS1(_inst.FB));
-		u32 t = ConvertToSingle(riPS1(_inst.FD));
-		rPS1(_inst.FD) = *(float*)&t;
 	}

+	rPS0(_inst.FD) = ApproximateReciprocalSquareRoot(rPS0(_inst.FB));
+	rPS1(_inst.FD) = ApproximateReciprocalSquareRoot(rPS1(_inst.FB));
+
 	UpdateFPRF(rPS0(_inst.FD));
 	if (_inst.Rc) Helper_UpdateCR1();
 }