From 567724b2f8027b4b99e8101af705ff5ceec9c9e7 Mon Sep 17 00:00:00 2001
From: magumagu <magumagu9@gmail.com>
Date: Fri, 23 May 2014 14:57:34 -0700
Subject: [PATCH] Jit: get rid of incorrect implementations of fres and
 frsqrte.

The existing implementations produce answers which aren't consistent with
the hardware, and games care about correct floating point math.

These can be reimplemented at some point in the future, if someone cares
enough, but the general case is probably too much code to inline.

(I'm leaving the ARM implementations in place by request, even though they
suffer the same issues.)
---
 Source/Core/Core/PowerPC/Jit64/Jit.h          |  2 --
 .../Core/Core/PowerPC/Jit64/Jit64_Tables.cpp  |  6 ++--
 .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp  | 15 --------
 Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp | 35 -------------------
 Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp   |  9 -----
 .../Core/PowerPC/Jit64IL/JitIL_Tables.cpp     |  2 +-
 Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp  |  1 -
 Source/Core/Core/PowerPC/JitILCommon/IR.cpp   |  2 +-
 Source/Core/Core/PowerPC/JitILCommon/IR.h     |  4 ---
 .../JitILCommon/JitILBase_FloatingPoint.cpp   |  9 +----
 10 files changed, 6 insertions(+), 79 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
index 0125eeae86..d273d32aaf 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@@ -168,12 +168,10 @@ public:
 	void ps_arith(UGeckoInstruction inst); //aggregate
 	void ps_mergeXX(UGeckoInstruction inst);
 	void ps_maddXX(UGeckoInstruction inst);
-	void ps_recip(UGeckoInstruction inst);
 	void ps_sum(UGeckoInstruction inst);
 	void ps_muls(UGeckoInstruction inst);
 
 	void fp_arith(UGeckoInstruction inst);
-	void frsqrtex(UGeckoInstruction inst);
 
 	void fcmpx(UGeckoInstruction inst);
 	void fmrx(UGeckoInstruction inst);
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
index 1375d926c0..2a8c3d6072 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
@@ -138,9 +138,9 @@ static GekkoOPTemplate table4_2[] =
 	{20, &Jit64::ps_arith},  //"ps_sub",    OPTYPE_PS, 0}},
 	{21, &Jit64::ps_arith},  //"ps_add",    OPTYPE_PS, 0}},
 	{23, &Jit64::ps_sel},    //"ps_sel",    OPTYPE_PS, 0}},
-	{24, &Jit64::ps_recip},  //"ps_res",    OPTYPE_PS, 0}},
+	{24, &Jit64::FallBackToInterpreter},    //"ps_res",    OPTYPE_PS, 0}},
 	{25, &Jit64::ps_arith},  //"ps_mul",    OPTYPE_PS, 0}},
-	{26, &Jit64::ps_recip},  //"ps_rsqrte", OPTYPE_PS, 0, 1}},
+	{26, &Jit64::FallBackToInterpreter},    //"ps_rsqrte", OPTYPE_PS, 0, 1}},
 	{28, &Jit64::ps_maddXX}, //"ps_msub",   OPTYPE_PS, 0}},
 	{29, &Jit64::ps_maddXX}, //"ps_madd",   OPTYPE_PS, 0}},
 	{30, &Jit64::ps_maddXX}, //"ps_nmsub",  OPTYPE_PS, 0}},
@@ -360,7 +360,7 @@ static GekkoOPTemplate table63_2[] =
 	{22, &Jit64::FallBackToInterpreter}, //"fsqrtx",   OPTYPE_FPU, FL_RC_BIT_F}},
 	{23, &Jit64::FallBackToInterpreter}, //"fselx",    OPTYPE_FPU, FL_RC_BIT_F}},
 	{25, &Jit64::fp_arith},              //"fmulx",    OPTYPE_FPU, FL_RC_BIT_F}},
-	{26, &Jit64::frsqrtex},              //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
+	{26, &Jit64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
 	{28, &Jit64::fmaddXX},               //"fmsubx",   OPTYPE_FPU, FL_RC_BIT_F}},
 	{29, &Jit64::fmaddXX},               //"fmaddx",   OPTYPE_FPU, FL_RC_BIT_F}},
 	{30, &Jit64::fmaddXX},               //"fnmsubx",  OPTYPE_FPU, FL_RC_BIT_F}},
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
index e733c3f56a..b5cd8833a6 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@@ -101,21 +101,6 @@ void Jit64::fp_arith(UGeckoInstruction inst)
 	}
 }
 
-void Jit64::frsqrtex(UGeckoInstruction inst)
-{
-	INSTRUCTION_START
-	JITDISABLE(bJITFloatingPointOff)
-	int d = inst.FD;
-	int b = inst.FB;
-	fpr.Lock(b, d);
-	fpr.BindToRegister(d, true, true);
-	MOVSD(XMM0, M((void *)&one_const));
-	SQRTSD(XMM1, fpr.R(b));
-	DIVSD(XMM0, R(XMM1));
-	MOVSD(fpr.R(d), XMM0);
-	fpr.UnlockAll();
-}
-
 void Jit64::fmaddXX(UGeckoInstruction inst)
 {
 	INSTRUCTION_START
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
index 58d73a845e..cd71dbbd9e 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
@@ -112,41 +112,6 @@ void Jit64::ps_sign(UGeckoInstruction inst)
 	fpr.UnlockAll();
 }
 
-// ps_res and ps_rsqrte
-void Jit64::ps_recip(UGeckoInstruction inst)
-{
-	INSTRUCTION_START
-	JITDISABLE(bJITPairedOff)
-
-	if (inst.Rc)
-	{
-		FallBackToInterpreter(inst);
-		return;
-	}
-
-	OpArg divisor;
-	int d = inst.FD;
-	int b = inst.FB;
-	fpr.Lock(d, b);
-	fpr.BindToRegister(d, (d == b));
-	switch (inst.SUBOP5)
-	{
-	case 24:
-		// ps_res
-		divisor = fpr.R(b);
-		break;
-	case 26:
-		// ps_rsqrte
-		SQRTPD(XMM0, fpr.R(b));
-		divisor = R(XMM0);
-		break;
-	}
-	MOVAPD(XMM1, M((void*)&psOneOne));
-	DIVPD(XMM1, divisor);
-	MOVAPD(fpr.R(d), XMM1);
-	fpr.UnlockAll();
-}
-
 //add a, b, c
 
 //mov a, b
diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp
index 53d0323c71..cd31cedb2d 100644
--- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp
+++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp
@@ -674,7 +674,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
 		case FSMul:
 		case FSAdd:
 		case FSSub:
-		case FSRSqrt:
 		case FDMul:
 		case FDAdd:
 		case FDSub:
@@ -1435,14 +1434,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
 			fregEmitBinInst(RI, I, &JitIL::SUBSS);
 			break;
 		}
-		case FSRSqrt: {
-			if (!thisUsed) break;
-			X64Reg reg = fregURegWithoutMov(RI, I);
-			Jit->RSQRTSS(reg, fregLocForInst(RI, getOp1(I)));
-			RI.fregs[reg] = I;
-			fregNormalRegClear(RI, I);
-			break;
-		}
 		case FDMul: {
 			if (!thisUsed) break;
 			fregEmitBinInst(RI, I, &JitIL::MULSD);
diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp
index 59d5a3e581..c3214f6e18 100644
--- a/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp
+++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp
@@ -361,7 +361,7 @@ static GekkoOPTemplate table63_2[] =
 	{22, &JitIL::FallBackToInterpreter}, //"fsqrtx",   OPTYPE_FPU, FL_RC_BIT_F}},
 	{23, &JitIL::FallBackToInterpreter}, //"fselx",    OPTYPE_FPU, FL_RC_BIT_F}},
 	{25, &JitIL::fp_arith_s},            //"fmulx",    OPTYPE_FPU, FL_RC_BIT_F}},
-	{26, &JitIL::fp_arith_s},            //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
+	{26, &JitIL::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
 	{28, &JitIL::fmaddXX},               //"fmsubx",   OPTYPE_FPU, FL_RC_BIT_F}},
 	{29, &JitIL::fmaddXX},               //"fmaddx",   OPTYPE_FPU, FL_RC_BIT_F}},
 	{30, &JitIL::fmaddXX},               //"fnmsubx",  OPTYPE_FPU, FL_RC_BIT_F}},
diff --git a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp b/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp
index d85260d15a..931056f592 100644
--- a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp
+++ b/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp
@@ -394,7 +394,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitArmIL* Jit, u32 exitAddress) {
 		case FSMul:
 		case FSAdd:
 		case FSSub:
-		case FSRSqrt:
 		case FDMul:
 		case FDAdd:
 		case FDSub:
diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp
index 3684e4e514..f9bd2a5820 100644
--- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp
+++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp
@@ -1128,7 +1128,7 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const {
 		numberOfOperands[CInt32] = 0;
 
 		static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, };
-		static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FSRSqrt, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, };
+		static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, };
 		static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, };
 		for (auto& op : ZeroOp) {
 			numberOfOperands[op] = 0;
diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.h b/Source/Core/Core/PowerPC/JitILCommon/IR.h
index 062738edc6..f84f29fe02 100644
--- a/Source/Core/Core/PowerPC/JitILCommon/IR.h
+++ b/Source/Core/Core/PowerPC/JitILCommon/IR.h
@@ -113,7 +113,6 @@ enum Opcode {
 	FSAdd,
 	FSSub,
 	FSNeg,
-	FSRSqrt,
 	FPAdd,
 	FPMul,
 	FPSub,
@@ -464,9 +463,6 @@ public:
 	InstLoc EmitFSNeg(InstLoc op1) {
 		return FoldUOp(FSNeg, op1);
 	}
-	InstLoc EmitFSRSqrt(InstLoc op1) {
-		return FoldUOp(FSRSqrt, op1);
-	}
 	InstLoc EmitFDMul(InstLoc op1, InstLoc op2) {
 		return FoldBiOp(FDMul, op1, op2);
 	}
diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp
index 464e133c20..ec3d01e663 100644
--- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp
@@ -9,8 +9,7 @@ void JitILBase::fp_arith_s(UGeckoInstruction inst)
 {
 	INSTRUCTION_START
 	JITDISABLE(bJITFloatingPointOff)
-	if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 &&
-	                inst.SUBOP5 != 21 && inst.SUBOP5 != 26))
+	if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21))
 	{
 		FallBackToInterpreter(inst);
 		return;
@@ -35,12 +34,6 @@ void JitILBase::fp_arith_s(UGeckoInstruction inst)
 	case 25: //mul
 		val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC));
 		break;
-	case 26: //rsqrte
-		val = ibuild.EmitLoadFReg(inst.FB);
-		val = ibuild.EmitDoubleToSingle(val);
-		val = ibuild.EmitFSRSqrt(val);
-		val = ibuild.EmitDupSingleToMReg(val);
-		break;
 	default:
 		_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
 	}