From 567724b2f8027b4b99e8101af705ff5ceec9c9e7 Mon Sep 17 00:00:00 2001 From: magumagu Date: Fri, 23 May 2014 14:57:34 -0700 Subject: [PATCH] Jit: get rid of incorrect implementations of fres and frsqrte. The existing implementations produce answers which aren't consistent with the hardware, and games care about correct floating point math. These can be reimplemented at some point in the future, if someone cares enough, but the general case is probably too much code to inline. (I'm leaving the ARM implementations in place by request, even though they suffer the same issues.) --- Source/Core/Core/PowerPC/Jit64/Jit.h | 2 -- .../Core/Core/PowerPC/Jit64/Jit64_Tables.cpp | 6 ++-- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 15 -------- Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp | 35 ------------------- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 9 ----- .../Core/PowerPC/Jit64IL/JitIL_Tables.cpp | 2 +- Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp | 1 - Source/Core/Core/PowerPC/JitILCommon/IR.cpp | 2 +- Source/Core/Core/PowerPC/JitILCommon/IR.h | 4 --- .../JitILCommon/JitILBase_FloatingPoint.cpp | 9 +---- 10 files changed, 6 insertions(+), 79 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 0125eeae86..d273d32aaf 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -168,12 +168,10 @@ public: void ps_arith(UGeckoInstruction inst); //aggregate void ps_mergeXX(UGeckoInstruction inst); void ps_maddXX(UGeckoInstruction inst); - void ps_recip(UGeckoInstruction inst); void ps_sum(UGeckoInstruction inst); void ps_muls(UGeckoInstruction inst); void fp_arith(UGeckoInstruction inst); - void frsqrtex(UGeckoInstruction inst); void fcmpx(UGeckoInstruction inst); void fmrx(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp index 1375d926c0..2a8c3d6072 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp @@ -138,9 +138,9 @@ static GekkoOPTemplate table4_2[] = {20, &Jit64::ps_arith}, //"ps_sub", OPTYPE_PS, 0}}, {21, &Jit64::ps_arith}, //"ps_add", OPTYPE_PS, 0}}, {23, &Jit64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}}, - {24, &Jit64::ps_recip}, //"ps_res", OPTYPE_PS, 0}}, + {24, &Jit64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}}, {25, &Jit64::ps_arith}, //"ps_mul", OPTYPE_PS, 0}}, - {26, &Jit64::ps_recip}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, + {26, &Jit64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, {28, &Jit64::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}}, {29, &Jit64::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}}, {30, &Jit64::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}}, @@ -360,7 +360,7 @@ static GekkoOPTemplate table63_2[] = {22, &Jit64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, {23, &Jit64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, {25, &Jit64::fp_arith}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, - {26, &Jit64::frsqrtex}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, + {26, &Jit64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, {28, &Jit64::fmaddXX}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {29, &Jit64::fmaddXX}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, {30, &Jit64::fmaddXX}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index e733c3f56a..b5cd8833a6 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -101,21 +101,6 @@ void Jit64::fp_arith(UGeckoInstruction inst) } } -void Jit64::frsqrtex(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff) - int d = inst.FD; - int b = inst.FB; - fpr.Lock(b, d); - fpr.BindToRegister(d, true, true); - MOVSD(XMM0, M((void *)&one_const)); - SQRTSD(XMM1, fpr.R(b)); - DIVSD(XMM0, R(XMM1)); - MOVSD(fpr.R(d), XMM0); - fpr.UnlockAll(); -} - void Jit64::fmaddXX(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp index 58d73a845e..cd71dbbd9e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp @@ -112,41 +112,6 @@ void Jit64::ps_sign(UGeckoInstruction inst) fpr.UnlockAll(); } -// ps_res and ps_rsqrte -void Jit64::ps_recip(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff) - - if (inst.Rc) - { - FallBackToInterpreter(inst); - return; - } - - OpArg divisor; - int d = inst.FD; - int b = inst.FB; - fpr.Lock(d, b); - fpr.BindToRegister(d, (d == b)); - switch (inst.SUBOP5) - { - case 24: - // ps_res - divisor = fpr.R(b); - break; - case 26: - // ps_rsqrte - SQRTPD(XMM0, fpr.R(b)); - divisor = R(XMM0); - break; - } - MOVAPD(XMM1, M((void*)&psOneOne)); - DIVPD(XMM1, divisor); - MOVAPD(fpr.R(d), XMM1); - fpr.UnlockAll(); -} - //add a, b, c //mov a, b diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 53d0323c71..cd31cedb2d 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -674,7 +674,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case FSMul: case FSAdd: case FSSub: - case FSRSqrt: case FDMul: case FDAdd: case FDSub: @@ -1435,14 +1434,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { fregEmitBinInst(RI, I, &JitIL::SUBSS); break; } - case FSRSqrt: { - if (!thisUsed) break; - X64Reg reg = fregURegWithoutMov(RI, I); - Jit->RSQRTSS(reg, fregLocForInst(RI, getOp1(I))); - RI.fregs[reg] = I; - fregNormalRegClear(RI, I); - break; - } case FDMul: { if (!thisUsed) break; fregEmitBinInst(RI, I, &JitIL::MULSD); diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp index 59d5a3e581..c3214f6e18 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL_Tables.cpp @@ -361,7 +361,7 @@ static GekkoOPTemplate table63_2[] = {22, &JitIL::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, {23, &JitIL::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, {25, &JitIL::fp_arith_s}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, - {26, &JitIL::fp_arith_s}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, + {26, &JitIL::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, {28, &JitIL::fmaddXX}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {29, &JitIL::fmaddXX}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, {30, &JitIL::fmaddXX}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, diff --git a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp b/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp index d85260d15a..931056f592 100644 --- a/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp +++ b/Source/Core/Core/PowerPC/JitArmIL/IR_Arm.cpp @@ -394,7 +394,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitArmIL* Jit, u32 exitAddress) { case FSMul: case FSAdd: case FSSub: - case FSRSqrt: case FDMul: case FDAdd: case FDSub: diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index 3684e4e514..f9bd2a5820 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -1128,7 +1128,7 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const { numberOfOperands[CInt32] = 0; static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, }; - static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FSRSqrt, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, }; + static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, }; static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, }; for (auto& op : ZeroOp) { numberOfOperands[op] = 0; diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.h b/Source/Core/Core/PowerPC/JitILCommon/IR.h index 062738edc6..f84f29fe02 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.h +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.h @@ -113,7 +113,6 @@ enum Opcode { FSAdd, FSSub, FSNeg, - FSRSqrt, FPAdd, FPMul, FPSub, @@ -464,9 +463,6 @@ public: InstLoc EmitFSNeg(InstLoc op1) { return FoldUOp(FSNeg, op1); } - InstLoc EmitFSRSqrt(InstLoc op1) { - return FoldUOp(FSRSqrt, op1); - } InstLoc EmitFDMul(InstLoc op1, InstLoc op2) { return FoldBiOp(FDMul, op1, op2); } diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp index 464e133c20..ec3d01e663 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_FloatingPoint.cpp @@ -9,8 +9,7 @@ void JitILBase::fp_arith_s(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITFloatingPointOff) - if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && - inst.SUBOP5 != 21 && inst.SUBOP5 != 26)) + if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21)) { FallBackToInterpreter(inst); return; @@ -35,12 +34,6 @@ void JitILBase::fp_arith_s(UGeckoInstruction inst) case 25: //mul val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC)); break; - case 26: //rsqrte - val = ibuild.EmitLoadFReg(inst.FB); - val = ibuild.EmitDoubleToSingle(val); - val = ibuild.EmitFSRSqrt(val); - val = ibuild.EmitDupSingleToMReg(val); - break; default: _assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!"); }