Jit: get rid of incorrect implementations of fres and frsqrte.
The existing implementations produce answers which aren't consistent with the hardware, and games care about correct floating point math. These can be reimplemented at some point in the future, if someone cares enough, but the general case is probably too much code to inline. (I'm leaving the ARM implementations in place by request, even though they suffer the same issues.)
This commit is contained in:
parent
2f8a147eda
commit
567724b2f8
|
@ -168,12 +168,10 @@ public:
|
|||
void ps_arith(UGeckoInstruction inst); //aggregate
|
||||
void ps_mergeXX(UGeckoInstruction inst);
|
||||
void ps_maddXX(UGeckoInstruction inst);
|
||||
void ps_recip(UGeckoInstruction inst);
|
||||
void ps_sum(UGeckoInstruction inst);
|
||||
void ps_muls(UGeckoInstruction inst);
|
||||
|
||||
void fp_arith(UGeckoInstruction inst);
|
||||
void frsqrtex(UGeckoInstruction inst);
|
||||
|
||||
void fcmpx(UGeckoInstruction inst);
|
||||
void fmrx(UGeckoInstruction inst);
|
||||
|
|
|
@ -138,9 +138,9 @@ static GekkoOPTemplate table4_2[] =
|
|||
{20, &Jit64::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
|
||||
{21, &Jit64::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
|
||||
{23, &Jit64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
|
||||
{24, &Jit64::ps_recip}, //"ps_res", OPTYPE_PS, 0}},
|
||||
{24, &Jit64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
|
||||
{25, &Jit64::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
|
||||
{26, &Jit64::ps_recip}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
||||
{26, &Jit64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
||||
{28, &Jit64::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
|
||||
{29, &Jit64::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
|
||||
{30, &Jit64::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
|
||||
|
@ -360,7 +360,7 @@ static GekkoOPTemplate table63_2[] =
|
|||
{22, &Jit64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{23, &Jit64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{25, &Jit64::fp_arith}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{26, &Jit64::frsqrtex}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{26, &Jit64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{28, &Jit64::fmaddXX}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{29, &Jit64::fmaddXX}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{30, &Jit64::fmaddXX}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
|
|
|
@ -101,21 +101,6 @@ void Jit64::fp_arith(UGeckoInstruction inst)
|
|||
}
|
||||
}
|
||||
|
||||
void Jit64::frsqrtex(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff)
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
fpr.Lock(b, d);
|
||||
fpr.BindToRegister(d, true, true);
|
||||
MOVSD(XMM0, M((void *)&one_const));
|
||||
SQRTSD(XMM1, fpr.R(b));
|
||||
DIVSD(XMM0, R(XMM1));
|
||||
MOVSD(fpr.R(d), XMM0);
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
|
|
|
@ -112,41 +112,6 @@ void Jit64::ps_sign(UGeckoInstruction inst)
|
|||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
// ps_res and ps_rsqrte
|
||||
void Jit64::ps_recip(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
|
||||
if (inst.Rc)
|
||||
{
|
||||
FallBackToInterpreter(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
OpArg divisor;
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
fpr.Lock(d, b);
|
||||
fpr.BindToRegister(d, (d == b));
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 24:
|
||||
// ps_res
|
||||
divisor = fpr.R(b);
|
||||
break;
|
||||
case 26:
|
||||
// ps_rsqrte
|
||||
SQRTPD(XMM0, fpr.R(b));
|
||||
divisor = R(XMM0);
|
||||
break;
|
||||
}
|
||||
MOVAPD(XMM1, M((void*)&psOneOne));
|
||||
DIVPD(XMM1, divisor);
|
||||
MOVAPD(fpr.R(d), XMM1);
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
//add a, b, c
|
||||
|
||||
//mov a, b
|
||||
|
|
|
@ -674,7 +674,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
|||
case FSMul:
|
||||
case FSAdd:
|
||||
case FSSub:
|
||||
case FSRSqrt:
|
||||
case FDMul:
|
||||
case FDAdd:
|
||||
case FDSub:
|
||||
|
@ -1435,14 +1434,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
|||
fregEmitBinInst(RI, I, &JitIL::SUBSS);
|
||||
break;
|
||||
}
|
||||
case FSRSqrt: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = fregURegWithoutMov(RI, I);
|
||||
Jit->RSQRTSS(reg, fregLocForInst(RI, getOp1(I)));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case FDMul: {
|
||||
if (!thisUsed) break;
|
||||
fregEmitBinInst(RI, I, &JitIL::MULSD);
|
||||
|
|
|
@ -361,7 +361,7 @@ static GekkoOPTemplate table63_2[] =
|
|||
{22, &JitIL::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{23, &JitIL::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{25, &JitIL::fp_arith_s}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{26, &JitIL::fp_arith_s}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{26, &JitIL::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{28, &JitIL::fmaddXX}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{29, &JitIL::fmaddXX}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{30, &JitIL::fmaddXX}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
|
|
|
@ -394,7 +394,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitArmIL* Jit, u32 exitAddress) {
|
|||
case FSMul:
|
||||
case FSAdd:
|
||||
case FSSub:
|
||||
case FSRSqrt:
|
||||
case FDMul:
|
||||
case FDAdd:
|
||||
case FDSub:
|
||||
|
|
|
@ -1128,7 +1128,7 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const {
|
|||
numberOfOperands[CInt32] = 0;
|
||||
|
||||
static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, };
|
||||
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FSRSqrt, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, };
|
||||
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, };
|
||||
static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, };
|
||||
for (auto& op : ZeroOp) {
|
||||
numberOfOperands[op] = 0;
|
||||
|
|
|
@ -113,7 +113,6 @@ enum Opcode {
|
|||
FSAdd,
|
||||
FSSub,
|
||||
FSNeg,
|
||||
FSRSqrt,
|
||||
FPAdd,
|
||||
FPMul,
|
||||
FPSub,
|
||||
|
@ -464,9 +463,6 @@ public:
|
|||
InstLoc EmitFSNeg(InstLoc op1) {
|
||||
return FoldUOp(FSNeg, op1);
|
||||
}
|
||||
InstLoc EmitFSRSqrt(InstLoc op1) {
|
||||
return FoldUOp(FSRSqrt, op1);
|
||||
}
|
||||
InstLoc EmitFDMul(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(FDMul, op1, op2);
|
||||
}
|
||||
|
|
|
@ -9,8 +9,7 @@ void JitILBase::fp_arith_s(UGeckoInstruction inst)
|
|||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff)
|
||||
if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 &&
|
||||
inst.SUBOP5 != 21 && inst.SUBOP5 != 26))
|
||||
if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21))
|
||||
{
|
||||
FallBackToInterpreter(inst);
|
||||
return;
|
||||
|
@ -35,12 +34,6 @@ void JitILBase::fp_arith_s(UGeckoInstruction inst)
|
|||
case 25: //mul
|
||||
val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC));
|
||||
break;
|
||||
case 26: //rsqrte
|
||||
val = ibuild.EmitLoadFReg(inst.FB);
|
||||
val = ibuild.EmitDoubleToSingle(val);
|
||||
val = ibuild.EmitFSRSqrt(val);
|
||||
val = ibuild.EmitDupSingleToMReg(val);
|
||||
break;
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue