Jit: get rid of incorrect implementations of fres and frsqrte.
The existing implementations produce answers which aren't consistent with the hardware, and games care about correct floating point math. These can be reimplemented at some point in the future, if someone cares enough, but the general case is probably too much code to inline. (I'm leaving the ARM implementations in place by request, even though they suffer the same issues.)
This commit is contained in:
parent
2f8a147eda
commit
567724b2f8
|
@ -168,12 +168,10 @@ public:
|
||||||
void ps_arith(UGeckoInstruction inst); //aggregate
|
void ps_arith(UGeckoInstruction inst); //aggregate
|
||||||
void ps_mergeXX(UGeckoInstruction inst);
|
void ps_mergeXX(UGeckoInstruction inst);
|
||||||
void ps_maddXX(UGeckoInstruction inst);
|
void ps_maddXX(UGeckoInstruction inst);
|
||||||
void ps_recip(UGeckoInstruction inst);
|
|
||||||
void ps_sum(UGeckoInstruction inst);
|
void ps_sum(UGeckoInstruction inst);
|
||||||
void ps_muls(UGeckoInstruction inst);
|
void ps_muls(UGeckoInstruction inst);
|
||||||
|
|
||||||
void fp_arith(UGeckoInstruction inst);
|
void fp_arith(UGeckoInstruction inst);
|
||||||
void frsqrtex(UGeckoInstruction inst);
|
|
||||||
|
|
||||||
void fcmpx(UGeckoInstruction inst);
|
void fcmpx(UGeckoInstruction inst);
|
||||||
void fmrx(UGeckoInstruction inst);
|
void fmrx(UGeckoInstruction inst);
|
||||||
|
|
|
@ -138,9 +138,9 @@ static GekkoOPTemplate table4_2[] =
|
||||||
{20, &Jit64::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
|
{20, &Jit64::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
|
||||||
{21, &Jit64::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
|
{21, &Jit64::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
|
||||||
{23, &Jit64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
|
{23, &Jit64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
|
||||||
{24, &Jit64::ps_recip}, //"ps_res", OPTYPE_PS, 0}},
|
{24, &Jit64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
|
||||||
{25, &Jit64::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
|
{25, &Jit64::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
|
||||||
{26, &Jit64::ps_recip}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
{26, &Jit64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
||||||
{28, &Jit64::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
|
{28, &Jit64::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
|
||||||
{29, &Jit64::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
|
{29, &Jit64::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
|
||||||
{30, &Jit64::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
|
{30, &Jit64::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
|
||||||
|
@ -360,7 +360,7 @@ static GekkoOPTemplate table63_2[] =
|
||||||
{22, &Jit64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{22, &Jit64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{23, &Jit64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{23, &Jit64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{25, &Jit64::fp_arith}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{25, &Jit64::fp_arith}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{26, &Jit64::frsqrtex}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
|
{26, &Jit64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{28, &Jit64::fmaddXX}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{28, &Jit64::fmaddXX}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{29, &Jit64::fmaddXX}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{29, &Jit64::fmaddXX}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{30, &Jit64::fmaddXX}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{30, &Jit64::fmaddXX}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
|
|
|
@ -101,21 +101,6 @@ void Jit64::fp_arith(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::frsqrtex(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
INSTRUCTION_START
|
|
||||||
JITDISABLE(bJITFloatingPointOff)
|
|
||||||
int d = inst.FD;
|
|
||||||
int b = inst.FB;
|
|
||||||
fpr.Lock(b, d);
|
|
||||||
fpr.BindToRegister(d, true, true);
|
|
||||||
MOVSD(XMM0, M((void *)&one_const));
|
|
||||||
SQRTSD(XMM1, fpr.R(b));
|
|
||||||
DIVSD(XMM0, R(XMM1));
|
|
||||||
MOVSD(fpr.R(d), XMM0);
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Jit64::fmaddXX(UGeckoInstruction inst)
|
void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
|
|
|
@ -112,41 +112,6 @@ void Jit64::ps_sign(UGeckoInstruction inst)
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
// ps_res and ps_rsqrte
|
|
||||||
void Jit64::ps_recip(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
INSTRUCTION_START
|
|
||||||
JITDISABLE(bJITPairedOff)
|
|
||||||
|
|
||||||
if (inst.Rc)
|
|
||||||
{
|
|
||||||
FallBackToInterpreter(inst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
OpArg divisor;
|
|
||||||
int d = inst.FD;
|
|
||||||
int b = inst.FB;
|
|
||||||
fpr.Lock(d, b);
|
|
||||||
fpr.BindToRegister(d, (d == b));
|
|
||||||
switch (inst.SUBOP5)
|
|
||||||
{
|
|
||||||
case 24:
|
|
||||||
// ps_res
|
|
||||||
divisor = fpr.R(b);
|
|
||||||
break;
|
|
||||||
case 26:
|
|
||||||
// ps_rsqrte
|
|
||||||
SQRTPD(XMM0, fpr.R(b));
|
|
||||||
divisor = R(XMM0);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
MOVAPD(XMM1, M((void*)&psOneOne));
|
|
||||||
DIVPD(XMM1, divisor);
|
|
||||||
MOVAPD(fpr.R(d), XMM1);
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
//add a, b, c
|
//add a, b, c
|
||||||
|
|
||||||
//mov a, b
|
//mov a, b
|
||||||
|
|
|
@ -674,7 +674,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
||||||
case FSMul:
|
case FSMul:
|
||||||
case FSAdd:
|
case FSAdd:
|
||||||
case FSSub:
|
case FSSub:
|
||||||
case FSRSqrt:
|
|
||||||
case FDMul:
|
case FDMul:
|
||||||
case FDAdd:
|
case FDAdd:
|
||||||
case FDSub:
|
case FDSub:
|
||||||
|
@ -1435,14 +1434,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
||||||
fregEmitBinInst(RI, I, &JitIL::SUBSS);
|
fregEmitBinInst(RI, I, &JitIL::SUBSS);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case FSRSqrt: {
|
|
||||||
if (!thisUsed) break;
|
|
||||||
X64Reg reg = fregURegWithoutMov(RI, I);
|
|
||||||
Jit->RSQRTSS(reg, fregLocForInst(RI, getOp1(I)));
|
|
||||||
RI.fregs[reg] = I;
|
|
||||||
fregNormalRegClear(RI, I);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case FDMul: {
|
case FDMul: {
|
||||||
if (!thisUsed) break;
|
if (!thisUsed) break;
|
||||||
fregEmitBinInst(RI, I, &JitIL::MULSD);
|
fregEmitBinInst(RI, I, &JitIL::MULSD);
|
||||||
|
|
|
@ -361,7 +361,7 @@ static GekkoOPTemplate table63_2[] =
|
||||||
{22, &JitIL::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{22, &JitIL::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{23, &JitIL::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{23, &JitIL::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{25, &JitIL::fp_arith_s}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{25, &JitIL::fp_arith_s}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{26, &JitIL::fp_arith_s}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
|
{26, &JitIL::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{28, &JitIL::fmaddXX}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{28, &JitIL::fmaddXX}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{29, &JitIL::fmaddXX}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{29, &JitIL::fmaddXX}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{30, &JitIL::fmaddXX}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{30, &JitIL::fmaddXX}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
|
|
|
@ -394,7 +394,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitArmIL* Jit, u32 exitAddress) {
|
||||||
case FSMul:
|
case FSMul:
|
||||||
case FSAdd:
|
case FSAdd:
|
||||||
case FSSub:
|
case FSSub:
|
||||||
case FSRSqrt:
|
|
||||||
case FDMul:
|
case FDMul:
|
||||||
case FDAdd:
|
case FDAdd:
|
||||||
case FDSub:
|
case FDSub:
|
||||||
|
|
|
@ -1128,7 +1128,7 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const {
|
||||||
numberOfOperands[CInt32] = 0;
|
numberOfOperands[CInt32] = 0;
|
||||||
|
|
||||||
static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, };
|
static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, };
|
||||||
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FSRSqrt, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, };
|
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, };
|
||||||
static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, };
|
static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, };
|
||||||
for (auto& op : ZeroOp) {
|
for (auto& op : ZeroOp) {
|
||||||
numberOfOperands[op] = 0;
|
numberOfOperands[op] = 0;
|
||||||
|
|
|
@ -113,7 +113,6 @@ enum Opcode {
|
||||||
FSAdd,
|
FSAdd,
|
||||||
FSSub,
|
FSSub,
|
||||||
FSNeg,
|
FSNeg,
|
||||||
FSRSqrt,
|
|
||||||
FPAdd,
|
FPAdd,
|
||||||
FPMul,
|
FPMul,
|
||||||
FPSub,
|
FPSub,
|
||||||
|
@ -464,9 +463,6 @@ public:
|
||||||
InstLoc EmitFSNeg(InstLoc op1) {
|
InstLoc EmitFSNeg(InstLoc op1) {
|
||||||
return FoldUOp(FSNeg, op1);
|
return FoldUOp(FSNeg, op1);
|
||||||
}
|
}
|
||||||
InstLoc EmitFSRSqrt(InstLoc op1) {
|
|
||||||
return FoldUOp(FSRSqrt, op1);
|
|
||||||
}
|
|
||||||
InstLoc EmitFDMul(InstLoc op1, InstLoc op2) {
|
InstLoc EmitFDMul(InstLoc op1, InstLoc op2) {
|
||||||
return FoldBiOp(FDMul, op1, op2);
|
return FoldBiOp(FDMul, op1, op2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,8 +9,7 @@ void JitILBase::fp_arith_s(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITFloatingPointOff)
|
JITDISABLE(bJITFloatingPointOff)
|
||||||
if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 &&
|
if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21))
|
||||||
inst.SUBOP5 != 21 && inst.SUBOP5 != 26))
|
|
||||||
{
|
{
|
||||||
FallBackToInterpreter(inst);
|
FallBackToInterpreter(inst);
|
||||||
return;
|
return;
|
||||||
|
@ -35,12 +34,6 @@ void JitILBase::fp_arith_s(UGeckoInstruction inst)
|
||||||
case 25: //mul
|
case 25: //mul
|
||||||
val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC));
|
val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC));
|
||||||
break;
|
break;
|
||||||
case 26: //rsqrte
|
|
||||||
val = ibuild.EmitLoadFReg(inst.FB);
|
|
||||||
val = ibuild.EmitDoubleToSingle(val);
|
|
||||||
val = ibuild.EmitFSRSqrt(val);
|
|
||||||
val = ibuild.EmitDupSingleToMReg(val);
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
|
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue