Jit: get rid of incorrect implementations of fres and frsqrte.

The existing implementations produce answers which aren't consistent with
the hardware, and games care about correct floating point math.

These can be reimplemented at some point in the future, if someone cares
enough, but the general case is probably too much code to inline.

(I'm leaving the ARM implementations in place by request, even though they
suffer the same issues.)
This commit is contained in:
magumagu 2014-05-23 14:57:34 -07:00
parent 2f8a147eda
commit 567724b2f8
10 changed files with 6 additions and 79 deletions

View File

@ -168,12 +168,10 @@ public:
void ps_arith(UGeckoInstruction inst); //aggregate
void ps_mergeXX(UGeckoInstruction inst);
void ps_maddXX(UGeckoInstruction inst);
void ps_recip(UGeckoInstruction inst);
void ps_sum(UGeckoInstruction inst);
void ps_muls(UGeckoInstruction inst);
void fp_arith(UGeckoInstruction inst);
void frsqrtex(UGeckoInstruction inst);
void fcmpx(UGeckoInstruction inst);
void fmrx(UGeckoInstruction inst);

View File

@ -138,9 +138,9 @@ static GekkoOPTemplate table4_2[] =
{20, &Jit64::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
{21, &Jit64::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
{23, &Jit64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
{24, &Jit64::ps_recip}, //"ps_res", OPTYPE_PS, 0}},
{24, &Jit64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
{25, &Jit64::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
{26, &Jit64::ps_recip}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
{26, &Jit64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
{28, &Jit64::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
{29, &Jit64::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
{30, &Jit64::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},
@ -360,7 +360,7 @@ static GekkoOPTemplate table63_2[] =
{22, &Jit64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
{23, &Jit64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &Jit64::fp_arith}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
{26, &Jit64::frsqrtex}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
{26, &Jit64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &Jit64::fmaddXX}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &Jit64::fmaddXX}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &Jit64::fmaddXX}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},

View File

@ -101,21 +101,6 @@ void Jit64::fp_arith(UGeckoInstruction inst)
}
}
void Jit64::frsqrtex(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
int d = inst.FD;
int b = inst.FB;
fpr.Lock(b, d);
fpr.BindToRegister(d, true, true);
MOVSD(XMM0, M((void *)&one_const));
SQRTSD(XMM1, fpr.R(b));
DIVSD(XMM0, R(XMM1));
MOVSD(fpr.R(d), XMM0);
fpr.UnlockAll();
}
void Jit64::fmaddXX(UGeckoInstruction inst)
{
INSTRUCTION_START

View File

@ -112,41 +112,6 @@ void Jit64::ps_sign(UGeckoInstruction inst)
fpr.UnlockAll();
}
// ps_res and ps_rsqrte
void Jit64::ps_recip(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
if (inst.Rc)
{
FallBackToInterpreter(inst);
return;
}
OpArg divisor;
int d = inst.FD;
int b = inst.FB;
fpr.Lock(d, b);
fpr.BindToRegister(d, (d == b));
switch (inst.SUBOP5)
{
case 24:
// ps_res
divisor = fpr.R(b);
break;
case 26:
// ps_rsqrte
SQRTPD(XMM0, fpr.R(b));
divisor = R(XMM0);
break;
}
MOVAPD(XMM1, M((void*)&psOneOne));
DIVPD(XMM1, divisor);
MOVAPD(fpr.R(d), XMM1);
fpr.UnlockAll();
}
//add a, b, c
//mov a, b

View File

@ -674,7 +674,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
case FSMul:
case FSAdd:
case FSSub:
case FSRSqrt:
case FDMul:
case FDAdd:
case FDSub:
@ -1435,14 +1434,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
fregEmitBinInst(RI, I, &JitIL::SUBSS);
break;
}
case FSRSqrt: {
if (!thisUsed) break;
X64Reg reg = fregURegWithoutMov(RI, I);
Jit->RSQRTSS(reg, fregLocForInst(RI, getOp1(I)));
RI.fregs[reg] = I;
fregNormalRegClear(RI, I);
break;
}
case FDMul: {
if (!thisUsed) break;
fregEmitBinInst(RI, I, &JitIL::MULSD);

View File

@ -361,7 +361,7 @@ static GekkoOPTemplate table63_2[] =
{22, &JitIL::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
{23, &JitIL::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, &JitIL::fp_arith_s}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
{26, &JitIL::fp_arith_s}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
{26, &JitIL::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
{28, &JitIL::fmaddXX}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, &JitIL::fmaddXX}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, &JitIL::fmaddXX}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},

View File

@ -394,7 +394,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitArmIL* Jit, u32 exitAddress) {
case FSMul:
case FSAdd:
case FSSub:
case FSRSqrt:
case FDMul:
case FDAdd:
case FDSub:

View File

@ -1128,7 +1128,7 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const {
numberOfOperands[CInt32] = 0;
static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, };
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FSRSqrt, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, };
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, };
static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, };
for (auto& op : ZeroOp) {
numberOfOperands[op] = 0;

View File

@ -113,7 +113,6 @@ enum Opcode {
FSAdd,
FSSub,
FSNeg,
FSRSqrt,
FPAdd,
FPMul,
FPSub,
@ -464,9 +463,6 @@ public:
InstLoc EmitFSNeg(InstLoc op1) {
return FoldUOp(FSNeg, op1);
}
InstLoc EmitFSRSqrt(InstLoc op1) {
return FoldUOp(FSRSqrt, op1);
}
InstLoc EmitFDMul(InstLoc op1, InstLoc op2) {
return FoldBiOp(FDMul, op1, op2);
}

View File

@ -9,8 +9,7 @@ void JitILBase::fp_arith_s(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 &&
inst.SUBOP5 != 21 && inst.SUBOP5 != 26))
if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21))
{
FallBackToInterpreter(inst);
return;
@ -35,12 +34,6 @@ void JitILBase::fp_arith_s(UGeckoInstruction inst)
case 25: //mul
val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC));
break;
case 26: //rsqrte
val = ibuild.EmitLoadFReg(inst.FB);
val = ibuild.EmitDoubleToSingle(val);
val = ibuild.EmitFSRSqrt(val);
val = ibuild.EmitDupSingleToMReg(val);
break;
default:
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
}