JIT: add ps_res and ps_rsqrte using scalar implementations

This commit is contained in:
Fiora 2014-09-18 12:30:05 -07:00
parent 1b425dedd1
commit af8772700a
3 changed files with 57 additions and 2 deletions

View File

@ -198,6 +198,8 @@ public:
void ps_arith(UGeckoInstruction inst); //aggregate
void ps_mergeXX(UGeckoInstruction inst);
void ps_maddXX(UGeckoInstruction inst);
void ps_res(UGeckoInstruction inst);
void ps_rsqrte(UGeckoInstruction inst);
void ps_sum(UGeckoInstruction inst);
void ps_muls(UGeckoInstruction inst);
void ps_cmpXX(UGeckoInstruction inst);

View File

@ -138,9 +138,9 @@ static GekkoOPTemplate table4_2[] =
{20, &Jit64::ps_arith}, //"ps_sub", OPTYPE_PS, 0}},
{21, &Jit64::ps_arith}, //"ps_add", OPTYPE_PS, 0}},
{23, &Jit64::ps_sel}, //"ps_sel", OPTYPE_PS, 0}},
{24, &Jit64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}},
{24, &Jit64::ps_res}, //"ps_res", OPTYPE_PS, 0}},
{25, &Jit64::ps_arith}, //"ps_mul", OPTYPE_PS, 0}},
{26, &Jit64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
{26, &Jit64::ps_rsqrte}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
{28, &Jit64::ps_maddXX}, //"ps_msub", OPTYPE_PS, 0}},
{29, &Jit64::ps_maddXX}, //"ps_madd", OPTYPE_PS, 0}},
{30, &Jit64::ps_maddXX}, //"ps_nmsub", OPTYPE_PS, 0}},

View File

@ -282,6 +282,59 @@ void Jit64::ps_mergeXX(UGeckoInstruction inst)
fpr.UnlockAll();
}
void Jit64::ps_rsqrte(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
int b = inst.FB;
int d = inst.FD;
gpr.FlushLockX(RSCRATCH_EXTRA);
fpr.Lock(b, d);
fpr.BindToRegister(b, true, false);
fpr.BindToRegister(d, false);
MOVSD(XMM0, fpr.R(b));
CALL((void *)asm_routines.frsqrte);
MOVSD(fpr.R(d), XMM0);
MOVHLPS(XMM0, fpr.RX(b));
CALL((void *)asm_routines.frsqrte);
MOVLHPS(fpr.RX(d), XMM0);
ForceSinglePrecisionP(fpr.RX(d));
SetFPRFIfNeeded(inst, fpr.RX(d));
fpr.UnlockAll();
gpr.UnlockAllX();
}
void Jit64::ps_res(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
int b = inst.FB;
int d = inst.FD;
gpr.FlushLockX(RSCRATCH_EXTRA);
fpr.Lock(b, d);
fpr.BindToRegister(b, true, false);
fpr.BindToRegister(d, false);
MOVSD(XMM0, fpr.R(b));
CALL((void *)asm_routines.fres);
MOVSD(fpr.R(d), XMM0);
MOVHLPS(XMM0, fpr.RX(b));
CALL((void *)asm_routines.fres);
MOVLHPS(fpr.RX(d), XMM0);
ForceSinglePrecisionP(fpr.RX(d));
SetFPRFIfNeeded(inst, fpr.RX(d));
fpr.UnlockAll();
gpr.UnlockAllX();
}
//TODO: add optimized cases
void Jit64::ps_maddXX(UGeckoInstruction inst)