[ARM] Implement ps_msub. Minor optimizations.
This commit is contained in:
parent
afdac224cb
commit
43f2313ef9
|
@ -220,6 +220,7 @@ public:
|
|||
void ps_sum0(UGeckoInstruction _inst);
|
||||
void ps_sum1(UGeckoInstruction _inst);
|
||||
void ps_madd(UGeckoInstruction _inst);
|
||||
void ps_msub(UGeckoInstruction _inst);
|
||||
void ps_madds0(UGeckoInstruction _inst);
|
||||
void ps_madds1(UGeckoInstruction _inst);
|
||||
void ps_sub(UGeckoInstruction _inst);
|
||||
|
|
|
@ -69,14 +69,41 @@ void JitArm::ps_madd(UGeckoInstruction inst)
|
|||
ARMReg V0 = fpr.GetReg();
|
||||
ARMReg V1 = fpr.GetReg();
|
||||
|
||||
VMOV(V0, vB0);
|
||||
VMOV(V1, vB1);
|
||||
VMUL(V0, vA0, vC0);
|
||||
VMUL(V1, vA1, vC1);
|
||||
VADD(vD0, V0, vB0);
|
||||
VADD(vD1, V1, vB1);
|
||||
|
||||
VMLA(V0, vA0, vC0);
|
||||
VMLA(V1, vA1, vC1);
|
||||
fpr.Unlock(V0);
|
||||
fpr.Unlock(V1);
|
||||
}
|
||||
|
||||
VMOV(vD0, V0);
|
||||
VMOV(vD1, V1);
|
||||
void JitArm::ps_msub(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
ARMReg vA0 = fpr.R0(a);
|
||||
ARMReg vA1 = fpr.R1(a);
|
||||
ARMReg vB0 = fpr.R0(b);
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vC0 = fpr.R0(c);
|
||||
ARMReg vC1 = fpr.R1(c);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
|
||||
ARMReg V0 = fpr.GetReg();
|
||||
ARMReg V1 = fpr.GetReg();
|
||||
|
||||
VMUL(V0, vA0, vC0);
|
||||
VMUL(V1, vA1, vC1);
|
||||
VSUB(vD0, V0, vB0);
|
||||
VSUB(vD1, V1, vB1);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
fpr.Unlock(V1);
|
||||
|
@ -103,14 +130,11 @@ void JitArm::ps_madds0(UGeckoInstruction inst)
|
|||
ARMReg V0 = fpr.GetReg();
|
||||
ARMReg V1 = fpr.GetReg();
|
||||
|
||||
VMOV(V0, vB0);
|
||||
VMOV(V1, vB1);
|
||||
VMUL(V0, vA0, vC0);
|
||||
VMUL(V1, vA1, vC0);
|
||||
|
||||
VMLA(V0, vA0, vC0);
|
||||
VMLA(V1, vA1, vC0);
|
||||
|
||||
VMOV(vD0, V0);
|
||||
VMOV(vD1, V1);
|
||||
VADD(vD0, V0, vB0);
|
||||
VADD(vD1, V1, vB1);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
fpr.Unlock(V1);
|
||||
|
@ -137,14 +161,10 @@ void JitArm::ps_madds1(UGeckoInstruction inst)
|
|||
ARMReg V0 = fpr.GetReg();
|
||||
ARMReg V1 = fpr.GetReg();
|
||||
|
||||
VMOV(V0, vB0);
|
||||
VMOV(V1, vB1);
|
||||
|
||||
VMLA(V0, vA0, vC1);
|
||||
VMLA(V1, vA1, vC1);
|
||||
|
||||
VMOV(vD0, V0);
|
||||
VMOV(vD1, V1);
|
||||
VMUL(V0, vA0, vC1);
|
||||
VMUL(V1, vA1, vC1);
|
||||
VADD(vD0, V0, vB0);
|
||||
VADD(vD1, V1, vB1);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
fpr.Unlock(V1);
|
||||
|
|
|
@ -156,7 +156,7 @@ static GekkoOPTemplate table4_2[] =
|
|||
{24, &JitArm::Default}, //"ps_res", OPTYPE_PS, 0}},
|
||||
{25, &JitArm::ps_mul}, //"ps_mul", OPTYPE_PS, 0}},
|
||||
{26, &JitArm::Default}, //"ps_rsqrte", OPTYPE_PS, 0, 1}},
|
||||
{28, &JitArm::Default}, //"ps_msub", OPTYPE_PS, 0}},
|
||||
{28, &JitArm::ps_msub}, //"ps_msub", OPTYPE_PS, 0}},
|
||||
{29, &JitArm::ps_madd}, //"ps_madd", OPTYPE_PS, 0}},
|
||||
{30, &JitArm::Default}, //"ps_nmsub", OPTYPE_PS, 0}},
|
||||
{31, &JitArm::Default}, //"ps_nmadd", OPTYPE_PS, 0}},
|
||||
|
|
Loading…
Reference in New Issue