JitArm64: Merge more stuff into ps_maddXX.

This commit is contained in:
degasus 2016-02-11 00:50:13 +01:00
parent 8b32cd0738
commit f259a8d6cf
3 changed files with 44 additions and 107 deletions

View File

@ -145,13 +145,9 @@ public:
void fctiwzx(UGeckoInstruction inst); void fctiwzx(UGeckoInstruction inst);
// Paired // Paired
void ps_madd(UGeckoInstruction inst); void ps_maddXX(UGeckoInstruction inst);
void ps_maddsX(UGeckoInstruction inst);
void ps_mergeXX(UGeckoInstruction inst); void ps_mergeXX(UGeckoInstruction inst);
void ps_msub(UGeckoInstruction inst);
void ps_mulsX(UGeckoInstruction inst); void ps_mulsX(UGeckoInstruction inst);
void ps_nmadd(UGeckoInstruction inst);
void ps_nmsub(UGeckoInstruction inst);
void ps_res(UGeckoInstruction inst); void ps_res(UGeckoInstruction inst);
void ps_sel(UGeckoInstruction inst); void ps_sel(UGeckoInstruction inst);
void ps_sumX(UGeckoInstruction inst); void ps_sumX(UGeckoInstruction inst);

View File

@ -83,7 +83,8 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)
fpr.FixSinglePrecision(d); fpr.FixSinglePrecision(d);
fpr.Unlock(V0); fpr.Unlock(V0);
} }
void JitArm64::ps_madd(UGeckoInstruction inst)
void JitArm64::ps_maddXX(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITPairedOff); JITDISABLE(bJITPairedOff);
@ -91,6 +92,7 @@ void JitArm64::ps_madd(UGeckoInstruction inst)
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
u32 op5 = inst.SUBOP5;
ARM64Reg VA = fpr.R(a, REG_REG); ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG); ARM64Reg VB = fpr.R(b, REG_REG);
@ -98,101 +100,40 @@ void JitArm64::ps_madd(UGeckoInstruction inst)
ARM64Reg VD = fpr.RW(d, REG_REG); ARM64Reg VD = fpr.RW(d, REG_REG);
ARM64Reg V0 = fpr.GetReg(); ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC); switch (op5)
m_float_emit.FADD(64, VD, V0, VB); {
fpr.FixSinglePrecision(d); case 14: // ps_madds0
m_float_emit.DUP(64, V0, VC, 0);
fpr.Unlock(V0); m_float_emit.FMUL(64, V0, V0, VA);
} m_float_emit.FADD(64, VD, V0, VB);
break;
void JitArm64::ps_maddsX(UGeckoInstruction inst) case 15: // ps_madds1
{ m_float_emit.DUP(64, V0, VC, 1);
INSTRUCTION_START m_float_emit.FMUL(64, V0, V0, VA);
JITDISABLE(bJITPairedOff); m_float_emit.FADD(64, VD, V0, VB);
FALLBACK_IF(inst.Rc); break;
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); case 28: // ps_msub
m_float_emit.FMUL(64, V0, VA, VC);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; m_float_emit.FSUB(64, VD, V0, VB);
break;
bool upper = inst.SUBOP5 == 15; case 29: // ps_madd
m_float_emit.FMUL(64, V0, VA, VC);
ARM64Reg VA = fpr.R(a, REG_REG); m_float_emit.FADD(64, VD, V0, VB);
ARM64Reg VB = fpr.R(b, REG_REG); break;
ARM64Reg VC = fpr.R(c, REG_REG); case 30: // ps_nmsub
ARM64Reg VD = fpr.RW(d, REG_REG); m_float_emit.FMUL(64, V0, VA, VC);
ARM64Reg V0 = fpr.GetReg(); m_float_emit.FSUB(64, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD);
m_float_emit.DUP(64, V0, VC, upper ? 1 : 0); break;
m_float_emit.FMUL(64, V0, V0, VA); case 31: // ps_nmadd
m_float_emit.FADD(64, VD, V0, VB); m_float_emit.FMUL(64, V0, VA, VC);
fpr.FixSinglePrecision(d); m_float_emit.FADD(64, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD);
fpr.Unlock(V0); break;
} default:
_assert_msg_(DYNA_REC, 0, "ps_madd - invalid op");
void JitArm64::ps_msub(UGeckoInstruction inst) break;
{ }
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.RW(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, VD, V0, VB);
fpr.FixSinglePrecision(d);
fpr.Unlock(V0);
}
void JitArm64::ps_nmadd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.RW(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FADD(64, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD);
fpr.FixSinglePrecision(d);
fpr.Unlock(V0);
}
void JitArm64::ps_nmsub(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff);
FALLBACK_IF(inst.Rc);
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.RW(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
m_float_emit.FSUB(64, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD);
fpr.FixSinglePrecision(d); fpr.FixSinglePrecision(d);
fpr.Unlock(V0); fpr.Unlock(V0);

View File

@ -124,8 +124,8 @@ static GekkoOPTemplate table4_2[] =
{11, &JitArm64::ps_sumX}, // ps_sum1 {11, &JitArm64::ps_sumX}, // ps_sum1
{12, &JitArm64::ps_mulsX}, // ps_muls0 {12, &JitArm64::ps_mulsX}, // ps_muls0
{13, &JitArm64::ps_mulsX}, // ps_muls1 {13, &JitArm64::ps_mulsX}, // ps_muls1
{14, &JitArm64::ps_maddsX}, // ps_madds0 {14, &JitArm64::ps_maddXX}, // ps_madds0
{15, &JitArm64::ps_maddsX}, // ps_madds1 {15, &JitArm64::ps_maddXX}, // ps_madds1
{18, &JitArm64::fp_arith}, // ps_div {18, &JitArm64::fp_arith}, // ps_div
{20, &JitArm64::fp_arith}, // ps_sub {20, &JitArm64::fp_arith}, // ps_sub
{21, &JitArm64::fp_arith}, // ps_add {21, &JitArm64::fp_arith}, // ps_add
@ -133,10 +133,10 @@ static GekkoOPTemplate table4_2[] =
{24, &JitArm64::ps_res}, // ps_res {24, &JitArm64::ps_res}, // ps_res
{25, &JitArm64::fp_arith}, // ps_mul {25, &JitArm64::fp_arith}, // ps_mul
{26, &JitArm64::FallBackToInterpreter}, // ps_rsqrte {26, &JitArm64::FallBackToInterpreter}, // ps_rsqrte
{28, &JitArm64::ps_msub}, // ps_msub {28, &JitArm64::ps_maddXX}, // ps_msub
{29, &JitArm64::ps_madd}, // ps_madd {29, &JitArm64::ps_maddXX}, // ps_madd
{30, &JitArm64::ps_nmsub}, // ps_nmsub {30, &JitArm64::ps_maddXX}, // ps_nmsub
{31, &JitArm64::ps_nmadd}, // ps_nmadd {31, &JitArm64::ps_maddXX}, // ps_nmadd
}; };