JitArm64: Also merge 3 way FP-PS instructions.
This commit is contained in:
parent
83eb1d8c31
commit
9c048bbc36
|
@ -157,8 +157,6 @@ public:
|
||||||
|
|
||||||
// Paired
|
// Paired
|
||||||
void ps_abs(UGeckoInstruction inst);
|
void ps_abs(UGeckoInstruction inst);
|
||||||
void ps_add(UGeckoInstruction inst);
|
|
||||||
void ps_div(UGeckoInstruction inst);
|
|
||||||
void ps_madd(UGeckoInstruction inst);
|
void ps_madd(UGeckoInstruction inst);
|
||||||
void ps_madds0(UGeckoInstruction inst);
|
void ps_madds0(UGeckoInstruction inst);
|
||||||
void ps_madds1(UGeckoInstruction inst);
|
void ps_madds1(UGeckoInstruction inst);
|
||||||
|
@ -168,7 +166,6 @@ public:
|
||||||
void ps_merge11(UGeckoInstruction inst);
|
void ps_merge11(UGeckoInstruction inst);
|
||||||
void ps_mr(UGeckoInstruction inst);
|
void ps_mr(UGeckoInstruction inst);
|
||||||
void ps_msub(UGeckoInstruction inst);
|
void ps_msub(UGeckoInstruction inst);
|
||||||
void ps_mul(UGeckoInstruction inst);
|
|
||||||
void ps_muls0(UGeckoInstruction inst);
|
void ps_muls0(UGeckoInstruction inst);
|
||||||
void ps_muls1(UGeckoInstruction inst);
|
void ps_muls1(UGeckoInstruction inst);
|
||||||
void ps_nabs(UGeckoInstruction inst);
|
void ps_nabs(UGeckoInstruction inst);
|
||||||
|
@ -177,7 +174,6 @@ public:
|
||||||
void ps_neg(UGeckoInstruction inst);
|
void ps_neg(UGeckoInstruction inst);
|
||||||
void ps_res(UGeckoInstruction inst);
|
void ps_res(UGeckoInstruction inst);
|
||||||
void ps_sel(UGeckoInstruction inst);
|
void ps_sel(UGeckoInstruction inst);
|
||||||
void ps_sub(UGeckoInstruction inst);
|
|
||||||
void ps_sum0(UGeckoInstruction inst);
|
void ps_sum0(UGeckoInstruction inst);
|
||||||
void ps_sum1(UGeckoInstruction inst);
|
void ps_sum1(UGeckoInstruction inst);
|
||||||
|
|
||||||
|
|
|
@ -40,31 +40,41 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
|
||||||
u32 a = inst.FA, d = inst.FD;
|
u32 a = inst.FA, d = inst.FD;
|
||||||
u32 b = inst.SUBOP5 == 25 ? inst.FC : inst.FB;
|
u32 b = inst.SUBOP5 == 25 ? inst.FC : inst.FB;
|
||||||
|
|
||||||
bool single = inst.OPCD == 4 || inst.OPCD == 59;
|
bool single = inst.OPCD == 59;
|
||||||
|
bool packed = inst.OPCD == 4;
|
||||||
|
|
||||||
ARM64Reg VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED));
|
if (packed)
|
||||||
ARM64Reg VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED));
|
|
||||||
ARM64Reg VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR));
|
|
||||||
|
|
||||||
switch (inst.SUBOP5)
|
|
||||||
{
|
{
|
||||||
case 18:
|
ARM64Reg VA = fpr.R(a, REG_REG);
|
||||||
m_float_emit.FDIV(VD, VA, VB);
|
ARM64Reg VB = fpr.R(b, REG_REG);
|
||||||
break;
|
ARM64Reg VD = fpr.RW(d, REG_REG);
|
||||||
case 20:
|
|
||||||
m_float_emit.FSUB(VD, VA, VB);
|
switch (inst.SUBOP5)
|
||||||
break;
|
{
|
||||||
case 21:
|
case 18: m_float_emit.FDIV(64, VD, VA, VB); break;
|
||||||
m_float_emit.FADD(VD, VA, VB);
|
case 20: m_float_emit.FSUB(64, VD, VA, VB); break;
|
||||||
break;
|
case 21: m_float_emit.FADD(64, VD, VA, VB); break;
|
||||||
case 25:
|
case 25: m_float_emit.FMUL(64, VD, VA, VB); break;
|
||||||
m_float_emit.FMUL(VD, VA, VB);
|
default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!");
|
||||||
break;
|
}
|
||||||
default:
|
}
|
||||||
_assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!");
|
else
|
||||||
|
{
|
||||||
|
ARM64Reg VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED));
|
||||||
|
ARM64Reg VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED));
|
||||||
|
ARM64Reg VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR));
|
||||||
|
|
||||||
|
switch (inst.SUBOP5)
|
||||||
|
{
|
||||||
|
case 18: m_float_emit.FDIV(VD, VA, VB); break;
|
||||||
|
case 20: m_float_emit.FSUB(VD, VA, VB); break;
|
||||||
|
case 21: m_float_emit.FADD(VD, VA, VB); break;
|
||||||
|
case 25: m_float_emit.FMUL(VD, VA, VB); break;
|
||||||
|
default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (single)
|
if (single || packed)
|
||||||
fpr.FixSinglePrecision(d);
|
fpr.FixSinglePrecision(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,40 +31,6 @@ void JitArm64::ps_abs(UGeckoInstruction inst)
|
||||||
m_float_emit.FABS(64, VD, VB);
|
m_float_emit.FABS(64, VD, VB);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::ps_add(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
INSTRUCTION_START
|
|
||||||
JITDISABLE(bJITPairedOff);
|
|
||||||
FALLBACK_IF(inst.Rc);
|
|
||||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
|
||||||
|
|
||||||
u32 a = inst.FA, b = inst.FB, d = inst.FD;
|
|
||||||
|
|
||||||
ARM64Reg VA = fpr.R(a, REG_REG);
|
|
||||||
ARM64Reg VB = fpr.R(b, REG_REG);
|
|
||||||
ARM64Reg VD = fpr.RW(d, REG_REG);
|
|
||||||
|
|
||||||
m_float_emit.FADD(64, VD, VA, VB);
|
|
||||||
fpr.FixSinglePrecision(d);
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitArm64::ps_div(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
INSTRUCTION_START
|
|
||||||
JITDISABLE(bJITPairedOff);
|
|
||||||
FALLBACK_IF(inst.Rc);
|
|
||||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
|
||||||
|
|
||||||
u32 a = inst.FA, b = inst.FB, d = inst.FD;
|
|
||||||
|
|
||||||
ARM64Reg VA = fpr.R(a, REG_REG);
|
|
||||||
ARM64Reg VB = fpr.R(b, REG_REG);
|
|
||||||
ARM64Reg VD = fpr.RW(d, REG_REG);
|
|
||||||
|
|
||||||
m_float_emit.FDIV(64, VD, VA, VB);
|
|
||||||
fpr.FixSinglePrecision(d);
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitArm64::ps_madd(UGeckoInstruction inst)
|
void JitArm64::ps_madd(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
|
@ -223,23 +189,6 @@ void JitArm64::ps_mr(UGeckoInstruction inst)
|
||||||
m_float_emit.ORR(VD, VB, VB);
|
m_float_emit.ORR(VD, VB, VB);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::ps_mul(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
INSTRUCTION_START
|
|
||||||
JITDISABLE(bJITPairedOff);
|
|
||||||
FALLBACK_IF(inst.Rc);
|
|
||||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
|
||||||
|
|
||||||
u32 a = inst.FA, c = inst.FC, d = inst.FD;
|
|
||||||
|
|
||||||
ARM64Reg VA = fpr.R(a, REG_REG);
|
|
||||||
ARM64Reg VC = fpr.R(c, REG_REG);
|
|
||||||
ARM64Reg VD = fpr.RW(d, REG_REG);
|
|
||||||
|
|
||||||
m_float_emit.FMUL(64, VD, VA, VC);
|
|
||||||
fpr.FixSinglePrecision(d);
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitArm64::ps_muls0(UGeckoInstruction inst)
|
void JitArm64::ps_muls0(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
|
@ -421,23 +370,6 @@ void JitArm64::ps_sel(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::ps_sub(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
INSTRUCTION_START
|
|
||||||
JITDISABLE(bJITPairedOff);
|
|
||||||
FALLBACK_IF(inst.Rc);
|
|
||||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
|
||||||
|
|
||||||
u32 a = inst.FA, b = inst.FB, d = inst.FD;
|
|
||||||
|
|
||||||
ARM64Reg VA = fpr.R(a, REG_REG);
|
|
||||||
ARM64Reg VB = fpr.R(b, REG_REG);
|
|
||||||
ARM64Reg VD = fpr.RW(d, REG_REG);
|
|
||||||
|
|
||||||
m_float_emit.FSUB(64, VD, VA, VB);
|
|
||||||
fpr.FixSinglePrecision(d);
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitArm64::ps_sum0(UGeckoInstruction inst)
|
void JitArm64::ps_sum0(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
|
|
|
@ -126,12 +126,12 @@ static GekkoOPTemplate table4_2[] =
|
||||||
{13, &JitArm64::ps_muls1}, // ps_muls1
|
{13, &JitArm64::ps_muls1}, // ps_muls1
|
||||||
{14, &JitArm64::ps_madds0}, // ps_madds0
|
{14, &JitArm64::ps_madds0}, // ps_madds0
|
||||||
{15, &JitArm64::ps_madds1}, // ps_madds1
|
{15, &JitArm64::ps_madds1}, // ps_madds1
|
||||||
{18, &JitArm64::ps_div}, // ps_div
|
{18, &JitArm64::fp_arith}, // ps_div
|
||||||
{20, &JitArm64::ps_sub}, // ps_sub
|
{20, &JitArm64::fp_arith}, // ps_sub
|
||||||
{21, &JitArm64::ps_add}, // ps_add
|
{21, &JitArm64::fp_arith}, // ps_add
|
||||||
{23, &JitArm64::ps_sel}, // ps_sel
|
{23, &JitArm64::ps_sel}, // ps_sel
|
||||||
{24, &JitArm64::ps_res}, // ps_res
|
{24, &JitArm64::ps_res}, // ps_res
|
||||||
{25, &JitArm64::ps_mul}, // ps_mul
|
{25, &JitArm64::fp_arith}, // ps_mul
|
||||||
{26, &JitArm64::FallBackToInterpreter}, // ps_rsqrte
|
{26, &JitArm64::FallBackToInterpreter}, // ps_rsqrte
|
||||||
{28, &JitArm64::ps_msub}, // ps_msub
|
{28, &JitArm64::ps_msub}, // ps_msub
|
||||||
{29, &JitArm64::ps_madd}, // ps_madd
|
{29, &JitArm64::ps_madd}, // ps_madd
|
||||||
|
|
Loading…
Reference in New Issue