[AArch64] Implement fctiwzx
Improves the povray benchmark time by 5.6%
This commit is contained in:
parent
4fa23abbe1
commit
561744819e
|
@ -147,6 +147,7 @@ public:
|
||||||
void fsubx(UGeckoInstruction inst);
|
void fsubx(UGeckoInstruction inst);
|
||||||
void fcmpx(UGeckoInstruction inst);
|
void fcmpx(UGeckoInstruction inst);
|
||||||
void frspx(UGeckoInstruction inst);
|
void frspx(UGeckoInstruction inst);
|
||||||
|
void fctiwzx(UGeckoInstruction inst);
|
||||||
|
|
||||||
// Paired
|
// Paired
|
||||||
void ps_abs(UGeckoInstruction inst);
|
void ps_abs(UGeckoInstruction inst);
|
||||||
|
|
|
@ -558,3 +558,38 @@ void JitArm64::fcmpx(UGeckoInstruction inst)
|
||||||
|
|
||||||
gpr.Unlock(WA);
|
gpr.Unlock(WA);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void JitArm64::fctiwzx(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
|
FALLBACK_IF(inst.Rc);
|
||||||
|
|
||||||
|
u32 b = inst.FB, d = inst.FD;
|
||||||
|
fpr.BindToRegister(d, d == b);
|
||||||
|
|
||||||
|
ARM64Reg VB = fpr.R(b);
|
||||||
|
ARM64Reg VD = fpr.R(d);
|
||||||
|
|
||||||
|
ARM64Reg V0 = fpr.GetReg();
|
||||||
|
|
||||||
|
// Generate 0xFFF8000000000000ULL
|
||||||
|
m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF000000000000ULL);
|
||||||
|
m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7);
|
||||||
|
if (fpr.IsLower(d))
|
||||||
|
{
|
||||||
|
m_float_emit.FCVTN(32, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
||||||
|
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), ROUND_Z);
|
||||||
|
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ARM64Reg V1 = fpr.GetReg();
|
||||||
|
m_float_emit.FCVTN(32, EncodeRegToDouble(V1), EncodeRegToDouble(VB));
|
||||||
|
m_float_emit.FCVTS(EncodeRegToSingle(V1), EncodeRegToSingle(V1), ROUND_Z);
|
||||||
|
m_float_emit.ORR(EncodeRegToDouble(V1), EncodeRegToDouble(V1), EncodeRegToDouble(V0));
|
||||||
|
m_float_emit.INS(64, VD, 0, V1, 0);
|
||||||
|
fpr.Unlock(V1);
|
||||||
|
}
|
||||||
|
fpr.Unlock(V0);
|
||||||
|
}
|
||||||
|
|
|
@ -330,7 +330,7 @@ static GekkoOPTemplate table63[] =
|
||||||
{32, &JitArm64::fcmpx}, // fcmpo
|
{32, &JitArm64::fcmpx}, // fcmpo
|
||||||
{0, &JitArm64::fcmpx}, // fcmpu
|
{0, &JitArm64::fcmpx}, // fcmpu
|
||||||
{14, &JitArm64::FallBackToInterpreter}, // fctiwx
|
{14, &JitArm64::FallBackToInterpreter}, // fctiwx
|
||||||
{15, &JitArm64::FallBackToInterpreter}, // fctiwzx
|
{15, &JitArm64::fctiwzx}, // fctiwzx
|
||||||
{72, &JitArm64::fmrx}, // fmrx
|
{72, &JitArm64::fmrx}, // fmrx
|
||||||
{136, &JitArm64::fnabsx}, // fnabsx
|
{136, &JitArm64::fnabsx}, // fnabsx
|
||||||
{40, &JitArm64::fnegx}, // fnegx
|
{40, &JitArm64::fnegx}, // fnegx
|
||||||
|
|
Loading…
Reference in New Issue