Merge pull request #10202 from merryhime/fctiwx
JitArm64: Implement fctiwx
This commit is contained in:
commit
58f8c6e529
|
@ -2913,6 +2913,10 @@ void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn)
|
|||
{
|
||||
EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FRINTI(ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
EmitScalar1Source(0, 0, IsDouble(Rd), 15, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::FRECPE(ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
|
|
|
@ -1230,6 +1230,7 @@ public:
|
|||
void FABS(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FNEG(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FRINTI(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
|
||||
void FRECPE(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FRSQRTE(ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
|
|
@ -144,7 +144,7 @@ public:
|
|||
void fselx(UGeckoInstruction inst);
|
||||
void fcmpX(UGeckoInstruction inst);
|
||||
void frspx(UGeckoInstruction inst);
|
||||
void fctiwzx(UGeckoInstruction inst);
|
||||
void fctiwx(UGeckoInstruction inst);
|
||||
void fresx(UGeckoInstruction inst);
|
||||
void frsqrtex(UGeckoInstruction inst);
|
||||
|
||||
|
|
|
@ -507,7 +507,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
|||
FloatCompare(inst);
|
||||
}
|
||||
|
||||
void JitArm64::fctiwzx(UGeckoInstruction inst)
|
||||
void JitArm64::fctiwx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
|
@ -518,19 +518,32 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
|
|||
const u32 d = inst.FD;
|
||||
|
||||
const bool single = fpr.IsSingle(b, true);
|
||||
const bool is_fctiwzx = inst.SUBOP10 == 15;
|
||||
|
||||
const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair);
|
||||
const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);
|
||||
|
||||
// TODO: The upper 32 bits of the result are set to 0xfff80000, except for -0.0 where should be
|
||||
// set to 0xfff80001 (TODO).
|
||||
|
||||
if (single)
|
||||
{
|
||||
const ARM64Reg V0 = fpr.GetReg();
|
||||
|
||||
if (is_fctiwzx)
|
||||
{
|
||||
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_float_emit.FRINTI(EncodeRegToSingle(VD), EncodeRegToSingle(VB));
|
||||
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), RoundingMode::Z);
|
||||
}
|
||||
|
||||
// Generate 0xFFF8'0000'0000'0000ULL
|
||||
m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF'0000'0000'0000ULL);
|
||||
m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7);
|
||||
|
||||
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z);
|
||||
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
|
||||
|
||||
fpr.Unlock(V0);
|
||||
|
@ -539,7 +552,16 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
|
|||
{
|
||||
const ARM64Reg WA = gpr.GetReg();
|
||||
|
||||
m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z);
|
||||
if (is_fctiwzx)
|
||||
{
|
||||
m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_float_emit.FRINTI(EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
||||
m_float_emit.FCVTS(WA, EncodeRegToDouble(VD), RoundingMode::Z);
|
||||
}
|
||||
|
||||
ORR(EncodeRegTo64(WA), EncodeRegTo64(WA), LogicalImm(0xFFF8'0000'0000'0000ULL, 64));
|
||||
m_float_emit.FMOV(EncodeRegToDouble(VD), EncodeRegTo64(WA));
|
||||
|
||||
|
|
|
@ -304,15 +304,15 @@ constexpr std::array<GekkoOPTemplate, 9> table59{{
|
|||
}};
|
||||
|
||||
constexpr std::array<GekkoOPTemplate, 15> table63{{
|
||||
{264, &JitArm64::fp_logic}, // fabsx
|
||||
{32, &JitArm64::fcmpX}, // fcmpo
|
||||
{0, &JitArm64::fcmpX}, // fcmpu
|
||||
{14, &JitArm64::FallBackToInterpreter}, // fctiwx
|
||||
{15, &JitArm64::fctiwzx}, // fctiwzx
|
||||
{72, &JitArm64::fp_logic}, // fmrx
|
||||
{136, &JitArm64::fp_logic}, // fnabsx
|
||||
{40, &JitArm64::fp_logic}, // fnegx
|
||||
{12, &JitArm64::frspx}, // frspx
|
||||
{264, &JitArm64::fp_logic}, // fabsx
|
||||
{32, &JitArm64::fcmpX}, // fcmpo
|
||||
{0, &JitArm64::fcmpX}, // fcmpu
|
||||
{14, &JitArm64::fctiwx}, // fctiwx
|
||||
{15, &JitArm64::fctiwx}, // fctiwzx
|
||||
{72, &JitArm64::fp_logic}, // fmrx
|
||||
{136, &JitArm64::fp_logic}, // fnabsx
|
||||
{40, &JitArm64::fp_logic}, // fnegx
|
||||
{12, &JitArm64::frspx}, // frspx
|
||||
|
||||
{64, &JitArm64::mcrfs}, // mcrfs
|
||||
{583, &JitArm64::mffsx}, // mffsx
|
||||
|
|
Loading…
Reference in New Issue