Merge pull request #2899 from Sonicadvance1/aarch64_fctiwzx

[AArch64] Implement fctiwzx
This commit is contained in:
Ryan Houdek 2015-08-24 13:22:27 -05:00
commit d96be9250c
5 changed files with 143 additions and 1 deletions

View File

@ -2505,6 +2505,23 @@ void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64
Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);
}
void ARM64FloatEmitter::EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh)
{
union
{
u8 hex;
struct
{
unsigned defgh : 5;
unsigned abc : 3;
};
} v;
v.hex = abcdefgh;
Rd = DecodeReg(Rd);
Write32((Q << 30) | (op << 29) | (0xF << 24) | (v.abc << 16) | (cmode << 12) | \
(o2 << 11) | (1 << 10) | (v.defgh << 5) | Rd);
}
void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm);
@ -3630,6 +3647,90 @@ void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8
EmitVectorxElement(0, 2 | (size >> 6), L, 1, H, Rd, Rn, Rm);
}
// Modified Immediate
void ARM64FloatEmitter::MOVI(u8 size, ARM64Reg Rd, u64 imm, u8 shift)
{
bool Q = IsQuad(Rd);
u8 cmode = 0;
u8 op = 0;
u8 abcdefgh = imm & 0xFF;
if (size == 8)
{
_assert_msg_(DYNA_REC, shift == 0, "%s(size8) doesn't support shift!", __FUNCTION__);
_assert_msg_(DYNA_REC, !(imm & ~0xFFULL), "%s(size8) only supports 8bit values!", __FUNCTION__);
}
else if (size == 16)
{
_assert_msg_(DYNA_REC, shift == 0 || shift == 8, "%s(size16) only supports shift of {0, 8}!", __FUNCTION__);
_assert_msg_(DYNA_REC, !(imm & ~0xFFULL), "%s(size16) only supports 8bit values!", __FUNCTION__);
if (shift == 8)
cmode |= 2;
}
else if (size == 32)
{
_assert_msg_(DYNA_REC,
shift == 0 || shift == 8 || shift == 16 || shift == 24,
"%s(size32) only supports shift of {0, 8, 16, 24}!", __FUNCTION__);
// XXX: Implement support for MOVI - shifting ones variant
_assert_msg_(DYNA_REC, !(imm & ~0xFFULL), "%s(size32) only supports 8bit values!", __FUNCTION__);
switch (shift)
{
case 8: cmode |= 2; break;
case 16: cmode |= 4; break;
case 24: cmode |= 6; break;
default: break;
}
}
else // 64
{
_assert_msg_(DYNA_REC, shift == 0, "%s(size64) doesn't support shift!", __FUNCTION__);
op = 1;
cmode = 0xE;
abcdefgh = 0;
for (int i = 0; i < 8; ++i)
{
u8 tmp = (imm >> (i << 3)) & 0xFF;
_assert_msg_(DYNA_REC, tmp == 0xFF || tmp == 0, "%s(size64) Invalid immediate!", __FUNCTION__);
if (tmp == 0xFF)
abcdefgh |= (1 << i);
}
}
EncodeModImm(Q, op, cmode, 0, Rd, abcdefgh);
}
void ARM64FloatEmitter::BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift)
{
bool Q = IsQuad(Rd);
u8 cmode = 1;
u8 op = 1;
if (size == 16)
{
_assert_msg_(DYNA_REC, shift == 0 || shift == 8, "%s(size16) only supports shift of {0, 8}!", __FUNCTION__);
if (shift == 8)
cmode |= 2;
}
else if (size == 32)
{
_assert_msg_(DYNA_REC,
shift == 0 || shift == 8 || shift == 16 || shift == 24,
"%s(size32) only supports shift of {0, 8, 16, 24}!", __FUNCTION__);
// XXX: Implement support for MOVI - shifting ones variant
switch (shift)
{
case 8: cmode |= 2; break;
case 16: cmode |= 4; break;
case 24: cmode |= 6; break;
default: break;
}
}
else
_assert_msg_(DYNA_REC, false, "%s only supports size of {16, 32}!", __FUNCTION__);
EncodeModImm(Q, op, cmode, 0, Rd, imm);
}
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp)
{
bool bundled_loadstore = false;

View File

@ -901,6 +901,10 @@ public:
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
// Modified Immediate
void MOVI(u8 size, ARM64Reg Rd, u64 imm, u8 shift = 0);
void BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift = 0);
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG);
@ -938,6 +942,7 @@ private:
void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh);
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);

View File

@ -153,6 +153,7 @@ public:
void fsubx(UGeckoInstruction inst);
void fcmpx(UGeckoInstruction inst);
void frspx(UGeckoInstruction inst);
void fctiwzx(UGeckoInstruction inst);
// Paired
void ps_abs(UGeckoInstruction inst);

View File

@ -558,3 +558,38 @@ void JitArm64::fcmpx(UGeckoInstruction inst)
gpr.Unlock(WA);
}
void JitArm64::fctiwzx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
// Generate 0xFFF8000000000000ULL
m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF000000000000ULL);
m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7);
if (fpr.IsLower(d))
{
m_float_emit.FCVTN(32, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), ROUND_Z);
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
}
else
{
ARM64Reg V1 = fpr.GetReg();
m_float_emit.FCVTN(32, EncodeRegToDouble(V1), EncodeRegToDouble(VB));
m_float_emit.FCVTS(EncodeRegToSingle(V1), EncodeRegToSingle(V1), ROUND_Z);
m_float_emit.ORR(EncodeRegToDouble(V1), EncodeRegToDouble(V1), EncodeRegToDouble(V0));
m_float_emit.INS(64, VD, 0, V1, 0);
fpr.Unlock(V1);
}
fpr.Unlock(V0);
}

View File

@ -330,7 +330,7 @@ static GekkoOPTemplate table63[] =
{32, &JitArm64::fcmpx}, // fcmpo
{0, &JitArm64::fcmpx}, // fcmpu
{14, &JitArm64::FallBackToInterpreter}, // fctiwx
{15, &JitArm64::FallBackToInterpreter}, // fctiwzx
{15, &JitArm64::fctiwzx}, // fctiwzx
{72, &JitArm64::fmrx}, // fmrx
{136, &JitArm64::fnabsx}, // fnabsx
{40, &JitArm64::fnegx}, // fnegx