[AArch64] Upstream PPSSPP's emitter changes.
Requires a minor change to in the JIT to make sure everything still works.
This commit is contained in:
parent
cf7178b4c2
commit
05b72c5d31
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
// Copyright 2014 Dolphin Emulator Project
|
// Copyright 2015 Dolphin Emulator Project
|
||||||
// Licensed under GPLv2+
|
// Licensed under GPLv2+
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
@ -74,19 +74,34 @@ enum ARM64Reg
|
||||||
PLTL2KEEP, PLTL2STRM,
|
PLTL2KEEP, PLTL2STRM,
|
||||||
PLTL3KEEP, PLTL3STRM,
|
PLTL3KEEP, PLTL3STRM,
|
||||||
|
|
||||||
|
WZR = WSP,
|
||||||
|
ZR = SP,
|
||||||
|
|
||||||
INVALID_REG = 0xFFFFFFFF
|
INVALID_REG = 0xFFFFFFFF
|
||||||
};
|
};
|
||||||
|
|
||||||
inline bool Is64Bit(ARM64Reg reg) { return reg & 0x20; }
|
inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
|
||||||
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
|
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
|
||||||
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
|
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
|
||||||
|
inline bool IsScalar(ARM64Reg reg) { return IsSingle(reg) || IsDouble(reg); }
|
||||||
inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }
|
inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }
|
||||||
inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }
|
inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }
|
||||||
|
inline bool IsGPR(ARM64Reg reg) { return (int)reg < 0x40; }
|
||||||
|
|
||||||
inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }
|
inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }
|
||||||
inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); }
|
inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); }
|
||||||
|
inline ARM64Reg EncodeRegToSingle(ARM64Reg reg) { return (ARM64Reg)(DecodeReg(reg) + S0); }
|
||||||
inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); }
|
inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); }
|
||||||
inline ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return (ARM64Reg)(reg | 0xC0); }
|
inline ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return (ARM64Reg)(reg | 0xC0); }
|
||||||
|
|
||||||
|
// For AND/TST/ORR/EOR etc
|
||||||
|
bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned int *imm_s, unsigned int *imm_r);
|
||||||
|
// For ADD/SUB
|
||||||
|
bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift);
|
||||||
|
|
||||||
|
float FPImm8ToFloat(uint8_t bits);
|
||||||
|
bool FPImm8FromFloat(float value, uint8_t *immOut);
|
||||||
|
|
||||||
enum OpType
|
enum OpType
|
||||||
{
|
{
|
||||||
TYPE_IMM = 0,
|
TYPE_IMM = 0,
|
||||||
|
@ -109,8 +124,7 @@ enum IndexType
|
||||||
INDEX_UNSIGNED,
|
INDEX_UNSIGNED,
|
||||||
INDEX_POST,
|
INDEX_POST,
|
||||||
INDEX_PRE,
|
INDEX_PRE,
|
||||||
// Only for VFP loadstore paired
|
INDEX_SIGNED, // used in LDP/STP
|
||||||
INDEX_SIGNED,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ShiftAmount
|
enum ShiftAmount
|
||||||
|
@ -121,12 +135,12 @@ enum ShiftAmount
|
||||||
SHIFT_48 = 3,
|
SHIFT_48 = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ExtendType
|
enum RoundingMode {
|
||||||
{
|
ROUND_A, // round to nearest, ties to away
|
||||||
EXTEND_UXTW = 2,
|
ROUND_M, // round towards -inf
|
||||||
EXTEND_LSL = 3, // Default for zero shift amount
|
ROUND_N, // round to nearest, ties to even
|
||||||
EXTEND_SXTW = 6,
|
ROUND_P, // round towards +inf
|
||||||
EXTEND_SXTX = 7,
|
ROUND_Z, // round towards zero
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FixupBranch
|
struct FixupBranch
|
||||||
|
@ -157,6 +171,9 @@ enum PStateField
|
||||||
FIELD_SPSel = 0,
|
FIELD_SPSel = 0,
|
||||||
FIELD_DAIFSet,
|
FIELD_DAIFSet,
|
||||||
FIELD_DAIFClr,
|
FIELD_DAIFClr,
|
||||||
|
FIELD_NZCV, // The only system registers accessible from EL0 (user space)
|
||||||
|
FIELD_FPCR = 0x340,
|
||||||
|
FIELD_FPSR = 0x341,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum SystemHint
|
enum SystemHint
|
||||||
|
@ -252,6 +269,7 @@ public:
|
||||||
m_width = WIDTH_32BIT;
|
m_width = WIDTH_32BIT;
|
||||||
m_extend = EXTEND_UXTW;
|
m_extend = EXTEND_UXTW;
|
||||||
}
|
}
|
||||||
|
m_shifttype = ST_LSL;
|
||||||
}
|
}
|
||||||
ArithOption(ARM64Reg Rd, ShiftType shift_type, u32 shift)
|
ArithOption(ARM64Reg Rd, ShiftType shift_type, u32 shift)
|
||||||
{
|
{
|
||||||
|
@ -333,7 +351,7 @@ private:
|
||||||
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||||
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||||
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
|
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
|
||||||
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);
|
||||||
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||||
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
|
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
|
||||||
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||||
|
@ -398,7 +416,7 @@ public:
|
||||||
// Unconditional Branch (register)
|
// Unconditional Branch (register)
|
||||||
void BR(ARM64Reg Rn);
|
void BR(ARM64Reg Rn);
|
||||||
void BLR(ARM64Reg Rn);
|
void BLR(ARM64Reg Rn);
|
||||||
void RET(ARM64Reg Rn);
|
void RET(ARM64Reg Rn = X30);
|
||||||
void ERET();
|
void ERET();
|
||||||
void DRPS();
|
void DRPS();
|
||||||
|
|
||||||
|
@ -414,6 +432,10 @@ public:
|
||||||
|
|
||||||
// System
|
// System
|
||||||
void _MSR(PStateField field, u8 imm);
|
void _MSR(PStateField field, u8 imm);
|
||||||
|
|
||||||
|
void _MSR(PStateField field, ARM64Reg Rt);
|
||||||
|
void MRS(ARM64Reg Rt, PStateField field);
|
||||||
|
|
||||||
void HINT(SystemHint op);
|
void HINT(SystemHint op);
|
||||||
void CLREX();
|
void CLREX();
|
||||||
void DSB(BarrierType type);
|
void DSB(BarrierType type);
|
||||||
|
@ -454,6 +476,17 @@ public:
|
||||||
void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
|
void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
|
||||||
void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
|
void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
|
||||||
|
|
||||||
|
// Aliases
|
||||||
|
void CSET(ARM64Reg Rd, CCFlags cond)
|
||||||
|
{
|
||||||
|
ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR;
|
||||||
|
CSINC(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
|
||||||
|
}
|
||||||
|
void NEG(ARM64Reg Rd, ARM64Reg Rs)
|
||||||
|
{
|
||||||
|
SUB(Rd, Is64Bit(Rd) ? ZR : WZR, Rs);
|
||||||
|
}
|
||||||
|
|
||||||
// Data-Processing 1 source
|
// Data-Processing 1 source
|
||||||
void RBIT(ARM64Reg Rd, ARM64Reg Rn);
|
void RBIT(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void REV16(ARM64Reg Rd, ARM64Reg Rn);
|
void REV16(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
@ -500,15 +533,34 @@ public:
|
||||||
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
|
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
|
||||||
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
|
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
|
||||||
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
|
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
|
||||||
|
|
||||||
|
// Wrap the above for saner syntax
|
||||||
|
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { AND(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||||
|
void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BIC(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||||
|
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||||
|
void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORN(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||||
|
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EOR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||||
|
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EON(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||||
|
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ANDS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||||
|
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BICS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||||
|
|
||||||
|
// Convenience wrappers around ORR. These match the official convenience syntax.
|
||||||
|
void MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift);
|
||||||
void MOV(ARM64Reg Rd, ARM64Reg Rm);
|
void MOV(ARM64Reg Rd, ARM64Reg Rm);
|
||||||
void MVN(ARM64Reg Rd, ARM64Reg Rm);
|
void MVN(ARM64Reg Rd, ARM64Reg Rm);
|
||||||
|
|
||||||
|
// TODO: These are "slow" as they use arith+shift, should be replaced with UBFM/EXTR variants.
|
||||||
|
void LSR(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
||||||
|
void LSL(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
||||||
|
void ASR(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
||||||
|
void ROR(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
||||||
|
|
||||||
// Logical (immediate)
|
// Logical (immediate)
|
||||||
void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||||
void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||||
void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||||
void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||||
void TST(ARM64Reg Rn, u32 immr, u32 imms);
|
void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||||
|
|
||||||
// Add/subtract (immediate)
|
// Add/subtract (immediate)
|
||||||
void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
|
void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
|
||||||
|
@ -526,12 +578,22 @@ public:
|
||||||
void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||||
void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||||
void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||||
|
|
||||||
|
// Extract register (ROR with two inputs, if same then faster on A67)
|
||||||
|
void EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift);
|
||||||
|
|
||||||
|
// Aliases
|
||||||
void SXTB(ARM64Reg Rd, ARM64Reg Rn);
|
void SXTB(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void SXTH(ARM64Reg Rd, ARM64Reg Rn);
|
void SXTH(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void SXTW(ARM64Reg Rd, ARM64Reg Rn);
|
void SXTW(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void UXTB(ARM64Reg Rd, ARM64Reg Rn);
|
void UXTB(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void UXTH(ARM64Reg Rd, ARM64Reg Rn);
|
void UXTH(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
|
||||||
|
void UBFX(ARM64Reg Rd, ARM64Reg Rn, int lsb, int width)
|
||||||
|
{
|
||||||
|
UBFM(Rd, Rn, lsb, lsb + width <= (Is64Bit(Rn) ? 64 : 32));
|
||||||
|
}
|
||||||
|
|
||||||
// Load Register (Literal)
|
// Load Register (Literal)
|
||||||
void LDR(ARM64Reg Rt, u32 imm);
|
void LDR(ARM64Reg Rt, u32 imm);
|
||||||
void LDRSW(ARM64Reg Rt, u32 imm);
|
void LDRSW(ARM64Reg Rt, u32 imm);
|
||||||
|
@ -610,6 +672,32 @@ public:
|
||||||
|
|
||||||
// Wrapper around MOVZ+MOVK
|
// Wrapper around MOVZ+MOVK
|
||||||
void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true);
|
void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true);
|
||||||
|
template <class P>
|
||||||
|
void MOVP2R(ARM64Reg Rd, P *ptr)
|
||||||
|
{
|
||||||
|
_assert_msg_(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
|
||||||
|
MOVI2R(Rd, (uintptr_t)ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wrapper around AND x, y, imm etc. If you are sure the imm will work, no need to pass a scratch register.
|
||||||
|
void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||||
|
void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||||
|
void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG) { ANDSI2R(Is64Bit(Rn) ? ZR : WZR, Rn, imm, scratch); }
|
||||||
|
void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||||
|
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||||
|
void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||||
|
|
||||||
|
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||||
|
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||||
|
void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||||
|
|
||||||
|
bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||||
|
bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||||
|
bool TryCMPI2R(ARM64Reg Rn, u32 imm);
|
||||||
|
|
||||||
|
bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||||
|
bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||||
|
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||||
|
|
||||||
// ABI related
|
// ABI related
|
||||||
void ABI_PushRegisters(BitSet32 registers);
|
void ABI_PushRegisters(BitSet32 registers);
|
||||||
|
@ -633,10 +721,17 @@ public:
|
||||||
ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f)
|
ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f)
|
||||||
{
|
{
|
||||||
auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;
|
auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;
|
||||||
MOVI2R(X30, (u64)trampoline);
|
MOVI2R(X30, (uintptr_t)trampoline);
|
||||||
MOVI2R(X0, (u64)const_cast<void*>((const void*)f));
|
MOVI2R(X0, (uintptr_t)const_cast<void*>((const void*)f));
|
||||||
return X30;
|
return X30;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Plain function call
|
||||||
|
void QuickCallFunction(ARM64Reg scratchreg, const void *func);
|
||||||
|
template <typename T> void QuickCallFunction(ARM64Reg scratchreg, T func)
|
||||||
|
{
|
||||||
|
QuickCallFunction(scratchreg, (const void *)func);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class ARM64FloatEmitter
|
class ARM64FloatEmitter
|
||||||
|
@ -671,14 +766,28 @@ public:
|
||||||
// Scalar - 1 Source
|
// Scalar - 1 Source
|
||||||
void FABS(ARM64Reg Rd, ARM64Reg Rn);
|
void FABS(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FNEG(ARM64Reg Rd, ARM64Reg Rn);
|
void FNEG(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
|
||||||
|
|
||||||
// Scalar - 2 Source
|
// Scalar - 2 Source
|
||||||
void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
void FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
void FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
void FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
void FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
void FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
void FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
|
||||||
|
// Scalar - 3 Source. Note - the accumulator is last on ARM!
|
||||||
|
void FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||||
|
void FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||||
|
void FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||||
|
void FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||||
|
|
||||||
// Scalar floating point immediate
|
// Scalar floating point immediate
|
||||||
void FMOV(ARM64Reg Rd, u32 imm);
|
void FMOV(ARM64Reg Rd, uint8_t imm8);
|
||||||
|
|
||||||
// Vector
|
// Vector
|
||||||
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
@ -686,7 +795,10 @@ public:
|
||||||
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
|
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
|
||||||
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
void FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
void FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
@ -697,11 +809,17 @@ public:
|
||||||
void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void NOT(ARM64Reg Rd, ARM64Reg Rn);
|
void NOT(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
|
void MOV(ARM64Reg Rd, ARM64Reg Rn)
|
||||||
|
{
|
||||||
|
ORR(Rd, Rn, Rn);
|
||||||
|
}
|
||||||
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||||
|
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||||
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
|
||||||
// Move
|
// Move
|
||||||
|
@ -714,11 +832,20 @@ public:
|
||||||
// One source
|
// One source
|
||||||
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
|
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
|
||||||
// Conversion between float and integer
|
// Scalar convert float to int, in a lot of variants.
|
||||||
void FMOV(u8 size, bool top, ARM64Reg Rd, ARM64Reg Rn);
|
// Note that the scalar version of this operation has two encodings, one that goes to an integer register
|
||||||
|
// and one that outputs to a scalar fp register.
|
||||||
|
void FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
|
||||||
|
void FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
|
||||||
|
|
||||||
|
// Scalar convert int to float. No rounding mode specifier necessary.
|
||||||
void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
|
void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void UCVTF(ARM64Reg Rd, ARM64Reg Rn);
|
void UCVTF(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
|
||||||
|
// Scalar fixed point to float. scale is the number of fractional bits.
|
||||||
|
void SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||||
|
void UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||||
|
|
||||||
// Float comparison
|
// Float comparison
|
||||||
void FCMP(ARM64Reg Rn, ARM64Reg Rm);
|
void FCMP(ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void FCMP(ARM64Reg Rn);
|
void FCMP(ARM64Reg Rn);
|
||||||
|
@ -746,13 +873,22 @@ public:
|
||||||
|
|
||||||
// Shift by immediate
|
// Shift by immediate
|
||||||
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||||
|
void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||||
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||||
|
void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||||
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||||
|
void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||||
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
void SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
void UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
|
||||||
// vector x indexed element
|
// vector x indexed element
|
||||||
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
|
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
|
||||||
|
void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
|
||||||
|
|
||||||
|
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
|
||||||
|
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG);
|
||||||
|
|
||||||
// ABI related
|
// ABI related
|
||||||
void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
|
void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
|
||||||
|
@ -764,25 +900,35 @@ private:
|
||||||
|
|
||||||
// Emitting functions
|
// Emitting functions
|
||||||
void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||||
void Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
|
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
|
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
|
||||||
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
|
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
|
void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm);
|
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8);
|
||||||
void EmitShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
void EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
|
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
|
||||||
void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
|
void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||||
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||||
|
void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);
|
||||||
|
void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
|
||||||
void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||||
|
|
||||||
|
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||||
|
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||||
|
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||||
|
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
|
||||||
|
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
|
||||||
};
|
};
|
||||||
|
|
||||||
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
|
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
|
||||||
|
|
|
@ -68,7 +68,7 @@ void JitArm64::psq_l(UGeckoInstruction inst)
|
||||||
|
|
||||||
fpr.BindToRegister(inst.RS, false);
|
fpr.BindToRegister(inst.RS, false);
|
||||||
ARM64Reg VS = fpr.R(inst.RS);
|
ARM64Reg VS = fpr.R(inst.RS);
|
||||||
m_float_emit.FCVTL(64, EncodeRegToDouble(VS), D0);
|
m_float_emit.FCVTL(64, VS, D0);
|
||||||
if (inst.W)
|
if (inst.W)
|
||||||
{
|
{
|
||||||
m_float_emit.FMOV(D0, 0x70); // 1.0 as a Double
|
m_float_emit.FMOV(D0, 0x70); // 1.0 as a Double
|
||||||
|
|
Loading…
Reference in New Issue