Merge pull request #2555 from Sonicadvance1/aarch64_rebase_ppsspp_emitter
[AArch64] Upstream PPSSPP's emitter changes.
This commit is contained in:
commit
8c2e5e2860
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2014 Dolphin Emulator Project
|
||||
// Copyright 2015 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
|
@ -74,19 +74,34 @@ enum ARM64Reg
|
|||
PLTL2KEEP, PLTL2STRM,
|
||||
PLTL3KEEP, PLTL3STRM,
|
||||
|
||||
WZR = WSP,
|
||||
ZR = SP,
|
||||
|
||||
INVALID_REG = 0xFFFFFFFF
|
||||
};
|
||||
|
||||
inline bool Is64Bit(ARM64Reg reg) { return reg & 0x20; }
|
||||
inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
|
||||
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
|
||||
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
|
||||
inline bool IsScalar(ARM64Reg reg) { return IsSingle(reg) || IsDouble(reg); }
|
||||
inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }
|
||||
inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }
|
||||
inline bool IsGPR(ARM64Reg reg) { return (int)reg < 0x40; }
|
||||
|
||||
inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }
|
||||
inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); }
|
||||
inline ARM64Reg EncodeRegToSingle(ARM64Reg reg) { return (ARM64Reg)(DecodeReg(reg) + S0); }
|
||||
inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); }
|
||||
inline ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return (ARM64Reg)(reg | 0xC0); }
|
||||
|
||||
// For AND/TST/ORR/EOR etc
|
||||
bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned int *imm_s, unsigned int *imm_r);
|
||||
// For ADD/SUB
|
||||
bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift);
|
||||
|
||||
float FPImm8ToFloat(uint8_t bits);
|
||||
bool FPImm8FromFloat(float value, uint8_t *immOut);
|
||||
|
||||
enum OpType
|
||||
{
|
||||
TYPE_IMM = 0,
|
||||
|
@ -109,8 +124,7 @@ enum IndexType
|
|||
INDEX_UNSIGNED,
|
||||
INDEX_POST,
|
||||
INDEX_PRE,
|
||||
// Only for VFP loadstore paired
|
||||
INDEX_SIGNED,
|
||||
INDEX_SIGNED, // used in LDP/STP
|
||||
};
|
||||
|
||||
enum ShiftAmount
|
||||
|
@ -121,12 +135,12 @@ enum ShiftAmount
|
|||
SHIFT_48 = 3,
|
||||
};
|
||||
|
||||
enum ExtendType
|
||||
{
|
||||
EXTEND_UXTW = 2,
|
||||
EXTEND_LSL = 3, // Default for zero shift amount
|
||||
EXTEND_SXTW = 6,
|
||||
EXTEND_SXTX = 7,
|
||||
enum RoundingMode {
|
||||
ROUND_A, // round to nearest, ties to away
|
||||
ROUND_M, // round towards -inf
|
||||
ROUND_N, // round to nearest, ties to even
|
||||
ROUND_P, // round towards +inf
|
||||
ROUND_Z, // round towards zero
|
||||
};
|
||||
|
||||
struct FixupBranch
|
||||
|
@ -157,6 +171,9 @@ enum PStateField
|
|||
FIELD_SPSel = 0,
|
||||
FIELD_DAIFSet,
|
||||
FIELD_DAIFClr,
|
||||
FIELD_NZCV, // The only system registers accessible from EL0 (user space)
|
||||
FIELD_FPCR = 0x340,
|
||||
FIELD_FPSR = 0x341,
|
||||
};
|
||||
|
||||
enum SystemHint
|
||||
|
@ -252,6 +269,7 @@ public:
|
|||
m_width = WIDTH_32BIT;
|
||||
m_extend = EXTEND_UXTW;
|
||||
}
|
||||
m_shifttype = ST_LSL;
|
||||
}
|
||||
ArithOption(ARM64Reg Rd, ShiftType shift_type, u32 shift)
|
||||
{
|
||||
|
@ -333,7 +351,7 @@ private:
|
|||
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
|
||||
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);
|
||||
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
|
||||
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||
|
@ -398,7 +416,7 @@ public:
|
|||
// Unconditional Branch (register)
|
||||
void BR(ARM64Reg Rn);
|
||||
void BLR(ARM64Reg Rn);
|
||||
void RET(ARM64Reg Rn);
|
||||
void RET(ARM64Reg Rn = X30);
|
||||
void ERET();
|
||||
void DRPS();
|
||||
|
||||
|
@ -414,6 +432,10 @@ public:
|
|||
|
||||
// System
|
||||
void _MSR(PStateField field, u8 imm);
|
||||
|
||||
void _MSR(PStateField field, ARM64Reg Rt);
|
||||
void MRS(ARM64Reg Rt, PStateField field);
|
||||
|
||||
void HINT(SystemHint op);
|
||||
void CLREX();
|
||||
void DSB(BarrierType type);
|
||||
|
@ -454,6 +476,17 @@ public:
|
|||
void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
|
||||
void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
|
||||
|
||||
// Aliases
|
||||
void CSET(ARM64Reg Rd, CCFlags cond)
|
||||
{
|
||||
ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR;
|
||||
CSINC(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
|
||||
}
|
||||
void NEG(ARM64Reg Rd, ARM64Reg Rs)
|
||||
{
|
||||
SUB(Rd, Is64Bit(Rd) ? ZR : WZR, Rs);
|
||||
}
|
||||
|
||||
// Data-Processing 1 source
|
||||
void RBIT(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void REV16(ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
@ -500,15 +533,34 @@ public:
|
|||
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
|
||||
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
|
||||
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
|
||||
|
||||
// Wrap the above for saner syntax
|
||||
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { AND(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||
void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BIC(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||
void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORN(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EOR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EON(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ANDS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BICS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
|
||||
|
||||
// Convenience wrappers around ORR. These match the official convenience syntax.
|
||||
void MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift);
|
||||
void MOV(ARM64Reg Rd, ARM64Reg Rm);
|
||||
void MVN(ARM64Reg Rd, ARM64Reg Rm);
|
||||
|
||||
// TODO: These are "slow" as they use arith+shift, should be replaced with UBFM/EXTR variants.
|
||||
void LSR(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
||||
void LSL(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
||||
void ASR(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
||||
void ROR(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
||||
|
||||
// Logical (immediate)
|
||||
void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void TST(ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||
void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||
void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||
void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||
void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||
|
||||
// Add/subtract (immediate)
|
||||
void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
|
||||
|
@ -526,12 +578,22 @@ public:
|
|||
void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
|
||||
// Extract register (ROR with two inputs, if same then faster on A67)
|
||||
void EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift);
|
||||
|
||||
// Aliases
|
||||
void SXTB(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SXTH(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SXTW(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UXTB(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UXTH(ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
void UBFX(ARM64Reg Rd, ARM64Reg Rn, int lsb, int width)
|
||||
{
|
||||
UBFM(Rd, Rn, lsb, lsb + width <= (Is64Bit(Rn) ? 64 : 32));
|
||||
}
|
||||
|
||||
// Load Register (Literal)
|
||||
void LDR(ARM64Reg Rt, u32 imm);
|
||||
void LDRSW(ARM64Reg Rt, u32 imm);
|
||||
|
@ -610,6 +672,32 @@ public:
|
|||
|
||||
// Wrapper around MOVZ+MOVK
|
||||
void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true);
|
||||
template <class P>
|
||||
void MOVP2R(ARM64Reg Rd, P *ptr)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
|
||||
MOVI2R(Rd, (uintptr_t)ptr);
|
||||
}
|
||||
|
||||
// Wrapper around AND x, y, imm etc. If you are sure the imm will work, no need to pass a scratch register.
|
||||
void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||
void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||
void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG) { ANDSI2R(Is64Bit(Rn) ? ZR : WZR, Rn, imm, scratch); }
|
||||
void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||
void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||
|
||||
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||
void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
|
||||
|
||||
bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||
bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||
bool TryCMPI2R(ARM64Reg Rn, u32 imm);
|
||||
|
||||
bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||
bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
|
||||
|
||||
// ABI related
|
||||
void ABI_PushRegisters(BitSet32 registers);
|
||||
|
@ -633,10 +721,17 @@ public:
|
|||
ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f)
|
||||
{
|
||||
auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;
|
||||
MOVI2R(X30, (u64)trampoline);
|
||||
MOVI2R(X0, (u64)const_cast<void*>((const void*)f));
|
||||
MOVI2R(X30, (uintptr_t)trampoline);
|
||||
MOVI2R(X0, (uintptr_t)const_cast<void*>((const void*)f));
|
||||
return X30;
|
||||
}
|
||||
|
||||
// Plain function call
|
||||
void QuickCallFunction(ARM64Reg scratchreg, const void *func);
|
||||
template <typename T> void QuickCallFunction(ARM64Reg scratchreg, T func)
|
||||
{
|
||||
QuickCallFunction(scratchreg, (const void *)func);
|
||||
}
|
||||
};
|
||||
|
||||
class ARM64FloatEmitter
|
||||
|
@ -671,14 +766,28 @@ public:
|
|||
// Scalar - 1 Source
|
||||
void FABS(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FNEG(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
|
||||
|
||||
// Scalar - 2 Source
|
||||
void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
||||
// Scalar - 3 Source. Note - the accumulator is last on ARM!
|
||||
void FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||
void FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||
void FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||
void FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
|
||||
|
||||
// Scalar floating point immediate
|
||||
void FMOV(ARM64Reg Rd, u32 imm);
|
||||
void FMOV(ARM64Reg Rd, uint8_t imm8);
|
||||
|
||||
// Vector
|
||||
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
@ -686,7 +795,10 @@ public:
|
|||
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
|
||||
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
@ -697,11 +809,17 @@ public:
|
|||
void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void NOT(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void MOV(ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
ORR(Rd, Rn, Rn);
|
||||
}
|
||||
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// Move
|
||||
|
@ -714,11 +832,20 @@ public:
|
|||
// One source
|
||||
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// Conversion between float and integer
|
||||
void FMOV(u8 size, bool top, ARM64Reg Rd, ARM64Reg Rn);
|
||||
// Scalar convert float to int, in a lot of variants.
|
||||
// Note that the scalar version of this operation has two encodings, one that goes to an integer register
|
||||
// and one that outputs to a scalar fp register.
|
||||
void FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
|
||||
void FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
|
||||
|
||||
// Scalar convert int to float. No rounding mode specifier necessary.
|
||||
void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UCVTF(ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// Scalar fixed point to float. scale is the number of fractional bits.
|
||||
void SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
void UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
|
||||
// Float comparison
|
||||
void FCMP(ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FCMP(ARM64Reg Rn);
|
||||
|
@ -746,13 +873,22 @@ public:
|
|||
|
||||
// Shift by immediate
|
||||
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// vector x indexed element
|
||||
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
|
||||
void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
|
||||
|
||||
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
|
||||
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG);
|
||||
|
||||
// ABI related
|
||||
void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
|
||||
|
@ -764,25 +900,35 @@ private:
|
|||
|
||||
// Emitting functions
|
||||
void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||
void Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
|
||||
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm);
|
||||
void EmitShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8);
|
||||
void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
|
||||
void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||
void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);
|
||||
void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
|
||||
void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||
|
||||
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
|
||||
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
|
||||
};
|
||||
|
||||
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
|
||||
|
|
|
@ -69,7 +69,7 @@ void JitArm64::psq_l(UGeckoInstruction inst)
|
|||
|
||||
fpr.BindToRegister(inst.RS, false);
|
||||
ARM64Reg VS = fpr.R(inst.RS);
|
||||
m_float_emit.FCVTL(64, EncodeRegToDouble(VS), D0);
|
||||
m_float_emit.FCVTL(64, VS, D0);
|
||||
if (inst.W)
|
||||
{
|
||||
m_float_emit.FMOV(D0, 0x70); // 1.0 as a Double
|
||||
|
|
Loading…
Reference in New Issue