Merge pull request #2555 from Sonicadvance1/aarch64_rebase_ppsspp_emitter

[AArch64] Upstream PPSSPP's emitter changes.
This commit is contained in:
Ryan Houdek 2015-06-07 23:23:04 -04:00
commit 8c2e5e2860
3 changed files with 1275 additions and 226 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
// Copyright 2014 Dolphin Emulator Project // Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+ // Licensed under GPLv2+
// Refer to the license.txt file included. // Refer to the license.txt file included.
@ -74,19 +74,34 @@ enum ARM64Reg
PLTL2KEEP, PLTL2STRM, PLTL2KEEP, PLTL2STRM,
PLTL3KEEP, PLTL3STRM, PLTL3KEEP, PLTL3STRM,
WZR = WSP,
ZR = SP,
INVALID_REG = 0xFFFFFFFF INVALID_REG = 0xFFFFFFFF
}; };
inline bool Is64Bit(ARM64Reg reg) { return reg & 0x20; } inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; } inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; } inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
inline bool IsScalar(ARM64Reg reg) { return IsSingle(reg) || IsDouble(reg); }
inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; } inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }
inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; } inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }
inline bool IsGPR(ARM64Reg reg) { return (int)reg < 0x40; }
inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); } inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }
inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); } inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); }
inline ARM64Reg EncodeRegToSingle(ARM64Reg reg) { return (ARM64Reg)(DecodeReg(reg) + S0); }
inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); } inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); }
inline ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return (ARM64Reg)(reg | 0xC0); } inline ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return (ARM64Reg)(reg | 0xC0); }
// For AND/TST/ORR/EOR etc
bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned int *imm_s, unsigned int *imm_r);
// For ADD/SUB
bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift);
float FPImm8ToFloat(uint8_t bits);
bool FPImm8FromFloat(float value, uint8_t *immOut);
enum OpType enum OpType
{ {
TYPE_IMM = 0, TYPE_IMM = 0,
@ -109,8 +124,7 @@ enum IndexType
INDEX_UNSIGNED, INDEX_UNSIGNED,
INDEX_POST, INDEX_POST,
INDEX_PRE, INDEX_PRE,
// Only for VFP loadstore paired INDEX_SIGNED, // used in LDP/STP
INDEX_SIGNED,
}; };
enum ShiftAmount enum ShiftAmount
@ -121,12 +135,12 @@ enum ShiftAmount
SHIFT_48 = 3, SHIFT_48 = 3,
}; };
enum ExtendType enum RoundingMode {
{ ROUND_A, // round to nearest, ties to away
EXTEND_UXTW = 2, ROUND_M, // round towards -inf
EXTEND_LSL = 3, // Default for zero shift amount ROUND_N, // round to nearest, ties to even
EXTEND_SXTW = 6, ROUND_P, // round towards +inf
EXTEND_SXTX = 7, ROUND_Z, // round towards zero
}; };
struct FixupBranch struct FixupBranch
@ -157,6 +171,9 @@ enum PStateField
FIELD_SPSel = 0, FIELD_SPSel = 0,
FIELD_DAIFSet, FIELD_DAIFSet,
FIELD_DAIFClr, FIELD_DAIFClr,
FIELD_NZCV, // The only system registers accessible from EL0 (user space)
FIELD_FPCR = 0x340,
FIELD_FPSR = 0x341,
}; };
enum SystemHint enum SystemHint
@ -252,6 +269,7 @@ public:
m_width = WIDTH_32BIT; m_width = WIDTH_32BIT;
m_extend = EXTEND_UXTW; m_extend = EXTEND_UXTW;
} }
m_shifttype = ST_LSL;
} }
ArithOption(ARM64Reg Rd, ShiftType shift_type, u32 shift) ArithOption(ARM64Reg Rd, ShiftType shift_type, u32 shift)
{ {
@ -333,7 +351,7 @@ private:
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm); void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd); void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm); void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm); void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
@ -398,7 +416,7 @@ public:
// Unconditional Branch (register) // Unconditional Branch (register)
void BR(ARM64Reg Rn); void BR(ARM64Reg Rn);
void BLR(ARM64Reg Rn); void BLR(ARM64Reg Rn);
void RET(ARM64Reg Rn); void RET(ARM64Reg Rn = X30);
void ERET(); void ERET();
void DRPS(); void DRPS();
@ -414,6 +432,10 @@ public:
// System // System
void _MSR(PStateField field, u8 imm); void _MSR(PStateField field, u8 imm);
void _MSR(PStateField field, ARM64Reg Rt);
void MRS(ARM64Reg Rt, PStateField field);
void HINT(SystemHint op); void HINT(SystemHint op);
void CLREX(); void CLREX();
void DSB(BarrierType type); void DSB(BarrierType type);
@ -454,6 +476,17 @@ public:
void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
// Aliases
void CSET(ARM64Reg Rd, CCFlags cond)
{
ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR;
CSINC(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
}
void NEG(ARM64Reg Rd, ARM64Reg Rs)
{
SUB(Rd, Is64Bit(Rd) ? ZR : WZR, Rs);
}
// Data-Processing 1 source // Data-Processing 1 source
void RBIT(ARM64Reg Rd, ARM64Reg Rn); void RBIT(ARM64Reg Rd, ARM64Reg Rn);
void REV16(ARM64Reg Rd, ARM64Reg Rn); void REV16(ARM64Reg Rd, ARM64Reg Rn);
@ -500,15 +533,34 @@ public:
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
// Wrap the above for saner syntax
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { AND(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BIC(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORN(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EOR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EON(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ANDS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BICS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
// Convenience wrappers around ORR. These match the official convenience syntax.
void MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift);
void MOV(ARM64Reg Rd, ARM64Reg Rm); void MOV(ARM64Reg Rd, ARM64Reg Rm);
void MVN(ARM64Reg Rd, ARM64Reg Rm); void MVN(ARM64Reg Rd, ARM64Reg Rm);
// TODO: These are "slow" as they use arith+shift, should be replaced with UBFM/EXTR variants.
void LSR(ARM64Reg Rd, ARM64Reg Rm, int shift);
void LSL(ARM64Reg Rd, ARM64Reg Rm, int shift);
void ASR(ARM64Reg Rd, ARM64Reg Rm, int shift);
void ROR(ARM64Reg Rd, ARM64Reg Rm, int shift);
// Logical (immediate) // Logical (immediate)
void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
void TST(ARM64Reg Rn, u32 immr, u32 imms); void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
// Add/subtract (immediate) // Add/subtract (immediate)
void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
@ -526,12 +578,22 @@ public:
void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
// Extract register (ROR with two inputs, if same then faster on A67)
void EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift);
// Aliases
void SXTB(ARM64Reg Rd, ARM64Reg Rn); void SXTB(ARM64Reg Rd, ARM64Reg Rn);
void SXTH(ARM64Reg Rd, ARM64Reg Rn); void SXTH(ARM64Reg Rd, ARM64Reg Rn);
void SXTW(ARM64Reg Rd, ARM64Reg Rn); void SXTW(ARM64Reg Rd, ARM64Reg Rn);
void UXTB(ARM64Reg Rd, ARM64Reg Rn); void UXTB(ARM64Reg Rd, ARM64Reg Rn);
void UXTH(ARM64Reg Rd, ARM64Reg Rn); void UXTH(ARM64Reg Rd, ARM64Reg Rn);
void UBFX(ARM64Reg Rd, ARM64Reg Rn, int lsb, int width)
{
UBFM(Rd, Rn, lsb, lsb + width <= (Is64Bit(Rn) ? 64 : 32));
}
// Load Register (Literal) // Load Register (Literal)
void LDR(ARM64Reg Rt, u32 imm); void LDR(ARM64Reg Rt, u32 imm);
void LDRSW(ARM64Reg Rt, u32 imm); void LDRSW(ARM64Reg Rt, u32 imm);
@ -610,6 +672,32 @@ public:
// Wrapper around MOVZ+MOVK // Wrapper around MOVZ+MOVK
void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true); void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true);
template <class P>
void MOVP2R(ARM64Reg Rd, P *ptr)
{
_assert_msg_(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
MOVI2R(Rd, (uintptr_t)ptr);
}
// Wrapper around AND x, y, imm etc. If you are sure the imm will work, no need to pass a scratch register.
void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG) { ANDSI2R(Is64Bit(Rn) ? ZR : WZR, Rn, imm, scratch); }
void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
bool TryCMPI2R(ARM64Reg Rn, u32 imm);
bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm);
// ABI related // ABI related
void ABI_PushRegisters(BitSet32 registers); void ABI_PushRegisters(BitSet32 registers);
@ -633,10 +721,17 @@ public:
ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f) ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f)
{ {
auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>; auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;
MOVI2R(X30, (u64)trampoline); MOVI2R(X30, (uintptr_t)trampoline);
MOVI2R(X0, (u64)const_cast<void*>((const void*)f)); MOVI2R(X0, (uintptr_t)const_cast<void*>((const void*)f));
return X30; return X30;
} }
// Plain function call
void QuickCallFunction(ARM64Reg scratchreg, const void *func);
template <typename T> void QuickCallFunction(ARM64Reg scratchreg, T func)
{
QuickCallFunction(scratchreg, (const void *)func);
}
}; };
class ARM64FloatEmitter class ARM64FloatEmitter
@ -671,14 +766,28 @@ public:
// Scalar - 1 Source // Scalar - 1 Source
void FABS(ARM64Reg Rd, ARM64Reg Rn); void FABS(ARM64Reg Rd, ARM64Reg Rn);
void FNEG(ARM64Reg Rd, ARM64Reg Rn); void FNEG(ARM64Reg Rd, ARM64Reg Rn);
void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
// Scalar - 2 Source // Scalar - 2 Source
void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
// Scalar - 3 Source. Note - the accumulator is last on ARM!
void FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
// Scalar floating point immediate // Scalar floating point immediate
void FMOV(ARM64Reg Rd, u32 imm); void FMOV(ARM64Reg Rd, uint8_t imm8);
// Vector // Vector
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
@ -686,7 +795,10 @@ public:
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index); void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
@ -697,11 +809,17 @@ public:
void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void NOT(ARM64Reg Rd, ARM64Reg Rn); void NOT(ARM64Reg Rd, ARM64Reg Rn);
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void MOV(ARM64Reg Rd, ARM64Reg Rn)
{
ORR(Rd, Rn, Rn);
}
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn); void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn); void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn); void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn); void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn); void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
// Move // Move
@ -714,11 +832,20 @@ public:
// One source // One source
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn); void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
// Conversion between float and integer // Scalar convert float to int, in a lot of variants.
void FMOV(u8 size, bool top, ARM64Reg Rd, ARM64Reg Rn); // Note that the scalar version of this operation has two encodings, one that goes to an integer register
// and one that outputs to a scalar fp register.
void FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
void FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
// Scalar convert int to float. No rounding mode specifier necessary.
void SCVTF(ARM64Reg Rd, ARM64Reg Rn); void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
void UCVTF(ARM64Reg Rd, ARM64Reg Rn); void UCVTF(ARM64Reg Rd, ARM64Reg Rn);
// Scalar fixed point to float. scale is the number of fractional bits.
void SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
void UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
// Float comparison // Float comparison
void FCMP(ARM64Reg Rn, ARM64Reg Rm); void FCMP(ARM64Reg Rn, ARM64Reg Rm);
void FCMP(ARM64Reg Rn); void FCMP(ARM64Reg Rn);
@ -746,13 +873,22 @@ public:
// Shift by immediate // Shift by immediate
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift); void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift); void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift); void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn); void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn); void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
// vector x indexed element // vector x indexed element
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index); void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG);
// ABI related // ABI related
void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG); void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
@ -764,25 +900,35 @@ private:
// Emitting functions // Emitting functions
void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm); void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn); void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
void Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn); void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm); void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn);
void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm); void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm); void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8);
void EmitShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn); void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm); void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm); void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);
void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
}; };
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter> class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>

View File

@ -69,7 +69,7 @@ void JitArm64::psq_l(UGeckoInstruction inst)
fpr.BindToRegister(inst.RS, false); fpr.BindToRegister(inst.RS, false);
ARM64Reg VS = fpr.R(inst.RS); ARM64Reg VS = fpr.R(inst.RS);
m_float_emit.FCVTL(64, EncodeRegToDouble(VS), D0); m_float_emit.FCVTL(64, VS, D0);
if (inst.W) if (inst.W)
{ {
m_float_emit.FMOV(D0, 0x70); // 1.0 as a Double m_float_emit.FMOV(D0, 0x70); // 1.0 as a Double