Merge pull request #1911 from Sonicadvance1/AArch64_paired_loadstores
[AArch64] Implements paired loadstores
This commit is contained in:
commit
cb967a6ff2
|
@ -250,10 +250,12 @@ void ARM64XEmitter::EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2
|
|||
|
||||
void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
|
||||
{
|
||||
bool b64Bit = Is64Bit(Rd);
|
||||
|
||||
Rd = DecodeReg(Rd);
|
||||
Rn = DecodeReg(Rn);
|
||||
Rm = DecodeReg(Rm);
|
||||
Write32((flags << 29) | (ArithEnc[instenc] << 21) | \
|
||||
Write32((b64Bit << 31) | (flags << 29) | (ArithEnc[instenc] << 21) | \
|
||||
(Option.GetType() == ArithOption::TYPE_EXTENDEDREG ? 1 << 21 : 0) | (Rm << 16) | Option.GetData() | (Rn << 5) | Rd);
|
||||
}
|
||||
|
||||
|
@ -342,10 +344,12 @@ void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, AR
|
|||
|
||||
void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
|
||||
{
|
||||
bool b64Bit = Is64Bit(Rd);
|
||||
|
||||
Rd = DecodeReg(Rd);
|
||||
Rm = DecodeReg(Rm);
|
||||
Rn = DecodeReg(Rn);
|
||||
Write32((LogicalEnc[instenc][0] << 29) | (0x50 << 21) | (LogicalEnc[instenc][1] << 21) | \
|
||||
Write32((b64Bit << 31) | (LogicalEnc[instenc][0] << 29) | (0x50 << 21) | (LogicalEnc[instenc][1] << 21) | \
|
||||
Shift.GetData() | (Rm << 16) | (Rn << 5) | Rd);
|
||||
}
|
||||
|
||||
|
@ -457,14 +461,14 @@ void ARM64XEmitter::EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32
|
|||
(immr << 16) | (imms << 10) | (Rn << 5) | Rd);
|
||||
}
|
||||
|
||||
void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend)
|
||||
void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
Rt = DecodeReg(Rt);
|
||||
Rn = DecodeReg(Rn);
|
||||
Rm = DecodeReg(Rm);
|
||||
ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());
|
||||
|
||||
Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (Rm << 16) | \
|
||||
(extend << 13) | (1 << 11) | (Rn << 5) | Rt);
|
||||
Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (decoded_Rm << 16) | \
|
||||
Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);
|
||||
}
|
||||
|
||||
void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd)
|
||||
|
@ -1158,6 +1162,14 @@ void ARM64XEmitter::SXTW(ARM64Reg Rd, ARM64Reg Rn)
|
|||
|
||||
SBFM(Rd, Rn, 0, 31);
|
||||
}
|
||||
void ARM64XEmitter::UXTB(ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
UBFM(Rd, Rn, 0, 7);
|
||||
}
|
||||
void ARM64XEmitter::UXTH(ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
UBFM(Rd, Rn, 0, 15);
|
||||
}
|
||||
|
||||
// Load Register (Literal)
|
||||
void ARM64XEmitter::LDR(ARM64Reg Rt, u32 imm)
|
||||
|
@ -1363,49 +1375,49 @@ void ARM64XEmitter::LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
|
|||
}
|
||||
|
||||
// Load/Store register (register offset)
|
||||
void ARM64XEmitter::STRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend)
|
||||
void ARM64XEmitter::STRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
EncodeLoadStoreRegisterOffset(0, 0, Rt, Rn, Rm, extend);
|
||||
EncodeLoadStoreRegisterOffset(0, 0, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64XEmitter::LDRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend)
|
||||
void ARM64XEmitter::LDRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
EncodeLoadStoreRegisterOffset(0, 1, Rt, Rn, Rm, extend);
|
||||
EncodeLoadStoreRegisterOffset(0, 1, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64XEmitter::LDRSB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend)
|
||||
void ARM64XEmitter::LDRSB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
bool b64Bit = Is64Bit(Rt);
|
||||
EncodeLoadStoreRegisterOffset(0, 3 - b64Bit, Rt, Rn, Rm, extend);
|
||||
EncodeLoadStoreRegisterOffset(0, 3 - b64Bit, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64XEmitter::STRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend)
|
||||
void ARM64XEmitter::STRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
EncodeLoadStoreRegisterOffset(1, 0, Rt, Rn, Rm, extend);
|
||||
EncodeLoadStoreRegisterOffset(1, 0, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64XEmitter::LDRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend)
|
||||
void ARM64XEmitter::LDRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
EncodeLoadStoreRegisterOffset(1, 1, Rt, Rn, Rm, extend);
|
||||
EncodeLoadStoreRegisterOffset(1, 1, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64XEmitter::LDRSH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend)
|
||||
void ARM64XEmitter::LDRSH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
bool b64Bit = Is64Bit(Rt);
|
||||
EncodeLoadStoreRegisterOffset(1, 3 - b64Bit, Rt, Rn, Rm, extend);
|
||||
EncodeLoadStoreRegisterOffset(1, 3 - b64Bit, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64XEmitter::STR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend)
|
||||
void ARM64XEmitter::STR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
bool b64Bit = Is64Bit(Rt);
|
||||
EncodeLoadStoreRegisterOffset(2 + b64Bit, 0, Rt, Rn, Rm, extend);
|
||||
EncodeLoadStoreRegisterOffset(2 + b64Bit, 0, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64XEmitter::LDR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend)
|
||||
void ARM64XEmitter::LDR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
bool b64Bit = Is64Bit(Rt);
|
||||
EncodeLoadStoreRegisterOffset(2 + b64Bit, 1, Rt, Rn, Rm, extend);
|
||||
EncodeLoadStoreRegisterOffset(2 + b64Bit, 1, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64XEmitter::LDRSW(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend)
|
||||
void ARM64XEmitter::LDRSW(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
EncodeLoadStoreRegisterOffset(2, 2, Rt, Rn, Rm, extend);
|
||||
EncodeLoadStoreRegisterOffset(2, 2, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64XEmitter::PRFM(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend)
|
||||
void ARM64XEmitter::PRFM(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
EncodeLoadStoreRegisterOffset(3, 2, Rt, Rn, Rm, extend);
|
||||
EncodeLoadStoreRegisterOffset(3, 2, Rt, Rn, Rm);
|
||||
}
|
||||
|
||||
// Address of label/page PC-relative
|
||||
|
@ -1679,7 +1691,6 @@ void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd,
|
|||
|
||||
void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, Rn <= SP, "%s only supports VFP registers!", __FUNCTION__);
|
||||
Rd = DecodeReg(Rd);
|
||||
Rn = DecodeReg(Rn);
|
||||
|
||||
|
@ -1709,6 +1720,18 @@ void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode,
|
|||
(S << 12) | (size << 10) | (Rn << 5) | Rt);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, IsSingle(Rt), "%s doesn't support singles!", __FUNCTION__);
|
||||
bool quad = IsQuad(Rt);
|
||||
Rt = DecodeReg(Rt);
|
||||
Rn = DecodeReg(Rn);
|
||||
Rm = DecodeReg(Rm);
|
||||
|
||||
Write32((quad << 30) | (0b11011 << 23) | (L << 22) | (R << 21) | (Rm << 16) | \
|
||||
(opcode << 13) | (S << 12) | (size << 10) | (Rn << 5) | Rt);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
|
||||
|
@ -1774,6 +1797,49 @@ void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn,
|
|||
(1 << 11) | (Rn << 5) | Rd);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
|
||||
|
||||
bool is_double = !IsSingle(Rd);
|
||||
|
||||
Rd = DecodeReg(Rd);
|
||||
|
||||
Write32((M << 31) | (S << 29) | (0b11110001 << 21) | (is_double << 22) | (type << 22) | \
|
||||
(imm << 13) | (1 << 12) | (imm5 << 5) | Rd);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::EmitShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
bool quad = IsQuad(Rd);
|
||||
|
||||
_assert_msg_(DYNA_REC, !immh, "%s bad encoding! Can't have zero immh", __FUNCTION__);
|
||||
|
||||
Rd = DecodeReg(Rd);
|
||||
Rn = DecodeReg(Rn);
|
||||
|
||||
Write32((quad << 30) | (U << 29) | (0b1111 << 24) | (immh << 19) | (immb << 16) | \
|
||||
(opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
|
||||
}
|
||||
void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn)
|
||||
{
|
||||
bool quad = IsQuad(Rt);
|
||||
u32 encoded_size = 0;
|
||||
|
||||
if (size == 16)
|
||||
encoded_size = 1;
|
||||
else if (size == 32)
|
||||
encoded_size = 2;
|
||||
else if (size == 64)
|
||||
encoded_size = 3;
|
||||
|
||||
Rt = DecodeReg(Rt);
|
||||
Rn = DecodeReg(Rn);
|
||||
|
||||
Write32((quad << 30) | (3 << 26) | (L << 22) | (opcode << 12) | \
|
||||
(encoded_size << 10) | (Rn << 5) | Rt);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
|
||||
{
|
||||
EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm);
|
||||
|
@ -1784,17 +1850,251 @@ void ARM64FloatEmitter::STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s
|
|||
}
|
||||
|
||||
// Loadstore single structure
|
||||
void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn)
|
||||
{
|
||||
bool S = 0;
|
||||
u32 opcode = 0;
|
||||
u32 encoded_size = 0;
|
||||
ARM64Reg encoded_reg = INVALID_REG;
|
||||
|
||||
if (size == 8)
|
||||
{
|
||||
S = index & 4;
|
||||
opcode = 0;
|
||||
encoded_size = index & 3;
|
||||
if (index & 8)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
|
||||
}
|
||||
else if (size == 16)
|
||||
{
|
||||
S = index & 2;
|
||||
opcode = 2;
|
||||
encoded_size = (index & 1) << 1;
|
||||
if (index & 4)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
|
||||
}
|
||||
else if (size == 32)
|
||||
{
|
||||
S = index & 1;
|
||||
opcode = 4;
|
||||
encoded_size = 0;
|
||||
if (index & 2)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
}
|
||||
else if (size == 64)
|
||||
{
|
||||
S = 0;
|
||||
opcode = 4;
|
||||
encoded_size = 1;
|
||||
if (index == 1)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
}
|
||||
|
||||
EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
bool S = 0;
|
||||
u32 opcode = 0;
|
||||
u32 encoded_size = 0;
|
||||
ARM64Reg encoded_reg = INVALID_REG;
|
||||
|
||||
if (size == 8)
|
||||
{
|
||||
S = index & 4;
|
||||
opcode = 0;
|
||||
encoded_size = index & 3;
|
||||
if (index & 8)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
|
||||
}
|
||||
else if (size == 16)
|
||||
{
|
||||
S = index & 2;
|
||||
opcode = 2;
|
||||
encoded_size = (index & 1) << 1;
|
||||
if (index & 4)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
|
||||
}
|
||||
else if (size == 32)
|
||||
{
|
||||
S = index & 1;
|
||||
opcode = 4;
|
||||
encoded_size = 0;
|
||||
if (index & 2)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
}
|
||||
else if (size == 64)
|
||||
{
|
||||
S = 0;
|
||||
opcode = 4;
|
||||
encoded_size = 1;
|
||||
if (index == 1)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
}
|
||||
|
||||
EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn)
|
||||
{
|
||||
EmitLoadStoreSingleStructure(1, 0, 0b110, 0, size >> 4, Rt, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn)
|
||||
{
|
||||
bool S = 0;
|
||||
u32 opcode = 0;
|
||||
u32 encoded_size = 0;
|
||||
ARM64Reg encoded_reg = INVALID_REG;
|
||||
|
||||
if (size == 8)
|
||||
{
|
||||
S = index & 4;
|
||||
opcode = 0;
|
||||
encoded_size = index & 3;
|
||||
if (index & 8)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
|
||||
}
|
||||
else if (size == 16)
|
||||
{
|
||||
S = index & 2;
|
||||
opcode = 2;
|
||||
encoded_size = (index & 1) << 1;
|
||||
if (index & 4)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
|
||||
}
|
||||
else if (size == 32)
|
||||
{
|
||||
S = index & 1;
|
||||
opcode = 4;
|
||||
encoded_size = 0;
|
||||
if (index & 2)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
}
|
||||
else if (size == 64)
|
||||
{
|
||||
S = 0;
|
||||
opcode = 4;
|
||||
encoded_size = 1;
|
||||
if (index == 1)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
}
|
||||
|
||||
EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
bool S = 0;
|
||||
u32 opcode = 0;
|
||||
u32 encoded_size = 0;
|
||||
ARM64Reg encoded_reg = INVALID_REG;
|
||||
|
||||
if (size == 8)
|
||||
{
|
||||
S = index & 4;
|
||||
opcode = 0;
|
||||
encoded_size = index & 3;
|
||||
if (index & 8)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
|
||||
}
|
||||
else if (size == 16)
|
||||
{
|
||||
S = index & 2;
|
||||
opcode = 2;
|
||||
encoded_size = (index & 1) << 1;
|
||||
if (index & 4)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
|
||||
}
|
||||
else if (size == 32)
|
||||
{
|
||||
S = index & 1;
|
||||
opcode = 4;
|
||||
encoded_size = 0;
|
||||
if (index & 2)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
}
|
||||
else if (size == 64)
|
||||
{
|
||||
S = 0;
|
||||
opcode = 4;
|
||||
encoded_size = 1;
|
||||
if (index == 1)
|
||||
encoded_reg = EncodeRegToQuad(Rt);
|
||||
else
|
||||
encoded_reg = EncodeRegToDouble(Rt);
|
||||
}
|
||||
|
||||
EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);
|
||||
}
|
||||
|
||||
// Loadstore multiple structure
|
||||
void ARM64FloatEmitter::LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, count == 0 || count > 4, "%s must have a count of 1 to 4 registers!", __FUNCTION__);
|
||||
u32 opcode = 0;
|
||||
if (count == 1)
|
||||
opcode = 0b111;
|
||||
else if (count == 2)
|
||||
opcode = 0b1010;
|
||||
else if (count == 3)
|
||||
opcode = 0b0110;
|
||||
else if (count == 4)
|
||||
opcode = 0b0010;
|
||||
EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn);
|
||||
}
|
||||
|
||||
// Scalar - 2 Source
|
||||
void ARM64FloatEmitter::FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
Emit2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
// Scalar floating point immediate
|
||||
void ARM64FloatEmitter::FMOV(ARM64Reg Rd, u32 imm)
|
||||
{
|
||||
EmitScalarImm(0, 0, 0, 0, Rd, imm);
|
||||
}
|
||||
|
||||
// Vector
|
||||
void ARM64FloatEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
|
@ -1843,6 +2143,18 @@ void ARM64FloatEmitter::FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
|||
{
|
||||
Emit2RegMisc(0, size >> 6, 0b10111, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, dest_size >> 5, 0b10110, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, 2 | (size >> 6), 0b11011, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(1, 2 | (size >> 6), 0b11011, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
EmitThreeSame(1, size >> 6, 0b11111, Rd, Rn, Rm);
|
||||
|
@ -1873,7 +2185,7 @@ void ARM64FloatEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
|||
}
|
||||
void ARM64FloatEmitter::REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, 1 | (size >> 4), 0, Rd, Rn);
|
||||
Emit2RegMisc(0, size >> 4, 1, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
|
@ -1883,6 +2195,18 @@ void ARM64FloatEmitter::REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
|||
{
|
||||
Emit2RegMisc(0, size >> 4, 0, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, size >> 6, 0b11101, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(1, size >> 6, 0b11101, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(0, dest_size >> 4, 0b10010, Rd, Rn);
|
||||
}
|
||||
|
||||
// Move
|
||||
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
|
@ -1960,6 +2284,62 @@ void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 ind
|
|||
EmitCopy(1, 1, imm5, imm4, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
|
||||
{
|
||||
bool b64Bit = Is64Bit(Rd);
|
||||
_assert_msg_(DYNA_REC, Rd > SP, "%s destination must be a GPR!", __FUNCTION__);
|
||||
_assert_msg_(DYNA_REC, b64Bit && size != 64, "%s must have a size of 64 when destination is 64bit!", __FUNCTION__);
|
||||
u32 imm5 = 0;
|
||||
|
||||
if (size == 8)
|
||||
{
|
||||
imm5 = 1;
|
||||
imm5 |= index << 1;
|
||||
}
|
||||
else if (size == 16)
|
||||
{
|
||||
imm5 = 2;
|
||||
imm5 |= index << 2;
|
||||
}
|
||||
else if (size == 32)
|
||||
{
|
||||
imm5 = 4;
|
||||
imm5 |= index << 3;
|
||||
}
|
||||
else if (size == 64)
|
||||
{
|
||||
imm5 = 8;
|
||||
imm5 |= index << 4;
|
||||
}
|
||||
|
||||
EmitCopy(b64Bit, 0, imm5, 0b0111, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
|
||||
{
|
||||
bool b64Bit = Is64Bit(Rd);
|
||||
_assert_msg_(DYNA_REC, Rd > SP, "%s destination must be a GPR!", __FUNCTION__);
|
||||
_assert_msg_(DYNA_REC, size == 64, "%s doesn't support 64bit destination. Use UMOV!", __FUNCTION__);
|
||||
u32 imm5 = 0;
|
||||
|
||||
if (size == 8)
|
||||
{
|
||||
imm5 = 1;
|
||||
imm5 |= index << 1;
|
||||
}
|
||||
else if (size == 16)
|
||||
{
|
||||
imm5 = 2;
|
||||
imm5 |= index << 2;
|
||||
}
|
||||
else if (size == 32)
|
||||
{
|
||||
imm5 = 4;
|
||||
imm5 |= index << 3;
|
||||
}
|
||||
|
||||
EmitCopy(b64Bit, 0, imm5, 0b0101, Rd, Rn);
|
||||
}
|
||||
|
||||
// One source
|
||||
void ARM64FloatEmitter::FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
|
@ -2000,6 +2380,26 @@ void ARM64FloatEmitter::FMOV(u8 size, bool top, ARM64Reg Rd, ARM64Reg Rn)
|
|||
EmitConversion(sf, 0, type, rmode, IsVector(Rd) ? 0b111 : 0b110, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
bool sf = Is64Bit(Rn);
|
||||
u32 type = 0;
|
||||
if (IsDouble(Rd))
|
||||
type = 1;
|
||||
|
||||
EmitConversion(sf, 0, type, 0, 0b010, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
bool sf = Is64Bit(Rn);
|
||||
u32 type = 0;
|
||||
if (IsDouble(Rd))
|
||||
type = 1;
|
||||
|
||||
EmitConversion(sf, 0, type, 0, 0b011, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::FCMP(ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
EmitCompare(0, 0, 0, 0, Rn, Rm);
|
||||
|
@ -2080,6 +2480,80 @@ void ARM64FloatEmitter::ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
|||
EmitPermute(size, 0b111, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
// Shift by immediate
|
||||
void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, shift >= src_size, "%s shift amount must less than the element size!", __FUNCTION__);
|
||||
u32 immh = 0;
|
||||
u32 immb = shift & 0xFFF;
|
||||
|
||||
if (src_size == 8)
|
||||
{
|
||||
immh = 1;
|
||||
}
|
||||
else if (src_size == 16)
|
||||
{
|
||||
immh = 2 | ((shift >> 3) & 1);
|
||||
}
|
||||
else if (src_size == 32)
|
||||
{
|
||||
immh = 4 | ((shift >> 3) & 3);;
|
||||
}
|
||||
EmitShiftImm(0, immh, immb, 0b10100, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, shift >= src_size, "%s shift amount must less than the element size!", __FUNCTION__);
|
||||
u32 immh = 0;
|
||||
u32 immb = shift & 0xFFF;
|
||||
|
||||
if (src_size == 8)
|
||||
{
|
||||
immh = 1;
|
||||
}
|
||||
else if (src_size == 16)
|
||||
{
|
||||
immh = 2 | ((shift >> 3) & 1);
|
||||
}
|
||||
else if (src_size == 32)
|
||||
{
|
||||
immh = 4 | ((shift >> 3) & 3);;
|
||||
}
|
||||
EmitShiftImm(1, immh, immb, 0b10100, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, shift >= dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
|
||||
u32 immh = 0;
|
||||
u32 immb = shift & 0xFFF;
|
||||
|
||||
if (dest_size == 8)
|
||||
{
|
||||
immh = 1;
|
||||
}
|
||||
else if (dest_size == 16)
|
||||
{
|
||||
immh = 2 | ((shift >> 3) & 1);
|
||||
}
|
||||
else if (dest_size == 32)
|
||||
{
|
||||
immh = 4 | ((shift >> 3) & 3);;
|
||||
}
|
||||
EmitShiftImm(1, immh, immb, 0b10000, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
SSHLL(src_size, Rd, Rn, 0);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
USHLL(src_size, Rd, Rn, 0);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
|
||||
{
|
||||
for (auto it : registers)
|
||||
|
|
|
@ -82,6 +82,8 @@ inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }
|
|||
inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }
|
||||
inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }
|
||||
inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); }
|
||||
inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); }
|
||||
inline ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return (ARM64Reg)(reg | 0xC0); }
|
||||
|
||||
enum OpType
|
||||
{
|
||||
|
@ -217,10 +219,24 @@ private:
|
|||
u32 m_shift;
|
||||
|
||||
public:
|
||||
ArithOption(ARM64Reg Rd)
|
||||
ArithOption(ARM64Reg Rd, bool index = false)
|
||||
{
|
||||
// Indexed registers are a certain feature of AARch64
|
||||
// On Loadstore instructions that use a register offset
|
||||
// We can have the register as an index
|
||||
// If we are indexing then the offset register will
|
||||
// be shifted to the left so we are indexing at intervals
|
||||
// of the size of what we are loading
|
||||
// 8-bit: Index does nothing
|
||||
// 16-bit: Index LSL 1
|
||||
// 32-bit: Index LSL 2
|
||||
// 64-bit: Index LSL 3
|
||||
if (index)
|
||||
m_shift = 4;
|
||||
else
|
||||
m_shift = 0;
|
||||
|
||||
m_destReg = Rd;
|
||||
m_shift = 0;
|
||||
m_type = TYPE_EXTENDEDREG;
|
||||
if (Is64Bit(Rd))
|
||||
{
|
||||
|
@ -256,18 +272,20 @@ public:
|
|||
{
|
||||
return m_type;
|
||||
}
|
||||
ARM64Reg GetReg()
|
||||
{
|
||||
return m_destReg;
|
||||
}
|
||||
u32 GetData() const
|
||||
{
|
||||
switch (m_type)
|
||||
{
|
||||
case TYPE_EXTENDEDREG:
|
||||
return (m_width == WIDTH_64BIT ? (1 << 31) : 0) |
|
||||
(m_extend << 13) |
|
||||
return (m_extend << 13) |
|
||||
(m_shift << 10);
|
||||
break;
|
||||
case TYPE_SHIFTEDREG:
|
||||
return (m_width == WIDTH_64BIT ? (1 << 31) : 0) |
|
||||
(m_shifttype << 22) |
|
||||
return (m_shifttype << 22) |
|
||||
(m_shift << 10);
|
||||
break;
|
||||
default:
|
||||
|
@ -309,7 +327,7 @@ private:
|
|||
void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size);
|
||||
void EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos);
|
||||
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend);
|
||||
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
|
||||
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||
|
@ -505,6 +523,8 @@ public:
|
|||
void SXTB(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SXTH(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SXTW(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UXTB(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UXTH(ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// Load Register (Literal)
|
||||
void LDR(ARM64Reg Rt, u32 imm);
|
||||
|
@ -551,16 +571,16 @@ public:
|
|||
void LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||
|
||||
// Load/Store register (register offset)
|
||||
void STRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
|
||||
void LDRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
|
||||
void LDRSB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
|
||||
void STRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
|
||||
void LDRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
|
||||
void LDRSH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
|
||||
void STR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
|
||||
void LDR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
|
||||
void LDRSW(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
|
||||
void PRFM(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
|
||||
void STRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void LDRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void LDRSB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void STRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void LDRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void LDRSH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void STR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void LDR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void LDRSW(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void PRFM(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
|
||||
// Load/Store pair
|
||||
void LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||
|
@ -588,11 +608,21 @@ public:
|
|||
void STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||
|
||||
// Loadstore single structure
|
||||
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
|
||||
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
|
||||
void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
|
||||
void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
||||
// Loadstore multiple structure
|
||||
void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
|
||||
|
||||
// Scalar - 2 Source
|
||||
void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
||||
// Scalar floating point immediate
|
||||
void FMOV(ARM64Reg Rd, u32 imm);
|
||||
|
||||
// Vector
|
||||
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
@ -600,6 +630,9 @@ public:
|
|||
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
@ -610,17 +643,24 @@ public:
|
|||
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// Move
|
||||
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn);
|
||||
void INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2);
|
||||
void UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
|
||||
void SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
|
||||
|
||||
// One source
|
||||
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// Conversion between float and integer
|
||||
void FMOV(u8 size, bool top, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UCVTF(ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// Float comparison
|
||||
void FCMP(ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
@ -647,6 +687,13 @@ public:
|
|||
void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
||||
// Shift by immediate
|
||||
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// ABI related
|
||||
void ABI_PushRegisters(BitSet32 registers);
|
||||
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));
|
||||
|
@ -662,11 +709,15 @@ private:
|
|||
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
|
||||
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm);
|
||||
void EmitShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
|
||||
};
|
||||
|
||||
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
|
||||
|
|
|
@ -165,6 +165,7 @@ set(SRCS ActionReplay.cpp
|
|||
PowerPC/Interpreter/Interpreter_Paired.cpp
|
||||
PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
|
||||
PowerPC/Interpreter/Interpreter_Tables.cpp
|
||||
PowerPC/JitCommon/JitAsmCommon.cpp
|
||||
PowerPC/JitCommon/JitBase.cpp
|
||||
PowerPC/JitCommon/JitCache.cpp
|
||||
PowerPC/JitILCommon/IR.cpp
|
||||
|
@ -195,7 +196,6 @@ if(_M_X86)
|
|||
PowerPC/Jit64/Jit_Paired.cpp
|
||||
PowerPC/Jit64/JitRegCache.cpp
|
||||
PowerPC/Jit64/Jit_SystemRegisters.cpp
|
||||
PowerPC/JitCommon/JitAsmCommon.cpp
|
||||
PowerPC/JitCommon/JitBackpatch.cpp
|
||||
PowerPC/JitCommon/Jit_Util.cpp
|
||||
PowerPC/JitCommon/TrampolineCache.cpp)
|
||||
|
@ -230,6 +230,7 @@ elseif(_M_ARM_64)
|
|||
PowerPC/JitArm64/JitArm64_LoadStore.cpp
|
||||
PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp
|
||||
PowerPC/JitArm64/JitArm64_Paired.cpp
|
||||
PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp
|
||||
PowerPC/JitArm64/JitArm64_SystemRegisters.cpp
|
||||
PowerPC/JitArm64/JitArm64_Tables.cpp)
|
||||
endif()
|
||||
|
|
|
@ -33,7 +33,7 @@ void JitArm::psq_l(UGeckoInstruction inst)
|
|||
UBFX(R12, R11, 16, 3); // Type
|
||||
LSL(R12, R12, 2);
|
||||
UBFX(R11, R11, 24, 6); // Scale
|
||||
LSL(R11, R11, 2);
|
||||
LSL(R11, R11, 3);
|
||||
|
||||
Operand2 off;
|
||||
if (TryMakeOperand2(offset, off))
|
||||
|
@ -84,7 +84,7 @@ void JitArm::psq_lx(UGeckoInstruction inst)
|
|||
UBFX(R12, R11, 16, 3); // Type
|
||||
LSL(R12, R12, 2);
|
||||
UBFX(R11, R11, 24, 6); // Scale
|
||||
LSL(R11, R11, 2);
|
||||
LSL(R11, R11, 3);
|
||||
|
||||
if (inst.RA || update) // Always uses the register on update
|
||||
{
|
||||
|
@ -136,7 +136,7 @@ void JitArm::psq_st(UGeckoInstruction inst)
|
|||
UBFX(R12, R11, 0, 3); // Type
|
||||
LSL(R12, R12, 2);
|
||||
UBFX(R11, R11, 8, 6); // Scale
|
||||
LSL(R11, R11, 2);
|
||||
LSL(R11, R11, 3);
|
||||
|
||||
Operand2 off;
|
||||
if (TryMakeOperand2(offset, off))
|
||||
|
@ -187,7 +187,7 @@ void JitArm::psq_stx(UGeckoInstruction inst)
|
|||
UBFX(R12, R11, 0, 3); // Type
|
||||
LSL(R12, R12, 2);
|
||||
UBFX(R11, R11, 8, 6); // Scale
|
||||
LSL(R11, R11, 2);
|
||||
LSL(R11, R11, 3);
|
||||
|
||||
if (inst.RA || update) // Always uses the register on update
|
||||
{
|
||||
|
|
|
@ -27,46 +27,6 @@ using namespace ArmGen;
|
|||
|
||||
JitArmAsmRoutineManager asm_routines;
|
||||
|
||||
static const float GC_ALIGNED16(m_quantizeTableS[]) =
|
||||
{
|
||||
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
|
||||
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
|
||||
(1 << 8), (1 << 9), (1 << 10), (1 << 11),
|
||||
(1 << 12), (1 << 13), (1 << 14), (1 << 15),
|
||||
(1 << 16), (1 << 17), (1 << 18), (1 << 19),
|
||||
(1 << 20), (1 << 21), (1 << 22), (1 << 23),
|
||||
(1 << 24), (1 << 25), (1 << 26), (1 << 27),
|
||||
(1 << 28), (1 << 29), (1 << 30), (1 << 31),
|
||||
1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
|
||||
};
|
||||
|
||||
static const float GC_ALIGNED16(m_dequantizeTableS[]) =
|
||||
{
|
||||
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
|
||||
(1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
|
||||
(1 << 28), (1 << 27), (1 << 26), (1 << 25),
|
||||
(1 << 24), (1 << 23), (1 << 22), (1 << 21),
|
||||
(1 << 20), (1 << 19), (1 << 18), (1 << 17),
|
||||
(1 << 16), (1 << 15), (1 << 14), (1 << 13),
|
||||
(1 << 12), (1 << 11), (1 << 10), (1 << 9),
|
||||
(1 << 8), (1 << 7), (1 << 6), (1 << 5),
|
||||
(1 << 4), (1 << 3), (1 << 2), (1 << 1),
|
||||
};
|
||||
|
||||
static void WriteDual32(u32 value1, u32 value2, u32 address)
|
||||
{
|
||||
Memory::Write_U32(value1, address);
|
||||
|
|
|
@ -168,6 +168,10 @@ public:
|
|||
void ps_sum0(UGeckoInstruction inst);
|
||||
void ps_sum1(UGeckoInstruction inst);
|
||||
|
||||
// Loadstore paired
|
||||
void psq_l(UGeckoInstruction inst);
|
||||
void psq_st(UGeckoInstruction inst);
|
||||
|
||||
private:
|
||||
Arm64GPRCache gpr;
|
||||
Arm64FPRCache fpr;
|
||||
|
|
|
@ -0,0 +1,130 @@
|
|||
// Copyright 2014 Dolphin Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "Common/Arm64Emitter.h"
|
||||
#include "Common/Common.h"
|
||||
#include "Common/StringUtil.h"
|
||||
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
#include "Core/PowerPC/PPCTables.h"
|
||||
#include "Core/PowerPC/JitArm64/Jit.h"
|
||||
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
|
||||
#include "Core/PowerPC/JitArm64/JitAsm.h"
|
||||
|
||||
using namespace Arm64Gen;
|
||||
|
||||
void JitArm64::psq_l(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStorePairedOff);
|
||||
FALLBACK_IF(js.memcheck || !SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem);
|
||||
|
||||
// X30 is LR
|
||||
// X0 contains the scale
|
||||
// X1 is the address
|
||||
// X2 is a temporary
|
||||
// Q0 is the return register
|
||||
// Q1 is a temporary
|
||||
bool update = inst.OPCD == 57;
|
||||
s32 offset = inst.SIMM_12;
|
||||
|
||||
gpr.Lock(W0, W1, W2, W30);
|
||||
fpr.Lock(Q0, Q1);
|
||||
|
||||
ARM64Reg arm_addr = gpr.R(inst.RA);
|
||||
ARM64Reg scale_reg = W0;
|
||||
ARM64Reg addr_reg = W1;
|
||||
ARM64Reg type_reg = W2;
|
||||
|
||||
LDR(INDEX_UNSIGNED, scale_reg, X29, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
|
||||
|
||||
if (inst.RA || update) // Always uses the register on update
|
||||
{
|
||||
if (offset >= 0)
|
||||
ADD(addr_reg, gpr.R(inst.RA), offset);
|
||||
else
|
||||
SUB(addr_reg, gpr.R(inst.RA), std::abs(offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVI2R(addr_reg, (u32)offset);
|
||||
}
|
||||
|
||||
UBFM(type_reg, scale_reg, 16, 18); // Type
|
||||
UBFM(scale_reg, scale_reg, 24, 29); // Scale
|
||||
|
||||
if (update)
|
||||
MOV(arm_addr, addr_reg);
|
||||
|
||||
MOVI2R(X30, (u64)&asm_routines.pairedLoadQuantized[inst.W * 8]);
|
||||
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
|
||||
BLR(X30);
|
||||
|
||||
fpr.BindToRegister(inst.RS, false);
|
||||
ARM64Reg VS = fpr.R(inst.RS);
|
||||
m_float_emit.FCVTL(64, EncodeRegToDouble(VS), D0);
|
||||
if (inst.W)
|
||||
{
|
||||
m_float_emit.FMOV(D0, 0x70); // 1.0 as a Double
|
||||
m_float_emit.INS(64, VS, 1, Q0, 0);
|
||||
}
|
||||
|
||||
gpr.Unlock(W0, W1, W2, W30);
|
||||
fpr.Unlock(Q0, Q1);
|
||||
}
|
||||
|
||||
void JitArm64::psq_st(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStorePairedOff);
|
||||
FALLBACK_IF(js.memcheck || !SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem);
|
||||
|
||||
// X30 is LR
|
||||
// X0 contains the scale
|
||||
// X1 is the address
|
||||
// Q0 is the store register
|
||||
|
||||
bool update = inst.OPCD == 61;
|
||||
s32 offset = inst.SIMM_12;
|
||||
|
||||
gpr.Lock(W0, W1, W2, W30);
|
||||
fpr.Lock(Q0, Q1);
|
||||
|
||||
ARM64Reg arm_addr = gpr.R(inst.RA);
|
||||
ARM64Reg scale_reg = W0;
|
||||
ARM64Reg addr_reg = W1;
|
||||
ARM64Reg type_reg = gpr.GetReg();
|
||||
|
||||
LDR(INDEX_UNSIGNED, scale_reg, X29, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
|
||||
|
||||
if (inst.RA || update) // Always uses the register on update
|
||||
{
|
||||
if (offset >= 0)
|
||||
ADD(addr_reg, gpr.R(inst.RA), offset);
|
||||
else
|
||||
SUB(addr_reg, gpr.R(inst.RA), std::abs(offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVI2R(addr_reg, (u32)offset);
|
||||
}
|
||||
|
||||
UBFM(type_reg, scale_reg, 0, 2); // Type
|
||||
UBFM(scale_reg, scale_reg, 8, 13); // Scale
|
||||
|
||||
if (update)
|
||||
MOV(arm_addr, addr_reg);
|
||||
|
||||
ARM64Reg VS = fpr.R(inst.RS);
|
||||
m_float_emit.FCVTN(32, D0, VS);
|
||||
MOVI2R(X30, (u64)&asm_routines.pairedStoreQuantized[inst.W * 8]);
|
||||
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
|
||||
BLR(X30);
|
||||
|
||||
gpr.Unlock(W0, W1, W2, W30, type_reg);
|
||||
fpr.Unlock(Q0, Q1);
|
||||
}
|
||||
|
|
@ -94,10 +94,10 @@ static GekkoOPTemplate primarytable[] =
|
|||
{54, &JitArm64::stfXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
||||
{55, &JitArm64::stfXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
|
||||
{56, &JitArm64::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}},
|
||||
{57, &JitArm64::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
||||
{60, &JitArm64::FallBackToInterpreter}, //"psq_st", OPTYPE_PS, FL_IN_A}},
|
||||
{61, &JitArm64::FallBackToInterpreter}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
||||
{56, &JitArm64::psq_l}, //"psq_l", OPTYPE_PS, FL_IN_A}},
|
||||
{57, &JitArm64::psq_l}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
||||
{60, &JitArm64::psq_st}, //"psq_st", OPTYPE_PS, FL_IN_A}},
|
||||
{61, &JitArm64::psq_st}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
||||
|
||||
//missing: 0, 5, 6, 9, 22, 30, 62, 58
|
||||
{0, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "Core/PowerPC/PowerPC.h"
|
||||
#include "Core/PowerPC/JitArm64/Jit.h"
|
||||
#include "Core/PowerPC/JitArm64/JitAsm.h"
|
||||
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
|
||||
#include "Core/PowerPC/JitCommon/JitCache.h"
|
||||
|
||||
using namespace Arm64Gen;
|
||||
|
@ -89,9 +90,428 @@ void JitArm64AsmRoutineManager::Generate()
|
|||
ABI_PopRegisters(regs_to_save);
|
||||
RET(X30);
|
||||
|
||||
GenerateCommon();
|
||||
|
||||
FlushIcache();
|
||||
}
|
||||
|
||||
void JitArm64AsmRoutineManager::GenerateCommon()
|
||||
{
|
||||
// X0 is the scale
|
||||
// X1 is address
|
||||
// X2 is a temporary on stores
|
||||
// X30 is LR
|
||||
// Q0 is the return for loads
|
||||
// is the register for stores
|
||||
// Q1 is a temporary
|
||||
ARM64Reg addr_reg = X1;
|
||||
ARM64Reg scale_reg = X0;
|
||||
ARM64FloatEmitter float_emit(this);
|
||||
const u32 GPR_CALLER_SAVE = 0x6007FFFF;
|
||||
|
||||
const u8* loadPairedIllegal = GetCodePtr();
|
||||
BRK(100);
|
||||
const u8* loadPairedFloatTwo = GetCodePtr();
|
||||
{
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.LD1(32, 1, D0, addr_reg);
|
||||
float_emit.REV32(8, D0, D0);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* loadPairedU8Two = GetCodePtr();
|
||||
{
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||
float_emit.UXTL(8, D0, D0);
|
||||
float_emit.UXTL(16, D0, D0);
|
||||
float_emit.UCVTF(32, D0, D0);
|
||||
|
||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LD1R(32, D1, scale_reg);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* loadPairedS8Two = GetCodePtr();
|
||||
{
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||
float_emit.SXTL(8, D0, D0);
|
||||
float_emit.SXTL(16, D0, D0);
|
||||
float_emit.SCVTF(32, D0, D0);
|
||||
|
||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LD1R(32, D1, scale_reg);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* loadPairedU16Two = GetCodePtr();
|
||||
{
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.LD1(16, 1, D0, addr_reg);
|
||||
float_emit.REV16(8, D0, D0);
|
||||
float_emit.UXTL(16, D0, D0);
|
||||
float_emit.UCVTF(32, D0, D0);
|
||||
|
||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LD1R(32, D1, scale_reg);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* loadPairedS16Two = GetCodePtr();
|
||||
{
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.LD1(16, 1, D0, addr_reg);
|
||||
float_emit.REV16(8, D0, D0);
|
||||
float_emit.SXTL(16, D0, D0);
|
||||
float_emit.SCVTF(32, D0, D0);
|
||||
|
||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LD1R(32, D1, scale_reg);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
RET(X30);
|
||||
}
|
||||
|
||||
const u8* loadPairedFloatOne = GetCodePtr();
|
||||
{
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.LDR(32, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||
float_emit.REV32(8, D0, D0);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* loadPairedU8One = GetCodePtr();
|
||||
{
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.LDR(8, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||
float_emit.UXTL(8, D0, D0);
|
||||
float_emit.UXTL(16, D0, D0);
|
||||
float_emit.UCVTF(32, D0, D0);
|
||||
|
||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LD1R(32, D1, scale_reg);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* loadPairedS8One = GetCodePtr();
|
||||
{
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.LDR(8, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||
float_emit.SXTL(8, D0, D0);
|
||||
float_emit.SXTL(16, D0, D0);
|
||||
float_emit.SCVTF(32, D0, D0);
|
||||
|
||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LD1R(32, D1, scale_reg);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* loadPairedU16One = GetCodePtr();
|
||||
{
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||
float_emit.REV16(8, D0, D0);
|
||||
float_emit.UXTL(16, D0, D0);
|
||||
float_emit.UCVTF(32, D0, D0);
|
||||
|
||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LD1R(32, D1, scale_reg);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* loadPairedS16One = GetCodePtr();
|
||||
{
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||
float_emit.REV16(8, D0, D0);
|
||||
float_emit.SXTL(16, D0, D0);
|
||||
float_emit.SCVTF(32, D0, D0);
|
||||
|
||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LD1R(32, D1, scale_reg);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
RET(X30);
|
||||
}
|
||||
|
||||
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
|
||||
ReserveCodeSpace(16 * sizeof(u8*));
|
||||
|
||||
pairedLoadQuantized[0] = loadPairedFloatTwo;
|
||||
pairedLoadQuantized[1] = loadPairedIllegal;
|
||||
pairedLoadQuantized[2] = loadPairedIllegal;
|
||||
pairedLoadQuantized[3] = loadPairedIllegal;
|
||||
pairedLoadQuantized[4] = loadPairedU8Two;
|
||||
pairedLoadQuantized[5] = loadPairedU16Two;
|
||||
pairedLoadQuantized[6] = loadPairedS8Two;
|
||||
pairedLoadQuantized[7] = loadPairedS16Two;
|
||||
|
||||
pairedLoadQuantized[8] = loadPairedFloatOne;
|
||||
pairedLoadQuantized[9] = loadPairedIllegal;
|
||||
pairedLoadQuantized[10] = loadPairedIllegal;
|
||||
pairedLoadQuantized[11] = loadPairedIllegal;
|
||||
pairedLoadQuantized[12] = loadPairedU8One;
|
||||
pairedLoadQuantized[13] = loadPairedU16One;
|
||||
pairedLoadQuantized[14] = loadPairedS8One;
|
||||
pairedLoadQuantized[15] = loadPairedS16One;
|
||||
|
||||
// Stores
|
||||
const u8* storePairedIllegal = GetCodePtr();
|
||||
BRK(0x101);
|
||||
const u8* storePairedFloat = GetCodePtr();
|
||||
{
|
||||
BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2
|
||||
BitSet32 fprs(~3); // All except Q0/Q1
|
||||
|
||||
TST(DecodeReg(addr_reg), 6, 1);
|
||||
FixupBranch argh = B(CC_NEQ);
|
||||
|
||||
float_emit.REV32(8, D0, D0);
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.ST1(32, Q0, 0, addr_reg, SP);
|
||||
float_emit.ST1(32, Q0, 1, addr_reg, SP);
|
||||
RET(X30);
|
||||
|
||||
SetJumpTarget(argh);
|
||||
|
||||
ABI_PushRegisters(gprs);
|
||||
float_emit.ABI_PushRegisters(fprs);
|
||||
float_emit.UMOV(64, X0, Q0, 0);
|
||||
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
|
||||
MOVI2R(X30, (u64)Memory::Write_U64);
|
||||
BLR(X30);
|
||||
float_emit.ABI_PopRegisters(fprs);
|
||||
ABI_PopRegisters(gprs);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* storePairedU8 = GetCodePtr();
|
||||
const u8* storePairedS8 = GetCodePtr();
|
||||
{
|
||||
BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2
|
||||
BitSet32 fprs(~3); // All except Q0/Q1
|
||||
|
||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LD1R(32, D1, scale_reg);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
float_emit.FCVTZU(32, D0, D0);
|
||||
float_emit.XTN(16, D0, D0);
|
||||
float_emit.XTN(8, D0, D0);
|
||||
|
||||
TST(DecodeReg(addr_reg), 6, 1);
|
||||
FixupBranch argh = B(CC_NEQ);
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.ST1(8, Q0, 0, addr_reg, SP);
|
||||
float_emit.ST1(8, Q0, 1, addr_reg, SP);
|
||||
RET(X30);
|
||||
|
||||
SetJumpTarget(argh);
|
||||
ABI_PushRegisters(gprs);
|
||||
float_emit.ABI_PushRegisters(fprs);
|
||||
float_emit.UMOV(16, W0, Q0, 0);
|
||||
REV16(W0, W0);
|
||||
MOVI2R(X30, (u64)Memory::Write_U16);
|
||||
BLR(X30);
|
||||
float_emit.ABI_PopRegisters(fprs);
|
||||
ABI_PopRegisters(gprs);
|
||||
RET(X30);
|
||||
}
|
||||
|
||||
const u8* storePairedU16 = GetCodePtr();
|
||||
const u8* storePairedS16 = GetCodePtr();
|
||||
{
|
||||
BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2
|
||||
BitSet32 fprs(~3); // All except Q0/Q1
|
||||
|
||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LD1R(32, D1, scale_reg);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
float_emit.FCVTZU(32, D0, D0);
|
||||
float_emit.XTN(16, D0, D0);
|
||||
|
||||
TST(DecodeReg(addr_reg), 6, 1);
|
||||
FixupBranch argh = B(CC_NEQ);
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.ST1(16, Q0, 0, addr_reg, SP);
|
||||
float_emit.ST1(16, Q0, 1, addr_reg, SP);
|
||||
RET(X30);
|
||||
|
||||
SetJumpTarget(argh);
|
||||
ABI_PushRegisters(gprs);
|
||||
float_emit.ABI_PushRegisters(fprs);
|
||||
float_emit.UMOV(32, W0, Q0, 0);
|
||||
REV32(W0, W0);
|
||||
MOVI2R(X30, (u64)Memory::Write_U32);
|
||||
BLR(X30);
|
||||
float_emit.ABI_PopRegisters(fprs);
|
||||
ABI_PopRegisters(gprs);
|
||||
RET(X30);
|
||||
}
|
||||
|
||||
const u8* storeSingleFloat = GetCodePtr();
|
||||
{
|
||||
BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2
|
||||
BitSet32 fprs(~3); // All except Q0/Q1
|
||||
|
||||
TST(DecodeReg(addr_reg), 6, 1);
|
||||
FixupBranch argh = B(CC_NEQ);
|
||||
|
||||
float_emit.REV32(8, D0, D0);
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.STR(32, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||
RET(X30);
|
||||
|
||||
SetJumpTarget(argh);
|
||||
|
||||
ABI_PushRegisters(gprs);
|
||||
float_emit.ABI_PushRegisters(fprs);
|
||||
float_emit.UMOV(32, W0, Q0, 0);
|
||||
MOVI2R(X30, (u64)&Memory::Write_U32);
|
||||
BLR(X30);
|
||||
float_emit.ABI_PopRegisters(fprs);
|
||||
ABI_PopRegisters(gprs);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii
|
||||
{
|
||||
BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2
|
||||
BitSet32 fprs(~3); // All except Q0/Q1
|
||||
|
||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
float_emit.FCVTZU(32, D0, D0);
|
||||
float_emit.XTN(16, D0, D0);
|
||||
float_emit.XTN(8, D0, D0);
|
||||
|
||||
TST(DecodeReg(addr_reg), 6, 1);
|
||||
FixupBranch argh = B(CC_NEQ);
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.ST1(8, Q0, 0, addr_reg);
|
||||
RET(X30);
|
||||
|
||||
SetJumpTarget(argh);
|
||||
ABI_PushRegisters(gprs);
|
||||
float_emit.ABI_PushRegisters(fprs);
|
||||
float_emit.UMOV(32, W0, Q0, 0);
|
||||
MOVI2R(X30, (u64)&Memory::Write_U8);
|
||||
BLR(X30);
|
||||
float_emit.ABI_PopRegisters(fprs);
|
||||
ABI_PopRegisters(gprs);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* storeSingleS8 = GetCodePtr();
|
||||
{
|
||||
BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2
|
||||
BitSet32 fprs(~3); // All except Q0/Q1
|
||||
|
||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
float_emit.FCVTZS(32, D0, D0);
|
||||
float_emit.XTN(16, D0, D0);
|
||||
float_emit.XTN(8, D0, D0);
|
||||
|
||||
TST(DecodeReg(addr_reg), 6, 1);
|
||||
FixupBranch argh = B(CC_NEQ);
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.ST1(8, Q0, 0, addr_reg);
|
||||
RET(X30);
|
||||
|
||||
SetJumpTarget(argh);
|
||||
ABI_PushRegisters(gprs);
|
||||
float_emit.ABI_PushRegisters(fprs);
|
||||
float_emit.SMOV(32, W0, Q0, 0);
|
||||
MOVI2R(X30, (u64)&Memory::Write_U8);
|
||||
BLR(X30);
|
||||
float_emit.ABI_PopRegisters(fprs);
|
||||
ABI_PopRegisters(gprs);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii
|
||||
{
|
||||
BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2
|
||||
BitSet32 fprs(~3); // All except Q0/Q1
|
||||
|
||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
float_emit.FCVTZU(32, D0, D0);
|
||||
float_emit.XTN(16, D0, D0);
|
||||
|
||||
TST(DecodeReg(addr_reg), 6, 1);
|
||||
FixupBranch argh = B(CC_NEQ);
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.ST1(16, Q0, 0, addr_reg);
|
||||
RET(X30);
|
||||
|
||||
SetJumpTarget(argh);
|
||||
ABI_PushRegisters(gprs);
|
||||
float_emit.ABI_PushRegisters(fprs);
|
||||
float_emit.UMOV(32, W0, Q0, 0);
|
||||
MOVI2R(X30, (u64)&Memory::Write_U16);
|
||||
BLR(X30);
|
||||
float_emit.ABI_PopRegisters(fprs);
|
||||
ABI_PopRegisters(gprs);
|
||||
RET(X30);
|
||||
}
|
||||
const u8* storeSingleS16 = GetCodePtr();
|
||||
{
|
||||
BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2
|
||||
BitSet32 fprs(~3); // All except Q0/Q1
|
||||
|
||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||
float_emit.FMUL(32, D0, D0, D1);
|
||||
float_emit.FCVTZS(32, D0, D0);
|
||||
float_emit.XTN(16, D0, D0);
|
||||
|
||||
TST(DecodeReg(addr_reg), 6, 1);
|
||||
FixupBranch argh = B(CC_NEQ);
|
||||
MOVK(addr_reg, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32);
|
||||
float_emit.ST1(16, Q0, 0, addr_reg);
|
||||
RET(X30);
|
||||
|
||||
SetJumpTarget(argh);
|
||||
ABI_PushRegisters(gprs);
|
||||
float_emit.ABI_PushRegisters(fprs);
|
||||
float_emit.SMOV(32, W0, Q0, 0);
|
||||
|
||||
MOVI2R(X30, (u64)&Memory::Write_U16);
|
||||
BLR(X30);
|
||||
float_emit.ABI_PopRegisters(fprs);
|
||||
ABI_PopRegisters(gprs);
|
||||
RET(X30);
|
||||
}
|
||||
|
||||
pairedStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
|
||||
ReserveCodeSpace(16 * sizeof(u8*));
|
||||
|
||||
pairedStoreQuantized[0] = storePairedFloat;
|
||||
pairedStoreQuantized[1] = storePairedIllegal;
|
||||
pairedStoreQuantized[2] = storePairedIllegal;
|
||||
pairedStoreQuantized[3] = storePairedIllegal;
|
||||
pairedStoreQuantized[4] = storePairedU8;
|
||||
pairedStoreQuantized[5] = storePairedU16;
|
||||
pairedStoreQuantized[6] = storePairedS8;
|
||||
pairedStoreQuantized[7] = storePairedS16;
|
||||
|
||||
pairedStoreQuantized[8] = storeSingleFloat;
|
||||
pairedStoreQuantized[9] = storePairedIllegal;
|
||||
pairedStoreQuantized[10] = storePairedIllegal;
|
||||
pairedStoreQuantized[11] = storePairedIllegal;
|
||||
pairedStoreQuantized[12] = storeSingleU8;
|
||||
pairedStoreQuantized[13] = storeSingleU16;
|
||||
pairedStoreQuantized[14] = storeSingleS8;
|
||||
pairedStoreQuantized[15] = storeSingleS16;
|
||||
}
|
||||
|
|
|
@ -194,7 +194,7 @@ void CommonAsmRoutines::GenMfcr()
|
|||
const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||
|
||||
static const float GC_ALIGNED16(m_quantizeTableS[]) =
|
||||
const float GC_ALIGNED16(m_quantizeTableS[]) =
|
||||
{
|
||||
(1ULL << 0), (1ULL << 0), (1ULL << 1), (1ULL << 1), (1ULL << 2), (1ULL << 2), (1ULL << 3), (1ULL << 3),
|
||||
(1ULL << 4), (1ULL << 4), (1ULL << 5), (1ULL << 5), (1ULL << 6), (1ULL << 6), (1ULL << 7), (1ULL << 7),
|
||||
|
@ -222,7 +222,7 @@ static const float GC_ALIGNED16(m_quantizeTableS[]) =
|
|||
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
|
||||
};
|
||||
|
||||
static const float GC_ALIGNED16(m_dequantizeTableS[]) =
|
||||
const float GC_ALIGNED16(m_dequantizeTableS[]) =
|
||||
{
|
||||
1.0 / (1ULL << 0), 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
|
||||
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3), 1.0 / (1ULL << 3),
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
extern const u8 GC_ALIGNED16(pbswapShuffle1x4[16]);
|
||||
extern const u8 GC_ALIGNED16(pbswapShuffle2x4[16]);
|
||||
extern const float GC_ALIGNED16(m_one[]);
|
||||
extern const float GC_ALIGNED16(m_quantizeTableS[]);
|
||||
extern const float GC_ALIGNED16(m_dequantizeTableS[]);
|
||||
|
||||
class CommonAsmRoutinesBase
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue