Merge pull request #2146 from Sonicadvance1/aarch64_optimize_fpr_push_pop

[AArch64] Optimize FPR pushing and popping.
This commit is contained in:
Ryan Houdek 2015-03-08 08:51:45 -05:00
commit a9622c247b
6 changed files with 309 additions and 47 deletions

View File

@ -512,6 +512,9 @@ void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64R
case INDEX_PRE: case INDEX_PRE:
type_encode = 0b011; type_encode = 0b011;
break; break;
case INDEX_SIGNED:
_assert_msg_(DYNA_REC, false, "%s doesn't support INDEX_SIGNED!", __FUNCTION__);
break;
} }
if (b64Bit) if (b64Bit)
@ -1891,6 +1894,27 @@ void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opc
(encoded_size << 10) | (Rn << 5) | Rt); (encoded_size << 10) | (Rn << 5) | Rt);
} }
void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
{
bool quad = IsQuad(Rt);
u32 encoded_size = 0;
if (size == 16)
encoded_size = 1;
else if (size == 32)
encoded_size = 2;
else if (size == 64)
encoded_size = 3;
Rt = DecodeReg(Rt);
Rn = DecodeReg(Rn);
Rm = DecodeReg(Rm);
Write32((quad << 30) | (0b11001 << 23) | (L << 22) | (Rm << 16) | (opcode << 12) | \
(encoded_size << 10) | (Rn << 5) | Rt);
}
void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
{ {
_assert_msg_(DYNA_REC, !IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__); _assert_msg_(DYNA_REC, !IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
@ -1923,6 +1947,55 @@ void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM
Write32((size << 30) | (0b1111 << 26) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt); Write32((size << 30) | (0b1111 << 26) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt);
} }
void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
{
u32 type_encode = 0;
u32 opc = 0;
switch (type)
{
case INDEX_SIGNED:
type_encode = 0b010;
break;
case INDEX_POST:
type_encode = 0b001;
break;
case INDEX_PRE:
type_encode = 0b011;
break;
case INDEX_UNSIGNED:
_assert_msg_(DYNA_REC, false, "%s doesn't support INDEX_UNSIGNED!", __FUNCTION__);
break;
}
if (size == 128)
{
_assert_msg_(DYNA_REC, !(imm & 0xF), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
opc = 2;
imm >>= 4;
}
else if (size == 64)
{
_assert_msg_(DYNA_REC, !(imm & 0x7), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
opc = 1;
imm >>= 3;
}
else if (size == 32)
{
_assert_msg_(DYNA_REC, !(imm & 0x3), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
opc = 0;
imm >>= 2;
}
Rt = DecodeReg(Rt);
Rt2 = DecodeReg(Rt2);
Rn = DecodeReg(Rn);
Write32((opc << 30) | (0b1011 << 26) | (type_encode << 23) | (load << 22) | \
((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
}
void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{ {
EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm); EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm);
@ -2234,6 +2307,22 @@ void ARM64FloatEmitter::LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
opcode = 0b0010; opcode = 0b0010;
EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn); EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn);
} }
void ARM64FloatEmitter::LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
{
_assert_msg_(DYNA_REC, !(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
_assert_msg_(DYNA_REC, type == INDEX_POST, "%s only supports post indexing!", __FUNCTION__);
u32 opcode = 0;
if (count == 1)
opcode = 0b111;
else if (count == 2)
opcode = 0b1010;
else if (count == 3)
opcode = 0b0110;
else if (count == 4)
opcode = 0b0010;
EmitLoadStoreMultipleStructurePost(size, 1, opcode, Rt, Rn, Rm);
}
void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn) void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
{ {
_assert_msg_(DYNA_REC, !(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__); _assert_msg_(DYNA_REC, !(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
@ -2248,6 +2337,32 @@ void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
opcode = 0b0010; opcode = 0b0010;
EmitLoadStoreMultipleStructure(size, 0, opcode, Rt, Rn); EmitLoadStoreMultipleStructure(size, 0, opcode, Rt, Rn);
} }
void ARM64FloatEmitter::ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
{
_assert_msg_(DYNA_REC, !(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
_assert_msg_(DYNA_REC, type == INDEX_POST, "%s only supports post indexing!", __FUNCTION__);
u32 opcode = 0;
if (count == 1)
opcode = 0b111;
else if (count == 2)
opcode = 0b1010;
else if (count == 3)
opcode = 0b0110;
else if (count == 4)
opcode = 0b0010;
EmitLoadStoreMultipleStructurePost(size, 0, opcode, Rt, Rn, Rm);
}
// Loadstore paired
void ARM64FloatEmitter::LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
{
EncodeLoadStorePair(size, true, type, Rt, Rt2, Rn, imm);
}
void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
{
EncodeLoadStorePair(size, false, type, Rt, Rt2, Rn, imm);
}
// Scalar - 1 Source // Scalar - 1 Source
void ARM64FloatEmitter::FABS(ARM64Reg Rd, ARM64Reg Rn) void ARM64FloatEmitter::FABS(ARM64Reg Rd, ARM64Reg Rn)
@ -2759,24 +2874,161 @@ void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8
EmitVectorxElement(0, 2 | (size >> 6), L, 0b1001, H, Rd, Rn, Rm); EmitVectorxElement(0, 2 | (size >> 6), L, 0b1001, H, Rd, Rn, Rm);
} }
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers) void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp)
{ {
for (auto it : registers) bool bundled_loadstore = false;
STR(128, INDEX_PRE, (ARM64Reg)(Q0 + it), SP, -16);
for (int i = 0; i < 32; ++i)
{
if (!registers[i])
continue;
int count = 0;
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
if (count > 1)
{
bundled_loadstore = true;
break;
}
}
if (bundled_loadstore && tmp != INVALID_REG)
{
int num_regs = registers.Count();
m_emit->SUB(SP, SP, num_regs * 16);
m_emit->ADD(tmp, SP, 0);
std::vector<ARM64Reg> island_regs;
for (int i = 0; i < 32; ++i)
{
if (!registers[i])
continue;
int count = 0;
// 0 = true
// 1 < 4 && registers[i + 1] true!
// 2 < 4 && registers[i + 2] true!
// 3 < 4 && registers[i + 3] true!
// 4 < 4 && registers[i + 4] false!
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
if (count == 1)
island_regs.push_back((ARM64Reg)(Q0 + i));
else
ST1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), tmp);
i += count - 1;
}
// Handle island registers
std::vector<ARM64Reg> pair_regs;
for (auto& it : island_regs)
{
pair_regs.push_back(it);
if (pair_regs.size() == 2)
{
STP(128, INDEX_POST, pair_regs[0], pair_regs[1], tmp, 32);
pair_regs.clear();
}
}
if (pair_regs.size())
STR(128, INDEX_POST, pair_regs[0], tmp, 16);
}
else
{
std::vector<ARM64Reg> pair_regs;
for (auto it : registers)
{
pair_regs.push_back((ARM64Reg)(Q0 + it));
if (pair_regs.size() == 2)
{
STP(128, INDEX_PRE, pair_regs[0], pair_regs[1], SP, -32);
pair_regs.clear();
}
}
if (pair_regs.size())
STR(128, INDEX_PRE, pair_regs[0], SP, -16);
}
} }
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask) void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp)
{ {
bool bundled_loadstore = false;
int num_regs = registers.Count();
for (int i = 0; i < 32; ++i)
{
if (!registers[i])
continue;
int count = 0;
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
if (count > 1)
{
bundled_loadstore = true;
break;
}
}
if (bundled_loadstore && tmp != INVALID_REG)
{
// The temporary register is only used to indicate that we can use this code path
std::vector<ARM64Reg> island_regs;
for (int i = 0; i < 32; ++i)
{
if (!registers[i])
continue;
int count = 0;
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
if (count == 1)
island_regs.push_back((ARM64Reg)(Q0 + i));
else
LD1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), SP);
i += count - 1;
}
// Handle island registers
std::vector<ARM64Reg> pair_regs;
for (auto& it : island_regs)
{
pair_regs.push_back(it);
if (pair_regs.size() == 2)
{
LDP(128, INDEX_POST, pair_regs[0], pair_regs[1], SP, 32);
pair_regs.clear();
}
}
if (pair_regs.size())
LDR(128, INDEX_POST, pair_regs[0], SP, 16);
}
else
{
bool odd = num_regs % 2;
std::vector<ARM64Reg> pair_regs;
for (int i = 31; i >= 0; --i) for (int i = 31; i >= 0; --i)
{ {
if (!registers[i]) if (!registers[i])
continue; continue;
if (ignore_mask[i]) if (odd)
m_emit->ADD(SP, SP, 16); {
else // First load must be a regular LDR if odd
odd = false;
LDR(128, INDEX_POST, (ARM64Reg)(Q0 + i), SP, 16); LDR(128, INDEX_POST, (ARM64Reg)(Q0 + i), SP, 16);
} }
else
{
pair_regs.push_back((ARM64Reg)(Q0 + i));
if (pair_regs.size() == 2)
{
LDP(128, INDEX_POST, pair_regs[1], pair_regs[0], SP, 32);
pair_regs.clear();
}
}
}
}
} }
} }

View File

@ -109,6 +109,8 @@ enum IndexType
INDEX_UNSIGNED, INDEX_UNSIGNED,
INDEX_POST, INDEX_POST,
INDEX_PRE, INDEX_PRE,
// Only for VFP loadstore paired
INDEX_SIGNED,
}; };
enum ShiftAmount enum ShiftAmount
@ -658,7 +660,13 @@ public:
// Loadstore multiple structure // Loadstore multiple structure
void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn); void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
void LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
void ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn); void ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
void ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
// Loadstore paired
void LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
// Scalar - 1 Source // Scalar - 1 Source
void FABS(ARM64Reg Rd, ARM64Reg Rn); void FABS(ARM64Reg Rd, ARM64Reg Rn);
@ -747,8 +755,8 @@ public:
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index); void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
// ABI related // ABI related
void ABI_PushRegisters(BitSet32 registers); void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0)); void ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
private: private:
ARM64XEmitter* m_emit; ARM64XEmitter* m_emit;
@ -770,9 +778,11 @@ private:
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm); void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm);
void EmitShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn); void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm); void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
}; };
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter> class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>

View File

@ -167,12 +167,12 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
// Has a chance of being backpatched which will destroy our state // Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance // push and pop everything in this instance
ABI_PushRegisters(regs_in_use); ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use); m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
EmitBackpatchRoutine(this, flags, EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
dest_reg, XA); dest_reg, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use); m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
ABI_PopRegisters(regs_in_use); ABI_PopRegisters(regs_in_use);
} }
@ -318,12 +318,12 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
// Has a chance of being backpatched which will destroy our state // Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance // push and pop everything in this instance
ABI_PushRegisters(regs_in_use); ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use); m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
EmitBackpatchRoutine(this, flags, EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
RS, XA); RS, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use); m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
ABI_PopRegisters(regs_in_use); ABI_PopRegisters(regs_in_use);
} }

View File

@ -190,12 +190,12 @@ void JitArm64::lfXX(UGeckoInstruction inst)
// Has a chance of being backpatched which will destroy our state // Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance // push and pop everything in this instance
ABI_PushRegisters(regs_in_use); ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use); m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
EmitBackpatchRoutine(this, flags, EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
VD, XA); VD, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use); m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
ABI_PopRegisters(regs_in_use); ABI_PopRegisters(regs_in_use);
} }
@ -406,9 +406,9 @@ void JitArm64::stfXX(UGeckoInstruction inst)
else else
{ {
ABI_PushRegisters(regs_in_use); ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use); m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
EmitBackpatchRoutine(this, flags, false, false, V0, XA); EmitBackpatchRoutine(this, flags, false, false, V0, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use); m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
ABI_PopRegisters(regs_in_use); ABI_PopRegisters(regs_in_use);
} }
} }
@ -417,12 +417,12 @@ void JitArm64::stfXX(UGeckoInstruction inst)
// Has a chance of being backpatched which will destroy our state // Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance // push and pop everything in this instance
ABI_PushRegisters(regs_in_use); ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use); m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
EmitBackpatchRoutine(this, flags, EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
V0, XA); V0, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use); m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
ABI_PopRegisters(regs_in_use); ABI_PopRegisters(regs_in_use);
} }
gpr.Unlock(W0, W1, W30); gpr.Unlock(W0, W1, W30);

View File

@ -279,12 +279,12 @@ void JitArm64AsmRoutineManager::GenerateCommon()
SetJumpTarget(argh); SetJumpTarget(argh);
ABI_PushRegisters(gprs); ABI_PushRegisters(gprs);
float_emit.ABI_PushRegisters(fprs); float_emit.ABI_PushRegisters(fprs, X3);
float_emit.UMOV(64, X0, Q0, 0); float_emit.UMOV(64, X0, Q0, 0);
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32)); ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
MOVI2R(X30, (u64)PowerPC::Write_U64); MOVI2R(X30, (u64)PowerPC::Write_U64);
BLR(X30); BLR(X30);
float_emit.ABI_PopRegisters(fprs); float_emit.ABI_PopRegisters(fprs, X3);
ABI_PopRegisters(gprs); ABI_PopRegisters(gprs);
RET(X30); RET(X30);
} }
@ -310,12 +310,12 @@ void JitArm64AsmRoutineManager::GenerateCommon()
SetJumpTarget(argh); SetJumpTarget(argh);
ABI_PushRegisters(gprs); ABI_PushRegisters(gprs);
float_emit.ABI_PushRegisters(fprs); float_emit.ABI_PushRegisters(fprs, X3);
float_emit.UMOV(16, W0, Q0, 0); float_emit.UMOV(16, W0, Q0, 0);
REV16(W0, W0); REV16(W0, W0);
MOVI2R(X30, (u64)PowerPC::Write_U16); MOVI2R(X30, (u64)PowerPC::Write_U16);
BLR(X30); BLR(X30);
float_emit.ABI_PopRegisters(fprs); float_emit.ABI_PopRegisters(fprs, X3);
ABI_PopRegisters(gprs); ABI_PopRegisters(gprs);
RET(X30); RET(X30);
} }
@ -341,12 +341,12 @@ void JitArm64AsmRoutineManager::GenerateCommon()
SetJumpTarget(argh); SetJumpTarget(argh);
ABI_PushRegisters(gprs); ABI_PushRegisters(gprs);
float_emit.ABI_PushRegisters(fprs); float_emit.ABI_PushRegisters(fprs, X3);
float_emit.UMOV(16, W0, Q0, 0); float_emit.UMOV(16, W0, Q0, 0);
REV16(W0, W0); REV16(W0, W0);
MOVI2R(X30, (u64)PowerPC::Write_U16); MOVI2R(X30, (u64)PowerPC::Write_U16);
BLR(X30); BLR(X30);
float_emit.ABI_PopRegisters(fprs); float_emit.ABI_PopRegisters(fprs, X3);
ABI_PopRegisters(gprs); ABI_PopRegisters(gprs);
RET(X30); RET(X30);
} }
@ -372,12 +372,12 @@ void JitArm64AsmRoutineManager::GenerateCommon()
SetJumpTarget(argh); SetJumpTarget(argh);
ABI_PushRegisters(gprs); ABI_PushRegisters(gprs);
float_emit.ABI_PushRegisters(fprs); float_emit.ABI_PushRegisters(fprs, X3);
float_emit.REV32(8, D0, D0); float_emit.REV32(8, D0, D0);
float_emit.UMOV(32, W0, Q0, 0); float_emit.UMOV(32, W0, Q0, 0);
MOVI2R(X30, (u64)PowerPC::Write_U32); MOVI2R(X30, (u64)PowerPC::Write_U32);
BLR(X30); BLR(X30);
float_emit.ABI_PopRegisters(fprs); float_emit.ABI_PopRegisters(fprs, X3);
ABI_PopRegisters(gprs); ABI_PopRegisters(gprs);
RET(X30); RET(X30);
} }
@ -402,12 +402,12 @@ void JitArm64AsmRoutineManager::GenerateCommon()
SetJumpTarget(argh); SetJumpTarget(argh);
ABI_PushRegisters(gprs); ABI_PushRegisters(gprs);
float_emit.ABI_PushRegisters(fprs); float_emit.ABI_PushRegisters(fprs, X3);
float_emit.REV32(8, D0, D0); float_emit.REV32(8, D0, D0);
float_emit.UMOV(32, W0, Q0, 0); float_emit.UMOV(32, W0, Q0, 0);
MOVI2R(X30, (u64)PowerPC::Write_U32); MOVI2R(X30, (u64)PowerPC::Write_U32);
BLR(X30); BLR(X30);
float_emit.ABI_PopRegisters(fprs); float_emit.ABI_PopRegisters(fprs, X3);
ABI_PopRegisters(gprs); ABI_PopRegisters(gprs);
RET(X30); RET(X30);
} }
@ -428,11 +428,11 @@ void JitArm64AsmRoutineManager::GenerateCommon()
SetJumpTarget(argh); SetJumpTarget(argh);
ABI_PushRegisters(gprs); ABI_PushRegisters(gprs);
float_emit.ABI_PushRegisters(fprs); float_emit.ABI_PushRegisters(fprs, X3);
float_emit.UMOV(32, W0, Q0, 0); float_emit.UMOV(32, W0, Q0, 0);
MOVI2R(X30, (u64)&PowerPC::Write_U32); MOVI2R(X30, (u64)&PowerPC::Write_U32);
BLR(X30); BLR(X30);
float_emit.ABI_PopRegisters(fprs); float_emit.ABI_PopRegisters(fprs, X3);
ABI_PopRegisters(gprs); ABI_PopRegisters(gprs);
RET(X30); RET(X30);
} }
@ -457,11 +457,11 @@ void JitArm64AsmRoutineManager::GenerateCommon()
SetJumpTarget(argh); SetJumpTarget(argh);
ABI_PushRegisters(gprs); ABI_PushRegisters(gprs);
float_emit.ABI_PushRegisters(fprs); float_emit.ABI_PushRegisters(fprs, X3);
float_emit.UMOV(32, W0, Q0, 0); float_emit.UMOV(32, W0, Q0, 0);
MOVI2R(X30, (u64)&PowerPC::Write_U8); MOVI2R(X30, (u64)&PowerPC::Write_U8);
BLR(X30); BLR(X30);
float_emit.ABI_PopRegisters(fprs); float_emit.ABI_PopRegisters(fprs, X3);
ABI_PopRegisters(gprs); ABI_PopRegisters(gprs);
RET(X30); RET(X30);
} }
@ -486,11 +486,11 @@ void JitArm64AsmRoutineManager::GenerateCommon()
SetJumpTarget(argh); SetJumpTarget(argh);
ABI_PushRegisters(gprs); ABI_PushRegisters(gprs);
float_emit.ABI_PushRegisters(fprs); float_emit.ABI_PushRegisters(fprs, X3);
float_emit.SMOV(32, W0, Q0, 0); float_emit.SMOV(32, W0, Q0, 0);
MOVI2R(X30, (u64)&PowerPC::Write_U8); MOVI2R(X30, (u64)&PowerPC::Write_U8);
BLR(X30); BLR(X30);
float_emit.ABI_PopRegisters(fprs); float_emit.ABI_PopRegisters(fprs, X3);
ABI_PopRegisters(gprs); ABI_PopRegisters(gprs);
RET(X30); RET(X30);
} }
@ -515,11 +515,11 @@ void JitArm64AsmRoutineManager::GenerateCommon()
SetJumpTarget(argh); SetJumpTarget(argh);
ABI_PushRegisters(gprs); ABI_PushRegisters(gprs);
float_emit.ABI_PushRegisters(fprs); float_emit.ABI_PushRegisters(fprs, X3);
float_emit.UMOV(32, W0, Q0, 0); float_emit.UMOV(32, W0, Q0, 0);
MOVI2R(X30, (u64)&PowerPC::Write_U16); MOVI2R(X30, (u64)&PowerPC::Write_U16);
BLR(X30); BLR(X30);
float_emit.ABI_PopRegisters(fprs); float_emit.ABI_PopRegisters(fprs, X3);
ABI_PopRegisters(gprs); ABI_PopRegisters(gprs);
RET(X30); RET(X30);
} }
@ -544,11 +544,11 @@ void JitArm64AsmRoutineManager::GenerateCommon()
SetJumpTarget(argh); SetJumpTarget(argh);
ABI_PushRegisters(gprs); ABI_PushRegisters(gprs);
float_emit.ABI_PushRegisters(fprs); float_emit.ABI_PushRegisters(fprs, X3);
float_emit.SMOV(32, W0, Q0, 0); float_emit.SMOV(32, W0, Q0, 0);
MOVI2R(X30, (u64)&PowerPC::Write_U16); MOVI2R(X30, (u64)&PowerPC::Write_U16);
BLR(X30); BLR(X30);
float_emit.ABI_PopRegisters(fprs); float_emit.ABI_PopRegisters(fprs, X3);
ABI_PopRegisters(gprs); ABI_PopRegisters(gprs);
RET(X30); RET(X30);
} }

View File

@ -79,11 +79,11 @@ private:
ARM64FloatEmitter float_emit(m_emit); ARM64FloatEmitter float_emit(m_emit);
m_emit->ABI_PushRegisters(m_gprs_in_use); m_emit->ABI_PushRegisters(m_gprs_in_use);
float_emit.ABI_PushRegisters(m_fprs_in_use); float_emit.ABI_PushRegisters(m_fprs_in_use, X1);
m_emit->MOVI2R(W1, m_address); m_emit->MOVI2R(W1, m_address);
m_emit->MOV(W2, m_src_reg); m_emit->MOV(W2, m_src_reg);
m_emit->BLR(m_emit->ABI_SetupLambda(lambda)); m_emit->BLR(m_emit->ABI_SetupLambda(lambda));
float_emit.ABI_PopRegisters(m_fprs_in_use); float_emit.ABI_PopRegisters(m_fprs_in_use, X1);
m_emit->ABI_PopRegisters(m_gprs_in_use); m_emit->ABI_PopRegisters(m_gprs_in_use);
} }
@ -179,10 +179,10 @@ private:
ARM64FloatEmitter float_emit(m_emit); ARM64FloatEmitter float_emit(m_emit);
m_emit->ABI_PushRegisters(m_gprs_in_use); m_emit->ABI_PushRegisters(m_gprs_in_use);
float_emit.ABI_PushRegisters(m_fprs_in_use); float_emit.ABI_PushRegisters(m_fprs_in_use, X1);
m_emit->MOVI2R(W1, m_address); m_emit->MOVI2R(W1, m_address);
m_emit->BLR(m_emit->ABI_SetupLambda(lambda)); m_emit->BLR(m_emit->ABI_SetupLambda(lambda));
float_emit.ABI_PopRegisters(m_fprs_in_use); float_emit.ABI_PopRegisters(m_fprs_in_use, X1);
m_emit->ABI_PopRegisters(m_gprs_in_use); m_emit->ABI_PopRegisters(m_gprs_in_use);
if (m_sign_extend) if (m_sign_extend)