Merge pull request #4875 from degasus/ABI
JitArm64: Optimize GPR register push/pop and fix the FPR one.
This commit is contained in:
commit
032b6252a0
|
@ -2079,106 +2079,58 @@ bool ARM64XEmitter::MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2)
|
||||||
|
|
||||||
void ARM64XEmitter::ABI_PushRegisters(BitSet32 registers)
|
void ARM64XEmitter::ABI_PushRegisters(BitSet32 registers)
|
||||||
{
|
{
|
||||||
unsigned int num_regs = registers.Count();
|
int num_regs = registers.Count();
|
||||||
|
int stack_size = (num_regs + (num_regs & 1)) * 8;
|
||||||
|
auto it = registers.begin();
|
||||||
|
|
||||||
if (num_regs % 2)
|
if (!num_regs)
|
||||||
{
|
return;
|
||||||
bool first = true;
|
|
||||||
|
|
||||||
// Stack is required to be quad-word aligned.
|
// 8 byte per register, but 16 byte alignment, so we may have to padd one register.
|
||||||
u32 stack_size = Common::AlignUp(num_regs * 8, 16);
|
// Only update the SP on the last write to avoid the dependency between those stores.
|
||||||
u32 current_offset = 0;
|
|
||||||
std::vector<ARM64Reg> reg_pair;
|
|
||||||
|
|
||||||
for (auto it : registers)
|
// The first push must adjust the SP, else a context switch may invalidate everything below SP.
|
||||||
{
|
if (num_regs & 1)
|
||||||
if (first)
|
STR(INDEX_PRE, (ARM64Reg)(X0 + *it++), SP, -stack_size);
|
||||||
{
|
|
||||||
STR(INDEX_PRE, (ARM64Reg)(X0 + it), SP, -(s32)stack_size);
|
|
||||||
first = false;
|
|
||||||
current_offset += 16;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
reg_pair.push_back((ARM64Reg)(X0 + it));
|
|
||||||
if (reg_pair.size() == 2)
|
|
||||||
{
|
|
||||||
STP(INDEX_SIGNED, reg_pair[0], reg_pair[1], SP, current_offset);
|
|
||||||
reg_pair.clear();
|
|
||||||
current_offset += 16;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
STP(INDEX_PRE, (ARM64Reg)(X0 + *it++), (ARM64Reg)(X0 + *it++), SP, -stack_size);
|
||||||
std::vector<ARM64Reg> reg_pair;
|
|
||||||
|
|
||||||
for (auto it : registers)
|
// Fast store for all other registers, this is always an even number.
|
||||||
{
|
for (int i = 0; i < (num_regs - 1) / 2; i++)
|
||||||
reg_pair.push_back((ARM64Reg)(X0 + it));
|
STP(INDEX_SIGNED, (ARM64Reg)(X0 + *it++), (ARM64Reg)(X0 + *it++), SP, 16 * (i + 1));
|
||||||
if (reg_pair.size() == 2)
|
|
||||||
{
|
_assert_msg_(DYNA_REC, it == registers.end(), "%s registers don't match.", __FUNCTION__);
|
||||||
STP(INDEX_PRE, reg_pair[0], reg_pair[1], SP, -16);
|
|
||||||
reg_pair.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARM64XEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask)
|
void ARM64XEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask)
|
||||||
{
|
{
|
||||||
int num_regs = registers.Count();
|
int num_regs = registers.Count();
|
||||||
|
int stack_size = (num_regs + (num_regs & 1)) * 8;
|
||||||
|
auto it = registers.begin();
|
||||||
|
|
||||||
if (num_regs % 2)
|
if (!num_regs)
|
||||||
{
|
return;
|
||||||
bool first = true;
|
|
||||||
|
|
||||||
std::vector<ARM64Reg> reg_pair;
|
// We must adjust the SP in the end, so load the first (two) registers at least.
|
||||||
|
ARM64Reg first = (ARM64Reg)(X0 + *it++);
|
||||||
|
ARM64Reg second;
|
||||||
|
if (!(num_regs & 1))
|
||||||
|
second = (ARM64Reg)(X0 + *it++);
|
||||||
|
|
||||||
for (auto it : registers)
|
// 8 byte per register, but 16 byte alignment, so we may have to padd one register.
|
||||||
{
|
// Only update the SP on the last load to avoid the dependency between those loads.
|
||||||
if (ignore_mask[it])
|
|
||||||
it = WSP;
|
|
||||||
|
|
||||||
if (first)
|
// Fast load for all but the first (two) registers, this is always an even number.
|
||||||
{
|
for (int i = 0; i < (num_regs - 1) / 2; i++)
|
||||||
LDR(INDEX_POST, (ARM64Reg)(X0 + it), SP, 16);
|
LDP(INDEX_SIGNED, (ARM64Reg)(X0 + *it++), (ARM64Reg)(X0 + *it++), SP, 16 * (i + 1));
|
||||||
first = false;
|
|
||||||
}
|
// Post loading the first (two) registers.
|
||||||
else
|
if (num_regs & 1)
|
||||||
{
|
LDR(INDEX_POST, first, SP, stack_size);
|
||||||
reg_pair.push_back((ARM64Reg)(X0 + it));
|
|
||||||
if (reg_pair.size() == 2)
|
|
||||||
{
|
|
||||||
LDP(INDEX_POST, reg_pair[0], reg_pair[1], SP, 16);
|
|
||||||
reg_pair.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
LDP(INDEX_POST, first, second, SP, stack_size);
|
||||||
std::vector<ARM64Reg> reg_pair;
|
|
||||||
|
|
||||||
for (int i = 31; i >= 0; --i)
|
_assert_msg_(DYNA_REC, it == registers.end(), "%s registers don't match.", __FUNCTION__);
|
||||||
{
|
|
||||||
if (!registers[i])
|
|
||||||
continue;
|
|
||||||
|
|
||||||
int reg = i;
|
|
||||||
|
|
||||||
if (ignore_mask[reg])
|
|
||||||
reg = WSP;
|
|
||||||
|
|
||||||
reg_pair.push_back((ARM64Reg)(X0 + reg));
|
|
||||||
if (reg_pair.size() == 2)
|
|
||||||
{
|
|
||||||
LDP(INDEX_POST, reg_pair[1], reg_pair[0], SP, 16);
|
|
||||||
reg_pair.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Float Emitter
|
// Float Emitter
|
||||||
|
|
|
@ -753,6 +753,7 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock*
|
||||||
|
|
||||||
gpr.Lock(W30);
|
gpr.Lock(W30);
|
||||||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||||
|
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||||
regs_in_use[W30] = 0;
|
regs_in_use[W30] = 0;
|
||||||
|
|
||||||
FixupBranch Exception = B();
|
FixupBranch Exception = B();
|
||||||
|
@ -761,8 +762,10 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock*
|
||||||
FixupBranch exit = B();
|
FixupBranch exit = B();
|
||||||
SetJumpTarget(Exception);
|
SetJumpTarget(Exception);
|
||||||
ABI_PushRegisters(regs_in_use);
|
ABI_PushRegisters(regs_in_use);
|
||||||
|
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
|
||||||
MOVP2R(X30, &GPFifo::FastCheckGatherPipe);
|
MOVP2R(X30, &GPFifo::FastCheckGatherPipe);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
|
m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
|
||||||
ABI_PopRegisters(regs_in_use);
|
ABI_PopRegisters(regs_in_use);
|
||||||
|
|
||||||
// Inline exception check
|
// Inline exception check
|
||||||
|
|
|
@ -19,12 +19,15 @@ using namespace Arm64Gen;
|
||||||
void JitArm64::GenerateAsm()
|
void JitArm64::GenerateAsm()
|
||||||
{
|
{
|
||||||
// This value is all of the callee saved registers that we are required to save.
|
// This value is all of the callee saved registers that we are required to save.
|
||||||
// According to the AACPS64 we need to save R19 ~ R30.
|
// According to the AACPS64 we need to save R19 ~ R30 and Q8 ~ Q15.
|
||||||
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
|
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
|
||||||
|
const u32 ALL_CALLEE_SAVED_FPR = 0x0000FF00;
|
||||||
BitSet32 regs_to_save(ALL_CALLEE_SAVED);
|
BitSet32 regs_to_save(ALL_CALLEE_SAVED);
|
||||||
|
BitSet32 regs_to_save_fpr(ALL_CALLEE_SAVED_FPR);
|
||||||
enterCode = GetCodePtr();
|
enterCode = GetCodePtr();
|
||||||
|
|
||||||
ABI_PushRegisters(regs_to_save);
|
ABI_PushRegisters(regs_to_save);
|
||||||
|
m_float_emit.ABI_PushRegisters(regs_to_save_fpr, X30);
|
||||||
|
|
||||||
MOVP2R(PPC_REG, &PowerPC::ppcState);
|
MOVP2R(PPC_REG, &PowerPC::ppcState);
|
||||||
|
|
||||||
|
@ -175,6 +178,7 @@ void JitArm64::GenerateAsm()
|
||||||
LDR(INDEX_UNSIGNED, X0, X1, 0);
|
LDR(INDEX_UNSIGNED, X0, X1, 0);
|
||||||
ADD(SP, X0, 0);
|
ADD(SP, X0, 0);
|
||||||
|
|
||||||
|
m_float_emit.ABI_PopRegisters(regs_to_save_fpr, X30);
|
||||||
ABI_PopRegisters(regs_to_save);
|
ABI_PopRegisters(regs_to_save);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue