[AArch64] Optimize GPR cache flushing.

If we are flushing multiple sequential guest GPRs then we can store two in a single STP instruction.
Ikaruga does this quite a bit in their blocks where they do an lmw at the very end and then we have to flush them all.
Typically cuts 16 STR instructions down to 8 STP instructions there.
This commit is contained in:
Ryan Houdek 2015-08-30 23:07:12 -05:00
parent f2c17436ab
commit 8bf332cf08
3 changed files with 55 additions and 12 deletions

View File

@ -428,10 +428,8 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
JitArm64Tables::CompileInstruction(ops[i]);
// If we have a register that will never be used again, flush it.
for (int j : ~ops[i].gprInUse)
gpr.StoreRegister(j);
for (int j : ~ops[i].fprInUse)
fpr.StoreRegister(j);
gpr.StoreRegisters(~ops[i].gprInUse);
fpr.StoreRegisters(~ops[i].fprInUse);
if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
{

View File

@ -126,8 +126,44 @@ void Arm64GPRCache::FlushRegister(u32 preg, bool maintain_state)
}
}
void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state)
{
for (int i = 0; i < 32; ++i)
{
if (regs[i])
{
if (i < 31 && regs[i + 1])
{
// We've got two guest registers in a row to store
OpArg& reg1 = m_guest_registers[i];
OpArg& reg2 = m_guest_registers[i + 1];
if (reg1.IsDirty() && reg2.IsDirty() &&
reg1.GetType() == REG_REG && reg2.GetType() == REG_REG)
{
ARM64Reg RX1 = R(i);
ARM64Reg RX2 = R(i + 1);
m_emit->STP(INDEX_SIGNED, RX1, RX2, X29, PPCSTATE_OFF(gpr[0]) + i * sizeof(u32));
if (!maintain_state)
{
UnlockRegister(RX1);
UnlockRegister(RX2);
reg1.Flush();
reg2.Flush();
}
++i;
continue;
}
}
FlushRegister(i, maintain_state);
}
}
}
void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
{
BitSet32 to_flush;
for (int i = 0; i < 32; ++i)
{
bool flush = true;
@ -144,15 +180,12 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
{
// Has to be flushed if it isn't in a callee saved register
ARM64Reg host_reg = m_guest_registers[i].GetReg();
if (flush || !IsCalleeSaved(host_reg))
FlushRegister(i, mode == FLUSH_MAINTAIN_STATE);
}
else if (m_guest_registers[i].GetType() == REG_IMM)
{
if (flush)
FlushRegister(i, mode == FLUSH_MAINTAIN_STATE);
flush = IsCalleeSaved(host_reg) ? flush : true;
}
to_flush[i] = flush;
}
FlushRegisters(to_flush, mode == FLUSH_MAINTAIN_STATE);
}
ARM64Reg Arm64GPRCache::R(u32 preg)
@ -462,6 +495,12 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
}
}
void Arm64FPRCache::FlushRegisters(BitSet32 regs, bool maintain_state)
{
for (int j : regs)
FlushRegister(j, maintain_state);
}
BitSet32 Arm64FPRCache::GetCallerSavedUsed()
{
BitSet32 registers(0);

View File

@ -141,7 +141,7 @@ public:
// Requires unlocking after done
ARM64Reg GetReg();
void StoreRegister(u32 preg) { FlushRegister(preg, false); }
void StoreRegisters(BitSet32 regs) { FlushRegisters(regs, false); }
// Locks a register so a cache cannot use it
// Useful for function calls
@ -185,6 +185,8 @@ protected:
virtual void FlushRegister(u32 preg, bool maintain_state) = 0;
virtual void FlushRegisters(BitSet32 regs, bool maintain_state) = 0;
// Get available host registers
u32 GetUnlockedRegisterCount();
@ -248,6 +250,8 @@ protected:
void FlushRegister(u32 preg, bool maintain_state) override;
void FlushRegisters(BitSet32 regs, bool maintain_state) override;
private:
bool IsCalleeSaved(ARM64Reg reg);
@ -280,6 +284,8 @@ protected:
void FlushRegister(u32 preg, bool maintain_state) override;
void FlushRegisters(BitSet32 regs, bool maintain_state) override;
private:
bool IsCalleeSaved(ARM64Reg reg);
};