From 8bf332cf089c04c3d427542ccbbf2df636e41396 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 30 Aug 2015 23:07:12 -0500 Subject: [PATCH] [AArch64] Optimize GPR cache flushing. If we are flushing multiple sequential guest GPRs then we can store two in a single STP instruction. Ikaruga does this quite a bit in their blocks where they do an lmw at the very end and then we have to flush them all. Typically cuts 16 STR instructions down to 8 STP instructions there. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 6 +-- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 53 ++++++++++++++++--- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 8 ++- 3 files changed, 55 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 2c002f9dc7..2e2dfe5ffb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -428,10 +428,8 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB JitArm64Tables::CompileInstruction(ops[i]); // If we have a register that will never be used again, flush it. - for (int j : ~ops[i].gprInUse) - gpr.StoreRegister(j); - for (int j : ~ops[i].fprInUse) - fpr.StoreRegister(j); + gpr.StoreRegisters(~ops[i].gprInUse); + fpr.StoreRegisters(~ops[i].fprInUse); if (jo.memcheck && (opinfo->flags & FL_LOADSTORE)) { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index c58a3cee46..4f539371ab 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -126,8 +126,44 @@ void Arm64GPRCache::FlushRegister(u32 preg, bool maintain_state) } } +void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state) +{ + for (int i = 0; i < 32; ++i) + { + if (regs[i]) + { + if (i < 31 && regs[i + 1]) + { + // We've got two guest registers in a row to store + OpArg& reg1 = m_guest_registers[i]; + OpArg& reg2 = m_guest_registers[i + 1]; + if (reg1.IsDirty() && reg2.IsDirty() && + reg1.GetType() == REG_REG && reg2.GetType() == REG_REG) + { + ARM64Reg RX1 = R(i); + ARM64Reg RX2 = R(i + 1); + + m_emit->STP(INDEX_SIGNED, RX1, RX2, X29, PPCSTATE_OFF(gpr[0]) + i * sizeof(u32)); + if (!maintain_state) + { + UnlockRegister(RX1); + UnlockRegister(RX2); + reg1.Flush(); + reg2.Flush(); + } + ++i; + continue; + } + } + + FlushRegister(i, maintain_state); + } + } +} + void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) { + BitSet32 to_flush; for (int i = 0; i < 32; ++i) { bool flush = true; @@ -144,15 +180,12 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) { // Has to be flushed if it isn't in a callee saved register ARM64Reg host_reg = m_guest_registers[i].GetReg(); - if (flush || !IsCalleeSaved(host_reg)) - FlushRegister(i, mode == FLUSH_MAINTAIN_STATE); - } - else if (m_guest_registers[i].GetType() == REG_IMM) - { - if (flush) - FlushRegister(i, mode == FLUSH_MAINTAIN_STATE); + flush = IsCalleeSaved(host_reg) ? flush : true; } + + to_flush[i] = flush; } + FlushRegisters(to_flush, mode == FLUSH_MAINTAIN_STATE); } ARM64Reg Arm64GPRCache::R(u32 preg) @@ -462,6 +495,12 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state) } } +void Arm64FPRCache::FlushRegisters(BitSet32 regs, bool maintain_state) +{ + for (int j : regs) + FlushRegister(j, maintain_state); +} + BitSet32 Arm64FPRCache::GetCallerSavedUsed() { BitSet32 registers(0); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 433f0dad8f..084ab8c94a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -141,7 +141,7 @@ public: // Requires unlocking after done ARM64Reg GetReg(); - void StoreRegister(u32 preg) { FlushRegister(preg, false); } + void StoreRegisters(BitSet32 regs) { FlushRegisters(regs, false); } // Locks a register so a cache cannot use it // Useful for function calls @@ -185,6 +185,8 @@ protected: virtual void FlushRegister(u32 preg, bool maintain_state) = 0; + virtual void FlushRegisters(BitSet32 regs, bool maintain_state) = 0; + // Get available host registers u32 GetUnlockedRegisterCount(); @@ -248,6 +250,8 @@ protected: void FlushRegister(u32 preg, bool maintain_state) override; + void FlushRegisters(BitSet32 regs, bool maintain_state) override; + private: bool IsCalleeSaved(ARM64Reg reg); @@ -280,6 +284,8 @@ protected: void FlushRegister(u32 preg, bool maintain_state) override; + void FlushRegisters(BitSet32 regs, bool maintain_state) override; + private: bool IsCalleeSaved(ARM64Reg reg); };