From fad46729b097ecd808508e9f57cc5609ed335054 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 2 Mar 2015 03:41:45 -0600 Subject: [PATCH] [AArch64] Implemented paired pushing/popping for the VFP. A bit more efficient if we are only pushing two VFP registers. We can probably be a bit more efficient in the future by mixing paired loadstores in to the other paths as well. --- Source/Core/Common/Arm64Emitter.cpp | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 4fc7c38ae0..4cb4086d7c 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -2877,6 +2877,19 @@ void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers) { bool bundled_loadstore = false; + int num_regs = registers.Count(); + + if (num_regs == 2) + { + int i = 0; + ARM64Reg regs[2]; + for (auto it : registers) + regs[i++] = (ARM64Reg)(Q0 + it); + + STP(128, INDEX_PRE, regs[0], regs[1], SP, -32); + return; + } + for (int i = 0; i < 32; ++i) { if (!registers[i]) @@ -2898,7 +2911,6 @@ void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers) } else { - int num_regs = registers.Count(); // Violating the AAPCS64 never felt so right. m_emit->SUB(SP, SP, num_regs * 16); for (int i = 0; i < 32; ++i) @@ -2925,6 +2937,19 @@ void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers) void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers) { bool bundled_loadstore = false; + int num_regs = registers.Count(); + + if (num_regs == 2) + { + int i = 0; + ARM64Reg regs[2]; + for (auto it : registers) + regs[i++] = (ARM64Reg)(Q0 + it); + + LDP(128, INDEX_POST, regs[0], regs[1], SP, 32); + return; + } + for (int i = 0; i < 32; ++i) { if (!registers[i])