[AArch64] Implemented paired pushing/popping for the VFP.

A bit more efficient if we are only pushing two VFP registers.
We can probably be a bit more efficient in the future by mixing paired loadstores in to the other paths as well.
This commit is contained in:
Ryan Houdek 2015-03-02 03:41:45 -06:00
parent 39e357d62d
commit fad46729b0
1 changed files with 26 additions and 1 deletions

View File

@ -2877,6 +2877,19 @@ void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
{
bool bundled_loadstore = false;
int num_regs = registers.Count();
if (num_regs == 2)
{
int i = 0;
ARM64Reg regs[2];
for (auto it : registers)
regs[i++] = (ARM64Reg)(Q0 + it);
STP(128, INDEX_PRE, regs[0], regs[1], SP, -32);
return;
}
for (int i = 0; i < 32; ++i)
{
if (!registers[i])
@ -2898,7 +2911,6 @@ void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
}
else
{
int num_regs = registers.Count();
// Violating the AAPCS64 never felt so right.
m_emit->SUB(SP, SP, num_regs * 16);
for (int i = 0; i < 32; ++i)
@ -2925,6 +2937,19 @@ void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers)
{
bool bundled_loadstore = false;
int num_regs = registers.Count();
if (num_regs == 2)
{
int i = 0;
ARM64Reg regs[2];
for (auto it : registers)
regs[i++] = (ARM64Reg)(Q0 + it);
LDP(128, INDEX_POST, regs[0], regs[1], SP, 32);
return;
}
for (int i = 0; i < 32; ++i)
{
if (!registers[i])