[AArch64] Implemented paired pushing/popping for the VFP.
A bit more efficient if we are only pushing two VFP registers. We can probably be a bit more efficient in the future by mixing paired loadstores in to the other paths as well.
This commit is contained in:
parent
39e357d62d
commit
fad46729b0
|
@ -2877,6 +2877,19 @@ void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8
|
||||||
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
|
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
|
||||||
{
|
{
|
||||||
bool bundled_loadstore = false;
|
bool bundled_loadstore = false;
|
||||||
|
int num_regs = registers.Count();
|
||||||
|
|
||||||
|
if (num_regs == 2)
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
ARM64Reg regs[2];
|
||||||
|
for (auto it : registers)
|
||||||
|
regs[i++] = (ARM64Reg)(Q0 + it);
|
||||||
|
|
||||||
|
STP(128, INDEX_PRE, regs[0], regs[1], SP, -32);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 32; ++i)
|
for (int i = 0; i < 32; ++i)
|
||||||
{
|
{
|
||||||
if (!registers[i])
|
if (!registers[i])
|
||||||
|
@ -2898,7 +2911,6 @@ void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int num_regs = registers.Count();
|
|
||||||
// Violating the AAPCS64 never felt so right.
|
// Violating the AAPCS64 never felt so right.
|
||||||
m_emit->SUB(SP, SP, num_regs * 16);
|
m_emit->SUB(SP, SP, num_regs * 16);
|
||||||
for (int i = 0; i < 32; ++i)
|
for (int i = 0; i < 32; ++i)
|
||||||
|
@ -2925,6 +2937,19 @@ void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
|
||||||
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers)
|
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers)
|
||||||
{
|
{
|
||||||
bool bundled_loadstore = false;
|
bool bundled_loadstore = false;
|
||||||
|
int num_regs = registers.Count();
|
||||||
|
|
||||||
|
if (num_regs == 2)
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
ARM64Reg regs[2];
|
||||||
|
for (auto it : registers)
|
||||||
|
regs[i++] = (ARM64Reg)(Q0 + it);
|
||||||
|
|
||||||
|
LDP(128, INDEX_POST, regs[0], regs[1], SP, 32);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 32; ++i)
|
for (int i = 0; i < 32; ++i)
|
||||||
{
|
{
|
||||||
if (!registers[i])
|
if (!registers[i])
|
||||||
|
|
Loading…
Reference in New Issue