JitArm64: Move fresx/frsqrtex RW calls earlier

If W0 is locked when fpr.RW is called, the indirectly called
ConvertSingleToDoubleLower may need to emit a push+pop, so it's
better for fresx/frsqrtex to call RW before locking W0 than after.
This commit is contained in:
JosJuice 2021-09-13 19:27:16 +02:00
parent 1c4155f800
commit 74f2acd83b
1 changed files with 12 additions and 13 deletions

View File

@ -554,25 +554,24 @@ void JitArm64::fresx(UGeckoInstruction inst)
const u32 b = inst.FB;
const u32 d = inst.FD;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
const ARM64Reg VB = fpr.R(b, RegType::LowerPair);
const ARM64Reg VD = fpr.RW(d, RegType::Duplicated);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
m_float_emit.FMOV(ARM64Reg::X1, EncodeRegToDouble(VB));
m_float_emit.FRECPE(ARM64Reg::D0, EncodeRegToDouble(VB));
BL(GetAsmRoutines()->fres);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
const ARM64Reg VD = fpr.RW(d, RegType::Duplicated);
m_float_emit.FMOV(EncodeRegToDouble(VD), ARM64Reg::X0);
SetFPRFIfNeeded(false, ARM64Reg::X0);
gpr.Unlock(ARM64Reg::W0);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
}
void JitArm64::frsqrtex(UGeckoInstruction inst)
@ -584,24 +583,24 @@ void JitArm64::frsqrtex(UGeckoInstruction inst)
const u32 b = inst.FB;
const u32 d = inst.FD;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
const ARM64Reg VB = fpr.R(b, RegType::LowerPair);
const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
m_float_emit.FMOV(ARM64Reg::X1, EncodeRegToDouble(VB));
m_float_emit.FRSQRTE(ARM64Reg::D0, EncodeRegToDouble(VB));
BL(GetAsmRoutines()->frsqrte);
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);
m_float_emit.FMOV(EncodeRegToDouble(VD), ARM64Reg::X0);
SetFPRFIfNeeded(false, ARM64Reg::X0);
gpr.Unlock(ARM64Reg::W0);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
}
// Since the following float conversion functions are used in non-arithmetic PPC float