JitArm64: Move fresx/frsqrtex RW calls earlier
If W0 is locked when fpr.RW is called, the indirectly called ConvertSingleToDoubleLower may need to emit a push+pop, so it's better for fresx/frsqrtex to call RW before locking W0 than after.
This commit is contained in:
parent
1c4155f800
commit
74f2acd83b
|
@ -554,25 +554,24 @@ void JitArm64::fresx(UGeckoInstruction inst)
|
|||
|
||||
const u32 b = inst.FB;
|
||||
const u32 d = inst.FD;
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||
fpr.Lock(ARM64Reg::Q0);
|
||||
|
||||
const ARM64Reg VB = fpr.R(b, RegType::LowerPair);
|
||||
const ARM64Reg VD = fpr.RW(d, RegType::Duplicated);
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||
|
||||
m_float_emit.FMOV(ARM64Reg::X1, EncodeRegToDouble(VB));
|
||||
m_float_emit.FRECPE(ARM64Reg::D0, EncodeRegToDouble(VB));
|
||||
|
||||
BL(GetAsmRoutines()->fres);
|
||||
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
|
||||
const ARM64Reg VD = fpr.RW(d, RegType::Duplicated);
|
||||
m_float_emit.FMOV(EncodeRegToDouble(VD), ARM64Reg::X0);
|
||||
|
||||
SetFPRFIfNeeded(false, ARM64Reg::X0);
|
||||
|
||||
gpr.Unlock(ARM64Reg::W0);
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
}
|
||||
|
||||
void JitArm64::frsqrtex(UGeckoInstruction inst)
|
||||
|
@ -584,24 +583,24 @@ void JitArm64::frsqrtex(UGeckoInstruction inst)
|
|||
const u32 b = inst.FB;
|
||||
const u32 d = inst.FD;
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||
fpr.Lock(ARM64Reg::Q0);
|
||||
|
||||
const ARM64Reg VB = fpr.R(b, RegType::LowerPair);
|
||||
const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||
|
||||
m_float_emit.FMOV(ARM64Reg::X1, EncodeRegToDouble(VB));
|
||||
m_float_emit.FRSQRTE(ARM64Reg::D0, EncodeRegToDouble(VB));
|
||||
|
||||
BL(GetAsmRoutines()->frsqrte);
|
||||
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
|
||||
const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);
|
||||
m_float_emit.FMOV(EncodeRegToDouble(VD), ARM64Reg::X0);
|
||||
|
||||
SetFPRFIfNeeded(false, ARM64Reg::X0);
|
||||
|
||||
gpr.Unlock(ARM64Reg::W0);
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
}
|
||||
|
||||
// Since the following float conversion functions are used in non-arithmetic PPC float
|
||||
|
|
Loading…
Reference in New Issue