JitArm64: Properly reserve scratch register for soft MMU
Cleans up a hack from the previous commit.
This commit is contained in:
parent
3dce1df00e
commit
3de49dee78
|
@ -248,10 +248,12 @@ protected:
|
|||
//
|
||||
// Additional scratch registers are used in the following situations:
|
||||
//
|
||||
// emitting_routine && (mode == Auto || (mode != AlwaysSafe && !jo.fastmem_arena)): X2
|
||||
// emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0
|
||||
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
|
||||
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30
|
||||
// emitting_routine && mode == Auto: X2
|
||||
// emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0
|
||||
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
|
||||
// emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X3
|
||||
// mode != AlwaysSafe && !jo.fastmem_arena: X2
|
||||
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30
|
||||
//
|
||||
// mode != AlwaysUnsafe:
|
||||
// X30 (plus most other registers, unless marked in gprs_to_push and fprs_to_push)
|
||||
|
|
|
@ -74,10 +74,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
|
||||
if (!jo.fastmem_arena)
|
||||
{
|
||||
const ARM64Reg temp = emitting_routine ? ARM64Reg::W2 : ARM64Reg::W30;
|
||||
const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30;
|
||||
|
||||
memory_base = EncodeRegTo64(temp);
|
||||
memory_offset = ARM64Reg::W8; // TODO
|
||||
memory_offset = ARM64Reg::W2;
|
||||
|
||||
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
|
||||
LDR(memory_base, MEM_REG, ArithOption(temp, true));
|
||||
|
|
|
@ -27,6 +27,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
|||
{
|
||||
// We want to make sure to not get LR as a temp register
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Lock(ARM64Reg::W2);
|
||||
|
||||
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
|
||||
ARM64Reg dest_reg = gpr.R(dest);
|
||||
|
@ -121,6 +123,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
|||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
if (!update || early_update)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
||||
if (!jo.fastmem_arena)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||
if (!jo.memcheck)
|
||||
regs_in_use[DecodeReg(dest_reg)] = 0;
|
||||
|
||||
|
@ -157,6 +161,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
|||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Unlock(ARM64Reg::W2);
|
||||
}
|
||||
|
||||
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset,
|
||||
|
@ -164,6 +170,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
{
|
||||
// We want to make sure to not get LR as a temp register
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Lock(ARM64Reg::W2);
|
||||
|
||||
ARM64Reg RS = gpr.R(value);
|
||||
|
||||
|
@ -259,6 +267,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
||||
if (!update || early_update)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
|
||||
if (!jo.fastmem_arena)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||
|
||||
u32 access_size = BackPatchInfo::GetFlagSize(flags);
|
||||
u32 mmio_address = 0;
|
||||
|
@ -315,6 +325,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Unlock(ARM64Reg::W2);
|
||||
}
|
||||
|
||||
FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
|
||||
|
@ -497,6 +509,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
|||
s32 offset = inst.SIMM_16;
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Lock(ARM64Reg::W2);
|
||||
|
||||
// MMU games make use of a >= d despite this being invalid according to the PEM.
|
||||
// Because of this, make sure to not re-read rA after starting doing the loads.
|
||||
|
@ -522,6 +536,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
|||
|
||||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
if (!jo.fastmem_arena)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||
if (i == 31)
|
||||
regs_in_use[DecodeReg(addr_reg)] = 0;
|
||||
if (!jo.memcheck)
|
||||
|
@ -538,6 +554,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Unlock(ARM64Reg::W2);
|
||||
}
|
||||
|
||||
void JitArm64::stmw(UGeckoInstruction inst)
|
||||
|
@ -549,6 +567,8 @@ void JitArm64::stmw(UGeckoInstruction inst)
|
|||
s32 offset = inst.SIMM_16;
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Lock(ARM64Reg::W2);
|
||||
|
||||
ARM64Reg addr_reg = ARM64Reg::W1;
|
||||
if (a)
|
||||
|
@ -572,6 +592,8 @@ void JitArm64::stmw(UGeckoInstruction inst)
|
|||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
||||
if (!jo.fastmem_arena)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||
if (i == 31)
|
||||
regs_in_use[DecodeReg(addr_reg)] = 0;
|
||||
|
||||
|
@ -583,6 +605,8 @@ void JitArm64::stmw(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Unlock(ARM64Reg::W2);
|
||||
}
|
||||
|
||||
void JitArm64::dcbx(UGeckoInstruction inst)
|
||||
|
@ -771,8 +795,14 @@ void JitArm64::dcbz(UGeckoInstruction inst)
|
|||
int a = inst.RA, b = inst.RB;
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Lock(ARM64Reg::W2);
|
||||
|
||||
Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); });
|
||||
Common::ScopeGuard register_guard([&] {
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Unlock(ARM64Reg::W2);
|
||||
});
|
||||
|
||||
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
|
||||
constexpr ARM64Reg temp_reg = ARM64Reg::W30;
|
||||
|
@ -839,6 +869,8 @@ void JitArm64::dcbz(UGeckoInstruction inst)
|
|||
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
||||
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
|
||||
if (!jo.fastmem_arena)
|
||||
gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0;
|
||||
|
||||
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0,
|
||||
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
|
||||
|
|
|
@ -79,6 +79,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
|||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
||||
fpr.Lock(ARM64Reg::Q0);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Lock(ARM64Reg::W2);
|
||||
|
||||
const ARM64Reg VD = fpr.RW(inst.FD, type, false);
|
||||
ARM64Reg addr_reg = ARM64Reg::W0;
|
||||
|
@ -166,6 +168,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
|||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
if (!update || early_update)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
||||
if (!jo.fastmem_arena)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
|
||||
if (!jo.memcheck)
|
||||
fprs_in_use[DecodeReg(VD)] = 0;
|
||||
|
@ -190,6 +194,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
|||
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Unlock(ARM64Reg::W2);
|
||||
}
|
||||
|
||||
void JitArm64::stfXX(UGeckoInstruction inst)
|
||||
|
@ -273,6 +279,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Lock(ARM64Reg::W2);
|
||||
|
||||
ARM64Reg addr_reg = ARM64Reg::W1;
|
||||
|
||||
|
@ -364,6 +372,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
|
||||
if (!update || early_update)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
|
||||
if (!jo.fastmem_arena)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
|
||||
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
|
||||
|
||||
if (is_immediate)
|
||||
|
@ -418,4 +428,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
if (!jo.fastmem_arena)
|
||||
gpr.Unlock(ARM64Reg::W2);
|
||||
}
|
||||
|
|
|
@ -44,6 +44,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
|
|||
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
|
||||
fpr.Lock(ARM64Reg::Q1);
|
||||
}
|
||||
else if (!jo.fastmem_arena)
|
||||
{
|
||||
gpr.Lock(ARM64Reg::W2);
|
||||
}
|
||||
|
||||
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
|
||||
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
|
||||
|
@ -82,6 +86,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
|
|||
// Wipe the registers we are using as temporaries
|
||||
if (!update || early_update)
|
||||
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||
if (!jo.fastmem_arena)
|
||||
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
|
||||
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
|
||||
if (!jo.memcheck)
|
||||
fprs_in_use[DecodeReg(VS)] = 0;
|
||||
|
@ -130,6 +136,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
|
|||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
|
||||
fpr.Unlock(ARM64Reg::Q1);
|
||||
}
|
||||
else if (!jo.fastmem_arena)
|
||||
{
|
||||
gpr.Unlock(ARM64Reg::W2);
|
||||
}
|
||||
}
|
||||
|
||||
void JitArm64::psq_stXX(UGeckoInstruction inst)
|
||||
|
@ -189,8 +199,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (!js.assumeNoPairedQuantize)
|
||||
if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
|
||||
gpr.Lock(ARM64Reg::W2);
|
||||
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
|
||||
gpr.Lock(ARM64Reg::W3);
|
||||
|
||||
constexpr ARM64Reg scale_reg = ARM64Reg::W0;
|
||||
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
|
||||
|
@ -229,6 +241,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
|
|||
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||
if (!update || early_update)
|
||||
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
||||
if (!jo.fastmem_arena)
|
||||
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
|
||||
|
||||
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
|
||||
if (!w)
|
||||
|
@ -261,9 +275,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
|
|||
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
|
||||
gpr.Lock(ARM64Reg::W2);
|
||||
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
|
||||
gpr.Lock(ARM64Reg::W3);
|
||||
if (!js.assumeNoPairedQuantize)
|
||||
{
|
||||
gpr.Unlock(ARM64Reg::W2);
|
||||
fpr.Unlock(ARM64Reg::Q1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -393,7 +393,7 @@ void Arm64GPRCache::GetAllocationOrder()
|
|||
ARM64Reg::W11,
|
||||
ARM64Reg::W10,
|
||||
ARM64Reg::W9,
|
||||
// ARM64Reg::W8,
|
||||
ARM64Reg::W8,
|
||||
ARM64Reg::W7,
|
||||
ARM64Reg::W6,
|
||||
ARM64Reg::W5,
|
||||
|
|
|
@ -699,6 +699,7 @@ void JitArm64::GenerateQuantizedStores()
|
|||
// X0 is the scale
|
||||
// X1 is the address
|
||||
// X2 is a temporary
|
||||
// X3 is a temporary if jo.fastmem_arena is false (used in EmitBackpatchRoutine)
|
||||
// X30 is LR
|
||||
// Q0 is the register
|
||||
// Q1 is a temporary
|
||||
|
@ -707,6 +708,8 @@ void JitArm64::GenerateQuantizedStores()
|
|||
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2};
|
||||
if (!jo.memcheck)
|
||||
gprs_to_push &= ~BitSet32{1};
|
||||
if (!jo.fastmem_arena)
|
||||
gprs_to_push &= ~BitSet32{3};
|
||||
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
|
||||
ARM64FloatEmitter float_emit(this);
|
||||
|
||||
|
|
Loading…
Reference in New Issue