JitArm64: Properly reserve scratch register for soft MMU

Cleans up a hack from the previous commit.
This commit is contained in:
JosJuice 2022-06-12 10:38:48 +02:00
parent 3dce1df00e
commit 3de49dee78
7 changed files with 76 additions and 12 deletions

View File

@ -248,9 +248,11 @@ protected:
//
// Additional scratch registers are used in the following situations:
//
// emitting_routine && (mode == Auto || (mode != AlwaysSafe && !jo.fastmem_arena)): X2
// emitting_routine && mode == Auto: X2
// emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
// emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X3
// mode != AlwaysSafe && !jo.fastmem_arena: X2
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30
//
// mode != AlwaysUnsafe:

View File

@ -74,10 +74,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (!jo.fastmem_arena)
{
const ARM64Reg temp = emitting_routine ? ARM64Reg::W2 : ARM64Reg::W30;
const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30;
memory_base = EncodeRegTo64(temp);
memory_offset = ARM64Reg::W8; // TODO
memory_offset = ARM64Reg::W2;
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
LDR(memory_base, MEM_REG, ArithOption(temp, true));

View File

@ -27,6 +27,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
ARM64Reg dest_reg = gpr.R(dest);
@ -121,6 +123,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;
@ -157,6 +161,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset,
@ -164,6 +170,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
ARM64Reg RS = gpr.R(value);
@ -259,6 +267,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@ -315,6 +325,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}
FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
@ -497,6 +509,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
// MMU games make use of a >= d despite this being invalid according to the PEM.
// Because of this, make sure to not re-read rA after starting doing the loads.
@ -522,6 +536,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (i == 31)
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.memcheck)
@ -538,6 +554,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}
void JitArm64::stmw(UGeckoInstruction inst)
@ -549,6 +567,8 @@ void JitArm64::stmw(UGeckoInstruction inst)
s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
ARM64Reg addr_reg = ARM64Reg::W1;
if (a)
@ -572,6 +592,8 @@ void JitArm64::stmw(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (i == 31)
regs_in_use[DecodeReg(addr_reg)] = 0;
@ -583,6 +605,8 @@ void JitArm64::stmw(UGeckoInstruction inst)
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}
void JitArm64::dcbx(UGeckoInstruction inst)
@ -771,8 +795,14 @@ void JitArm64::dcbz(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); });
Common::ScopeGuard register_guard([&] {
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
});
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg temp_reg = ARM64Reg::W30;
@ -839,6 +869,8 @@ void JitArm64::dcbz(UGeckoInstruction inst)
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0;
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0,
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);

View File

@ -79,6 +79,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
const ARM64Reg VD = fpr.RW(inst.FD, type, false);
ARM64Reg addr_reg = ARM64Reg::W0;
@ -166,6 +168,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = 0;
@ -190,6 +194,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}
void JitArm64::stfXX(UGeckoInstruction inst)
@ -273,6 +279,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
}
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
ARM64Reg addr_reg = ARM64Reg::W1;
@ -364,6 +372,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (is_immediate)
@ -418,4 +428,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
}

View File

@ -44,6 +44,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Lock(ARM64Reg::Q1);
}
else if (!jo.fastmem_arena)
{
gpr.Lock(ARM64Reg::W2);
}
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
@ -82,6 +86,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
// Wipe the registers we are using as temporaries
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!jo.fastmem_arena)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VS)] = 0;
@ -130,6 +136,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q1);
}
else if (!jo.fastmem_arena)
{
gpr.Unlock(ARM64Reg::W2);
}
}
void JitArm64::psq_stXX(UGeckoInstruction inst)
@ -189,8 +199,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
}
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W3);
constexpr ARM64Reg scale_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
@ -229,6 +241,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!jo.fastmem_arena)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
@ -261,9 +275,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W2);
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
gpr.Lock(ARM64Reg::W3);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W2);
fpr.Unlock(ARM64Reg::Q1);
}
}

View File

@ -393,7 +393,7 @@ void Arm64GPRCache::GetAllocationOrder()
ARM64Reg::W11,
ARM64Reg::W10,
ARM64Reg::W9,
// ARM64Reg::W8,
ARM64Reg::W8,
ARM64Reg::W7,
ARM64Reg::W6,
ARM64Reg::W5,

View File

@ -699,6 +699,7 @@ void JitArm64::GenerateQuantizedStores()
// X0 is the scale
// X1 is the address
// X2 is a temporary
// X3 is a temporary if jo.fastmem_arena is false (used in EmitBackpatchRoutine)
// X30 is LR
// Q0 is the register
// Q1 is a temporary
@ -707,6 +708,8 @@ void JitArm64::GenerateQuantizedStores()
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2};
if (!jo.memcheck)
gprs_to_push &= ~BitSet32{1};
if (!jo.fastmem_arena)
gprs_to_push &= ~BitSet32{3};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
ARM64FloatEmitter float_emit(this);