From 3de49dee78be9d12b9caf7cd20a39613dcc35125 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 12 Jun 2022 10:38:48 +0200 Subject: [PATCH] JitArm64: Properly reserve scratch register for soft MMU Cleans up a hack from the previous commit. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 10 +++--- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 4 +-- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 34 ++++++++++++++++++- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 12 +++++++ .../JitArm64/JitArm64_LoadStorePaired.cpp | 23 ++++++++++--- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 2 +- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 3 ++ 7 files changed, 76 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 454b4f68a7..5a7d84b151 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -248,10 +248,12 @@ protected: // // Additional scratch registers are used in the following situations: // - // emitting_routine && (mode == Auto || (mode != AlwaysSafe && !jo.fastmem_arena)): X2 - // emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0 - // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 - // !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30 + // emitting_routine && mode == Auto: X2 + // emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0 + // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 + // emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X3 + // mode != AlwaysSafe && !jo.fastmem_arena: X2 + // !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30 // // mode != AlwaysUnsafe: // X30 (plus most other registers, unless marked in gprs_to_push and fprs_to_push) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 80bacaed6a..c3d2889c94 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -74,10 +74,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (!jo.fastmem_arena) { - const ARM64Reg temp = emitting_routine ? ARM64Reg::W2 : ARM64Reg::W30; + const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30; memory_base = EncodeRegTo64(temp); - memory_offset = ARM64Reg::W8; // TODO + memory_offset = ARM64Reg::W2; LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT); LDR(memory_base, MEM_REG, ArithOption(temp, true)); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 0adfcae6f3..be39929f21 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -27,6 +27,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o { // We want to make sure to not get LR as a temp register gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false); ARM64Reg dest_reg = gpr.R(dest); @@ -121,6 +123,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); if (!update || early_update) regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; if (!jo.memcheck) regs_in_use[DecodeReg(dest_reg)] = 0; @@ -157,6 +161,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o } gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, @@ -164,6 +170,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s { // We want to make sure to not get LR as a temp register gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); ARM64Reg RS = gpr.R(value); @@ -259,6 +267,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; if (!update || early_update) regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; @@ -315,6 +325,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s } gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp, @@ -497,6 +509,8 @@ void JitArm64::lmw(UGeckoInstruction inst) s32 offset = inst.SIMM_16; gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); // MMU games make use of a >= d despite this being invalid according to the PEM. // Because of this, make sure to not re-read rA after starting doing the loads. @@ -522,6 +536,8 @@ void JitArm64::lmw(UGeckoInstruction inst) BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; if (i == 31) regs_in_use[DecodeReg(addr_reg)] = 0; if (!jo.memcheck) @@ -538,6 +554,8 @@ void JitArm64::lmw(UGeckoInstruction inst) } gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } void JitArm64::stmw(UGeckoInstruction inst) @@ -549,6 +567,8 @@ void JitArm64::stmw(UGeckoInstruction inst) s32 offset = inst.SIMM_16; gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); ARM64Reg addr_reg = ARM64Reg::W1; if (a) @@ -572,6 +592,8 @@ void JitArm64::stmw(UGeckoInstruction inst) BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; if (i == 31) regs_in_use[DecodeReg(addr_reg)] = 0; @@ -583,6 +605,8 @@ void JitArm64::stmw(UGeckoInstruction inst) } gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } void JitArm64::dcbx(UGeckoInstruction inst) @@ -771,8 +795,14 @@ void JitArm64::dcbz(UGeckoInstruction inst) int a = inst.RA, b = inst.RB; gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); - Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); }); + Common::ScopeGuard register_guard([&] { + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); + }); constexpr ARM64Reg addr_reg = ARM64Reg::W0; constexpr ARM64Reg temp_reg = ARM64Reg::W30; @@ -839,6 +869,8 @@ void JitArm64::dcbz(UGeckoInstruction inst) BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0; + if (!jo.fastmem_arena) + gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0; EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0, EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 8845b25fa4..281593d4a7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -79,6 +79,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); fpr.Lock(ARM64Reg::Q0); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); const ARM64Reg VD = fpr.RW(inst.FD, type, false); ARM64Reg addr_reg = ARM64Reg::W0; @@ -166,6 +168,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); if (!update || early_update) regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; if (!jo.memcheck) fprs_in_use[DecodeReg(VD)] = 0; @@ -190,6 +194,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } void JitArm64::stfXX(UGeckoInstruction inst) @@ -273,6 +279,8 @@ void JitArm64::stfXX(UGeckoInstruction inst) } gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); ARM64Reg addr_reg = ARM64Reg::W1; @@ -364,6 +372,8 @@ void JitArm64::stfXX(UGeckoInstruction inst) regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; if (!update || early_update) regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; if (is_immediate) @@ -418,4 +428,6 @@ void JitArm64::stfXX(UGeckoInstruction inst) gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 0960bcf236..c7c5dcb0f9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -44,6 +44,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); fpr.Lock(ARM64Reg::Q1); } + else if (!jo.fastmem_arena) + { + gpr.Lock(ARM64Reg::W2); + } constexpr ARM64Reg addr_reg = ARM64Reg::W0; constexpr ARM64Reg scale_reg = ARM64Reg::W1; @@ -82,6 +86,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) // Wipe the registers we are using as temporaries if (!update || early_update) gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; + if (!jo.fastmem_arena) + gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; if (!jo.memcheck) fprs_in_use[DecodeReg(VS)] = 0; @@ -130,6 +136,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); fpr.Unlock(ARM64Reg::Q1); } + else if (!jo.fastmem_arena) + { + gpr.Unlock(ARM64Reg::W2); + } } void JitArm64::psq_stXX(UGeckoInstruction inst) @@ -189,8 +199,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) } gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); - if (!js.assumeNoPairedQuantize) + if (!js.assumeNoPairedQuantize || !jo.fastmem_arena) gpr.Lock(ARM64Reg::W2); + if (!js.assumeNoPairedQuantize && !jo.fastmem_arena) + gpr.Lock(ARM64Reg::W3); constexpr ARM64Reg scale_reg = ARM64Reg::W0; constexpr ARM64Reg addr_reg = ARM64Reg::W1; @@ -229,6 +241,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; if (!update || early_update) gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; + if (!jo.fastmem_arena) + gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) @@ -261,9 +275,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); + if (!js.assumeNoPairedQuantize || !jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); + if (!js.assumeNoPairedQuantize && !jo.fastmem_arena) + gpr.Lock(ARM64Reg::W3); if (!js.assumeNoPairedQuantize) - { - gpr.Unlock(ARM64Reg::W2); fpr.Unlock(ARM64Reg::Q1); - } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index af0fc0386b..7a077f6ce2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -393,7 +393,7 @@ void Arm64GPRCache::GetAllocationOrder() ARM64Reg::W11, ARM64Reg::W10, ARM64Reg::W9, - // ARM64Reg::W8, + ARM64Reg::W8, ARM64Reg::W7, ARM64Reg::W6, ARM64Reg::W5, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index e50ec27929..b48ffef90c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -699,6 +699,7 @@ void JitArm64::GenerateQuantizedStores() // X0 is the scale // X1 is the address // X2 is a temporary + // X3 is a temporary if jo.fastmem_arena is false (used in EmitBackpatchRoutine) // X30 is LR // Q0 is the register // Q1 is a temporary @@ -707,6 +708,8 @@ void JitArm64::GenerateQuantizedStores() BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2}; if (!jo.memcheck) gprs_to_push &= ~BitSet32{1}; + if (!jo.fastmem_arena) + gprs_to_push &= ~BitSet32{3}; BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; ARM64FloatEmitter float_emit(this);