From 27ed2a99a0699f0b6e4ebcfbc315caa5a7cd9096 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Mon, 6 Jun 2022 19:11:02 +0200 Subject: [PATCH 1/7] JitArm64: Refactor EmitBackpatchRoutine parameters, part 1 --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 35 +++++++-- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 8 +- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 23 +++--- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 14 ++-- .../JitArm64/JitArm64_LoadStorePaired.cpp | 4 +- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 76 +++++++++---------- 6 files changed, 93 insertions(+), 67 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index e979e3a0cc..e89c2b1a9b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -215,10 +215,28 @@ protected: // Dump a memory range of code void DumpCode(const u8* start, const u8* end); + // This enum is used for selecting an implementation of EmitBackpatchRoutine. + // + // The below descriptions of each enum entry apply when jo.fastmem_arena is true. + // If jo.fastmem_arena is false, the slow C++ code is always called instead. + enum class MemAccessMode + { + // Always calls the slow C++ code. For performance reasons, should generally only be used if + // the guest address is known in advance and IsOptimizableRAMAddress returns false for it. + AlwaysSafe, + // Only emits fast access code. Must only be used if the guest address is known in advance + // and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash! + AlwaysUnsafe, + // Best in most cases. Tries to run fast access code, and if that fails, uses backpatching to + // replace the code with a call to the slow C++ code. + Auto, + }; + // This is the core routine for accessing emulated memory, with support for - // many different kinds of loads and stores as well as fastmem backpatching. + // many different kinds of loads and stores as well as fastmem/backpatching. // // Registers used: + // // addr scratch // Store: X1 X0 // Load: X0 @@ -226,15 +244,18 @@ protected: // Store float: X1 Q0 // Load float: X0 // - // If fastmem && !do_farcode, the addr argument can be any register. + // If using MemAccessMode::AlwaysUnsafe, the addr argument can be any register. // Otherwise it must be the register listed in the table above. // // Additional scratch registers are used in the following situations: - // fastmem && do_farcode && emitting_routine: X2 - // fastmem && do_farcode && emitting_routine && (flags & BackPatchInfo::FLAG_STORE): X0 - // fastmem && do_farcode && emitting_routine && !(flags & BackPatchInfo::FLAG_STORE): X3 - // !fastmem || do_farcode: X30 (plus lots more unless you set gprs_to_push and fprs_to_push) - void EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, Arm64Gen::ARM64Reg RS, + // + // mode == Auto && emitting_routine: X2 + // mode == Auto && emitting_routine && (flags & BackPatchInfo::FLAG_STORE): X0 + // mode == Auto && emitting_routine && !(flags & BackPatchInfo::FLAG_STORE): X3 + // + // mode != AlwaysUnsafe || !jo.fastmem_arena: + // X30 (plus most other registers, unless marked in gprs_to_push and fprs_to_push) + void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 5c68e12338..591ad2a897 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -54,12 +54,16 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx) ERROR_LOG_FMT(DYNA_REC, "Full block: {}", pc_memory); } -void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, ARM64Reg RS, - ARM64Reg addr, BitSet32 gprs_to_push, BitSet32 fprs_to_push, +void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr, + BitSet32 gprs_to_push, BitSet32 fprs_to_push, bool emitting_routine) { const u32 access_size = BackPatchInfo::GetFlagSize(flags); + const bool fastmem = jo.fastmem_arena && mode != MemAccessMode::AlwaysSafe; + const bool do_farcode = jo.fastmem_arena && (mode == MemAccessMode::Auto || + mode == MemAccessMode::AutoWithoutBackpatch); + bool in_far_code = false; const u8* fastmem_start = GetCodePtr(); std::optional slowmem_fixup; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 2781ba3f65..0adfcae6f3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -129,10 +129,11 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (is_immediate) mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size); - if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0)); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, dest_reg, XA, regs_in_use, + fprs_in_use); } else if (mmio_address) { @@ -142,7 +143,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use); } gpr.BindToRegister(dest, false, true); @@ -290,10 +291,10 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s js.fifoBytesSinceCheck += accessSize >> 3; } - else if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + else if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0)); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, RS, XA, regs_in_use, fprs_in_use); } else if (mmio_address) { @@ -303,7 +304,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, RS, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use); } if (update && !early_update) @@ -526,8 +527,8 @@ void JitArm64::lmw(UGeckoInstruction inst) if (!jo.memcheck) regs_in_use[DecodeReg(dest_reg)] = 0; - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, EncodeRegTo64(addr_reg), - regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use, + fprs_in_use); gpr.BindToRegister(i, false, true); ASSERT(dest_reg == gpr.R(i)); @@ -574,8 +575,8 @@ void JitArm64::stmw(UGeckoInstruction inst) if (i == 31) regs_in_use[DecodeReg(addr_reg)] = 0; - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, src_reg, EncodeRegTo64(addr_reg), - regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use, + fprs_in_use); if (i != 31) ADD(addr_reg, addr_reg, 4); @@ -839,7 +840,7 @@ void JitArm64::dcbz(UGeckoInstruction inst) BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0; - EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, jo.fastmem, jo.fastmem, ARM64Reg::W0, + EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0, EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); if (using_dcbz_hack) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 9a0326e27b..8845b25fa4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -170,13 +170,13 @@ void JitArm64::lfXX(UGeckoInstruction inst) if (!jo.memcheck) fprs_in_use[DecodeReg(VD)] = 0; - if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) { - EmitBackpatchRoutine(flags, true, false, VD, XA, BitSet32(0), BitSet32(0)); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, VD, XA, regs_in_use, fprs_in_use); } else { - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VD, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use); } const ARM64Reg VD_again = fpr.RW(inst.FD, type, true); @@ -389,21 +389,21 @@ void JitArm64::stfXX(UGeckoInstruction inst) STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); js.fifoBytesSinceCheck += accessSize >> 3; } - else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr)) + else if (PowerPC::IsOptimizableRAMAddress(imm_addr)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, true, false, V0, XA, BitSet32(0), BitSet32(0)); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysUnsafe, V0, XA, regs_in_use, fprs_in_use); } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, false, false, V0, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSafe, V0, XA, regs_in_use, fprs_in_use); } } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, V0, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use); } if (update && !early_update) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 4fa015dcd4..0960bcf236 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -90,7 +90,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) if (!w) flags |= BackPatchInfo::FLAG_PAIR; - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use, + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, fprs_in_use); } else @@ -234,7 +234,7 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) if (!w) flags |= BackPatchInfo::FLAG_PAIR; - EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use, + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, fprs_in_use); } else diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 1b93a275c6..a506ce02e1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -510,7 +510,7 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push & ~BitSet32{1}, fprs_to_push, true); RET(ARM64Reg::X30); @@ -520,8 +520,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -538,8 +538,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -556,8 +556,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -573,8 +573,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -591,7 +591,7 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push & ~BitSet32{1}, fprs_to_push, true); RET(ARM64Reg::X30); @@ -601,8 +601,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -619,8 +619,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -637,8 +637,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -654,8 +654,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -718,8 +718,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); RET(ARM64Reg::X30); } @@ -737,8 +737,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); RET(ARM64Reg::X30); } @@ -756,8 +756,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); RET(ARM64Reg::X30); } @@ -774,8 +774,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); RET(ARM64Reg::X30); } @@ -792,8 +792,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); RET(ARM64Reg::X30); } @@ -803,8 +803,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); RET(ARM64Reg::X30); } @@ -822,8 +822,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); RET(ARM64Reg::X30); } @@ -841,8 +841,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); RET(ARM64Reg::X30); } @@ -859,8 +859,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); RET(ARM64Reg::X30); } @@ -877,8 +877,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, jo.fastmem_arena, jo.fastmem_arena, ARM64Reg::D0, addr_reg, - gprs_to_push, fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, + fprs_to_push, true); RET(ARM64Reg::X30); } From 63283dcc72be41f69f042e28351dcf9c4efc4129 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Mon, 6 Jun 2022 19:17:36 +0200 Subject: [PATCH 2/7] JitArm64: Refactor EmitBackpatchRoutine parameters, part 2 --- .../Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 591ad2a897..e5ce801175 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -60,17 +60,16 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, { const u32 access_size = BackPatchInfo::GetFlagSize(flags); - const bool fastmem = jo.fastmem_arena && mode != MemAccessMode::AlwaysSafe; - const bool do_farcode = jo.fastmem_arena && (mode == MemAccessMode::Auto || - mode == MemAccessMode::AutoWithoutBackpatch); + const bool emit_fastmem = jo.fastmem_arena && mode != MemAccessMode::AlwaysSafe; + const bool emit_slowmem = !jo.fastmem_arena || mode != MemAccessMode::AlwaysUnsafe; bool in_far_code = false; const u8* fastmem_start = GetCodePtr(); std::optional slowmem_fixup; - if (fastmem) + if (emit_fastmem) { - if (do_farcode && emitting_routine) + if (emit_slowmem && emitting_routine) { const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3; const ARM64Reg temp2 = ARM64Reg::W2; @@ -126,11 +125,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, } const u8* fastmem_end = GetCodePtr(); - if (!fastmem || do_farcode) + if (emit_slowmem) { const bool memcheck = jo.memcheck && !emitting_routine; - if (fastmem && do_farcode) + if (emit_fastmem) { in_far_code = true; SwitchToFarCode(); From bcc64a05b36e085ae6afedbd97d0d61427c2a44e Mon Sep 17 00:00:00 2001 From: JosJuice Date: Mon, 6 Jun 2022 15:39:41 +0200 Subject: [PATCH 3/7] Memmap: Remove outdated comments for 32-bit --- Source/Core/Core/HW/Memmap.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index 14ed19291f..5a15a1e343 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -21,16 +21,13 @@ class Mapping; namespace Memory { // Base is a pointer to the base of the memory map. Yes, some MMU tricks -// are used to set up a full GC or Wii memory map in process memory. on -// 32-bit, you have to mask your offsets with 0x3FFFFFFF. This means that -// some things are mirrored too many times, but eh... it works. - +// are used to set up a full GC or Wii memory map in process memory. // In 64-bit, this might point to "high memory" (above the 32-bit limit), // so be sure to load it into a 64-bit register. extern u8* physical_base; extern u8* logical_base; -// These are guaranteed to point to "low memory" addresses (sub-32-bit). +// The actual memory used for backing the memory map. extern u8* m_pRAM; extern u8* m_pEXRAM; extern u8* m_pL1Cache; From 3dce1df00e3bf28707270c7d0c58fd1b6cabe3be Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 11 Jun 2022 15:49:08 +0200 Subject: [PATCH 4/7] JitArm64: Implement "soft MMU" This is used when fastmem isn't available. Instead of always falling back to the C++ code in MMU.cpp, the JIT translates addresses on its own by looking them up in a table that Dolphin constructs. This is slower than fastmem, but faster than the old non-fastmem code. This is primarily useful for iOS, since that's the only major platform nowadays where you can't reliably get fastmem. I think it would make sense to merge this feature to master despite this, since there's nothing actually iOS-specific about the feature. It would be of use for me when I have to disable fastmem to stop Android Studio from constantly breaking on segfaults, for instance. Co-authored-by: OatmealDome --- Source/Core/Core/HW/Memmap.cpp | 50 ++++++++++++++----- Source/Core/Core/HW/Memmap.h | 5 ++ Source/Core/Core/PowerPC/JitArm64/Jit.h | 18 +++---- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 50 +++++++++++++------ .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 2 +- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 8 +-- Source/Core/Core/PowerPC/MMU.h | 3 +- 7 files changed, 94 insertions(+), 42 deletions(-) diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index 4f2aaa1a5d..f61e974bf3 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -47,6 +47,8 @@ namespace Memory // Store the MemArena here u8* physical_base = nullptr; u8* logical_base = nullptr; +u8* physical_page_mappings_base = nullptr; +u8* logical_page_mappings_base = nullptr; static bool is_fastmem_arena_initialized = false; // The MemArena class @@ -223,6 +225,9 @@ static std::array s_physical_regions; static std::vector logical_mapped_entries; +static std::array s_physical_page_mappings; +static std::array s_logical_page_mappings; + void Init() { const auto get_mem1_size = [] { @@ -280,6 +285,8 @@ void Init() } g_arena.GrabSHMSegment(mem_size); + s_physical_page_mappings.fill(nullptr); + // Create an anonymous view of the physical memory for (const PhysicalMemoryRegion& region : s_physical_regions) { @@ -295,8 +302,17 @@ void Init() region.physical_address, region.size); exit(0); } + + for (u32 i = 0; i < region.size; i += PowerPC::BAT_PAGE_SIZE) + { + const size_t index = (i + region.physical_address) >> PowerPC::BAT_INDEX_SHIFT; + s_physical_page_mappings[index] = *region.out_pointer + i; + } } + physical_page_mappings_base = reinterpret_cast(s_physical_page_mappings.data()); + logical_page_mappings_base = reinterpret_cast(s_logical_page_mappings.data()); + InitMMIO(wii); Clear(); @@ -347,14 +363,14 @@ bool InitFastmemArena() void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) { - if (!is_fastmem_arena_initialized) - return; - for (auto& entry : logical_mapped_entries) { g_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } logical_mapped_entries.clear(); + + s_logical_page_mappings.fill(nullptr); + for (u32 i = 0; i < dbat_table.size(); ++i) { if (dbat_table[i] & PowerPC::BAT_PHYSICAL_BIT) @@ -375,19 +391,27 @@ void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) if (intersection_start < intersection_end) { // Found an overlapping region; map it. - u32 position = physical_region.shm_position + intersection_start - mapping_address; - u8* base = logical_base + logical_address + intersection_start - translated_address; - u32 mapped_size = intersection_end - intersection_start; - void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base); - if (!mapped_pointer) + if (is_fastmem_arena_initialized) { - PanicAlertFmt("Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} " - "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", - intersection_start, mapped_size, logical_address); - exit(0); + u32 position = physical_region.shm_position + intersection_start - mapping_address; + u8* base = logical_base + logical_address + intersection_start - translated_address; + u32 mapped_size = intersection_end - intersection_start; + + void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base); + if (!mapped_pointer) + { + PanicAlertFmt( + "Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} " + "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", + intersection_start, mapped_size, logical_address); + exit(0); + } + logical_mapped_entries.push_back({mapped_pointer, mapped_size}); } - logical_mapped_entries.push_back({mapped_pointer, mapped_size}); + + s_logical_page_mappings[i] = + *physical_region.out_pointer + intersection_start - mapping_address; } } } diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index 5a15a1e343..32065f2053 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -27,6 +27,11 @@ namespace Memory extern u8* physical_base; extern u8* logical_base; +// This page table is used for a "soft MMU" implementation when +// setting up the full memory map in process memory isn't possible. +extern u8* physical_page_mappings_base; +extern u8* logical_page_mappings_base; + // The actual memory used for backing the memory map. extern u8* m_pRAM; extern u8* m_pEXRAM; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index e89c2b1a9b..454b4f68a7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -216,9 +216,6 @@ protected: void DumpCode(const u8* start, const u8* end); // This enum is used for selecting an implementation of EmitBackpatchRoutine. - // - // The below descriptions of each enum entry apply when jo.fastmem_arena is true. - // If jo.fastmem_arena is false, the slow C++ code is always called instead. enum class MemAccessMode { // Always calls the slow C++ code. For performance reasons, should generally only be used if @@ -227,8 +224,10 @@ protected: // Only emits fast access code. Must only be used if the guest address is known in advance // and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash! AlwaysUnsafe, - // Best in most cases. Tries to run fast access code, and if that fails, uses backpatching to - // replace the code with a call to the slow C++ code. + // Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem_arena): + // Tries to run fast access code, and if that fails, uses backpatching to replace the code + // with a call to the slow C++ code. Otherwise: Checks whether the fast access code will work, + // then branches to either the fast access code or the slow C++ code. Auto, }; @@ -249,11 +248,12 @@ protected: // // Additional scratch registers are used in the following situations: // - // mode == Auto && emitting_routine: X2 - // mode == Auto && emitting_routine && (flags & BackPatchInfo::FLAG_STORE): X0 - // mode == Auto && emitting_routine && !(flags & BackPatchInfo::FLAG_STORE): X3 + // emitting_routine && (mode == Auto || (mode != AlwaysSafe && !jo.fastmem_arena)): X2 + // emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0 + // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 + // !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30 // - // mode != AlwaysUnsafe || !jo.fastmem_arena: + // mode != AlwaysUnsafe: // X30 (plus most other registers, unless marked in gprs_to_push and fprs_to_push) void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index e5ce801175..80bacaed6a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -60,8 +60,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, { const u32 access_size = BackPatchInfo::GetFlagSize(flags); - const bool emit_fastmem = jo.fastmem_arena && mode != MemAccessMode::AlwaysSafe; - const bool emit_slowmem = !jo.fastmem_arena || mode != MemAccessMode::AlwaysUnsafe; + const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe; + const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe; bool in_far_code = false; const u8* fastmem_start = GetCodePtr(); @@ -69,7 +69,29 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (emit_fastmem) { - if (emit_slowmem && emitting_routine) + ARM64Reg memory_base = MEM_REG; + ARM64Reg memory_offset = addr; + + if (!jo.fastmem_arena) + { + const ARM64Reg temp = emitting_routine ? ARM64Reg::W2 : ARM64Reg::W30; + + memory_base = EncodeRegTo64(temp); + memory_offset = ARM64Reg::W8; // TODO + + LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT); + LDR(memory_base, MEM_REG, ArithOption(temp, true)); + + if (emit_slowmem) + { + FixupBranch pass = CBNZ(memory_base); + slowmem_fixup = B(); + SetJumpTarget(pass); + } + + AND(memory_offset, addr, LogicalImm(PowerPC::BAT_PAGE_SIZE - 1, 64)); + } + else if (emit_slowmem && emitting_routine) { const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3; const ARM64Reg temp2 = ARM64Reg::W2; @@ -82,11 +104,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg temp = ARM64Reg::D0; temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true); - m_float_emit.STR(access_size, temp, MEM_REG, addr); + m_float_emit.STR(access_size, temp, memory_base, memory_offset); } else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT)) { - m_float_emit.LDR(access_size, EncodeRegToDouble(RS), MEM_REG, addr); + m_float_emit.LDR(access_size, EncodeRegToDouble(RS), memory_base, memory_offset); ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags, true, false); @@ -97,28 +119,28 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true); if (flags & BackPatchInfo::FLAG_SIZE_32) - STR(temp, MEM_REG, addr); + STR(temp, memory_base, memory_offset); else if (flags & BackPatchInfo::FLAG_SIZE_16) - STRH(temp, MEM_REG, addr); + STRH(temp, memory_base, memory_offset); else - STRB(temp, MEM_REG, addr); + STRB(temp, memory_base, memory_offset); } else if (flags & BackPatchInfo::FLAG_ZERO_256) { // This literally only stores 32bytes of zeros to the target address ARM64Reg temp = ARM64Reg::X30; - ADD(temp, addr, MEM_REG); + ADD(temp, memory_base, memory_offset); STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0); STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16); } else { if (flags & BackPatchInfo::FLAG_SIZE_32) - LDR(RS, MEM_REG, addr); + LDR(RS, memory_base, memory_offset); else if (flags & BackPatchInfo::FLAG_SIZE_16) - LDRH(RS, MEM_REG, addr); + LDRH(RS, memory_base, memory_offset); else if (flags & BackPatchInfo::FLAG_SIZE_8) - LDRB(RS, MEM_REG, addr); + LDRB(RS, memory_base, memory_offset); ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false); } @@ -134,7 +156,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, in_far_code = true; SwitchToFarCode(); - if (!emitting_routine) + if (jo.fastmem_arena && !emitting_routine) { FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end]; fastmem_area->fastmem_code = fastmem_start; @@ -264,7 +286,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (in_far_code) { - if (emitting_routine) + if (slowmem_fixup) { FixupBranch done = B(); SwitchToNearCode(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 7a077f6ce2..af0fc0386b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -393,7 +393,7 @@ void Arm64GPRCache::GetAllocationOrder() ARM64Reg::W11, ARM64Reg::W10, ARM64Reg::W9, - ARM64Reg::W8, + // ARM64Reg::W8, ARM64Reg::W7, ARM64Reg::W6, ARM64Reg::W5, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index a506ce02e1..e50ec27929 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -93,10 +93,10 @@ void JitArm64::GenerateAsm() // set the mem_base based on MSR flags LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr)); FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27); - MOVP2R(MEM_REG, Memory::physical_base); + MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base); FixupBranch membaseend = B(); SetJumpTarget(physmem); - MOVP2R(MEM_REG, Memory::logical_base); + MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base); SetJumpTarget(membaseend); // iCache[(address >> 2) & iCache_Mask]; @@ -141,10 +141,10 @@ void JitArm64::GenerateAsm() // set the mem_base based on MSR flags and jump to next block. LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr)); FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27); - MOVP2R(MEM_REG, Memory::physical_base); + MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base); BR(ARM64Reg::X0); SetJumpTarget(physmem); - MOVP2R(MEM_REG, Memory::logical_base); + MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base); BR(ARM64Reg::X0); // Call JIT diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 726e861f62..6eda9a22b7 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -197,11 +197,12 @@ TranslateResult JitCache_TranslateAddress(u32 address); constexpr int BAT_INDEX_SHIFT = 17; constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT; +constexpr u32 BAT_PAGE_COUNT = 1 << (32 - BAT_INDEX_SHIFT); constexpr u32 BAT_MAPPED_BIT = 0x1; constexpr u32 BAT_PHYSICAL_BIT = 0x2; constexpr u32 BAT_WI_BIT = 0x4; constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x7); -using BatTable = std::array; // 128 KB +using BatTable = std::array; // 128 KB extern BatTable ibat_table; extern BatTable dbat_table; inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi) From 3de49dee78be9d12b9caf7cd20a39613dcc35125 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 12 Jun 2022 10:38:48 +0200 Subject: [PATCH 5/7] JitArm64: Properly reserve scratch register for soft MMU Cleans up a hack from the previous commit. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 10 +++--- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 4 +-- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 34 ++++++++++++++++++- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 12 +++++++ .../JitArm64/JitArm64_LoadStorePaired.cpp | 23 ++++++++++--- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 2 +- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 3 ++ 7 files changed, 76 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 454b4f68a7..5a7d84b151 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -248,10 +248,12 @@ protected: // // Additional scratch registers are used in the following situations: // - // emitting_routine && (mode == Auto || (mode != AlwaysSafe && !jo.fastmem_arena)): X2 - // emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0 - // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 - // !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30 + // emitting_routine && mode == Auto: X2 + // emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0 + // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 + // emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X3 + // mode != AlwaysSafe && !jo.fastmem_arena: X2 + // !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30 // // mode != AlwaysUnsafe: // X30 (plus most other registers, unless marked in gprs_to_push and fprs_to_push) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 80bacaed6a..c3d2889c94 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -74,10 +74,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (!jo.fastmem_arena) { - const ARM64Reg temp = emitting_routine ? ARM64Reg::W2 : ARM64Reg::W30; + const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30; memory_base = EncodeRegTo64(temp); - memory_offset = ARM64Reg::W8; // TODO + memory_offset = ARM64Reg::W2; LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT); LDR(memory_base, MEM_REG, ArithOption(temp, true)); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 0adfcae6f3..be39929f21 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -27,6 +27,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o { // We want to make sure to not get LR as a temp register gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false); ARM64Reg dest_reg = gpr.R(dest); @@ -121,6 +123,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); if (!update || early_update) regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; if (!jo.memcheck) regs_in_use[DecodeReg(dest_reg)] = 0; @@ -157,6 +161,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o } gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, @@ -164,6 +170,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s { // We want to make sure to not get LR as a temp register gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); ARM64Reg RS = gpr.R(value); @@ -259,6 +267,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; if (!update || early_update) regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; @@ -315,6 +325,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s } gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp, @@ -497,6 +509,8 @@ void JitArm64::lmw(UGeckoInstruction inst) s32 offset = inst.SIMM_16; gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); // MMU games make use of a >= d despite this being invalid according to the PEM. // Because of this, make sure to not re-read rA after starting doing the loads. @@ -522,6 +536,8 @@ void JitArm64::lmw(UGeckoInstruction inst) BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; if (i == 31) regs_in_use[DecodeReg(addr_reg)] = 0; if (!jo.memcheck) @@ -538,6 +554,8 @@ void JitArm64::lmw(UGeckoInstruction inst) } gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } void JitArm64::stmw(UGeckoInstruction inst) @@ -549,6 +567,8 @@ void JitArm64::stmw(UGeckoInstruction inst) s32 offset = inst.SIMM_16; gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); ARM64Reg addr_reg = ARM64Reg::W1; if (a) @@ -572,6 +592,8 @@ void JitArm64::stmw(UGeckoInstruction inst) BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; if (i == 31) regs_in_use[DecodeReg(addr_reg)] = 0; @@ -583,6 +605,8 @@ void JitArm64::stmw(UGeckoInstruction inst) } gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } void JitArm64::dcbx(UGeckoInstruction inst) @@ -771,8 +795,14 @@ void JitArm64::dcbz(UGeckoInstruction inst) int a = inst.RA, b = inst.RB; gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); - Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); }); + Common::ScopeGuard register_guard([&] { + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); + }); constexpr ARM64Reg addr_reg = ARM64Reg::W0; constexpr ARM64Reg temp_reg = ARM64Reg::W30; @@ -839,6 +869,8 @@ void JitArm64::dcbz(UGeckoInstruction inst) BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0; + if (!jo.fastmem_arena) + gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0; EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0, EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 8845b25fa4..281593d4a7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -79,6 +79,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); fpr.Lock(ARM64Reg::Q0); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); const ARM64Reg VD = fpr.RW(inst.FD, type, false); ARM64Reg addr_reg = ARM64Reg::W0; @@ -166,6 +168,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); if (!update || early_update) regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; if (!jo.memcheck) fprs_in_use[DecodeReg(VD)] = 0; @@ -190,6 +194,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } void JitArm64::stfXX(UGeckoInstruction inst) @@ -273,6 +279,8 @@ void JitArm64::stfXX(UGeckoInstruction inst) } gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + if (!jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); ARM64Reg addr_reg = ARM64Reg::W1; @@ -364,6 +372,8 @@ void JitArm64::stfXX(UGeckoInstruction inst) regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; if (!update || early_update) regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; + if (!jo.fastmem_arena) + regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; if (is_immediate) @@ -418,4 +428,6 @@ void JitArm64::stfXX(UGeckoInstruction inst) gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); + if (!jo.fastmem_arena) + gpr.Unlock(ARM64Reg::W2); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 0960bcf236..c7c5dcb0f9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -44,6 +44,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); fpr.Lock(ARM64Reg::Q1); } + else if (!jo.fastmem_arena) + { + gpr.Lock(ARM64Reg::W2); + } constexpr ARM64Reg addr_reg = ARM64Reg::W0; constexpr ARM64Reg scale_reg = ARM64Reg::W1; @@ -82,6 +86,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) // Wipe the registers we are using as temporaries if (!update || early_update) gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; + if (!jo.fastmem_arena) + gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; if (!jo.memcheck) fprs_in_use[DecodeReg(VS)] = 0; @@ -130,6 +136,10 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); fpr.Unlock(ARM64Reg::Q1); } + else if (!jo.fastmem_arena) + { + gpr.Unlock(ARM64Reg::W2); + } } void JitArm64::psq_stXX(UGeckoInstruction inst) @@ -189,8 +199,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) } gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); - if (!js.assumeNoPairedQuantize) + if (!js.assumeNoPairedQuantize || !jo.fastmem_arena) gpr.Lock(ARM64Reg::W2); + if (!js.assumeNoPairedQuantize && !jo.fastmem_arena) + gpr.Lock(ARM64Reg::W3); constexpr ARM64Reg scale_reg = ARM64Reg::W0; constexpr ARM64Reg addr_reg = ARM64Reg::W1; @@ -229,6 +241,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; if (!update || early_update) gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; + if (!jo.fastmem_arena) + gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) @@ -261,9 +275,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); + if (!js.assumeNoPairedQuantize || !jo.fastmem_arena) + gpr.Lock(ARM64Reg::W2); + if (!js.assumeNoPairedQuantize && !jo.fastmem_arena) + gpr.Lock(ARM64Reg::W3); if (!js.assumeNoPairedQuantize) - { - gpr.Unlock(ARM64Reg::W2); fpr.Unlock(ARM64Reg::Q1); - } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index af0fc0386b..7a077f6ce2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -393,7 +393,7 @@ void Arm64GPRCache::GetAllocationOrder() ARM64Reg::W11, ARM64Reg::W10, ARM64Reg::W9, - // ARM64Reg::W8, + ARM64Reg::W8, ARM64Reg::W7, ARM64Reg::W6, ARM64Reg::W5, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index e50ec27929..b48ffef90c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -699,6 +699,7 @@ void JitArm64::GenerateQuantizedStores() // X0 is the scale // X1 is the address // X2 is a temporary + // X3 is a temporary if jo.fastmem_arena is false (used in EmitBackpatchRoutine) // X30 is LR // Q0 is the register // Q1 is a temporary @@ -707,6 +708,8 @@ void JitArm64::GenerateQuantizedStores() BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2}; if (!jo.memcheck) gprs_to_push &= ~BitSet32{1}; + if (!jo.fastmem_arena) + gprs_to_push &= ~BitSet32{3}; BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; ARM64FloatEmitter float_emit(this); From 75e897c40edad489da1f378d79b0c7f2eb39e154 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 12 Jun 2022 10:51:21 +0200 Subject: [PATCH 6/7] JitArm64: Fix a minor mistake in the EmitBackpatchRoutine comment X30 is used in fewer situations than the comment was claiming. (I think that when I wrote the comment I was counting the use of X30 as a temp variable in the slowmem code as clobbering X30, but that happens after pushing X30, so it doesn't actually get clobbered.) --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 5a7d84b151..ad21fe1988 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -243,7 +243,7 @@ protected: // Store float: X1 Q0 // Load float: X0 // - // If using MemAccessMode::AlwaysUnsafe, the addr argument can be any register. + // If mode == AlwaysUnsafe, the addr argument can be any register. // Otherwise it must be the register listed in the table above. // // Additional scratch registers are used in the following situations: @@ -254,9 +254,10 @@ protected: // emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X3 // mode != AlwaysSafe && !jo.fastmem_arena: X2 // !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30 + // !emitting_routine && mode == Auto && jo.fastmem_arena: X30 // - // mode != AlwaysUnsafe: - // X30 (plus most other registers, unless marked in gprs_to_push and fprs_to_push) + // Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push + // may be clobbered if mode != AlwaysUnsafe. void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); From 62ec19c1ba005df4ad90610e60875a1bb1b1cc98 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 12 Jun 2022 10:52:33 +0200 Subject: [PATCH 7/7] JitArm64: Remove redundant line in the EmitBackpatchRoutine comment All stores clobber X0, so that stores clobber X0 when emitting_routine is true is nothing special. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index ad21fe1988..75565b771d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -249,7 +249,6 @@ protected: // Additional scratch registers are used in the following situations: // // emitting_routine && mode == Auto: X2 - // emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0 // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 // emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X3 // mode != AlwaysSafe && !jo.fastmem_arena: X2