From 8bfcd2deb76c542cf3b4a5bd2d6176243243e04b Mon Sep 17 00:00:00 2001 From: Franz-Josef Haider Date: Fri, 14 Jul 2023 17:10:51 +0300 Subject: [PATCH] JitArm64/Jit64: Load the memory register without jumps and only when necessary. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 21 +++++++++++++ Source/Core/Core/PowerPC/Jit64/Jit.h | 2 ++ Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 22 ++++---------- Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp | 4 +++ .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 3 ++ Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 29 ++++++++++++++++-- Source/Core/Core/PowerPC/JitArm64/Jit.h | 3 ++ .../Core/PowerPC/JitArm64/JitArm64_Branch.cpp | 3 ++ .../JitArm64/JitArm64_SystemRegisters.cpp | 2 ++ Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 30 +++++-------------- Source/Core/Core/PowerPC/JitInterface.cpp | 19 ++++++++++++ Source/Core/Core/PowerPC/JitInterface.h | 1 + Source/Core/Core/PowerPC/PowerPC.cpp | 5 ++++ Source/Core/Core/PowerPC/PowerPC.h | 1 + 14 files changed, 104 insertions(+), 41 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 313b840132..5ef45f76d0 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -490,6 +490,21 @@ void Jit64::FakeBLCall(u32 after) SetJumpTarget(skip_exit); } +void Jit64::EmitUpdateMembase() +{ + MOV(64, R(RMEM), PPCSTATE(mem_ptr)); +} + +void Jit64::EmitStoreMembase(const OpArg& msr, X64Reg scratch_reg) +{ + auto& memory = m_system.GetMemory(); + MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase())); + MOV(64, R(scratch_reg), ImmPtr(memory.GetPhysicalBase())); + TEST(32, msr, Imm32(1 << (31 - 27))); + CMOVcc(64, RMEM, R(scratch_reg), CC_Z); + MOV(64, PPCSTATE(mem_ptr), R(RMEM)); +} + void Jit64::WriteExit(u32 destination, bool bl, u32 after) { if (!m_enable_blr_optimization) @@ -599,6 +614,7 @@ void Jit64::WriteRfiExitDestInRSCRATCH() ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunctionP(PowerPC::CheckExceptionsFromJIT, &m_system.GetPowerPC()); ABI_PopRegistersAndAdjustStack({}, 0); + EmitUpdateMembase(); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, Jump::Near); } @@ -620,6 +636,7 @@ void Jit64::WriteExceptionExit() ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunctionP(PowerPC::CheckExceptionsFromJIT, &m_system.GetPowerPC()); ABI_PopRegistersAndAdjustStack({}, 0); + EmitUpdateMembase(); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, Jump::Near); } @@ -632,6 +649,7 @@ void Jit64::WriteExternalExceptionExit() ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunctionP(PowerPC::CheckExternalExceptionsFromJIT, &m_system.GetPowerPC()); ABI_PopRegistersAndAdjustStack({}, 0); + EmitUpdateMembase(); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, Jump::Near); } @@ -639,6 +657,7 @@ void Jit64::WriteExternalExceptionExit() void Jit64::Run() { ProtectStack(); + m_system.GetJitInterface().UpdateMembase(); CompiledCode pExecAddr = (CompiledCode)asm_routines.enter_code; pExecAddr(); @@ -649,6 +668,7 @@ void Jit64::Run() void Jit64::SingleStep() { ProtectStack(); + m_system.GetJitInterface().UpdateMembase(); CompiledCode pExecAddr = (CompiledCode)asm_routines.enter_code; pExecAddr(); @@ -745,6 +765,7 @@ void Jit64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure) m_ppc_state.npc = nextPC; m_ppc_state.Exceptions |= EXCEPTION_ISI; m_system.GetPowerPC().CheckExceptions(); + m_system.GetJitInterface().UpdateMembase(); WARN_LOG_FMT(POWERPC, "ISI exception at {:#010x}", nextPC); return; } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 964cf01aa2..7f1d107c62 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -87,6 +87,8 @@ public: // Utilities for use by opcodes + void EmitUpdateMembase(); + void EmitStoreMembase(const Gen::OpArg& msr, Gen::X64Reg scratch_reg); void FakeBLCall(u32 after); void WriteExit(u32 destination, bool bl = false, u32 after = 0); void JustWriteExit(u32 destination, bool bl, u32 after); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 596e8cd724..6237b1b787 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -65,6 +65,11 @@ void Jit64AsmRoutineManager::Generate() ABI_CallFunction(CoreTiming::GlobalAdvance); ABI_PopRegistersAndAdjustStack({}, 0); + // When we've just entered the jit we need to update the membase + // GlobalAdvance also checks exceptions after which we need to + // update the membase so it makes sense to do this here. + m_jit.EmitUpdateMembase(); + // skip the sync and compare first time FixupBranch skipToRealDispatch = J(enable_debugging ? Jump::Near : Jump::Short); @@ -104,8 +109,6 @@ void Jit64AsmRoutineManager::Generate() dispatcher_no_check = GetCodePtr(); - auto& memory = system.GetMemory(); - // The following is a translation of JitBaseBlockCache::Dispatch into assembly. const bool assembly_dispatcher = true; if (assembly_dispatcher) @@ -165,13 +168,6 @@ void Jit64AsmRoutineManager::Generate() FixupBranch state_mismatch = J_CC(CC_NE); // Success; branch to the block we found. - // Switch to the correct memory base, in case MSR.DR has changed. - TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27))); - FixupBranch physmem = J_CC(CC_Z); - MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase())); - JMPptr(MDisp(RSCRATCH, static_cast(offsetof(JitBlockData, normalEntry)))); - SetJumpTarget(physmem); - MOV(64, R(RMEM), ImmPtr(memory.GetPhysicalBase())); JMPptr(MDisp(RSCRATCH, static_cast(offsetof(JitBlockData, normalEntry)))); SetJumpTarget(not_found); @@ -189,13 +185,7 @@ void Jit64AsmRoutineManager::Generate() TEST(64, R(ABI_RETURN), R(ABI_RETURN)); FixupBranch no_block_available = J_CC(CC_Z); - // Switch to the correct memory base, in case MSR.DR has changed. - TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27))); - FixupBranch physmem = J_CC(CC_Z); - MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase())); - JMPptr(R(ABI_RETURN)); - SetJumpTarget(physmem); - MOV(64, R(RMEM), ImmPtr(memory.GetPhysicalBase())); + // Jump to the block JMPptr(R(ABI_RETURN)); SetJumpTarget(no_block_available); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index d48b9efc5c..5a24cc2108 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -12,6 +12,7 @@ #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PowerPC.h" +#include "Core/System.h" // The branches are known good, or at least reasonably good. // No need for a disable-mechanism. @@ -54,6 +55,9 @@ void Jit64::rfi(UGeckoInstruction inst) MOV(32, R(RSCRATCH), PPCSTATE_SRR1); AND(32, R(RSCRATCH), Imm32(mask & clearMSR13)); OR(32, PPCSTATE(msr), R(RSCRATCH)); + + EmitStoreMembase(R(RSCRATCH), RSCRATCH2); + // NPC = SRR0; MOV(32, R(RSCRATCH), PPCSTATE_SRR0); WriteRfiExitDestInRSCRATCH(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index e06c77b915..5992fe9f28 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -438,7 +438,10 @@ void Jit64::mtmsr(UGeckoInstruction inst) RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read); RegCache::Realize(Rs); MOV(32, PPCSTATE(msr), Rs); + + EmitStoreMembase(PPCSTATE(msr), RSCRATCH2); } + gpr.Flush(); fpr.Flush(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index ef7737d69c..caf40987e4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -127,8 +127,11 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx) { ERROR_LOG_FMT(DYNA_REC, "JitArm64 address calculation overflowed. This should never happen! " - "PC {:#018x}, access address {:#018x}, memory base {:#018x}, MSR.DR {}", - ctx->CTX_PC, access_address, memory_base, m_ppc_state.msr.DR); + "PC {:#018x}, access address {:#018x}, memory base {:#018x}, MSR.DR {}, " + "mem_ptr {}, pbase {}, lbase {}", + ctx->CTX_PC, access_address, memory_base, m_ppc_state.msr.DR, + fmt::ptr(m_ppc_state.mem_ptr), fmt::ptr(memory.GetPhysicalBase()), + fmt::ptr(memory.GetLogicalBase())); } else { @@ -353,6 +356,24 @@ void JitArm64::IntializeSpeculativeConstants() } } +void JitArm64::EmitUpdateMembase() +{ + LDR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr)); +} + +void JitArm64::EmitStoreMembase(const ARM64Reg& msr) +{ + auto& memory = m_system.GetMemory(); + ARM64Reg WD = gpr.GetReg(); + ARM64Reg XD = EncodeRegTo64(WD); + MOVP2R(MEM_REG, jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()); + MOVP2R(XD, jo.fastmem_arena ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()); + TST(msr, LogicalImm(1 << (31 - 27), 32)); + CSEL(MEM_REG, MEM_REG, XD, CCFlags::CC_NEQ); + STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr)); + gpr.Unlock(WD); +} + void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return) { Cleanup(); @@ -523,6 +544,7 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always else MOVP2R(EncodeRegTo64(DISPATCHER_PC), &PowerPC::CheckExceptionsFromJIT); BLR(EncodeRegTo64(DISPATCHER_PC)); + EmitUpdateMembase(); LDR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc)); @@ -636,6 +658,7 @@ void JitArm64::EndTimeProfile(JitBlock* b) void JitArm64::Run() { ProtectStack(); + m_system.GetJitInterface().UpdateMembase(); CompiledCode pExecAddr = (CompiledCode)enter_code; pExecAddr(); @@ -646,6 +669,7 @@ void JitArm64::Run() void JitArm64::SingleStep() { ProtectStack(); + m_system.GetJitInterface().UpdateMembase(); CompiledCode pExecAddr = (CompiledCode)enter_code; pExecAddr(); @@ -747,6 +771,7 @@ void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure) m_ppc_state.npc = nextPC; m_ppc_state.Exceptions |= EXCEPTION_ISI; m_system.GetPowerPC().CheckExceptions(); + m_system.GetJitInterface().UpdateMembase(); WARN_LOG_FMT(POWERPC, "ISI exception at {:#010x}", nextPC); return; } diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index c323ee04d9..df71e70a93 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -311,6 +311,9 @@ protected: void BeginTimeProfile(JitBlock* b); void EndTimeProfile(JitBlock* b); + void EmitUpdateMembase(); + void EmitStoreMembase(const Arm64Gen::ARM64Reg& msr); + // Exits void WriteExit(u32 destination, bool LK = false, u32 exit_address_after_return = 0); void WriteExit(Arm64Gen::ARM64Reg dest, bool LK = false, u32 exit_address_after_return = 0); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp index b2a2d723bd..edb31f7d64 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -11,6 +11,7 @@ #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/PPCTables.h" #include "Core/PowerPC/PowerPC.h" +#include "Core/System.h" using namespace Arm64Gen; @@ -64,6 +65,8 @@ void JitArm64::rfi(UGeckoInstruction inst) STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA + EmitStoreMembase(WA); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR0)); gpr.Unlock(WB, WC); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 6093ae2481..3df2fea5fa 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -94,6 +94,8 @@ void JitArm64::mtmsr(UGeckoInstruction inst) gpr.BindToRegister(inst.RS, true); STR(IndexType::Unsigned, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(msr)); + EmitStoreMembase(gpr.R(inst.RS)); + gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 03b3c7487d..97be9d0634 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -95,26 +95,13 @@ void JitArm64::GenerateAsm() bool assembly_dispatcher = true; - auto& memory = m_system.GetMemory(); - if (assembly_dispatcher) { - // set the mem_base based on MSR flags - LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr)); - FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27); - MOVP2R(MEM_REG, - jo.fastmem_arena ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()); - FixupBranch membaseend = B(); - SetJumpTarget(physmem); - MOVP2R(MEM_REG, - jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()); - SetJumpTarget(membaseend); - if (GetBlockCache()->GetFastBlockMap()) { // Check if there is a block ARM64Reg pc_masked = ARM64Reg::X25; - ARM64Reg cache_base = ARM64Reg::X27; + ARM64Reg cache_base = ARM64Reg::X24; ARM64Reg block = ARM64Reg::X30; LSL(pc_masked, DISPATCHER_PC, 1); MOVP2R(cache_base, GetBlockCache()->GetFastBlockMap()); @@ -122,7 +109,7 @@ void JitArm64::GenerateAsm() FixupBranch not_found = CBZ(block); // b.msrBits != msr - ARM64Reg msr = ARM64Reg::W25; + ARM64Reg msr = ARM64Reg::W27; ARM64Reg msr2 = ARM64Reg::W24; LDR(IndexType::Unsigned, msr, PPC_REG, PPCSTATE_OFF(msr)); AND(msr, msr, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32)); @@ -181,14 +168,6 @@ void JitArm64::GenerateAsm() FixupBranch no_block_available = CBZ(ARM64Reg::X0); - // set the mem_base based on MSR flags and jump to next block. - LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr)); - FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27); - MOVP2R(MEM_REG, - jo.fastmem_arena ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()); - BR(ARM64Reg::X0); - SetJumpTarget(physmem); - MOVP2R(MEM_REG, jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()); BR(ARM64Reg::X0); // Call JIT @@ -217,6 +196,11 @@ void JitArm64::GenerateAsm() MOVP2R(ARM64Reg::X8, &CoreTiming::GlobalAdvance); BLR(ARM64Reg::X8); + // When we've just entered the jit we need to update the membase + // GlobalAdvance also checks exceptions after which we need to + // update the membase so it makes sense to do this here. + EmitUpdateMembase(); + // Load the PC back into DISPATCHER_PC (the exception handler might have changed it) LDR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index 5e369067f6..851de5487b 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -98,6 +98,25 @@ void JitInterface::SetProfilingState(ProfilingState state) m_jit->jo.profile_blocks = state == ProfilingState::Enabled; } +void JitInterface::UpdateMembase() +{ + if (!m_jit) + return; + + auto& ppc_state = m_system.GetPPCState(); + auto& memory = m_system.GetMemory(); + if (ppc_state.msr.DR) + { + ppc_state.mem_ptr = + m_jit->jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase(); + } + else + { + ppc_state.mem_ptr = + m_jit->jo.fastmem_arena ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase(); + } +} + void JitInterface::WriteProfileResults(const std::string& filename) const { Profiler::ProfileStats prof_stats; diff --git a/Source/Core/Core/PowerPC/JitInterface.h b/Source/Core/Core/PowerPC/JitInterface.h index 00c4680d47..f1d88b2ab0 100644 --- a/Source/Core/Core/PowerPC/JitInterface.h +++ b/Source/Core/Core/PowerPC/JitInterface.h @@ -61,6 +61,7 @@ public: u32 entry_address; }; + void UpdateMembase(); void SetProfilingState(ProfilingState state); void WriteProfileResults(const std::string& filename) const; void GetProfileResults(Profiler::ProfileStats* prof_stats) const; diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 30c9cb2b14..f52dc7d517 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -569,7 +569,10 @@ void PowerPCManager::CheckExceptions() else { CheckExternalExceptions(); + return; } + + m_system.GetJitInterface().UpdateMembase(); } void PowerPCManager::CheckExternalExceptions() @@ -623,6 +626,8 @@ void PowerPCManager::CheckExternalExceptions() exceptions); } } + + m_system.GetJitInterface().UpdateMembase(); } void PowerPCManager::CheckBreakPoints() diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 42545f2c1f..0e30f5963a 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -169,6 +169,7 @@ struct PowerPCState // Storage for the stack pointer of the BLR optimization. u8* stored_stack_pointer = nullptr; + u8* mem_ptr = nullptr; std::array, NUM_TLBS> tlb;