diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index b09279d458..d76854409f 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -1294,6 +1294,32 @@ void Jit64::IntializeSpeculativeConstants() } } +void Jit64::FlushPCBeforeSlowAccess() +{ + // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe + // interrupt checks, and printing accurate PC locations in debug logs. + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); +} + +void Jit64::FlushRegistersBeforeSlowAccess() +{ + // Register values can be used by memory watchpoint conditions. + MemChecks& mem_checks = m_system.GetPowerPC().GetMemChecks(); + if (mem_checks.HasAny()) + { + BitSet32 gprs = mem_checks.GetGPRsUsedInConditions(); + BitSet32 fprs = mem_checks.GetFPRsUsedInConditions(); + if (gprs || fprs) + { + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); + + gpr.Flush(gprs); + fpr.Flush(fprs); + } + } +} + bool Jit64::HandleFunctionHooking(u32 address) { const auto result = HLE::TryReplaceFunction(m_ppc_symbol_db, address, PowerPC::CoreMode::JIT); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 0e98981a46..c23d7e1491 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -81,6 +81,9 @@ public: void IntializeSpeculativeConstants(); + void FlushPCBeforeSlowAccess(); + void FlushRegistersBeforeSlowAccess(); + JitBlockCache* GetBlockCache() override { return &blocks; } void Trace(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 05bd690694..a200689f35 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -110,6 +110,8 @@ void Jit64::lXXx(UGeckoInstruction inst) PanicAlertFmt("Invalid instruction"); } + FlushRegistersBeforeSlowAccess(); + // PowerPC has no 8-bit sign extended load, but x86 does, so merge extsb with the load if we find // it. if (CanMergeNextInstructions(1) && accessSize == 8 && js.op[1].inst.OPCD == 31 && @@ -439,6 +441,8 @@ void Jit64::dcbz(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; + FlushRegistersBeforeSlowAccess(); + { RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0); RCOpArg Rb = gpr.Use(b, RCMode::Read); @@ -477,7 +481,7 @@ void Jit64::dcbz(UGeckoInstruction inst) SwitchToFarCode(); SetJumpTarget(slow); } - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + FlushPCBeforeSlowAccess(); BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit, &m_mmu, RSCRATCH); @@ -524,6 +528,8 @@ void Jit64::stX(UGeckoInstruction inst) return; } + FlushRegistersBeforeSlowAccess(); + // If we already know the address of the write if (!a || gpr.IsImm(a)) { @@ -602,6 +608,8 @@ void Jit64::stXx(UGeckoInstruction inst) break; } + FlushRegistersBeforeSlowAccess(); + const bool does_clobber = WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse); RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read); @@ -634,6 +642,8 @@ void Jit64::lmw(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; + FlushRegistersBeforeSlowAccess(); + // TODO: This doesn't handle rollback on DSI correctly { RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0); @@ -657,6 +667,8 @@ void Jit64::stmw(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; + FlushRegistersBeforeSlowAccess(); + // TODO: This doesn't handle rollback on DSI correctly for (int i = d; i < 32; i++) { diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 7ac34f5d6e..7b347a3742 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -30,6 +30,8 @@ void Jit64::lfXXX(UGeckoInstruction inst) FALLBACK_IF(!indexed && !a); + FlushRegistersBeforeSlowAccess(); + s32 offset = 0; RCOpArg addr = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read); RegCache::Realize(addr); @@ -103,6 +105,8 @@ void Jit64::stfXXX(UGeckoInstruction inst) FALLBACK_IF(update && jo.memcheck && indexed && a == b); + FlushRegistersBeforeSlowAccess(); + if (single) { if (js.fpr_is_store_safe[s] && js.op->fprIsSingle[s]) @@ -196,6 +200,8 @@ void Jit64::stfiwx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; + FlushRegistersBeforeSlowAccess(); + RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0); RCOpArg Rb = gpr.Use(b, RCMode::Read); RCOpArg Rs = fpr.Use(s, RCMode::Read); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 91865b61a0..78b00b99f0 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -35,6 +35,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst) int w = indexed ? inst.Wx : inst.W; FALLBACK_IF(!a); + FlushRegistersBeforeSlowAccess(); + RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA); RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read); RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset); @@ -69,8 +71,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst) } else { - // Stash PC in case asm routine needs to call into C++ - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + FlushPCBeforeSlowAccess(); + // We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly // with just the scale bits. MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00)); @@ -83,8 +85,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst) } else { - // Stash PC in case asm routine needs to call into C++ - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + FlushPCBeforeSlowAccess(); + // Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code. // Hence, we need to mask out the unused bits. The layout of the GQR register is // UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with @@ -124,6 +126,8 @@ void Jit64::psq_lXX(UGeckoInstruction inst) int w = indexed ? inst.Wx : inst.W; FALLBACK_IF(!a); + FlushRegistersBeforeSlowAccess(); + RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA); RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read); RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset); @@ -144,8 +148,8 @@ void Jit64::psq_lXX(UGeckoInstruction inst) } else { - // Stash PC in case asm routine needs to call into C++ - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + FlushPCBeforeSlowAccess(); + // Get the high part of the GQR register OpArg gqr = PPCSTATE_SPR(SPR_GQR0 + i); gqr.AddMemOffset(2); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 02c8ba1893..040e983b08 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -386,15 +386,10 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, SetJumpTarget(slow); } - // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe - // interrupt checks, and printing accurate PC locations in debug logs. - // - // In the case of Jit64AsmCommon routines, we don't know the PC here, - // so the caller has to store the PC themselves. + // In the case of Jit64AsmCommon routines, the state we want to store here + // isn't known at JIT time, so the caller has to store it themselves. if (!(flags & SAFE_LOADSTORE_NO_UPDATE_PC)) - { - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); - } + m_jit.FlushPCBeforeSlowAccess(); size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0; ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment); @@ -457,8 +452,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc return; } - // Helps external systems know which instruction triggered the read. - MOV(32, PPCSTATE(pc), Imm32(m_jit.js.compilerPC)); + m_jit.FlushPCBeforeSlowAccess(); // Fall back to general-case code. ABI_PushRegistersAndAdjustStack(registersInUse, 0); @@ -560,15 +554,10 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces SetJumpTarget(slow); } - // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe - // interrupt checks, and printing accurate PC locations in debug logs. - // - // In the case of Jit64AsmCommon routines, we don't know the PC here, - // so the caller has to store the PC themselves. + // In the case of Jit64AsmCommon routines, the state we want to store here + // isn't known at JIT time, so the caller has to store it themselves. if (!(flags & SAFE_LOADSTORE_NO_UPDATE_PC)) - { - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); - } + m_jit.FlushPCBeforeSlowAccess(); size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0; ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment); @@ -663,8 +652,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, } else { - // Helps external systems know which instruction triggered the write - MOV(32, PPCSTATE(pc), Imm32(m_jit.js.compilerPC)); + m_jit.FlushPCBeforeSlowAccess(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); switch (accessSize) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 8ca685b1d3..e28b473235 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -282,6 +282,9 @@ protected: Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); + // temp_gpr must be a valid register, but temp_fpr can be INVALID_REG. + void FlushPPCStateBeforeSlowAccess(Arm64Gen::ARM64Reg temp_gpr, Arm64Gen::ARM64Reg temp_fpr); + // Loadstore routines void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, bool update); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 1c54c00ebe..04fd1afdb2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -21,6 +21,7 @@ #include "Core/PowerPC/JitArmCommon/BackPatch.h" #include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PowerPC.h" +#include "Core/System.h" using namespace Arm64Gen; @@ -171,6 +172,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, const ARM64Reg temp_gpr = ARM64Reg::W1; const int temp_gpr_index = DecodeReg(temp_gpr); + const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0; BitSet32 gprs_to_push_early = {}; if (memcheck) @@ -189,16 +191,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ABI_PushRegisters(gprs_to_push & ~gprs_to_push_early); m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30); - // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe - // interrupt checks, and printing accurate PC locations in debug logs. - // - // In the case of JitAsm routines, we don't know the PC here, - // so the caller has to store the PC themselves. + // In the case of JitAsm routines, the state we want to store here + // isn't known at JIT time, so the caller has to store it themselves. if (!emitting_routine) - { - MOVI2R(ARM64Reg::W30, js.compilerPC); - STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc)); - } + FlushPPCStateBeforeSlowAccess(ARM64Reg::W30, temp_fpr); if (flags & BackPatchInfo::FLAG_STORE) { @@ -265,7 +261,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (memcheck) { - const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0; const u64 early_push_count = (gprs_to_push & gprs_to_push_early).Count(); const u64 early_push_size = Common::AlignUp(early_push_count, 2) * 8; @@ -316,6 +311,22 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, } } +void JitArm64::FlushPPCStateBeforeSlowAccess(ARM64Reg temp_gpr, ARM64Reg temp_fpr) +{ + // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe + // interrupt checks, and printing accurate PC locations in debug logs. + MOVI2R(temp_gpr, js.compilerPC); + STR(IndexType::Unsigned, temp_gpr, PPC_REG, PPCSTATE_OFF(pc)); + + // Register values can be used by memory watchpoint conditions. + MemChecks& mem_checks = m_system.GetPowerPC().GetMemChecks(); + if (mem_checks.HasAny()) + { + gpr.StoreRegisters(mem_checks.GetGPRsUsedInConditions(), temp_gpr, FlushMode::MaintainState); + fpr.StoreRegisters(mem_checks.GetFPRsUsedInConditions(), temp_fpr, FlushMode::MaintainState); + } +} + bool JitArm64::HandleFastmemFault(SContext* ctx) { const u8* pc = reinterpret_cast(ctx->CTX_PC); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index eb8b4d015c..8bf8a8cbe2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -102,9 +102,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) { LDR(IndexType::Unsigned, scale_reg, PPC_REG, PPCSTATE_OFF_SPR(SPR_GQR0 + i)); - // Stash PC in case asm routine needs to call into C++ - MOVI2R(ARM64Reg::W30, js.compilerPC); - STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc)); + FlushPPCStateBeforeSlowAccess(ARM64Reg::W30, ARM64Reg::Q1); UBFM(type_reg, scale_reg, 16, 18); // Type UBFM(scale_reg, scale_reg, 24, 29); // Scale @@ -260,9 +258,7 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) { LDR(IndexType::Unsigned, scale_reg, PPC_REG, PPCSTATE_OFF_SPR(SPR_GQR0 + i)); - // Stash PC in case asm routine needs to call into C++ - MOVI2R(ARM64Reg::W30, js.compilerPC); - STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc)); + FlushPPCStateBeforeSlowAccess(ARM64Reg::W30, ARM64Reg::Q1); UBFM(type_reg, scale_reg, 0, 2); // Type UBFM(scale_reg, scale_reg, 8, 13); // Scale diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index b98e170531..90b56115ac 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -388,14 +388,16 @@ public: BitSet32 GetDirtyGPRs() const; - void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG) + void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG, + FlushMode flush_mode = FlushMode::All) { - FlushRegisters(regs, FlushMode::All, tmp_reg, IgnoreDiscardedRegisters::No); + FlushRegisters(regs, flush_mode, tmp_reg, IgnoreDiscardedRegisters::No); } - void StoreCRRegisters(BitSet8 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG) + void StoreCRRegisters(BitSet8 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG, + FlushMode flush_mode = FlushMode::All) { - FlushCRRegisters(regs, FlushMode::All, tmp_reg, IgnoreDiscardedRegisters::No); + FlushCRRegisters(regs, flush_mode, tmp_reg, IgnoreDiscardedRegisters::No); } void DiscardCRRegisters(BitSet8 regs); @@ -459,9 +461,10 @@ public: void FixSinglePrecision(size_t preg); - void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG) + void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG, + FlushMode flush_mode = FlushMode::All) { - FlushRegisters(regs, FlushMode::All, tmp_reg); + FlushRegisters(regs, flush_mode, tmp_reg); } protected: