Jit: Flush registers used in memory breakpoint conditions
Aims to fix https://bugs.dolphin-emu.org/issues/13686. I'm using a less efficient approach for Jit64 than for JitArm64. In JitArm64, I'm flushing in the slow access code, but in Jit64, I'm flushing before the split between the slow access code and the fast access code, because Jit64 doesn't keep register mappings around for when it's time to emit the slow access code. But the flushing code is only emitted when there are memory breakpoints with conditions, so I'd say that this is a performance loss we can live with.
This commit is contained in:
parent
4ef14d3619
commit
c1d4f63929
|
@ -1294,6 +1294,32 @@ void Jit64::IntializeSpeculativeConstants()
|
|||
}
|
||||
}
|
||||
|
||||
void Jit64::FlushPCBeforeSlowAccess()
|
||||
{
|
||||
// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
|
||||
// interrupt checks, and printing accurate PC locations in debug logs.
|
||||
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
|
||||
}
|
||||
|
||||
void Jit64::FlushRegistersBeforeSlowAccess()
|
||||
{
|
||||
// Register values can be used by memory watchpoint conditions.
|
||||
MemChecks& mem_checks = m_system.GetPowerPC().GetMemChecks();
|
||||
if (mem_checks.HasAny())
|
||||
{
|
||||
BitSet32 gprs = mem_checks.GetGPRsUsedInConditions();
|
||||
BitSet32 fprs = mem_checks.GetFPRsUsedInConditions();
|
||||
if (gprs || fprs)
|
||||
{
|
||||
RCForkGuard gpr_guard = gpr.Fork();
|
||||
RCForkGuard fpr_guard = fpr.Fork();
|
||||
|
||||
gpr.Flush(gprs);
|
||||
fpr.Flush(fprs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Jit64::HandleFunctionHooking(u32 address)
|
||||
{
|
||||
const auto result = HLE::TryReplaceFunction(m_ppc_symbol_db, address, PowerPC::CoreMode::JIT);
|
||||
|
|
|
@ -81,6 +81,9 @@ public:
|
|||
|
||||
void IntializeSpeculativeConstants();
|
||||
|
||||
void FlushPCBeforeSlowAccess();
|
||||
void FlushRegistersBeforeSlowAccess();
|
||||
|
||||
JitBlockCache* GetBlockCache() override { return &blocks; }
|
||||
void Trace();
|
||||
|
||||
|
|
|
@ -110,6 +110,8 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
PanicAlertFmt("Invalid instruction");
|
||||
}
|
||||
|
||||
FlushRegistersBeforeSlowAccess();
|
||||
|
||||
// PowerPC has no 8-bit sign extended load, but x86 does, so merge extsb with the load if we find
|
||||
// it.
|
||||
if (CanMergeNextInstructions(1) && accessSize == 8 && js.op[1].inst.OPCD == 31 &&
|
||||
|
@ -439,6 +441,8 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
|||
int a = inst.RA;
|
||||
int b = inst.RB;
|
||||
|
||||
FlushRegistersBeforeSlowAccess();
|
||||
|
||||
{
|
||||
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
|
||||
RCOpArg Rb = gpr.Use(b, RCMode::Read);
|
||||
|
@ -477,7 +481,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
|||
SwitchToFarCode();
|
||||
SetJumpTarget(slow);
|
||||
}
|
||||
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
|
||||
FlushPCBeforeSlowAccess();
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit, &m_mmu, RSCRATCH);
|
||||
|
@ -524,6 +528,8 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
return;
|
||||
}
|
||||
|
||||
FlushRegistersBeforeSlowAccess();
|
||||
|
||||
// If we already know the address of the write
|
||||
if (!a || gpr.IsImm(a))
|
||||
{
|
||||
|
@ -602,6 +608,8 @@ void Jit64::stXx(UGeckoInstruction inst)
|
|||
break;
|
||||
}
|
||||
|
||||
FlushRegistersBeforeSlowAccess();
|
||||
|
||||
const bool does_clobber = WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse);
|
||||
|
||||
RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
|
||||
|
@ -634,6 +642,8 @@ void Jit64::lmw(UGeckoInstruction inst)
|
|||
|
||||
int a = inst.RA, d = inst.RD;
|
||||
|
||||
FlushRegistersBeforeSlowAccess();
|
||||
|
||||
// TODO: This doesn't handle rollback on DSI correctly
|
||||
{
|
||||
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
|
||||
|
@ -657,6 +667,8 @@ void Jit64::stmw(UGeckoInstruction inst)
|
|||
|
||||
int a = inst.RA, d = inst.RD;
|
||||
|
||||
FlushRegistersBeforeSlowAccess();
|
||||
|
||||
// TODO: This doesn't handle rollback on DSI correctly
|
||||
for (int i = d; i < 32; i++)
|
||||
{
|
||||
|
|
|
@ -30,6 +30,8 @@ void Jit64::lfXXX(UGeckoInstruction inst)
|
|||
|
||||
FALLBACK_IF(!indexed && !a);
|
||||
|
||||
FlushRegistersBeforeSlowAccess();
|
||||
|
||||
s32 offset = 0;
|
||||
RCOpArg addr = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
|
||||
RegCache::Realize(addr);
|
||||
|
@ -103,6 +105,8 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
|||
|
||||
FALLBACK_IF(update && jo.memcheck && indexed && a == b);
|
||||
|
||||
FlushRegistersBeforeSlowAccess();
|
||||
|
||||
if (single)
|
||||
{
|
||||
if (js.fpr_is_store_safe[s] && js.op->fprIsSingle[s])
|
||||
|
@ -196,6 +200,8 @@ void Jit64::stfiwx(UGeckoInstruction inst)
|
|||
int a = inst.RA;
|
||||
int b = inst.RB;
|
||||
|
||||
FlushRegistersBeforeSlowAccess();
|
||||
|
||||
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
|
||||
RCOpArg Rb = gpr.Use(b, RCMode::Read);
|
||||
RCOpArg Rs = fpr.Use(s, RCMode::Read);
|
||||
|
|
|
@ -35,6 +35,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
|||
int w = indexed ? inst.Wx : inst.W;
|
||||
FALLBACK_IF(!a);
|
||||
|
||||
FlushRegistersBeforeSlowAccess();
|
||||
|
||||
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
|
||||
RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
|
||||
RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
|
||||
|
@ -69,8 +71,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
// Stash PC in case asm routine needs to call into C++
|
||||
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
|
||||
FlushPCBeforeSlowAccess();
|
||||
|
||||
// We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly
|
||||
// with just the scale bits.
|
||||
MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00));
|
||||
|
@ -83,8 +85,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
// Stash PC in case asm routine needs to call into C++
|
||||
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
|
||||
FlushPCBeforeSlowAccess();
|
||||
|
||||
// Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code.
|
||||
// Hence, we need to mask out the unused bits. The layout of the GQR register is
|
||||
// UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with
|
||||
|
@ -124,6 +126,8 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
|||
int w = indexed ? inst.Wx : inst.W;
|
||||
FALLBACK_IF(!a);
|
||||
|
||||
FlushRegistersBeforeSlowAccess();
|
||||
|
||||
RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
|
||||
RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
|
||||
RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
|
||||
|
@ -144,8 +148,8 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
// Stash PC in case asm routine needs to call into C++
|
||||
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
|
||||
FlushPCBeforeSlowAccess();
|
||||
|
||||
// Get the high part of the GQR register
|
||||
OpArg gqr = PPCSTATE_SPR(SPR_GQR0 + i);
|
||||
gqr.AddMemOffset(2);
|
||||
|
|
|
@ -386,15 +386,10 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
|
|||
SetJumpTarget(slow);
|
||||
}
|
||||
|
||||
// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
|
||||
// interrupt checks, and printing accurate PC locations in debug logs.
|
||||
//
|
||||
// In the case of Jit64AsmCommon routines, we don't know the PC here,
|
||||
// so the caller has to store the PC themselves.
|
||||
// In the case of Jit64AsmCommon routines, the state we want to store here
|
||||
// isn't known at JIT time, so the caller has to store it themselves.
|
||||
if (!(flags & SAFE_LOADSTORE_NO_UPDATE_PC))
|
||||
{
|
||||
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
|
||||
}
|
||||
m_jit.FlushPCBeforeSlowAccess();
|
||||
|
||||
size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0;
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment);
|
||||
|
@ -457,8 +452,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc
|
|||
return;
|
||||
}
|
||||
|
||||
// Helps external systems know which instruction triggered the read.
|
||||
MOV(32, PPCSTATE(pc), Imm32(m_jit.js.compilerPC));
|
||||
m_jit.FlushPCBeforeSlowAccess();
|
||||
|
||||
// Fall back to general-case code.
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
|
@ -560,15 +554,10 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
|
|||
SetJumpTarget(slow);
|
||||
}
|
||||
|
||||
// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
|
||||
// interrupt checks, and printing accurate PC locations in debug logs.
|
||||
//
|
||||
// In the case of Jit64AsmCommon routines, we don't know the PC here,
|
||||
// so the caller has to store the PC themselves.
|
||||
// In the case of Jit64AsmCommon routines, the state we want to store here
|
||||
// isn't known at JIT time, so the caller has to store it themselves.
|
||||
if (!(flags & SAFE_LOADSTORE_NO_UPDATE_PC))
|
||||
{
|
||||
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
|
||||
}
|
||||
m_jit.FlushPCBeforeSlowAccess();
|
||||
|
||||
size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0;
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment);
|
||||
|
@ -663,8 +652,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
|
|||
}
|
||||
else
|
||||
{
|
||||
// Helps external systems know which instruction triggered the write
|
||||
MOV(32, PPCSTATE(pc), Imm32(m_jit.js.compilerPC));
|
||||
m_jit.FlushPCBeforeSlowAccess();
|
||||
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
switch (accessSize)
|
||||
|
|
|
@ -282,6 +282,9 @@ protected:
|
|||
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
|
||||
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);
|
||||
|
||||
// temp_gpr must be a valid register, but temp_fpr can be INVALID_REG.
|
||||
void FlushPPCStateBeforeSlowAccess(Arm64Gen::ARM64Reg temp_gpr, Arm64Gen::ARM64Reg temp_fpr);
|
||||
|
||||
// Loadstore routines
|
||||
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);
|
||||
void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, bool update);
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "Core/PowerPC/JitArmCommon/BackPatch.h"
|
||||
#include "Core/PowerPC/MMU.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
#include "Core/System.h"
|
||||
|
||||
using namespace Arm64Gen;
|
||||
|
||||
|
@ -171,6 +172,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
|
||||
const ARM64Reg temp_gpr = ARM64Reg::W1;
|
||||
const int temp_gpr_index = DecodeReg(temp_gpr);
|
||||
const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0;
|
||||
|
||||
BitSet32 gprs_to_push_early = {};
|
||||
if (memcheck)
|
||||
|
@ -189,16 +191,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
ABI_PushRegisters(gprs_to_push & ~gprs_to_push_early);
|
||||
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);
|
||||
|
||||
// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
|
||||
// interrupt checks, and printing accurate PC locations in debug logs.
|
||||
//
|
||||
// In the case of JitAsm routines, we don't know the PC here,
|
||||
// so the caller has to store the PC themselves.
|
||||
// In the case of JitAsm routines, the state we want to store here
|
||||
// isn't known at JIT time, so the caller has to store it themselves.
|
||||
if (!emitting_routine)
|
||||
{
|
||||
MOVI2R(ARM64Reg::W30, js.compilerPC);
|
||||
STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc));
|
||||
}
|
||||
FlushPPCStateBeforeSlowAccess(ARM64Reg::W30, temp_fpr);
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_STORE)
|
||||
{
|
||||
|
@ -265,7 +261,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
|
||||
if (memcheck)
|
||||
{
|
||||
const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0;
|
||||
const u64 early_push_count = (gprs_to_push & gprs_to_push_early).Count();
|
||||
const u64 early_push_size = Common::AlignUp(early_push_count, 2) * 8;
|
||||
|
||||
|
@ -316,6 +311,22 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
}
|
||||
}
|
||||
|
||||
void JitArm64::FlushPPCStateBeforeSlowAccess(ARM64Reg temp_gpr, ARM64Reg temp_fpr)
|
||||
{
|
||||
// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
|
||||
// interrupt checks, and printing accurate PC locations in debug logs.
|
||||
MOVI2R(temp_gpr, js.compilerPC);
|
||||
STR(IndexType::Unsigned, temp_gpr, PPC_REG, PPCSTATE_OFF(pc));
|
||||
|
||||
// Register values can be used by memory watchpoint conditions.
|
||||
MemChecks& mem_checks = m_system.GetPowerPC().GetMemChecks();
|
||||
if (mem_checks.HasAny())
|
||||
{
|
||||
gpr.StoreRegisters(mem_checks.GetGPRsUsedInConditions(), temp_gpr, FlushMode::MaintainState);
|
||||
fpr.StoreRegisters(mem_checks.GetFPRsUsedInConditions(), temp_fpr, FlushMode::MaintainState);
|
||||
}
|
||||
}
|
||||
|
||||
bool JitArm64::HandleFastmemFault(SContext* ctx)
|
||||
{
|
||||
const u8* pc = reinterpret_cast<const u8*>(ctx->CTX_PC);
|
||||
|
|
|
@ -102,9 +102,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
|
|||
{
|
||||
LDR(IndexType::Unsigned, scale_reg, PPC_REG, PPCSTATE_OFF_SPR(SPR_GQR0 + i));
|
||||
|
||||
// Stash PC in case asm routine needs to call into C++
|
||||
MOVI2R(ARM64Reg::W30, js.compilerPC);
|
||||
STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc));
|
||||
FlushPPCStateBeforeSlowAccess(ARM64Reg::W30, ARM64Reg::Q1);
|
||||
|
||||
UBFM(type_reg, scale_reg, 16, 18); // Type
|
||||
UBFM(scale_reg, scale_reg, 24, 29); // Scale
|
||||
|
@ -260,9 +258,7 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
|
|||
{
|
||||
LDR(IndexType::Unsigned, scale_reg, PPC_REG, PPCSTATE_OFF_SPR(SPR_GQR0 + i));
|
||||
|
||||
// Stash PC in case asm routine needs to call into C++
|
||||
MOVI2R(ARM64Reg::W30, js.compilerPC);
|
||||
STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc));
|
||||
FlushPPCStateBeforeSlowAccess(ARM64Reg::W30, ARM64Reg::Q1);
|
||||
|
||||
UBFM(type_reg, scale_reg, 0, 2); // Type
|
||||
UBFM(scale_reg, scale_reg, 8, 13); // Scale
|
||||
|
|
|
@ -388,14 +388,16 @@ public:
|
|||
|
||||
BitSet32 GetDirtyGPRs() const;
|
||||
|
||||
void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG)
|
||||
void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
|
||||
FlushMode flush_mode = FlushMode::All)
|
||||
{
|
||||
FlushRegisters(regs, FlushMode::All, tmp_reg, IgnoreDiscardedRegisters::No);
|
||||
FlushRegisters(regs, flush_mode, tmp_reg, IgnoreDiscardedRegisters::No);
|
||||
}
|
||||
|
||||
void StoreCRRegisters(BitSet8 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG)
|
||||
void StoreCRRegisters(BitSet8 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
|
||||
FlushMode flush_mode = FlushMode::All)
|
||||
{
|
||||
FlushCRRegisters(regs, FlushMode::All, tmp_reg, IgnoreDiscardedRegisters::No);
|
||||
FlushCRRegisters(regs, flush_mode, tmp_reg, IgnoreDiscardedRegisters::No);
|
||||
}
|
||||
|
||||
void DiscardCRRegisters(BitSet8 regs);
|
||||
|
@ -459,9 +461,10 @@ public:
|
|||
|
||||
void FixSinglePrecision(size_t preg);
|
||||
|
||||
void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG)
|
||||
void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG,
|
||||
FlushMode flush_mode = FlushMode::All)
|
||||
{
|
||||
FlushRegisters(regs, FlushMode::All, tmp_reg);
|
||||
FlushRegisters(regs, flush_mode, tmp_reg);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
|
Loading…
Reference in New Issue