JitArm64: Implement breakpoints

Plus two miscellaneous debugger features that I found along the way when
reading Jit64's code for comparison: bJITNoBlockLinking and tracing.

Fixes https://bugs.dolphin-emu.org/issues/13127.
This commit is contained in:
JosJuice 2022-12-24 13:00:45 +01:00
parent 0659827485
commit c744ff4934
3 changed files with 117 additions and 11 deletions

View File

@ -17,12 +17,14 @@
#include "Core/Core.h" #include "Core/Core.h"
#include "Core/CoreTiming.h" #include "Core/CoreTiming.h"
#include "Core/HLE/HLE.h" #include "Core/HLE/HLE.h"
#include "Core/HW/CPU.h"
#include "Core/HW/GPFifo.h" #include "Core/HW/GPFifo.h"
#include "Core/HW/Memmap.h" #include "Core/HW/Memmap.h"
#include "Core/HW/ProcessorInterface.h" #include "Core/HW/ProcessorInterface.h"
#include "Core/PatchEngine.h" #include "Core/PatchEngine.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/Profiler.h" #include "Core/PowerPC/Profiler.h"
#include "Core/System.h" #include "Core/System.h"
@ -57,9 +59,10 @@ void JitArm64::Init()
auto& memory = system.GetMemory(); auto& memory = system.GetMemory();
jo.fastmem_arena = m_fastmem_enabled && memory.InitFastmemArena(); jo.fastmem_arena = m_fastmem_enabled && memory.InitFastmemArena();
jo.enableBlocklink = true;
jo.optimizeGatherPipe = true; jo.optimizeGatherPipe = true;
UpdateMemoryAndExceptionOptions(); UpdateMemoryAndExceptionOptions();
SetBlockLinkingEnabled(true);
SetOptimizationEnabled(true);
gpr.Init(this); gpr.Init(this);
fpr.Init(this); fpr.Init(this);
blocks.Init(); blocks.Init();
@ -67,9 +70,6 @@ void JitArm64::Init()
code_block.m_stats = &js.st; code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa; code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa; code_block.m_fpa = &js.fpa;
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW);
m_enable_blr_optimization = jo.enableBlocklink && m_fastmem_enabled && !m_enable_debugging; m_enable_blr_optimization = jo.enableBlocklink && m_fastmem_enabled && !m_enable_debugging;
m_cleanup_after_stackfault = false; m_cleanup_after_stackfault = false;
@ -80,6 +80,27 @@ void JitArm64::Init()
ResetFreeMemoryRanges(); ResetFreeMemoryRanges();
} }
void JitArm64::SetBlockLinkingEnabled(bool enabled)
{
jo.enableBlocklink = enabled && !SConfig::GetInstance().bJITNoBlockLinking;
}
void JitArm64::SetOptimizationEnabled(bool enabled)
{
if (enabled)
{
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW);
}
else
{
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW);
}
}
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx) bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
{ {
// Ifdef this since the exception handler runs on a separate thread on macOS (ARM) // Ifdef this since the exception handler runs on a separate thread on macOS (ARM)
@ -661,6 +682,31 @@ void JitArm64::SingleStep()
pExecAddr(); pExecAddr();
} }
void JitArm64::Trace()
{
std::string regs;
std::string fregs;
#ifdef JIT_LOG_GPR
for (size_t i = 0; i < std::size(PowerPC::ppcState.gpr); i++)
{
regs += fmt::format("r{:02d}: {:08x} ", i, PowerPC::ppcState.gpr[i]);
}
#endif
#ifdef JIT_LOG_FPR
for (size_t i = 0; i < std::size(PowerPC::ppcState.ps); i++)
{
fregs += fmt::format("f{:02d}: {:016x} ", i, PowerPC::ppcState.ps[i].PS0AsU64());
}
#endif
DEBUG_LOG_FMT(DYNA_REC,
"JitArm64 PC: {:08x} SRR0: {:08x} SRR1: {:08x} FPSCR: {:08x} "
"MSR: {:08x} LR: {:08x} {} {}",
PC, SRR0, SRR1, FPSCR.Hex, MSR.Hex, PowerPC::ppcState.spr[8], regs, fregs);
}
void JitArm64::Jit(u32 em_address) void JitArm64::Jit(u32 em_address)
{ {
Jit(em_address, true); Jit(em_address, true);
@ -706,8 +752,22 @@ void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
if (m_enable_debugging) if (m_enable_debugging)
{ {
// Comment out the following to disable breakpoints (speed-up) // We can link blocks as long as we are not single stepping
block_size = 1; SetBlockLinkingEnabled(true);
SetOptimizationEnabled(true);
if (!jo.profile_blocks)
{
if (CPU::IsStepping())
{
block_size = 1;
// Do not link this block to other blocks while single stepping
SetBlockLinkingEnabled(false);
SetOptimizationEnabled(false);
}
Trace();
}
} }
// Analyze the block, collect all instructions it is made of (including inlining, // Analyze the block, collect all instructions it is made of (including inlining,
@ -1006,11 +1066,38 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
js.firstFPInstructionFound = true; js.firstFPInstructionFound = true;
} }
if (bJITRegisterCacheOff) if (m_enable_debugging && PowerPC::breakpoints.IsAddressBreakPoint(op.address) &&
!CPU::IsStepping())
{ {
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
static_assert(PPCSTATE_OFF(pc) <= 252);
static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc));
MOVI2R(DISPATCHER_PC, op.address);
STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
MOVP2R(ARM64Reg::X0, &PowerPC::CheckBreakPoints);
BLR(ARM64Reg::X0);
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0,
MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr()));
FixupBranch no_breakpoint = CBZ(ARM64Reg::W0);
Cleanup();
EndTimeProfile(js.curBlock);
DoDownCount();
B(dispatcher_exit);
SetJumpTarget(no_breakpoint);
}
if (bJITRegisterCacheOff)
{
FlushCarry(); FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
} }
CompileInstruction(op); CompileInstruction(op);

View File

@ -190,6 +190,9 @@ protected:
const u8* slowmem_code; const u8* slowmem_code;
}; };
void SetBlockLinkingEnabled(bool enabled);
void SetOptimizationEnabled(bool enabled);
void CompileInstruction(PPCAnalyst::CodeOp& op); void CompileInstruction(PPCAnalyst::CodeOp& op);
bool HandleFunctionHooking(u32 address); bool HandleFunctionHooking(u32 address);
@ -276,6 +279,8 @@ protected:
bool DoJit(u32 em_address, JitBlock* b, u32 nextPC); bool DoJit(u32 em_address, JitBlock* b, u32 nextPC);
void Trace();
// Finds a free memory region and sets the near and far code emitters to point at that region. // Finds a free memory region and sets the near and far code emitters to point at that region.
// Returns false if no free memory region can be found for either of the two. // Returns false if no free memory region can be found for either of the two.
bool SetEmitterStateToFreeCodeRegion(); bool SetEmitterStateToFreeCodeRegion();

View File

@ -8,10 +8,12 @@
#include "Common/Arm64Emitter.h" #include "Common/Arm64Emitter.h"
#include "Common/BitUtils.h" #include "Common/BitUtils.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/Config/Config.h"
#include "Common/FloatUtils.h" #include "Common/FloatUtils.h"
#include "Common/JitRegister.h" #include "Common/JitRegister.h"
#include "Common/MathUtil.h" #include "Common/MathUtil.h"
#include "Core/Config/MainSettings.h"
#include "Core/CoreTiming.h" #include "Core/CoreTiming.h"
#include "Core/HW/CPU.h" #include "Core/HW/CPU.h"
#include "Core/HW/Memmap.h" #include "Core/HW/Memmap.h"
@ -28,6 +30,8 @@ void JitArm64::GenerateAsm()
{ {
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes; const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
const bool enable_debugging = Config::Get(Config::MAIN_ENABLE_DEBUGGING);
// This value is all of the callee saved registers that we are required to save. // This value is all of the callee saved registers that we are required to save.
// According to the AACPS64 we need to save R19 ~ R30 and Q8 ~ Q15. // According to the AACPS64 we need to save R19 ~ R30 and Q8 ~ Q15.
const u32 ALL_CALLEE_SAVED = 0x7FF80000; const u32 ALL_CALLEE_SAVED = 0x7FF80000;
@ -85,6 +89,15 @@ void JitArm64::GenerateAsm()
FixupBranch bail = B(CC_LE); FixupBranch bail = B(CC_LE);
dispatcher_no_timing_check = GetCodePtr(); dispatcher_no_timing_check = GetCodePtr();
FixupBranch debug_exit;
if (enable_debugging)
{
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0,
MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr()));
debug_exit = CBNZ(ARM64Reg::W0);
}
dispatcher_no_check = GetCodePtr(); dispatcher_no_check = GetCodePtr();
bool assembly_dispatcher = true; bool assembly_dispatcher = true;
@ -174,9 +187,7 @@ void JitArm64::GenerateAsm()
// Check the state pointer to see if we are exiting // Check the state pointer to see if we are exiting
// Gets checked on at the end of every slice // Gets checked on at the end of every slice
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr())); LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr()));
FixupBranch exit = CBNZ(ARM64Reg::W0);
CMP(ARM64Reg::W0, 0);
FixupBranch Exit = B(CC_NEQ);
SetJumpTarget(to_start_of_timing_slice); SetJumpTarget(to_start_of_timing_slice);
MOVP2R(ARM64Reg::X8, &CoreTiming::GlobalAdvance); MOVP2R(ARM64Reg::X8, &CoreTiming::GlobalAdvance);
@ -188,7 +199,10 @@ void JitArm64::GenerateAsm()
// We can safely assume that downcount >= 1 // We can safely assume that downcount >= 1
B(dispatcher_no_check); B(dispatcher_no_check);
SetJumpTarget(Exit); dispatcher_exit = GetCodePtr();
SetJumpTarget(exit);
if (enable_debugging)
SetJumpTarget(debug_exit);
// Reset the stack pointer, as the BLR optimization have touched it. // Reset the stack pointer, as the BLR optimization have touched it.
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1,