Jit64: Install BranchWatch

This commit is contained in:
mitaclaw 2023-12-07 09:35:14 -08:00
parent 2aa250a68a
commit 7cccedca1e
5 changed files with 234 additions and 11 deletions

View File

@ -1041,7 +1041,18 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
if (HandleFunctionHooking(op.address))
break;
if (!op.skip)
if (op.skip)
{
if (IsDebuggingEnabled())
{
// The only thing that currently sets op.skip is the BLR following optimization.
// If any non-branch instruction starts setting that too, this will need to be changed.
ASSERT(op.inst.hex == 0x4e800020);
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
}
else
{
if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
{

View File

@ -98,6 +98,12 @@ public:
void WriteExternalExceptionExit();
void WriteRfiExitDestInRSCRATCH();
void WriteIdleExit(u32 destination);
template <bool condition>
void WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, Gen::X64Reg reg_a,
Gen::X64Reg reg_b, BitSet32 caller_save);
void WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, Gen::X64Reg reg_a,
Gen::X64Reg reg_b, BitSet32 caller_save);
bool Cleanup();
void GenerateConstantOverflow(bool overflow);

View File

@ -7,6 +7,7 @@
#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
#include "Core/CoreTiming.h"
#include "Core/Debugger/BranchWatch.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
@ -66,6 +67,68 @@ void Jit64::rfi(UGeckoInstruction inst)
WriteRfiExitDestInRSCRATCH();
}
template <bool condition>
void Jit64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, X64Reg reg_a,
X64Reg reg_b, BitSet32 caller_save)
{
MOV(64, R(reg_a), ImmPtr(&m_branch_watch));
MOVZX(32, 8, reg_b, MDisp(reg_a, Core::BranchWatch::GetOffsetOfRecordingActive()));
TEST(32, R(reg_b), R(reg_b));
FixupBranch branch_in = J_CC(CC_NZ, Jump::Near);
SwitchToFarCode();
SetJumpTarget(branch_in);
ABI_PushRegistersAndAdjustStack(caller_save, 0);
// Some call sites have an optimization to use ABI_PARAM1 as a scratch register.
if (reg_a != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg_a));
MOV(64, R(ABI_PARAM2), Imm64(Core::FakeBranchWatchCollectionKey{origin, destination}));
MOV(32, R(ABI_PARAM3), Imm32(inst.hex));
ABI_CallFunction(m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk :
&Core::BranchWatch::HitVirtualFalse_fk) :
(condition ? &Core::BranchWatch::HitPhysicalTrue_fk :
&Core::BranchWatch::HitPhysicalFalse_fk));
ABI_PopRegistersAndAdjustStack(caller_save, 0);
FixupBranch branch_out = J(Jump::Near);
SwitchToNearCode();
SetJumpTarget(branch_out);
}
template void Jit64::WriteBranchWatch<true>(u32, u32, UGeckoInstruction, X64Reg, X64Reg, BitSet32);
template void Jit64::WriteBranchWatch<false>(u32, u32, UGeckoInstruction, X64Reg, X64Reg, BitSet32);
void Jit64::WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, X64Reg reg_a,
X64Reg reg_b, BitSet32 caller_save)
{
MOV(64, R(reg_a), ImmPtr(&m_branch_watch));
MOVZX(32, 8, reg_b, MDisp(reg_a, Core::BranchWatch::GetOffsetOfRecordingActive()));
TEST(32, R(reg_b), R(reg_b));
FixupBranch branch_in = J_CC(CC_NZ, Jump::Near);
SwitchToFarCode();
SetJumpTarget(branch_in);
// Assert RSCRATCH won't be clobbered before it is moved from.
static_assert(ABI_PARAM1 != RSCRATCH);
ABI_PushRegistersAndAdjustStack(caller_save, 0);
// Some call sites have an optimization to use ABI_PARAM1 as a scratch register.
if (reg_a != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg_a));
MOV(32, R(ABI_PARAM3), R(RSCRATCH));
MOV(32, R(ABI_PARAM2), Imm32(origin));
MOV(32, R(ABI_PARAM4), Imm32(inst.hex));
ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue :
&Core::BranchWatch::HitPhysicalTrue);
ABI_PopRegistersAndAdjustStack(caller_save, 0);
FixupBranch branch_out = J(Jump::Near);
SwitchToNearCode();
SetJumpTarget(branch_out);
}
void Jit64::bx(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -81,6 +144,11 @@ void Jit64::bx(UGeckoInstruction inst)
// Because PPCAnalyst::Flatten() merged the blocks.
if (!js.isLastInstruction)
{
if (IsDebuggingEnabled())
{
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
if (inst.LK && !js.op->skipLRStack)
{
// We have to fake the stack as the RET instruction was not
@ -94,6 +162,11 @@ void Jit64::bx(UGeckoInstruction inst)
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {});
}
#ifdef ACID_TEST
if (inst.LK)
AND(32, PPCSTATE(cr), Imm32(~(0xFF000000)));
@ -144,6 +217,11 @@ void Jit64::bcx(UGeckoInstruction inst)
if (!js.isLastInstruction && (inst.BO & BO_DONT_DECREMENT_FLAG) &&
(inst.BO & BO_DONT_CHECK_CONDITION))
{
if (IsDebuggingEnabled())
{
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
if (inst.LK && !js.op->skipLRStack)
{
// We have to fake the stack as the RET instruction was not
@ -160,6 +238,11 @@ void Jit64::bcx(UGeckoInstruction inst)
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {});
}
if (js.op->branchIsIdleLoop)
{
WriteIdleExit(js.op->branchTo);
@ -179,8 +262,18 @@ void Jit64::bcx(UGeckoInstruction inst)
{
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {});
}
WriteExit(js.compilerPC + 4);
}
else if (IsDebuggingEnabled())
{
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
}
void Jit64::bcctrx(UGeckoInstruction inst)
@ -204,6 +297,12 @@ void Jit64::bcctrx(UGeckoInstruction inst)
if (inst.LK_3)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2,
BitSet32{RSCRATCH});
}
WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
}
else
@ -226,6 +325,12 @@ void Jit64::bcctrx(UGeckoInstruction inst)
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2,
BitSet32{RSCRATCH});
}
WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
// Would really like to continue the block here, but it ends. TODO.
}
@ -235,8 +340,18 @@ void Jit64::bcctrx(UGeckoInstruction inst)
{
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {});
}
WriteExit(js.compilerPC + 4);
}
else if (IsDebuggingEnabled())
{
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
}
}
@ -270,10 +385,8 @@ void Jit64::bclrx(UGeckoInstruction inst)
MOV(32, R(RSCRATCH), PPCSTATE_LR);
// We don't have to do this because WriteBLRExit handles it for us. Specifically, since we only
// ever push
// divisible-by-four instruction addresses onto the stack, if the return address matches, we're
// already
// good. If it doesn't match, the mispredicted-BLR code handles the fixup.
// ever push divisible-by-four instruction addresses onto the stack, if the return address
// matches, we're already good. If it doesn't match, the mispredicted-BLR code handles the fixup.
if (!m_enable_blr_optimization)
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (inst.LK)
@ -287,10 +400,21 @@ void Jit64::bclrx(UGeckoInstruction inst)
if (js.op->branchIsIdleLoop)
{
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {});
}
WriteIdleExit(js.op->branchTo);
}
else
{
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2,
BitSet32{RSCRATCH});
}
WriteBLRExit();
}
}
@ -304,6 +428,16 @@ void Jit64::bclrx(UGeckoInstruction inst)
{
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {});
}
WriteExit(js.compilerPC + 4);
}
else if (IsDebuggingEnabled())
{
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
}

View File

@ -394,18 +394,25 @@ void Jit64::DoMergedBranch()
if (next.LK)
MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4));
WriteIdleExit(js.op[1].branchTo);
const u32 destination = js.op[1].branchTo;
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<true>(nextPC, destination, next, ABI_PARAM1, RSCRATCH, {});
}
WriteIdleExit(destination);
}
else if (next.OPCD == 16) // bcx
{
if (next.LK)
MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4));
u32 destination;
if (next.AA)
destination = SignExt16(next.BD << 2);
else
destination = nextPC + SignExt16(next.BD << 2);
const u32 destination = js.op[1].branchTo;
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<true>(nextPC, destination, next, ABI_PARAM1, RSCRATCH, {});
}
WriteExit(destination, next.LK, nextPC + 4);
}
else if ((next.OPCD == 19) && (next.SUBOP10 == 528)) // bcctrx
@ -414,6 +421,11 @@ void Jit64::DoMergedBranch()
MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4));
MOV(32, R(RSCRATCH), PPCSTATE_SPR(SPR_CTR));
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatchDestInRSCRATCH(nextPC, next, ABI_PARAM1, RSCRATCH2, BitSet32{RSCRATCH});
}
WriteExitDestInRSCRATCH(next.LK, nextPC + 4);
}
else if ((next.OPCD == 19) && (next.SUBOP10 == 16)) // bclrx
@ -423,6 +435,11 @@ void Jit64::DoMergedBranch()
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (next.LK)
MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4));
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatchDestInRSCRATCH(nextPC, next, ABI_PARAM1, RSCRATCH2, BitSet32{RSCRATCH});
}
WriteBLRExit();
}
else
@ -480,8 +497,18 @@ void Jit64::DoMergedBranchCondition()
{
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<false>(nextPC, nextPC + 4, next, ABI_PARAM1, RSCRATCH, {});
}
WriteExit(nextPC + 4);
}
else if (IsDebuggingEnabled())
{
WriteBranchWatch<false>(nextPC, nextPC + 4, next, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
}
void Jit64::DoMergedBranchImmediate(s64 val)
@ -515,8 +542,18 @@ void Jit64::DoMergedBranchImmediate(s64 val)
{
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<false>(nextPC, nextPC + 4, next, ABI_PARAM1, RSCRATCH, {});
}
WriteExit(nextPC + 4);
}
else if (IsDebuggingEnabled())
{
WriteBranchWatch<false>(nextPC, nextPC + 4, next, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
}
void Jit64::cmpXX(UGeckoInstruction inst)

View File

@ -15,6 +15,7 @@
#include "Core/ConfigManager.h"
#include "Core/CoreTiming.h"
#include "Core/Debugger/BranchWatch.h"
#include "Core/HW/CPU.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/Jit64/RegCache/JitRegCache.h"
@ -300,6 +301,40 @@ void Jit64::dcbx(UGeckoInstruction inst)
// Load the loop_counter register with the amount of invalidations to execute.
LEA(32, loop_counter, MDisp(RSCRATCH2, 1));
if (IsDebuggingEnabled())
{
const X64Reg bw_reg_a = reg_cycle_count, bw_reg_b = reg_downcount;
const BitSet32 bw_caller_save = (CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}) &
~BitSet32{int(bw_reg_a), int(bw_reg_b)};
MOV(64, R(bw_reg_a), ImmPtr(&m_branch_watch));
MOVZX(32, 8, bw_reg_b, MDisp(bw_reg_a, Core::BranchWatch::GetOffsetOfRecordingActive()));
TEST(32, R(bw_reg_b), R(bw_reg_b));
FixupBranch branch_in = J_CC(CC_NZ, Jump::Near);
SwitchToFarCode();
SetJumpTarget(branch_in);
// Assert RSCRATCH2 won't be clobbered before it is moved from.
static_assert(RSCRATCH2 != ABI_PARAM1);
ABI_PushRegistersAndAdjustStack(bw_caller_save, 0);
MOV(64, R(ABI_PARAM1), R(bw_reg_a));
// RSCRATCH2 holds the amount of faked branch watch hits. Move RSCRATCH2 first, because
// ABI_PARAM2 clobbers RSCRATCH2 on Windows and ABI_PARAM3 clobbers RSCRATCH2 on Linux!
MOV(32, R(ABI_PARAM4), R(RSCRATCH2));
const PPCAnalyst::CodeOp& op = js.op[2];
MOV(64, R(ABI_PARAM2), Imm64(Core::FakeBranchWatchCollectionKey{op.address, op.branchTo}));
MOV(32, R(ABI_PARAM3), Imm32(op.inst.hex));
ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue_fk_n :
&Core::BranchWatch::HitPhysicalTrue_fk_n);
ABI_PopRegistersAndAdjustStack(bw_caller_save, 0);
FixupBranch branch_out = J(Jump::Near);
SwitchToNearCode();
SetJumpTarget(branch_out);
}
}
X64Reg addr = RSCRATCH;