CPU/Recompiler: Avoid backpatching already-compiled linked blocks

This commit is contained in:
Connor McLaughlin 2021-07-24 14:56:12 +10:00
parent f9c6cb9343
commit 2af1ef6cee
7 changed files with 109 additions and 46 deletions

View File

@ -195,7 +195,7 @@ void LogCurrentState();
static CodeBlockKey GetNextBlockKey(); static CodeBlockKey GetNextBlockKey();
/// Looks up the block in the cache if it's already been compiled. /// Looks up the block in the cache if it's already been compiled.
static CodeBlock* LookupBlock(CodeBlockKey key); static CodeBlock* LookupOrCompileBlock(CodeBlockKey key);
/// Can the current block execute? This will re-validate the block if necessary. /// Can the current block execute? This will re-validate the block if necessary.
/// The block can also be flushed if recompilation failed, so ignore the pointer if false is returned. /// The block can also be flushed if recompilation failed, so ignore the pointer if false is returned.
@ -206,9 +206,6 @@ static void RemoveReferencesToBlock(CodeBlock* block);
static void AddBlockToPageMap(CodeBlock* block); static void AddBlockToPageMap(CodeBlock* block);
static void RemoveBlockFromPageMap(CodeBlock* block); static void RemoveBlockFromPageMap(CodeBlock* block);
/// Link block from to to. Returns the successor index.
static void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size);
/// Unlink all blocks which point to this block, and any that this block links to. /// Unlink all blocks which point to this block, and any that this block links to.
static void UnlinkBlock(CodeBlock* block); static void UnlinkBlock(CodeBlock* block);
@ -310,7 +307,7 @@ static void ExecuteImpl()
next_block_key = GetNextBlockKey(); next_block_key = GetNextBlockKey();
while (g_state.pending_ticks < g_state.downcount) while (g_state.pending_ticks < g_state.downcount)
{ {
CodeBlock* block = LookupBlock(next_block_key); CodeBlock* block = LookupOrCompileBlock(next_block_key);
if (!block) if (!block)
{ {
InterpretUncachedBlock<pgxp_mode>(); InterpretUncachedBlock<pgxp_mode>();
@ -371,7 +368,7 @@ static void ExecuteImpl()
} }
// No acceptable blocks found in the successor list, try a new one. // No acceptable blocks found in the successor list, try a new one.
CodeBlock* next_block = LookupBlock(next_block_key); CodeBlock* next_block = LookupOrCompileBlock(next_block_key);
if (next_block) if (next_block)
{ {
// Link the previous block to this new block if we find a new block. // Link the previous block to this new block if we find a new block.
@ -537,6 +534,24 @@ static void FallbackExistingBlockToInterpreter(CodeBlock* block)
} }
CodeBlock* LookupBlock(CodeBlockKey key) CodeBlock* LookupBlock(CodeBlockKey key)
{
BlockMap::iterator iter = s_blocks.find(key.bits);
if (iter == s_blocks.end())
return nullptr;
// ensure it hasn't been invalidated
CodeBlock* existing_block = iter->second;
if (!existing_block || !existing_block->invalidated)
return existing_block;
// if compilation fails or we're forced back to the interpreter, bail out
if (RevalidateBlock(existing_block))
return existing_block;
else
return nullptr;
}
CodeBlock* LookupOrCompileBlock(CodeBlockKey key)
{ {
BlockMap::iterator iter = s_blocks.find(key.bits); BlockMap::iterator iter = s_blocks.find(key.bits);
if (iter != s_blocks.end()) if (iter != s_blocks.end())
@ -797,7 +812,7 @@ bool CompileBlock(CodeBlock* block)
void FastCompileBlockFunction() void FastCompileBlockFunction()
{ {
CodeBlock* block = LookupBlock(GetNextBlockKey()); CodeBlock* block = LookupOrCompileBlock(GetNextBlockKey());
if (block) if (block)
{ {
s_single_block_asm_dispatcher(block->host_code); s_single_block_asm_dispatcher(block->host_code);
@ -938,15 +953,6 @@ void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve
li.block = from; li.block = from;
to->link_predecessors.push_back(li); to->link_predecessors.push_back(li);
// apply in code
if (host_pc)
{
Log_ProfilePrintf("Backpatching %p(%08x) to jump to block %p (%08x)", host_pc, from->GetPC(), to, to->GetPC());
s_code_buffer.WriteProtect(false);
Recompiler::CodeGenerator::BackpatchBranch(host_pc, host_pc_size, reinterpret_cast<void*>(to->host_code));
s_code_buffer.WriteProtect(true);
}
} }
void UnlinkBlock(CodeBlock* block) void UnlinkBlock(CodeBlock* block)
@ -1174,6 +1180,12 @@ Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc,
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler; return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
} }
bool CanLinkBlocks(CodeBlock* block, CodeBlock* successor_block)
{
return (block->can_link && successor_block->can_link &&
(!successor_block->invalidated || RevalidateBlock(successor_block)));
}
#endif // WITH_RECOMPILER #endif // WITH_RECOMPILER
} // namespace CPU::CodeCache } // namespace CPU::CodeCache
@ -1185,9 +1197,8 @@ void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, voi
using namespace CPU::CodeCache; using namespace CPU::CodeCache;
CodeBlockKey key = GetNextBlockKey(); CodeBlockKey key = GetNextBlockKey();
CodeBlock* successor_block = LookupBlock(key); CodeBlock* successor_block = LookupOrCompileBlock(key);
if (!successor_block || (successor_block->invalidated && !RevalidateBlock(successor_block)) || !block->can_link || if (!successor_block || !successor_block->host_code || !CanLinkBlocks(block, successor_block))
!successor_block->can_link)
{ {
// just turn it into a return to the dispatcher instead. // just turn it into a return to the dispatcher instead.
s_code_buffer.WriteProtect(false); s_code_buffer.WriteProtect(false);
@ -1198,6 +1209,13 @@ void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, voi
{ {
// link blocks! // link blocks!
LinkBlock(block, successor_block, host_pc, host_resolve_pc, host_pc_size); LinkBlock(block, successor_block, host_pc, host_resolve_pc, host_pc_size);
Log_ProfilePrintf("Backpatching %p(%08x) to jump to block %p (%08x)", host_pc, block->GetPC(), successor_block,
successor_block->GetPC());
s_code_buffer.WriteProtect(false);
Recompiler::CodeGenerator::BackpatchBranch(host_pc, host_pc_size,
reinterpret_cast<void*>(successor_block->host_code));
s_code_buffer.WriteProtect(true);
} }
} }

View File

@ -140,6 +140,9 @@ void Reinitialize();
/// Invalidates all blocks which are in the range of the specified code page. /// Invalidates all blocks which are in the range of the specified code page.
void InvalidateBlocksWithPageIndex(u32 page_index); void InvalidateBlocksWithPageIndex(u32 page_index);
/// Looks up the block in the cache if it's already been compiled.
CodeBlock* LookupBlock(CodeBlockKey key);
template<PGXPMode pgxp_mode> template<PGXPMode pgxp_mode>
void InterpretCachedBlock(const CodeBlock& block); void InterpretCachedBlock(const CodeBlock& block);
@ -158,6 +161,12 @@ ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_c
} }
} }
/// Returns true if the two specified blocks can be linked.
bool CanLinkBlocks(CodeBlock* block, CodeBlock* successor_block);
/// Link block from to to.
void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size);
}; // namespace CodeCache }; // namespace CodeCache
} // namespace CPU } // namespace CPU

View File

@ -2185,6 +2185,14 @@ bool CodeGenerator::Compile_SetLess(const CodeBlockInstruction& cbi)
return true; return true;
} }
static CodeBlockKey GetBlockKeyForBranchTarget(const Value& value)
{
CodeBlockKey key = {};
key.SetPC(static_cast<u32>(value.constant_value));
key.user_mode = CPU::InUserMode();
return key;
}
bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
{ {
InstructionPrologue(cbi, 1); InstructionPrologue(cbi, 1);
@ -2192,6 +2200,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
auto DoBranch = [this, &cbi](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg, auto DoBranch = [this, &cbi](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg,
Value&& branch_target) { Value&& branch_target) {
const bool can_link_block = cbi.is_direct_branch_instruction && g_settings.cpu_recompiler_block_linking; const bool can_link_block = cbi.is_direct_branch_instruction && g_settings.cpu_recompiler_block_linking;
Assert(!can_link_block || branch_target.IsConstant());
// ensure the lr register is flushed, since we want it's correct value after the branch // ensure the lr register is flushed, since we want it's correct value after the branch
// we don't want to invalidate it yet because of "jalr r0, r0", branch_target could be the lr_reg. // we don't want to invalidate it yet because of "jalr r0, r0", branch_target could be the lr_reg.
@ -2344,11 +2353,27 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
// we're committed at this point :D // we're committed at this point :D
EmitEndBlock(true, false); EmitEndBlock(true, false);
const void* jump_pointer = GetCurrentCodePointer(); void* jump_pointer = GetCurrentCodePointer();
const void* resolve_pointer = GetCurrentFarCodePointer(); void* resolve_pointer = GetCurrentFarCodePointer();
EmitBranch(resolve_pointer); u32 jump_size;
const u32 jump_size = static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) -
// is the block we're going to already compiled? save backpatching later...
CodeBlock* successor_block = CPU::CodeCache::LookupBlock(GetBlockKeyForBranchTarget(branch_target));
if (successor_block && successor_block->host_code && CPU::CodeCache::CanLinkBlocks(m_block, successor_block))
{
EmitBranch(successor_block->host_code, false);
jump_size = static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) -
static_cast<const char*>(jump_pointer)); static_cast<const char*>(jump_pointer));
CodeCache::LinkBlock(m_block, successor_block, jump_pointer, resolve_pointer, jump_size);
}
else
{
EmitBranch(resolve_pointer, false);
jump_size = static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) -
static_cast<const char*>(jump_pointer));
}
SwitchToFarCode(); SwitchToFarCode();
EmitBeginBlock(true); EmitBeginBlock(true);
@ -2379,11 +2404,28 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
EmitEndBlock(true, false); EmitEndBlock(true, false);
const void* jump_pointer = GetCurrentCodePointer(); void* jump_pointer = GetCurrentCodePointer();
const void* resolve_pointer = GetCurrentFarCodePointer(); void* resolve_pointer = GetCurrentFarCodePointer();
EmitBranch(GetCurrentFarCodePointer()); u32 jump_size;
const u32 jump_size =
// is the block we're going to already compiled? save backpatching later...
CodeBlock* successor_block = CPU::CodeCache::LookupBlock(
GetBlockKeyForBranchTarget(condition != Condition::Always ? next_pc : branch_target));
if (successor_block && successor_block->host_code && CPU::CodeCache::CanLinkBlocks(m_block, successor_block))
{
EmitBranch(successor_block->host_code, false);
jump_size =
static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) - static_cast<const char*>(jump_pointer)); static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) - static_cast<const char*>(jump_pointer));
CodeCache::LinkBlock(m_block, successor_block, jump_pointer, resolve_pointer, jump_size);
}
else
{
EmitBranch(resolve_pointer, false);
jump_size =
static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) - static_cast<const char*>(jump_pointer));
}
SwitchToFarCode(); SwitchToFarCode();
EmitBeginBlock(true); EmitBeginBlock(true);

View File

@ -99,8 +99,7 @@ public:
const Value& value, bool in_far_code); const Value& value, bool in_far_code);
void EmitUpdateFastmemBase(); void EmitUpdateFastmemBase();
// Unconditional branch to pointer. May allocate a scratch register. void EmitBranch(const void* address, bool allow_short = true);
void EmitBranch(const void* address, bool allow_scratch = true);
void EmitBranch(LabelType* label); void EmitBranch(LabelType* label);
// Branching, generates two paths. // Branching, generates two paths.

View File

@ -1312,7 +1312,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
fastmem_base = GetFastmemLoadBase(); fastmem_base = GetFastmemLoadBase();
// return to the block code // return to the block code
EmitBranch(GetCurrentNearCodePointer(), false); EmitBranch(GetCurrentNearCodePointer());
SwitchToNearCode(); SwitchToNearCode();
m_register_cache.UninhibitAllocation(); m_register_cache.UninhibitAllocation();
@ -1469,7 +1469,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
fastmem_base = GetFastmemStoreBase(); fastmem_base = GetFastmemStoreBase();
// return to the block code // return to the block code
EmitBranch(GetCurrentNearCodePointer(), false); EmitBranch(GetCurrentNearCodePointer());
SwitchToNearCode(); SwitchToNearCode();
m_register_cache.UninhibitAllocation(); m_register_cache.UninhibitAllocation();
@ -1809,7 +1809,7 @@ void CodeGenerator::EmitStallUntilGTEComplete()
m_emit->str(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks))); m_emit->str(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks)));
} }
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) void CodeGenerator::EmitBranch(const void* address, bool allow_short /* = true */)
{ {
const s32 displacement = GetPCDisplacement(GetCurrentCodePointer(), address); const s32 displacement = GetPCDisplacement(GetCurrentCodePointer(), address);
if (IsPCDisplacementInImmediateRange(displacement)) if (IsPCDisplacementInImmediateRange(displacement))

View File

@ -1491,7 +1491,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add))); Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
// return to the block code // return to the block code
EmitBranch(GetCurrentNearCodePointer(), false); EmitBranch(GetCurrentNearCodePointer());
SwitchToNearCode(); SwitchToNearCode();
m_register_cache.UninhibitAllocation(); m_register_cache.UninhibitAllocation();
@ -1664,7 +1664,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add))); Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
// return to the block code // return to the block code
EmitBranch(GetCurrentNearCodePointer(), false); EmitBranch(GetCurrentNearCodePointer());
SwitchToNearCode(); SwitchToNearCode();
m_register_cache.UninhibitAllocation(); m_register_cache.UninhibitAllocation();
@ -1996,7 +1996,7 @@ void CodeGenerator::EmitStallUntilGTEComplete()
m_emit->str(GetHostReg32(RARG1), a64::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks))); m_emit->str(GetHostReg32(RARG1), a64::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks)));
} }
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) void CodeGenerator::EmitBranch(const void* address, bool allow_short /* = true */)
{ {
const s64 jump_distance = const s64 jump_distance =
static_cast<s64>(reinterpret_cast<intptr_t>(address) - reinterpret_cast<intptr_t>(GetCurrentCodePointer())); static_cast<s64>(reinterpret_cast<intptr_t>(address) - reinterpret_cast<intptr_t>(GetCurrentCodePointer()));
@ -2007,8 +2007,6 @@ void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
return; return;
} }
Assert(allow_scratch);
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(address)); m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(address));
m_emit->br(GetHostReg64(RSCRATCH)); m_emit->br(GetHostReg64(RSCRATCH));
} }

View File

@ -2696,21 +2696,18 @@ void CodeGenerator::EmitStallUntilGTEComplete()
m_emit->mov(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], GetHostReg32(RRETURN)); m_emit->mov(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], GetHostReg32(RRETURN));
} }
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) void CodeGenerator::EmitBranch(const void* address, bool allow_short /* = true */)
{ {
const s64 jump_distance = const s64 jump_distance =
static_cast<s64>(reinterpret_cast<intptr_t>(address) - reinterpret_cast<intptr_t>(GetCurrentCodePointer())); static_cast<s64>(reinterpret_cast<intptr_t>(address) - reinterpret_cast<intptr_t>(GetCurrentCodePointer()));
if (Xbyak::inner::IsInInt32(static_cast<u64>(jump_distance))) if (Xbyak::inner::IsInInt32(static_cast<u64>(jump_distance)))
{ {
m_emit->jmp(address); m_emit->jmp(address, allow_short ? Xbyak::CodeGenerator::T_AUTO : Xbyak::CodeGenerator::T_NEAR);
return; return;
} }
Assert(allow_scratch); m_emit->mov(GetHostReg64(RRETURN), reinterpret_cast<uintptr_t>(address));
m_emit->jmp(GetHostReg64(RRETURN));
Value temp = m_register_cache.AllocateScratch(RegSize_64);
m_emit->mov(GetHostReg64(temp), reinterpret_cast<uintptr_t>(address));
m_emit->jmp(GetHostReg64(temp));
} }
void CodeGenerator::EmitBranch(LabelType* label) void CodeGenerator::EmitBranch(LabelType* label)