CPU/Recompiler: Avoid backpatching already-compiled linked blocks

This commit is contained in:
Connor McLaughlin 2021-07-24 14:56:12 +10:00
parent f9c6cb9343
commit 2af1ef6cee
7 changed files with 109 additions and 46 deletions

View File

@ -195,7 +195,7 @@ void LogCurrentState();
static CodeBlockKey GetNextBlockKey();
/// Looks up the block in the cache if it's already been compiled.
static CodeBlock* LookupBlock(CodeBlockKey key);
static CodeBlock* LookupOrCompileBlock(CodeBlockKey key);
/// Can the current block execute? This will re-validate the block if necessary.
/// The block can also be flushed if recompilation failed, so ignore the pointer if false is returned.
@ -206,9 +206,6 @@ static void RemoveReferencesToBlock(CodeBlock* block);
static void AddBlockToPageMap(CodeBlock* block);
static void RemoveBlockFromPageMap(CodeBlock* block);
/// Link block from to to. Returns the successor index.
static void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size);
/// Unlink all blocks which point to this block, and any that this block links to.
static void UnlinkBlock(CodeBlock* block);
@ -310,7 +307,7 @@ static void ExecuteImpl()
next_block_key = GetNextBlockKey();
while (g_state.pending_ticks < g_state.downcount)
{
CodeBlock* block = LookupBlock(next_block_key);
CodeBlock* block = LookupOrCompileBlock(next_block_key);
if (!block)
{
InterpretUncachedBlock<pgxp_mode>();
@ -371,7 +368,7 @@ static void ExecuteImpl()
}
// No acceptable blocks found in the successor list, try a new one.
CodeBlock* next_block = LookupBlock(next_block_key);
CodeBlock* next_block = LookupOrCompileBlock(next_block_key);
if (next_block)
{
// Link the previous block to this new block if we find a new block.
@ -537,6 +534,24 @@ static void FallbackExistingBlockToInterpreter(CodeBlock* block)
}
CodeBlock* LookupBlock(CodeBlockKey key)
{
BlockMap::iterator iter = s_blocks.find(key.bits);
if (iter == s_blocks.end())
return nullptr;
// ensure it hasn't been invalidated
CodeBlock* existing_block = iter->second;
if (!existing_block || !existing_block->invalidated)
return existing_block;
// if compilation fails or we're forced back to the interpreter, bail out
if (RevalidateBlock(existing_block))
return existing_block;
else
return nullptr;
}
CodeBlock* LookupOrCompileBlock(CodeBlockKey key)
{
BlockMap::iterator iter = s_blocks.find(key.bits);
if (iter != s_blocks.end())
@ -797,7 +812,7 @@ bool CompileBlock(CodeBlock* block)
void FastCompileBlockFunction()
{
CodeBlock* block = LookupBlock(GetNextBlockKey());
CodeBlock* block = LookupOrCompileBlock(GetNextBlockKey());
if (block)
{
s_single_block_asm_dispatcher(block->host_code);
@ -938,15 +953,6 @@ void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve
li.block = from;
to->link_predecessors.push_back(li);
// apply in code
if (host_pc)
{
Log_ProfilePrintf("Backpatching %p(%08x) to jump to block %p (%08x)", host_pc, from->GetPC(), to, to->GetPC());
s_code_buffer.WriteProtect(false);
Recompiler::CodeGenerator::BackpatchBranch(host_pc, host_pc_size, reinterpret_cast<void*>(to->host_code));
s_code_buffer.WriteProtect(true);
}
}
void UnlinkBlock(CodeBlock* block)
@ -1174,6 +1180,12 @@ Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc,
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
bool CanLinkBlocks(CodeBlock* block, CodeBlock* successor_block)
{
return (block->can_link && successor_block->can_link &&
(!successor_block->invalidated || RevalidateBlock(successor_block)));
}
#endif // WITH_RECOMPILER
} // namespace CPU::CodeCache
@ -1185,9 +1197,8 @@ void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, voi
using namespace CPU::CodeCache;
CodeBlockKey key = GetNextBlockKey();
CodeBlock* successor_block = LookupBlock(key);
if (!successor_block || (successor_block->invalidated && !RevalidateBlock(successor_block)) || !block->can_link ||
!successor_block->can_link)
CodeBlock* successor_block = LookupOrCompileBlock(key);
if (!successor_block || !successor_block->host_code || !CanLinkBlocks(block, successor_block))
{
// just turn it into a return to the dispatcher instead.
s_code_buffer.WriteProtect(false);
@ -1198,6 +1209,13 @@ void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, voi
{
// link blocks!
LinkBlock(block, successor_block, host_pc, host_resolve_pc, host_pc_size);
Log_ProfilePrintf("Backpatching %p(%08x) to jump to block %p (%08x)", host_pc, block->GetPC(), successor_block,
successor_block->GetPC());
s_code_buffer.WriteProtect(false);
Recompiler::CodeGenerator::BackpatchBranch(host_pc, host_pc_size,
reinterpret_cast<void*>(successor_block->host_code));
s_code_buffer.WriteProtect(true);
}
}

View File

@ -140,6 +140,9 @@ void Reinitialize();
/// Invalidates all blocks which are in the range of the specified code page.
void InvalidateBlocksWithPageIndex(u32 page_index);
/// Looks up the block in the cache if it's already been compiled.
CodeBlock* LookupBlock(CodeBlockKey key);
template<PGXPMode pgxp_mode>
void InterpretCachedBlock(const CodeBlock& block);
@ -158,6 +161,12 @@ ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_c
}
}
/// Returns true if the two specified blocks can be linked.
bool CanLinkBlocks(CodeBlock* block, CodeBlock* successor_block);
/// Link block from to to.
void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size);
}; // namespace CodeCache
} // namespace CPU

View File

@ -2185,6 +2185,14 @@ bool CodeGenerator::Compile_SetLess(const CodeBlockInstruction& cbi)
return true;
}
static CodeBlockKey GetBlockKeyForBranchTarget(const Value& value)
{
CodeBlockKey key = {};
key.SetPC(static_cast<u32>(value.constant_value));
key.user_mode = CPU::InUserMode();
return key;
}
bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1);
@ -2192,6 +2200,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
auto DoBranch = [this, &cbi](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg,
Value&& branch_target) {
const bool can_link_block = cbi.is_direct_branch_instruction && g_settings.cpu_recompiler_block_linking;
Assert(!can_link_block || branch_target.IsConstant());
// ensure the lr register is flushed, since we want it's correct value after the branch
// we don't want to invalidate it yet because of "jalr r0, r0", branch_target could be the lr_reg.
@ -2344,11 +2353,27 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
// we're committed at this point :D
EmitEndBlock(true, false);
const void* jump_pointer = GetCurrentCodePointer();
const void* resolve_pointer = GetCurrentFarCodePointer();
EmitBranch(resolve_pointer);
const u32 jump_size = static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) -
static_cast<const char*>(jump_pointer));
void* jump_pointer = GetCurrentCodePointer();
void* resolve_pointer = GetCurrentFarCodePointer();
u32 jump_size;
// is the block we're going to already compiled? save backpatching later...
CodeBlock* successor_block = CPU::CodeCache::LookupBlock(GetBlockKeyForBranchTarget(branch_target));
if (successor_block && successor_block->host_code && CPU::CodeCache::CanLinkBlocks(m_block, successor_block))
{
EmitBranch(successor_block->host_code, false);
jump_size = static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) -
static_cast<const char*>(jump_pointer));
CodeCache::LinkBlock(m_block, successor_block, jump_pointer, resolve_pointer, jump_size);
}
else
{
EmitBranch(resolve_pointer, false);
jump_size = static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) -
static_cast<const char*>(jump_pointer));
}
SwitchToFarCode();
EmitBeginBlock(true);
@ -2379,11 +2404,28 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
EmitEndBlock(true, false);
const void* jump_pointer = GetCurrentCodePointer();
const void* resolve_pointer = GetCurrentFarCodePointer();
EmitBranch(GetCurrentFarCodePointer());
const u32 jump_size =
static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) - static_cast<const char*>(jump_pointer));
void* jump_pointer = GetCurrentCodePointer();
void* resolve_pointer = GetCurrentFarCodePointer();
u32 jump_size;
// is the block we're going to already compiled? save backpatching later...
CodeBlock* successor_block = CPU::CodeCache::LookupBlock(
GetBlockKeyForBranchTarget(condition != Condition::Always ? next_pc : branch_target));
if (successor_block && successor_block->host_code && CPU::CodeCache::CanLinkBlocks(m_block, successor_block))
{
EmitBranch(successor_block->host_code, false);
jump_size =
static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) - static_cast<const char*>(jump_pointer));
CodeCache::LinkBlock(m_block, successor_block, jump_pointer, resolve_pointer, jump_size);
}
else
{
EmitBranch(resolve_pointer, false);
jump_size =
static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) - static_cast<const char*>(jump_pointer));
}
SwitchToFarCode();
EmitBeginBlock(true);

View File

@ -99,8 +99,7 @@ public:
const Value& value, bool in_far_code);
void EmitUpdateFastmemBase();
// Unconditional branch to pointer. May allocate a scratch register.
void EmitBranch(const void* address, bool allow_scratch = true);
void EmitBranch(const void* address, bool allow_short = true);
void EmitBranch(LabelType* label);
// Branching, generates two paths.

View File

@ -1312,7 +1312,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
fastmem_base = GetFastmemLoadBase();
// return to the block code
EmitBranch(GetCurrentNearCodePointer(), false);
EmitBranch(GetCurrentNearCodePointer());
SwitchToNearCode();
m_register_cache.UninhibitAllocation();
@ -1469,7 +1469,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
fastmem_base = GetFastmemStoreBase();
// return to the block code
EmitBranch(GetCurrentNearCodePointer(), false);
EmitBranch(GetCurrentNearCodePointer());
SwitchToNearCode();
m_register_cache.UninhibitAllocation();
@ -1809,7 +1809,7 @@ void CodeGenerator::EmitStallUntilGTEComplete()
m_emit->str(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks)));
}
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
void CodeGenerator::EmitBranch(const void* address, bool allow_short /* = true */)
{
const s32 displacement = GetPCDisplacement(GetCurrentCodePointer(), address);
if (IsPCDisplacementInImmediateRange(displacement))

View File

@ -1491,7 +1491,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
// return to the block code
EmitBranch(GetCurrentNearCodePointer(), false);
EmitBranch(GetCurrentNearCodePointer());
SwitchToNearCode();
m_register_cache.UninhibitAllocation();
@ -1664,7 +1664,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
// return to the block code
EmitBranch(GetCurrentNearCodePointer(), false);
EmitBranch(GetCurrentNearCodePointer());
SwitchToNearCode();
m_register_cache.UninhibitAllocation();
@ -1996,7 +1996,7 @@ void CodeGenerator::EmitStallUntilGTEComplete()
m_emit->str(GetHostReg32(RARG1), a64::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks)));
}
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
void CodeGenerator::EmitBranch(const void* address, bool allow_short /* = true */)
{
const s64 jump_distance =
static_cast<s64>(reinterpret_cast<intptr_t>(address) - reinterpret_cast<intptr_t>(GetCurrentCodePointer()));
@ -2007,8 +2007,6 @@ void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
return;
}
Assert(allow_scratch);
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(address));
m_emit->br(GetHostReg64(RSCRATCH));
}

View File

@ -2696,21 +2696,18 @@ void CodeGenerator::EmitStallUntilGTEComplete()
m_emit->mov(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], GetHostReg32(RRETURN));
}
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
void CodeGenerator::EmitBranch(const void* address, bool allow_short /* = true */)
{
const s64 jump_distance =
static_cast<s64>(reinterpret_cast<intptr_t>(address) - reinterpret_cast<intptr_t>(GetCurrentCodePointer()));
if (Xbyak::inner::IsInInt32(static_cast<u64>(jump_distance)))
{
m_emit->jmp(address);
m_emit->jmp(address, allow_short ? Xbyak::CodeGenerator::T_AUTO : Xbyak::CodeGenerator::T_NEAR);
return;
}
Assert(allow_scratch);
Value temp = m_register_cache.AllocateScratch(RegSize_64);
m_emit->mov(GetHostReg64(temp), reinterpret_cast<uintptr_t>(address));
m_emit->jmp(GetHostReg64(temp));
m_emit->mov(GetHostReg64(RRETURN), reinterpret_cast<uintptr_t>(address));
m_emit->jmp(GetHostReg64(RRETURN));
}
void CodeGenerator::EmitBranch(LabelType* label)