From 2af1ef6cee283f6d537ed966db3316e435f591fc Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 24 Jul 2021 14:56:12 +1000 Subject: [PATCH] CPU/Recompiler: Avoid backpatching already-compiled linked blocks --- src/core/cpu_code_cache.cpp | 56 +++++++++++------ src/core/cpu_code_cache.h | 9 +++ src/core/cpu_recompiler_code_generator.cpp | 62 ++++++++++++++++--- src/core/cpu_recompiler_code_generator.h | 3 +- .../cpu_recompiler_code_generator_aarch32.cpp | 6 +- .../cpu_recompiler_code_generator_aarch64.cpp | 8 +-- .../cpu_recompiler_code_generator_x64.cpp | 11 ++-- 7 files changed, 109 insertions(+), 46 deletions(-) diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 613308398..ef47cb5e4 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -195,7 +195,7 @@ void LogCurrentState(); static CodeBlockKey GetNextBlockKey(); /// Looks up the block in the cache if it's already been compiled. -static CodeBlock* LookupBlock(CodeBlockKey key); +static CodeBlock* LookupOrCompileBlock(CodeBlockKey key); /// Can the current block execute? This will re-validate the block if necessary. /// The block can also be flushed if recompilation failed, so ignore the pointer if false is returned. @@ -206,9 +206,6 @@ static void RemoveReferencesToBlock(CodeBlock* block); static void AddBlockToPageMap(CodeBlock* block); static void RemoveBlockFromPageMap(CodeBlock* block); -/// Link block from to to. Returns the successor index. -static void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size); - /// Unlink all blocks which point to this block, and any that this block links to. static void UnlinkBlock(CodeBlock* block); @@ -310,7 +307,7 @@ static void ExecuteImpl() next_block_key = GetNextBlockKey(); while (g_state.pending_ticks < g_state.downcount) { - CodeBlock* block = LookupBlock(next_block_key); + CodeBlock* block = LookupOrCompileBlock(next_block_key); if (!block) { InterpretUncachedBlock(); @@ -371,7 +368,7 @@ static void ExecuteImpl() } // No acceptable blocks found in the successor list, try a new one. - CodeBlock* next_block = LookupBlock(next_block_key); + CodeBlock* next_block = LookupOrCompileBlock(next_block_key); if (next_block) { // Link the previous block to this new block if we find a new block. @@ -537,6 +534,24 @@ static void FallbackExistingBlockToInterpreter(CodeBlock* block) } CodeBlock* LookupBlock(CodeBlockKey key) +{ + BlockMap::iterator iter = s_blocks.find(key.bits); + if (iter == s_blocks.end()) + return nullptr; + + // ensure it hasn't been invalidated + CodeBlock* existing_block = iter->second; + if (!existing_block || !existing_block->invalidated) + return existing_block; + + // if compilation fails or we're forced back to the interpreter, bail out + if (RevalidateBlock(existing_block)) + return existing_block; + else + return nullptr; +} + +CodeBlock* LookupOrCompileBlock(CodeBlockKey key) { BlockMap::iterator iter = s_blocks.find(key.bits); if (iter != s_blocks.end()) @@ -797,7 +812,7 @@ bool CompileBlock(CodeBlock* block) void FastCompileBlockFunction() { - CodeBlock* block = LookupBlock(GetNextBlockKey()); + CodeBlock* block = LookupOrCompileBlock(GetNextBlockKey()); if (block) { s_single_block_asm_dispatcher(block->host_code); @@ -938,15 +953,6 @@ void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve li.block = from; to->link_predecessors.push_back(li); - - // apply in code - if (host_pc) - { - Log_ProfilePrintf("Backpatching %p(%08x) to jump to block %p (%08x)", host_pc, from->GetPC(), to, to->GetPC()); - s_code_buffer.WriteProtect(false); - Recompiler::CodeGenerator::BackpatchBranch(host_pc, host_pc_size, reinterpret_cast(to->host_code)); - s_code_buffer.WriteProtect(true); - } } void UnlinkBlock(CodeBlock* block) @@ -1174,6 +1180,12 @@ Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc, return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler; } +bool CanLinkBlocks(CodeBlock* block, CodeBlock* successor_block) +{ + return (block->can_link && successor_block->can_link && + (!successor_block->invalidated || RevalidateBlock(successor_block))); +} + #endif // WITH_RECOMPILER } // namespace CPU::CodeCache @@ -1185,9 +1197,8 @@ void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, voi using namespace CPU::CodeCache; CodeBlockKey key = GetNextBlockKey(); - CodeBlock* successor_block = LookupBlock(key); - if (!successor_block || (successor_block->invalidated && !RevalidateBlock(successor_block)) || !block->can_link || - !successor_block->can_link) + CodeBlock* successor_block = LookupOrCompileBlock(key); + if (!successor_block || !successor_block->host_code || !CanLinkBlocks(block, successor_block)) { // just turn it into a return to the dispatcher instead. s_code_buffer.WriteProtect(false); @@ -1198,6 +1209,13 @@ void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, voi { // link blocks! LinkBlock(block, successor_block, host_pc, host_resolve_pc, host_pc_size); + + Log_ProfilePrintf("Backpatching %p(%08x) to jump to block %p (%08x)", host_pc, block->GetPC(), successor_block, + successor_block->GetPC()); + s_code_buffer.WriteProtect(false); + Recompiler::CodeGenerator::BackpatchBranch(host_pc, host_pc_size, + reinterpret_cast(successor_block->host_code)); + s_code_buffer.WriteProtect(true); } } diff --git a/src/core/cpu_code_cache.h b/src/core/cpu_code_cache.h index 1b4debe5d..d56db6144 100644 --- a/src/core/cpu_code_cache.h +++ b/src/core/cpu_code_cache.h @@ -140,6 +140,9 @@ void Reinitialize(); /// Invalidates all blocks which are in the range of the specified code page. void InvalidateBlocksWithPageIndex(u32 page_index); +/// Looks up the block in the cache if it's already been compiled. +CodeBlock* LookupBlock(CodeBlockKey key); + template void InterpretCachedBlock(const CodeBlock& block); @@ -158,6 +161,12 @@ ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_c } } +/// Returns true if the two specified blocks can be linked. +bool CanLinkBlocks(CodeBlock* block, CodeBlock* successor_block); + +/// Link block from to to. +void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size); + }; // namespace CodeCache } // namespace CPU diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 7379565e1..a2aa627c2 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -2185,6 +2185,14 @@ bool CodeGenerator::Compile_SetLess(const CodeBlockInstruction& cbi) return true; } +static CodeBlockKey GetBlockKeyForBranchTarget(const Value& value) +{ + CodeBlockKey key = {}; + key.SetPC(static_cast(value.constant_value)); + key.user_mode = CPU::InUserMode(); + return key; +} + bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) { InstructionPrologue(cbi, 1); @@ -2192,6 +2200,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) auto DoBranch = [this, &cbi](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg, Value&& branch_target) { const bool can_link_block = cbi.is_direct_branch_instruction && g_settings.cpu_recompiler_block_linking; + Assert(!can_link_block || branch_target.IsConstant()); // ensure the lr register is flushed, since we want it's correct value after the branch // we don't want to invalidate it yet because of "jalr r0, r0", branch_target could be the lr_reg. @@ -2344,11 +2353,27 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) // we're committed at this point :D EmitEndBlock(true, false); - const void* jump_pointer = GetCurrentCodePointer(); - const void* resolve_pointer = GetCurrentFarCodePointer(); - EmitBranch(resolve_pointer); - const u32 jump_size = static_cast(static_cast(GetCurrentCodePointer()) - - static_cast(jump_pointer)); + void* jump_pointer = GetCurrentCodePointer(); + void* resolve_pointer = GetCurrentFarCodePointer(); + u32 jump_size; + + // is the block we're going to already compiled? save backpatching later... + CodeBlock* successor_block = CPU::CodeCache::LookupBlock(GetBlockKeyForBranchTarget(branch_target)); + if (successor_block && successor_block->host_code && CPU::CodeCache::CanLinkBlocks(m_block, successor_block)) + { + EmitBranch(successor_block->host_code, false); + jump_size = static_cast(static_cast(GetCurrentCodePointer()) - + static_cast(jump_pointer)); + + CodeCache::LinkBlock(m_block, successor_block, jump_pointer, resolve_pointer, jump_size); + } + else + { + EmitBranch(resolve_pointer, false); + jump_size = static_cast(static_cast(GetCurrentCodePointer()) - + static_cast(jump_pointer)); + } + SwitchToFarCode(); EmitBeginBlock(true); @@ -2379,11 +2404,28 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) EmitEndBlock(true, false); - const void* jump_pointer = GetCurrentCodePointer(); - const void* resolve_pointer = GetCurrentFarCodePointer(); - EmitBranch(GetCurrentFarCodePointer()); - const u32 jump_size = - static_cast(static_cast(GetCurrentCodePointer()) - static_cast(jump_pointer)); + void* jump_pointer = GetCurrentCodePointer(); + void* resolve_pointer = GetCurrentFarCodePointer(); + u32 jump_size; + + // is the block we're going to already compiled? save backpatching later... + CodeBlock* successor_block = CPU::CodeCache::LookupBlock( + GetBlockKeyForBranchTarget(condition != Condition::Always ? next_pc : branch_target)); + if (successor_block && successor_block->host_code && CPU::CodeCache::CanLinkBlocks(m_block, successor_block)) + { + EmitBranch(successor_block->host_code, false); + jump_size = + static_cast(static_cast(GetCurrentCodePointer()) - static_cast(jump_pointer)); + + CodeCache::LinkBlock(m_block, successor_block, jump_pointer, resolve_pointer, jump_size); + } + else + { + EmitBranch(resolve_pointer, false); + jump_size = + static_cast(static_cast(GetCurrentCodePointer()) - static_cast(jump_pointer)); + } + SwitchToFarCode(); EmitBeginBlock(true); diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index 49cc452c8..1ca237eec 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -99,8 +99,7 @@ public: const Value& value, bool in_far_code); void EmitUpdateFastmemBase(); - // Unconditional branch to pointer. May allocate a scratch register. - void EmitBranch(const void* address, bool allow_scratch = true); + void EmitBranch(const void* address, bool allow_short = true); void EmitBranch(LabelType* label); // Branching, generates two paths. diff --git a/src/core/cpu_recompiler_code_generator_aarch32.cpp b/src/core/cpu_recompiler_code_generator_aarch32.cpp index 13931cbda..c1b289365 100644 --- a/src/core/cpu_recompiler_code_generator_aarch32.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch32.cpp @@ -1312,7 +1312,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, fastmem_base = GetFastmemLoadBase(); // return to the block code - EmitBranch(GetCurrentNearCodePointer(), false); + EmitBranch(GetCurrentNearCodePointer()); SwitchToNearCode(); m_register_cache.UninhibitAllocation(); @@ -1469,7 +1469,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, fastmem_base = GetFastmemStoreBase(); // return to the block code - EmitBranch(GetCurrentNearCodePointer(), false); + EmitBranch(GetCurrentNearCodePointer()); SwitchToNearCode(); m_register_cache.UninhibitAllocation(); @@ -1809,7 +1809,7 @@ void CodeGenerator::EmitStallUntilGTEComplete() m_emit->str(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks))); } -void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) +void CodeGenerator::EmitBranch(const void* address, bool allow_short /* = true */) { const s32 displacement = GetPCDisplacement(GetCurrentCodePointer(), address); if (IsPCDisplacementInImmediateRange(displacement)) diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index e58a79800..192eec764 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -1491,7 +1491,7 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, Value::FromConstantU32(static_cast(-m_delayed_cycles_add))); // return to the block code - EmitBranch(GetCurrentNearCodePointer(), false); + EmitBranch(GetCurrentNearCodePointer()); SwitchToNearCode(); m_register_cache.UninhibitAllocation(); @@ -1664,7 +1664,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, Value::FromConstantU32(static_cast(-m_delayed_cycles_add))); // return to the block code - EmitBranch(GetCurrentNearCodePointer(), false); + EmitBranch(GetCurrentNearCodePointer()); SwitchToNearCode(); m_register_cache.UninhibitAllocation(); @@ -1996,7 +1996,7 @@ void CodeGenerator::EmitStallUntilGTEComplete() m_emit->str(GetHostReg32(RARG1), a64::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks))); } -void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) +void CodeGenerator::EmitBranch(const void* address, bool allow_short /* = true */) { const s64 jump_distance = static_cast(reinterpret_cast(address) - reinterpret_cast(GetCurrentCodePointer())); @@ -2007,8 +2007,6 @@ void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) return; } - Assert(allow_scratch); - m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast(address)); m_emit->br(GetHostReg64(RSCRATCH)); } diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index b261158ad..ac7214f34 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -2696,21 +2696,18 @@ void CodeGenerator::EmitStallUntilGTEComplete() m_emit->mov(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], GetHostReg32(RRETURN)); } -void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) +void CodeGenerator::EmitBranch(const void* address, bool allow_short /* = true */) { const s64 jump_distance = static_cast(reinterpret_cast(address) - reinterpret_cast(GetCurrentCodePointer())); if (Xbyak::inner::IsInInt32(static_cast(jump_distance))) { - m_emit->jmp(address); + m_emit->jmp(address, allow_short ? Xbyak::CodeGenerator::T_AUTO : Xbyak::CodeGenerator::T_NEAR); return; } - Assert(allow_scratch); - - Value temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(temp), reinterpret_cast(address)); - m_emit->jmp(GetHostReg64(temp)); + m_emit->mov(GetHostReg64(RRETURN), reinterpret_cast(address)); + m_emit->jmp(GetHostReg64(RRETURN)); } void CodeGenerator::EmitBranch(LabelType* label)