diff --git a/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp b/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp index ab22a1fa32..559b6853ce 100644 --- a/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp +++ b/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp @@ -47,7 +47,7 @@ namespace aarch64 continue; } - if (llvm::dyn_cast(&*bit)) + if (llvm::isa(&*bit)) { if (auto ci = llvm::dyn_cast(&*prev)) { @@ -63,7 +63,7 @@ namespace aarch64 } } - function_info_t GHC_frame_preservation_pass::preprocess_function(llvm::Function& f) + function_info_t GHC_frame_preservation_pass::preprocess_function(const llvm::Function& f) { function_info_t result{}; result.instruction_count = f.getInstructionCount(); @@ -100,6 +100,7 @@ namespace aarch64 // This is another thing to be moved to a MachineFunction pass. Ideally we should check the instruction stream for writes to LR and reload it on exit. // For now, assume it is dirtied if the function is of any reasonable length. result.clobbers_x30 = result.instruction_count > 32; + result.is_leaf = true; for (auto& bb : f) { @@ -107,8 +108,23 @@ namespace aarch64 { if (auto ci = llvm::dyn_cast(&inst)) { + if (llvm::isa(ci->getCalledOperand())) + { + // Inline ASM blocks are ignored + continue; + } + result.num_external_calls++; - result.clobbers_x30 |= (!ci->isTailCall()); + if (ci->isTailCall()) + { + // This is not a leaf if it has at least one exit point / terminator that is not a return instruction. + result.is_leaf = false; + } + else + { + // Returning calls always clobber x30 + result.clobbers_x30 = true; + } } } } @@ -116,7 +132,7 @@ namespace aarch64 return result; } - instruction_info_t GHC_frame_preservation_pass::decode_instruction(llvm::Function& f, llvm::Instruction* i) + instruction_info_t GHC_frame_preservation_pass::decode_instruction(const llvm::Function& f, const llvm::Instruction* i) { instruction_info_t result{}; if (auto ci = llvm::dyn_cast(i)) @@ -202,7 +218,7 @@ namespace aarch64 return result; } - gpr GHC_frame_preservation_pass::get_base_register_for_call(const std::string& callee_name) + gpr GHC_frame_preservation_pass::get_base_register_for_call(const std::string& callee_name, gpr default_reg) { // We go over the base_register_lookup table and find the first matching pattern for (const auto& pattern : m_config.base_register_lookup) @@ -213,8 +229,7 @@ namespace aarch64 } } - // Default is x19 - return aarch64::x19; + return default_reg; } void GHC_frame_preservation_pass::run(llvm::IRBuilder<>* irb, llvm::Function& f) @@ -262,6 +277,16 @@ namespace aarch64 // Force tail calls on all terminators force_tail_call_terminators(f); + // Check for leaves + if (function_info.is_leaf && !m_config.use_stack_frames) + { + // Sanity check. If this function had no returning calls, it should have been omitted from processing. + ensure(function_info.clobbers_x30, "Function has no terminator and no non-tail calls but was allowed for frame processing!"); + DPRINT("Function %s is a leaf.", this_name.c_str()); + process_leaf_function(irb, f); + return; + } + // Asm snippets for patching stack frame std::string frame_prologue, frame_epilogue; @@ -413,6 +438,149 @@ namespace aarch64 } } - ensure(terminator_found, "Could not find terminator for function!"); + if (!terminator_found) + { + // If we got here, we must be using stack frames. + ensure(function_info.is_leaf && function_info.stack_frame_size > 0, "Leaf function was processed without using stack frames!"); + + // We want to insert a frame cleanup at the tail at every return instruction we find. + for (auto& bb : f) + { + for (auto& i : bb) + { + if (is_ret_instruction(&i)) + { + irb->SetInsertPoint(&i); + LLVM_ASM_VOID(frame_epilogue, irb, f.getContext()); + } + } + } + } + } + + bool GHC_frame_preservation_pass::is_ret_instruction(const llvm::Instruction* i) + { + if (llvm::isa(i)) + { + return true; + } + + // Check for inline asm invoking "ret". This really shouldn't be a thing, but it is present in SPULLVMRecompiler for some reason. + if (auto ci = llvm::dyn_cast(i)) + { + if (auto asm_ = llvm::dyn_cast(ci->getCalledOperand())) + { + if (asm_->getAsmString() == "ret") + { + return true; + } + } + } + + return false; + } + + bool GHC_frame_preservation_pass::is_inlined_call(const llvm::CallInst* ci) + { + const auto callee = ci->getCalledFunction(); + if (!callee) + { + // Indirect BLR + return false; + } + + const std::string callee_name = callee->getName().str(); + if (callee_name.starts_with("llvm.")) + { + // Intrinsic + return true; + } + + if (callee->hasFnAttribute(llvm::Attribute::AlwaysInline)) + { + // Assume LLVM always obeys this + return true; + } + + return false; + } + + void GHC_frame_preservation_pass::process_leaf_function(llvm::IRBuilder<>* irb, llvm::Function& f) + { + // Leaf functions have no true external calls. + // After every external call, restore LR from the thread context register. + bool restore_LR = false; + gpr thread_context_reg = aarch64::x19; + + const auto restore_x30 = [&](llvm::Instruction* where) + { + const auto x30_tail_restore = fmt::format( + "ldr x30, [x%u, #%u];\n", // Load x30 from thread context + static_cast(thread_context_reg), + m_config.hypervisor_context_offset); + + ensure(where); + irb->SetInsertPoint(where); + LLVM_ASM_VOID(x30_tail_restore, irb, f.getContext()); + }; + + for (auto &bb : f) + { + for (auto bit = bb.begin(); bit != bb.end();) + { + if (restore_LR) + { + restore_x30(llvm::dyn_cast(bit)); + restore_LR = false; + } + + if (auto ci = llvm::dyn_cast(bit)) + { + // Returning call? + if (is_inlined_call(ci)) + { + ++bit; + continue; + } + + const auto callee = ci->getCalledFunction(); + const std::string callee_name = callee ? callee->getName().str() : "__indirect"; + auto base_reg = get_base_register_for_call(callee_name, m_config.base_register_lookup.empty() ? gpr::x19 : gpr::xzr); + + // Check if the call is unexpected. We cannot guarantee that the reload is well-formed in such scenarios + if (base_reg == gpr::xzr) + { + if (callee) + { + // We don't know what we're dealing with here + DPRINT("Unexpected call to %s. We cannot guarantee sane codegen!", callee_name.c_str()); + } + + // Assume x19 + base_reg = gpr::x19; + } + + restore_LR = true; + thread_context_reg = base_reg; + } + + if (m_config.debug_info && is_ret_instruction(llvm::dyn_cast(bit))) + { + // We need to save the chain return point. + LLVM_ASM_VOID( + "mov x29, x28;\n" + "mov x28, x27;\n" + "adr x27, .;\n", + irb, f.getContext()); + } + + if (bit != bb.end()) + { + ++bit; + } + } + } + + ensure(!restore_LR, "Dangling function call found"); } } diff --git a/rpcs3/Emu/CPU/Backends/AArch64JIT.h b/rpcs3/Emu/CPU/Backends/AArch64JIT.h index f40d7b9345..225874c848 100644 --- a/rpcs3/Emu/CPU/Backends/AArch64JIT.h +++ b/rpcs3/Emu/CPU/Backends/AArch64JIT.h @@ -16,7 +16,8 @@ namespace aarch64 x0 = 0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, - x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30 + x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, + xzr, pc, sp }; // On non-x86 architectures GHC runs stackless. SP is treated as a pointer to scratchpad memory. @@ -30,6 +31,7 @@ namespace aarch64 u32 num_external_calls; u32 stack_frame_size; // Guessing this properly is critical for vector-heavy functions where spilling is a lot more common bool clobbers_x30; + bool is_leaf; }; struct instruction_info_t @@ -60,11 +62,17 @@ namespace aarch64 void force_tail_call_terminators(llvm::Function& f); - function_info_t preprocess_function(llvm::Function& f); + function_info_t preprocess_function(const llvm::Function& f); - instruction_info_t decode_instruction(llvm::Function& f, llvm::Instruction* i); + instruction_info_t decode_instruction(const llvm::Function& f, const llvm::Instruction* i); - gpr get_base_register_for_call(const std::string& callee_name); + bool is_ret_instruction(const llvm::Instruction* i); + + bool is_inlined_call(const llvm::CallInst* ci); + + gpr get_base_register_for_call(const std::string& callee_name, gpr default_reg = gpr::x19); + + void process_leaf_function(llvm::IRBuilder<>* irb, llvm::Function& f); public: GHC_frame_preservation_pass(const config_t& configuration);