diff --git a/rpcs3/Emu/Cell/PPUFunction.cpp b/rpcs3/Emu/Cell/PPUFunction.cpp index a0084d78dd..29a22a5eec 100644 --- a/rpcs3/Emu/Cell/PPUFunction.cpp +++ b/rpcs3/Emu/Cell/PPUFunction.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "PPUFunction.h" +#include "Utilities/JIT.h" #include "PPUModule.h" @@ -1888,7 +1889,7 @@ extern std::string ppu_get_variable_name(const std::string& _module, u32 vnid) return fmt::format("0x%08X", vnid); } -std::vector& ppu_function_manager::access() +std::vector& ppu_function_manager::access(bool ghc) { static std::vector list { @@ -1907,15 +1908,43 @@ std::vector& ppu_function_manager::access() }, }; - return list; + static std::vector list_ghc + { + build_function_asm([](asmjit::X86Assembler& c, auto& args) + { + using namespace asmjit; + + c.mov(args[0], x86::rbp); + c.jmp(imm_ptr(list[0])); + }), + build_function_asm([](asmjit::X86Assembler& c, auto& args) + { + using namespace asmjit; + + c.mov(args[0], x86::rbp); + c.jmp(imm_ptr(list[1])); + }), + }; + + return ghc ? list_ghc : list; } u32 ppu_function_manager::add_function(ppu_function_t function) { auto& list = access(); + auto& list2 = access(true); list.push_back(function); + // Generate trampoline + list2.push_back(build_function_asm([&](asmjit::X86Assembler& c, auto& args) + { + using namespace asmjit; + + c.mov(args[0], x86::rbp); + c.jmp(imm_ptr(function)); + })); + return ::size32(list) - 1; } diff --git a/rpcs3/Emu/Cell/PPUFunction.h b/rpcs3/Emu/Cell/PPUFunction.h index 665b78e74a..5466cedc3f 100644 --- a/rpcs3/Emu/Cell/PPUFunction.h +++ b/rpcs3/Emu/Cell/PPUFunction.h @@ -256,7 +256,7 @@ class ppu_function_manager }; // Access global function list - static std::vector& access(); + static std::vector& access(bool ghc = false); static u32 add_function(ppu_function_t function); @@ -276,9 +276,9 @@ public: } // Read all registered functions - static inline const auto& get() + static inline const auto& get(bool llvm = false) { - return access(); + return access(llvm); } static inline u32 func_addr(u32 index) diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index fb00c1110e..1f5764ca71 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -244,7 +244,7 @@ static void ppu_initialize_modules(ppu_linkage_info* link) }; // Initialize double-purpose fake OPD array for HLE functions - const auto& hle_funcs = ppu_function_manager::get(); + const auto& hle_funcs = ppu_function_manager::get(g_cfg.core.ppu_decoder == ppu_decoder_type::llvm); // Allocate memory for the array (must be called after fixed allocations) ppu_function_manager::addr = vm::alloc(::size32(hle_funcs) * 8, vm::main); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 9b096853e5..ba5449ec9c 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -130,6 +130,128 @@ static bool ppu_break(ppu_thread& ppu, ppu_opcode_t op); extern void do_cell_atomic_128_store(u32 addr, const void* to_write); +const auto ppu_gateway = build_function_asm([](asmjit::X86Assembler& c, auto& args) +{ + // Gateway for PPU, converts from native to GHC calling convention, also saves RSP value for escape + using namespace asmjit; + +#ifdef _WIN32 + c.push(x86::r15); + c.push(x86::r14); + c.push(x86::r13); + c.push(x86::r12); + c.push(x86::rsi); + c.push(x86::rdi); + c.push(x86::rbp); + c.push(x86::rbx); + c.sub(x86::rsp, 0xa8); + c.movaps(x86::oword_ptr(x86::rsp, 0x90), x86::xmm15); + c.movaps(x86::oword_ptr(x86::rsp, 0x80), x86::xmm14); + c.movaps(x86::oword_ptr(x86::rsp, 0x70), x86::xmm13); + c.movaps(x86::oword_ptr(x86::rsp, 0x60), x86::xmm12); + c.movaps(x86::oword_ptr(x86::rsp, 0x50), x86::xmm11); + c.movaps(x86::oword_ptr(x86::rsp, 0x40), x86::xmm10); + c.movaps(x86::oword_ptr(x86::rsp, 0x30), x86::xmm9); + c.movaps(x86::oword_ptr(x86::rsp, 0x20), x86::xmm8); + c.movaps(x86::oword_ptr(x86::rsp, 0x10), x86::xmm7); + c.movaps(x86::oword_ptr(x86::rsp, 0), x86::xmm6); +#else + c.push(x86::rbp); + c.push(x86::r15); + c.push(x86::r14); + c.push(x86::r13); + c.push(x86::r12); + c.push(x86::rbx); + c.push(x86::rax); +#endif + + // Save native stack pointer for longjmp emulation + c.mov(x86::qword_ptr(args[0], ::offset32(&ppu_thread::saved_native_sp)), x86::rsp); + + // Initialize args + c.mov(x86::r13, x86::qword_ptr(reinterpret_cast(&vm::g_exec_addr))); + c.mov(x86::rbp, args[0]); + c.mov(x86::edx, x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia))); // Load PC + + c.mov(x86::rax, x86::qword_ptr(x86::r13, x86::edx, 1, 0)); // Load call target + c.mov(x86::rdx, x86::rax); + c.shl(x86::rax, 17); + c.shr(x86::rax, 17); + c.shr(x86::rdx, 47); + c.shl(x86::rdx, 12); + c.mov(x86::r12d, x86::edx); // Load relocation base + + c.mov(x86::rbx, x86::qword_ptr(reinterpret_cast(&vm::g_base_addr))); + c.mov(x86::r14, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 0))); // Load some registers + c.mov(x86::rsi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 1))); + c.mov(x86::rdi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 2))); + + if (utils::has_avx()) + { + c.vzeroupper(); + } + + c.call(x86::rax); + + if (utils::has_avx()) + { + c.vzeroupper(); + } + +#ifdef _WIN32 + c.movaps(x86::xmm6, x86::oword_ptr(x86::rsp, 0)); + c.movaps(x86::xmm7, x86::oword_ptr(x86::rsp, 0x10)); + c.movaps(x86::xmm8, x86::oword_ptr(x86::rsp, 0x20)); + c.movaps(x86::xmm9, x86::oword_ptr(x86::rsp, 0x30)); + c.movaps(x86::xmm10, x86::oword_ptr(x86::rsp, 0x40)); + c.movaps(x86::xmm11, x86::oword_ptr(x86::rsp, 0x50)); + c.movaps(x86::xmm12, x86::oword_ptr(x86::rsp, 0x60)); + c.movaps(x86::xmm13, x86::oword_ptr(x86::rsp, 0x70)); + c.movaps(x86::xmm14, x86::oword_ptr(x86::rsp, 0x80)); + c.movaps(x86::xmm15, x86::oword_ptr(x86::rsp, 0x90)); + c.add(x86::rsp, 0xa8); + c.pop(x86::rbx); + c.pop(x86::rbp); + c.pop(x86::rdi); + c.pop(x86::rsi); + c.pop(x86::r12); + c.pop(x86::r13); + c.pop(x86::r14); + c.pop(x86::r15); +#else + c.add(x86::rsp, +8); + c.pop(x86::rbx); + c.pop(x86::r12); + c.pop(x86::r13); + c.pop(x86::r14); + c.pop(x86::r15); + c.pop(x86::rbp); +#endif + + c.ret(); +}); + +const extern auto ppu_escape = build_function_asm([](asmjit::X86Assembler& c, auto& args) +{ + using namespace asmjit; + + // Restore native stack pointer (longjmp emulation) + c.mov(x86::rsp, x86::qword_ptr(args[0], ::offset32(&ppu_thread::saved_native_sp))); + + // Return to the return location + c.jmp(x86::qword_ptr(x86::rsp, -8)); +}); + +void ppu_recompiler_fallback(ppu_thread& ppu); + +const auto ppu_recompiler_fallback_ghc = build_function_asm([](asmjit::X86Assembler& c, auto& args) +{ + using namespace asmjit; + + c.mov(args[0], x86::rbp); + c.jmp(imm_ptr(ppu_recompiler_fallback)); +}); + // Get pointer to executable cache static u64& ppu_ref(u32 addr) { @@ -174,26 +296,33 @@ void ppu_recompiler_fallback(ppu_thread& ppu) const auto& table = g_ppu_interpreter_fast.get_table(); + u64 ctr = 0; + while (true) { // Run instructions in interpreter - if (const u32 op = vm::read32(ppu.cia); table[ppu_decode(op)](ppu, {op})) [[likely]] + if (const u32 op = vm::read32(ppu.cia); ctr++, table[ppu_decode(op)](ppu, {op})) [[likely]] { ppu.cia += 4; continue; } - if (uptr func = ppu_ref(ppu.cia); func != reinterpret_cast(ppu_recompiler_fallback)) + if (uptr func = ppu_ref(ppu.cia); (func << 17 >> 17) != reinterpret_cast(ppu_recompiler_fallback_ghc)) { // We found a recompiler function at cia, return - return; + break; } if (ppu.test_stopped()) { - return; + break; } } + + if (g_cfg.core.ppu_debug) + { + ppu_log.warning("Exiting interpreter at 0x%x (executed %u functions)", ppu.cia, ctr); + } } void ppu_reservation_fallback(ppu_thread& ppu) @@ -262,12 +391,23 @@ extern void ppu_register_range(u32 addr, u32 size) utils::memory_commit(&ppu_ref(addr), size * 2, utils::protection::rw); vm::page_protect(addr, utils::align(size, 0x10000), 0, vm::page_executable); - const u64 fallback = g_cfg.core.ppu_decoder == ppu_decoder_type::llvm ? reinterpret_cast(ppu_recompiler_fallback) : reinterpret_cast(ppu_fallback); + const u64 fallback = reinterpret_cast(ppu_fallback); + const u64 seg_base = addr; size &= ~3; // Loop assumes `size = n * 4`, enforce that by rounding down + while (size) { - ppu_ref(addr) = fallback; + if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm) + { + // Assume addr is the start of first segment of PRX + ppu_ref(addr) = reinterpret_cast(ppu_recompiler_fallback_ghc) | (seg_base << (32 + 3)); + } + else + { + ppu_ref(addr) = fallback; + } + addr += 4; size -= 4; } @@ -278,7 +418,7 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr) // Initialize specific function if (ptr) { - ppu_ref(addr) = reinterpret_cast(ptr); + ppu_ref(addr) = (reinterpret_cast(ptr) & 0x7fff'ffff'ffffu) | (ppu_ref(addr) & ~0x7fff'ffff'ffffu); return; } @@ -833,9 +973,15 @@ void ppu_thread::exec_task() { if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm) { - while (!(state & (cpu_flag::ret + cpu_flag::exit + cpu_flag::stop))) + while (true) { - reinterpret_cast(ppu_ref(cia))(*this); + if (state) [[unlikely]] + { + if (check_state()) + break; + } + + ppu_gateway(this); } return; @@ -1927,9 +2073,9 @@ namespace // Compiled PPU module info struct jit_module { - std::vector vars; std::vector funcs; std::shared_ptr pjit; + bool init = false; }; struct jit_module_manager @@ -2554,9 +2700,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only) // Compiler instance (deferred initialization) std::shared_ptr& jit = jit_mod.pjit; - // Global variables to initialize - std::vector> globals; - // Split module into fragments <= 1 MiB usz fpos = 0; @@ -2574,7 +2717,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only) bool compiled_new = false; - while (jit_mod.vars.empty() && fpos < info.funcs.size()) + while (!jit_mod.init && fpos < info.funcs.size()) { // Initialize compiler instance if (!jit && get_current_cpu_thread()) @@ -2582,17 +2725,11 @@ bool ppu_initialize(const ppu_module& info, bool check_only) jit = std::make_shared(s_link_table, g_cfg.core.llvm_cpu); } - // First function in current module part - const auto fstart = fpos; - // Copy module information (TODO: optimize) ppu_module part; part.copy_part(info); part.funcs.reserve(16000); - // Unique suffix for each module part - const u32 suffix = info.funcs.at(fstart).addr - reloc; - // Overall block size in bytes usz bsize = 0; usz bcount = 0; @@ -2761,7 +2898,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only) settings += ppu_settings::greedy_mode; // Write version, hash, CPU, settings - fmt::append(obj_name, "v3-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu)); + fmt::append(obj_name, "v4-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu)); } if (Emu.IsStopped()) @@ -2771,16 +2908,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only) if (!check_only) { - globals.emplace_back(fmt::format("__mptr%x", suffix), reinterpret_cast(vm::g_base_addr)); - globals.emplace_back(fmt::format("__cptr%x", suffix), reinterpret_cast(vm::g_exec_addr)); - - // Initialize segments for relocations - for (u32 i = 0, num = 0; i < info.segs.size(); i++) - { - if (!info.segs[i].addr) continue; - globals.emplace_back(fmt::format("__seg%u_%x", num++, suffix), info.segs[i].addr); - } - link_workload.emplace_back(obj_name, false); } @@ -2894,7 +3021,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only) } // Jit can be null if the loop doesn't ever enter. - if (jit && jit_mod.vars.empty()) + if (jit && !jit_mod.init) { jit->fin(); @@ -2903,23 +3030,16 @@ bool ppu_initialize(const ppu_module& info, bool check_only) { if (!func.size) continue; - const u64 addr = ensure(jit->get(fmt::format("__0x%x", func.addr - reloc))); + const auto name = fmt::format("__0x%x", func.addr - reloc); + const u64 addr = ensure(jit->get(name)); jit_mod.funcs.emplace_back(reinterpret_cast(addr)); - ppu_ref(func.addr) = addr; + ppu_ref(func.addr) = (addr & 0x7fff'ffff'ffffu) | (ppu_ref(func.addr) & ~0x7fff'ffff'ffffu); + + if (g_cfg.core.ppu_debug) + ppu_log.notice("Installing function %s at 0x%x: %p (reloc = 0x%x)", name, func.addr, ppu_ref(func.addr), reloc); } - // Initialize global variables - for (auto& var : globals) - { - const u64 addr = ensure(jit->get(var.first)); - - jit_mod.vars.emplace_back(reinterpret_cast(addr)); - - if (addr) - { - *reinterpret_cast(addr) = var.second; - } - } + jit_mod.init = true; } else { @@ -2930,23 +3050,14 @@ bool ppu_initialize(const ppu_module& info, bool check_only) { if (!func.size) continue; - ppu_ref(func.addr) = ensure(reinterpret_cast(jit_mod.funcs[index++])); + const u64 addr = ensure(reinterpret_cast(jit_mod.funcs[index++])); + ppu_ref(func.addr) = (addr & 0x7fff'ffff'ffffu) | (ppu_ref(func.addr) & ~0x7fff'ffff'ffffu); + + if (g_cfg.core.ppu_debug) + ppu_log.notice("Reinstalling function at 0x%x: %p (reloc=0x%x)", func.addr, ppu_ref(func.addr), reloc); } index = 0; - - // Rewrite global variables - while (index < jit_mod.vars.size()) - { - *jit_mod.vars[index++] = reinterpret_cast(vm::g_base_addr); - *jit_mod.vars[index++] = reinterpret_cast(vm::g_exec_addr); - - for (const auto& seg : info.segs) - { - if (!seg.addr) continue; - *jit_mod.vars[index++] = seg.addr; - } - } } return compiled_new; @@ -2971,8 +3082,15 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co PPUTranslator translator(jit.get_context(), _module.get(), module_part, jit.get_engine()); // Define some types - const auto _void = Type::getVoidTy(jit.get_context()); - const auto _func = FunctionType::get(_void, {translator.GetContextType()->getPointerTo()}, false); + const auto _func = FunctionType::get(translator.get_type(), { + translator.get_type(), // Exec base + translator.GetContextType()->getPointerTo(), // PPU context + translator.get_type(), // Segment address (for PRX) + translator.get_type(), // Memory base + translator.get_type(), // r0 + translator.get_type(), // r1 + translator.get_type(), // r2 + }, false); // Initialize function list for (const auto& func : module_part.funcs) @@ -2980,7 +3098,8 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co if (func.size) { const auto f = cast(_module->getOrInsertFunction(func.name, _func).getCallee()); - f->addAttribute(1, Attribute::NoAlias); + f->setCallingConv(CallingConv::GHC); + f->addAttribute(2, Attribute::NoAlias); f->addFnAttr(Attribute::NoUnwind); } } diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index b5bbe5d545..9556ce6b43 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -267,6 +267,8 @@ public: // Thread name atomic_ptr ppu_tname; + u64 saved_native_sp = 0; // Host thread's stack pointer for emulated longjmp + u64 last_ftsc = 0; u64 last_ftime = 0; u32 last_faddr = 0; diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 67638312cb..9faa4e0ffe 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -26,14 +26,6 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo // Bind context cpu_translator::initialize(context, engine); - // There is no weak linkage on JIT, so let's create variables with different names for each module part - const u32 gsuffix = m_info.relocs.empty() ? info.funcs[0].addr : info.funcs[0].addr - m_info.segs[0].addr; - - // Memory base - m_base = new GlobalVariable(*_module, ArrayType::get(GetType(), 0x100000000)->getPointerTo(), true, GlobalValue::ExternalLinkage, 0, fmt::format("__mptr%x", gsuffix)); - m_base->setInitializer(ConstantPointerNull::get(cast(m_base->getType()->getPointerElementType()))); - m_base->setExternallyInitialized(true); - // Thread context struct (TODO: safer member access) const u32 off0 = offset32(&ppu_thread::state); const u32 off1 = offset32(&ppu_thread::gpr); @@ -56,11 +48,6 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo m_thread_type = StructType::create(m_context, thread_struct, "context_t"); - // Callable - m_call = new GlobalVariable(*_module, ArrayType::get(GetType(), 0x40000000)->getPointerTo(), true, GlobalValue::ExternalLinkage, 0, fmt::format("__cptr%x", gsuffix)); - m_call->setInitializer(ConstantPointerNull::get(cast(m_call->getType()->getPointerElementType()))); - m_call->setExternallyInitialized(true); - const auto md_name = MDString::get(m_context, "branch_weights"); const auto md_low = ValueAsMetadata::get(ConstantInt::get(GetType(), 1)); const auto md_high = ValueAsMetadata::get(ConstantInt::get(GetType(), 666)); @@ -69,16 +56,6 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo m_md_likely = MDTuple::get(m_context, {md_name, md_high, md_low}); m_md_unlikely = MDTuple::get(m_context, {md_name, md_low, md_high}); - // Create segment variables - for (const auto& seg : m_info.segs) - { - if (!seg.addr) continue; - auto gv = new GlobalVariable(*_module, GetType(), true, GlobalValue::ExternalLinkage, 0, fmt::format("__seg%u_%x", m_segs.size(), gsuffix)); - gv->setInitializer(ConstantInt::get(GetType(), seg.addr)); - gv->setExternallyInitialized(true); - m_segs.emplace_back(gv); - } - // Sort relevant relocations (TODO) const auto caddr = m_info.segs[0].addr; const auto cend = caddr + m_info.segs[0].size; @@ -194,11 +171,18 @@ Function* PPUTranslator::Translate(const ppu_function& info) } } - m_thread = &*m_function->arg_begin(); - m_base_loaded = m_ir->CreateLoad(m_base); + m_thread = &*(m_function->arg_begin() + 1); + m_base = &*(m_function->arg_begin() + 3); + m_exec = &*(m_function->arg_begin() + 0); + m_seg0 = &*(m_function->arg_begin() + 2); + + m_gpr[0] = &*(m_function->arg_begin() + 4); + m_gpr[1] = &*(m_function->arg_begin() + 5); + m_gpr[2] = &*(m_function->arg_begin() + 6); const auto body = BasicBlock::Create(m_context, "__body", m_function); + //Call(GetType(), "__trace", GetAddr()); if (need_check) { // Check status register in the entry block @@ -302,7 +286,7 @@ Value* PPUTranslator::GetAddr(u64 _add) if (m_reloc) { // Load segment address from global variable, compute actual instruction address - return m_ir->CreateAdd(m_ir->getInt64(m_addr + _add), m_ir->CreateLoad(m_segs[m_reloc - m_info.segs.data()])); + return m_ir->CreateAdd(m_ir->getInt64(m_addr + _add), m_seg0); } return m_ir->getInt64(m_addr + _add); @@ -351,20 +335,23 @@ Value* PPUTranslator::RotateLeft(Value* arg, Value* n) void PPUTranslator::CallFunction(u64 target, Value* indirect) { - const auto type = FunctionType::get(GetType(), {m_thread_type->getPointerTo()}, false); + const auto type = m_function->getFunctionType(); const auto block = m_ir->GetInsertBlock(); FunctionCallee callee; + auto seg0 = m_seg0; + if (!indirect) { - if ((!m_reloc && target < 0x10000) || target >= u64{} - 0x10000) + if ((!m_reloc && target < 0x10000) || target >= 0x100000000u - 0x10000) { Trap(); return; } - callee = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type); + callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target), type); + cast(callee.getCallee())->setCallingConv(CallingConv::GHC); } else { @@ -379,13 +366,19 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect) } } - const auto pos = m_ir->CreateLShr(indirect, 2, "", true); - const auto ptr = m_ir->CreateGEP(m_ir->CreateLoad(m_call), {m_ir->getInt64(0), pos}); - callee = FunctionCallee(type, m_ir->CreateIntToPtr(m_ir->CreateLoad(ptr), type->getPointerTo())); + const auto pos = m_ir->CreateShl(indirect, 1); + const auto ptr = m_ir->CreateGEP(m_exec, pos); + const auto val = m_ir->CreateLoad(m_ir->CreateBitCast(ptr, get_type())); + callee = FunctionCallee(type, m_ir->CreateIntToPtr(m_ir->CreateAnd(val, 0x7fff'ffff'ffff), type->getPointerTo())); + + // Load new segment address + seg0 = m_ir->CreateShl(m_ir->CreateLShr(val, 47), 12); } m_ir->SetInsertPoint(block); - m_ir->CreateCall(callee, {m_thread})->setTailCallKind(llvm::CallInst::TCK_Tail); + const auto c = m_ir->CreateCall(callee, {m_exec, m_thread, seg0, m_base, GetGpr(0), GetGpr(1), GetGpr(2)}); + c->setTailCallKind(llvm::CallInst::TCK_Tail); + c->setCallingConv(CallingConv::GHC); m_ir->CreateRetVoid(); } @@ -627,7 +620,7 @@ void PPUTranslator::UseCondition(MDNode* hint, Value* cond) llvm::Value* PPUTranslator::GetMemory(llvm::Value* addr, llvm::Type* type) { - return bitcast(m_ir->CreateGEP(m_base_loaded, {m_ir->getInt64(0), addr}), type->getPointerTo()); + return bitcast(m_ir->CreateGEP(m_base, addr), type->getPointerTo()); } Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align) diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index 0de6123592..f5a14115cb 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -36,18 +36,17 @@ class PPUTranslator final : public cpu_translator /* Variables */ - // Segments - std::vector m_segs; - // Memory base - llvm::GlobalVariable* m_base; - llvm::Value* m_base_loaded; + llvm::Value* m_base; // Thread context llvm::Value* m_thread; // Callable functions - llvm::GlobalVariable* m_call; + llvm::Value* m_exec; + + // Segment 0 address + llvm::Value* m_seg0; // Thread context struct llvm::StructType* m_thread_type;