diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 4ee330d928..1e876b5f68 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -64,6 +64,8 @@ static u8* add_jit_memory(std::size_t size, uint align) if (UNLIKELY(_new > 0x40000000)) { + // Sorry, we failed, and further attempts should fail too. + ctr = 0x40000000; return -1; } @@ -77,7 +79,7 @@ static u8* add_jit_memory(std::size_t size, uint align) if (UNLIKELY(pos == -1)) { - LOG_FATAL(GENERAL, "JIT: Out of memory (size=0x%x, align=0x%x, off=0x%x)", size, align, Off); + LOG_WARNING(GENERAL, "JIT: Out of memory (size=0x%x, align=0x%x, off=0x%x)", size, align, Off); return nullptr; } @@ -181,10 +183,10 @@ void jit_runtime::finalize() noexcept std::memcpy(alloc(s_data_init.size(), 1, false), s_data_init.data(), s_data_init.size()); } -::jit_runtime& asmjit::get_global_runtime() +asmjit::JitRuntime& asmjit::get_global_runtime() { // Magic static - static ::jit_runtime g_rt; + static asmjit::JitRuntime g_rt; return g_rt; } diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 3fdbd9900c..eeb03c0ac5 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -40,7 +40,7 @@ struct jit_runtime final : asmjit::HostRuntime namespace asmjit { // Should only be used to build global functions - ::jit_runtime& get_global_runtime(); + asmjit::JitRuntime& get_global_runtime(); // Emit xbegin and adjacent loop, return label at xbegin Label build_transaction_enter(X86Assembler& c, Label fallback); diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index e4f64f2074..d71278e324 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -46,35 +46,20 @@ void spu_recompiler::init() } } -spu_function_t spu_recompiler::compile(std::vector&& func_rv) +bool spu_recompiler::compile(u64 last_reset_count, const std::vector& func) { - init(); + const auto fn_location = m_spurt->find(last_reset_count, func); - std::unique_lock lock(m_spurt->m_mutex); - - // Try to find existing function, register new one if necessary - const auto fn_info = m_spurt->m_map.emplace(std::move(func_rv), nullptr); - - auto& fn_location = fn_info.first->second; - - if (!fn_location && !fn_info.second) + if (fn_location == spu_runtime::g_dispatcher) { - // Wait if already in progress - while (!fn_location) - { - m_spurt->m_cond.wait(lock); - } + return true; } - if (fn_location) + if (!fn_location) { - return fn_location; + return false; } - auto& func = fn_info.first->first; - - lock.unlock(); - using namespace asmjit; SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode); @@ -833,12 +818,20 @@ spu_function_t spu_recompiler::compile(std::vector&& func_rv) // Compile and get function address spu_function_t fn; - if (m_asmrt.add(&fn, &code)) + if (auto err = m_asmrt.add(&fn, &code)) { + if (err == asmjit::ErrorCode::kErrorNoVirtualMemory) + { + return false; + } + LOG_FATAL(SPU, "Failed to build a function"); } - m_spurt->add(*fn_info.first, fn); + if (!m_spurt->add(last_reset_count, fn_location, fn)) + { + return false; + } if (g_cfg.core.spu_debug) { @@ -848,7 +841,7 @@ spu_function_t spu_recompiler::compile(std::vector&& func_rv) log += "\n\n\n"; // Append log file - fs::file(m_spurt->m_cache_path + "spu.log", fs::write + fs::append).write(log); + fs::file(m_spurt->get_cache_path() + "spu.log", fs::write + fs::append).write(log); } if (m_cache && g_cfg.core.spu_cache) @@ -856,7 +849,7 @@ spu_function_t spu_recompiler::compile(std::vector&& func_rv) m_cache->add(func); } - return fn; + return true; } spu_recompiler::XmmLink spu_recompiler::XmmAlloc() // get empty xmm register @@ -947,11 +940,21 @@ void spu_recompiler::branch_fixed(u32 target) return; } + const auto ppptr = m_spurt->make_branch_patchpoint(target); + c->mov(SPU_OFF_32(pc), target); c->xor_(qw0->r32(), qw0->r32()); c->cmp(SPU_OFF_32(state), 0); c->jnz(label_stop); - c->jmp(imm_ptr(m_spurt->make_branch_patchpoint(target))); + + if (ppptr) + { + c->jmp(imm_ptr(ppptr)); + } + else + { + c->ret(); + } } void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret) diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h index b82ed75314..ffc04cf333 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.h +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.h @@ -13,7 +13,7 @@ public: virtual void init() override; - virtual spu_function_t compile(std::vector&&) override; + virtual bool compile(u64 last_reset_count, const std::vector&) override; private: // ASMJIT runtime diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 0caf05a9bf..bb1f8bd5fc 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -23,6 +23,10 @@ const spu_decoder s_spu_iname; extern u64 get_timebased_time(); +thread_local DECLARE(spu_runtime::workload){}; + +thread_local DECLARE(spu_runtime::addrv){u32{0}}; + DECLARE(spu_runtime::tr_dispatch) = [] { // Generate a special trampoline to spu_recompiler_base::dispatch with pause instruction @@ -149,6 +153,7 @@ void spu_cache::initialize() // Read cache auto func_list = cache->get(); atomic_t fnext{}; + atomic_t fail_flag{0}; // Initialize compiler instances for parallel compilation u32 max_threads = static_cast(g_cfg.core.llvm_threads); @@ -190,6 +195,9 @@ void spu_cache::initialize() for (std::size_t i = 0; i < compilers.size(); i++) thread_queue.emplace_back("Worker " + std::to_string(i), [&, compiler = compilers[i].get()]() { + // Register SPU runtime user + spu_runtime::passive_lock _passive_lock(compiler->get_runtime()); + // Fake LS std::vector> ls(0x10000); @@ -198,7 +206,7 @@ void spu_cache::initialize() { std::vector& func = func_list[func_i]; - if (Emu.IsStopped()) + if (Emu.IsStopped() || fail_flag) { g_progr_pdone++; continue; @@ -222,7 +230,11 @@ void spu_cache::initialize() LOG_ERROR(SPU, "[0x%05x] SPU Analyser failed, %u vs %u", func2[0], func2.size() - 1, size0 - 1); } - compiler->compile(std::move(func)); + if (!compiler->compile(0, func)) + { + // Likely, out of JIT memory. Signal to prevent further building. + fail_flag |= 1; + } // Clear fake LS for (u32 i = 1, pos = start; i < func2.size(); i++, pos += 4) @@ -256,6 +268,14 @@ void spu_cache::initialize() return; } + if (fail_flag) + { + LOG_ERROR(SPU, "SPU Runtime: Cache building failed (too much data). SPU Cache will be disabled."); + spu_runtime::passive_lock _passive_lock(compilers[0]->get_runtime()); + compilers[0]->get_runtime().reset(0); + return; + } + if (compilers.size() && !func_list.empty()) { LOG_SUCCESS(SPU, "SPU Runtime: Built %u functions.", func_list.size()); @@ -288,9 +308,18 @@ spu_runtime::spu_runtime() LOG_SUCCESS(SPU, "SPU Recompiler Runtime initialized..."); } -void spu_runtime::add(std::pair, spu_function_t>& where, spu_function_t compiled) +bool spu_runtime::add(u64 last_reset_count, void* _where, spu_function_t compiled) { - std::unique_lock lock(m_mutex); + writer_lock lock(*this); + + // Check reset count (makes where invalid) + if (!_where || last_reset_count != m_reset_count) + { + return false; + } + + // Use opaque pointer + auto& where = *static_cast(_where); // Function info const std::vector& func = where.first; @@ -315,7 +344,12 @@ void spu_runtime::add(std::pair, spu_function_t>& where, else { // Allocate some writable executable memory - u8* const wxptr = verify(HERE, jit_runtime::alloc(size0 * 20, 16)); + u8* const wxptr = jit_runtime::alloc(size0 * 20, 16); + + if (!wxptr) + { + return false; + } // Raw assembly pointer u8* raw = wxptr; @@ -547,13 +581,63 @@ void spu_runtime::add(std::pair, spu_function_t>& where, g_dispatcher[func[0] / 4] = reinterpret_cast(reinterpret_cast(wxptr)); } - lock.unlock(); - m_cond.notify_all(); + // Notify in lock destructor + lock.notify = true; + return true; } -spu_function_t spu_runtime::find(const se_t* ls, u32 addr) +void* spu_runtime::find(u64 last_reset_count, const std::vector& func) { - std::unique_lock lock(m_mutex); + writer_lock lock(*this); + + // Check reset count + if (last_reset_count != m_reset_count) + { + return nullptr; + } + + // Try to find existing function, register new one if necessary + const auto result = m_map.try_emplace(func, nullptr); + + // Pointer to the value in the map (pair) + const auto fn_location = &*result.first; + + if (fn_location->second) + { + // Already compiled + return g_dispatcher; + } + else if (!result.second) + { + // Wait if already in progress + while (!fn_location->second) + { + m_cond.wait(m_mutex); + + // If reset count changed, fn_location is invalidated; also requires return + if (last_reset_count != m_reset_count) + { + return nullptr; + } + } + + return g_dispatcher; + } + + // Return location to compile and use in add() + return fn_location; +} + +spu_function_t spu_runtime::find(const se_t* ls, u32 addr) const +{ + const u64 reset_count = m_reset_count; + + reader_lock lock(*this); + + if (reset_count != m_reset_count) + { + return nullptr; + } const u32 start = addr * (g_cfg.core.spu_block_size != spu_block_size_type::giga); @@ -591,6 +675,11 @@ spu_function_t spu_runtime::make_branch_patchpoint(u32 target) const { u8* const raw = jit_runtime::alloc(16, 16); + if (!raw) + { + return nullptr; + } + // Save address of the following jmp #ifdef _WIN32 raw[0] = 0x4c; // lea r8, [rip+1] @@ -621,13 +710,50 @@ spu_function_t spu_runtime::make_branch_patchpoint(u32 target) const return reinterpret_cast(raw); } -void spu_runtime::handle_return(cpu_thread* _thr) +u64 spu_runtime::reset(std::size_t last_reset_count) +{ + writer_lock lock(*this); + + if (last_reset_count != m_reset_count || !m_reset_count.compare_and_swap_test(last_reset_count, last_reset_count + 1)) + { + // Probably already reset + return m_reset_count; + } + + // Notify SPU threads + idm::select>([](u32, cpu_thread& cpu) + { + if (!cpu.state.test_and_set(cpu_flag::jit_return)) + { + cpu.notify(); + } + }); + + // Reset function map (may take some time) + m_map.clear(); + + // Wait for threads to catch on jit_return flag + while (m_passive_locks) + { + busy_wait(); + } + + // Reinitialize (TODO) + jit_runtime::finalize(); + jit_runtime::initialize(); + return ++m_reset_count; +} + +void spu_runtime::handle_return(spu_thread* _spu) { // Wait until the runtime becomes available - //writer_lock lock(*this); + writer_lock lock(*this); - // Simply reset the flag - _thr->state -= cpu_flag::jit_return; + // Reset stack mirror + std::memset(_spu->stack_mirror.data(), 0xff, sizeof(spu_thread::stack_mirror)); + + // Reset the flag + _spu->state -= cpu_flag::jit_return; } spu_recompiler_base::spu_recompiler_base() @@ -638,6 +764,19 @@ spu_recompiler_base::~spu_recompiler_base() { } +void spu_recompiler_base::make_function(const std::vector& data) +{ + for (u64 reset_count = m_spurt->get_reset_count();;) + { + if (LIKELY(compile(reset_count, data))) + { + break; + } + + reset_count = m_spurt->reset(reset_count); + } +} + void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip) { // If code verification failed from a patched patchpoint, clear it with a dispatcher jump @@ -669,7 +808,7 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip) } // Compile - verify(HERE), spu.jit->compile(spu.jit->block(spu._ptr(0), spu.pc)); + spu.jit->make_function(spu.jit->block(spu._ptr(0), spu.pc)); // Diagnostic if (g_cfg.core.spu_block_size == spu_block_size_type::giga) @@ -2097,11 +2236,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Generate a patchpoint for fixed location const auto cblock = m_ir->GetInsertBlock(); + const auto ppptr = m_spurt->make_branch_patchpoint(target); const auto result = llvm::BasicBlock::Create(m_context, "", m_function); m_ir->SetInsertPoint(result); m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&spu_thread::pc)); const auto type = llvm::FunctionType::get(get_type(), {get_type(), get_type(), get_type()}, false)->getPointerTo(); - tail(m_ir->CreateIntToPtr(m_ir->getInt64((u64)m_spurt->make_branch_patchpoint(target)), type)); + tail(m_ir->CreateIntToPtr(m_ir->getInt64(reinterpret_cast(ppptr ? ppptr : &spu_recompiler_base::dispatch)), type)); m_ir->SetInsertPoint(cblock); return result; } @@ -2652,36 +2792,20 @@ public: } } - virtual spu_function_t compile(std::vector&& func_rv) override + virtual bool compile(u64 last_reset_count, const std::vector& func) override { - init(); + const auto fn_location = m_spurt->find(last_reset_count, func); - // Don't lock without shared runtime - std::unique_lock lock(m_spurt->m_mutex); - - // Try to find existing function, register new one if necessary - const auto fn_info = m_spurt->m_map.emplace(std::move(func_rv), nullptr); - - auto& fn_location = fn_info.first->second; - - if (!fn_location && !fn_info.second) + if (fn_location == spu_runtime::g_dispatcher) { - // Wait if already in progress - while (!fn_location) - { - m_spurt->m_cond.wait(lock); - } + return true; } - if (fn_location) + if (!fn_location) { - return fn_location; + return false; } - auto& func = fn_info.first->first; - - lock.unlock(); - std::string hash; { sha1_context ctx; @@ -2744,12 +2868,7 @@ public: log += '\n'; this->dump(log); - fs::file(m_spurt->m_cache_path + "spu.log", fs::write + fs::append).write(log); - } - - if (m_cache && g_cfg.core.spu_cache) - { - m_cache->add(func); + fs::file(m_spurt->get_cache_path() + "spu.log", fs::write + fs::append).write(log); } using namespace llvm; @@ -3181,7 +3300,7 @@ public: if (g_cfg.core.spu_debug) { - fs::file(m_spurt->m_cache_path + "spu.log", fs::write + fs::append).write(log); + fs::file(m_spurt->get_cache_path() + "spu.log", fs::write + fs::append).write(log); } fmt::raw_error("Compilation failed"); @@ -3190,7 +3309,7 @@ public: if (g_cfg.core.spu_debug) { // Testing only - m_jit.add(std::move(module), m_spurt->m_cache_path + "llvm/"); + m_jit.add(std::move(module), m_spurt->get_cache_path() + "llvm/"); } else { @@ -3202,15 +3321,23 @@ public: // Register function pointer const spu_function_t fn = reinterpret_cast(m_jit.get_engine().getPointerToFunction(main_func)); - m_spurt->add(*fn_info.first, fn); + if (!m_spurt->add(last_reset_count, fn_location, fn)) + { + return false; + } if (g_cfg.core.spu_debug) { out.flush(); - fs::file(m_spurt->m_cache_path + "spu.log", fs::write + fs::append).write(log); + fs::file(m_spurt->get_cache_path() + "spu.log", fs::write + fs::append).write(log); } - return fn; + if (m_cache && g_cfg.core.spu_cache) + { + m_cache->add(func); + } + + return true; } static bool exec_check_state(spu_thread* _spu) diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index b66cd5aad3..503d0ecffd 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -36,10 +36,13 @@ public: // Helper class class spu_runtime { -public: - shared_mutex m_mutex; + mutable shared_mutex m_mutex; - cond_variable m_cond; + mutable cond_variable m_cond; + + mutable atomic_t m_passive_locks{0}; + + atomic_t m_reset_count{0}; // All functions std::map, spu_function_t> m_map; @@ -57,12 +60,12 @@ public: std::map, spu_function_t>::iterator beg; std::map, spu_function_t>::iterator end; }; -private: - // Scratch vector - std::vector workload; // Scratch vector - std::vector addrv{u32{0}}; + static thread_local std::vector workload; + + // Scratch vector + static thread_local std::vector addrv; // Trampoline to spu_recompiler_base::dispatch static const spu_function_t tr_dispatch; @@ -73,20 +76,104 @@ private: public: spu_runtime(); + const std::string& get_cache_path() const + { + return m_cache_path; + } + // Add compiled function and generate trampoline if necessary - void add(std::pair, spu_function_t>& where, spu_function_t compiled); + bool add(u64 last_reset_count, void* where, spu_function_t compiled); + + // Return opaque pointer for add() + void* find(u64 last_reset_count, const std::vector&); // Find existing function - spu_function_t find(const se_t* ls, u32 addr); + spu_function_t find(const se_t* ls, u32 addr) const; // Generate a patchable trampoline to spu_recompiler_base::branch spu_function_t make_branch_patchpoint(u32 target) const; + // reset() arg retriever, for race avoidance (can result in double reset) + u64 get_reset_count() const + { + return m_reset_count.load(); + } + + // Remove all compiled function and free JIT memory + u64 reset(std::size_t last_reset_count); + // Handle cpu_flag::jit_return - void handle_return(cpu_thread* _thr); + void handle_return(spu_thread* _spu); // All dispatchers (array allocated in jit memory) static atomic_t* const g_dispatcher; + + struct passive_lock + { + spu_runtime& _this; + + passive_lock(const passive_lock&) = delete; + + passive_lock(spu_runtime& _this) + : _this(_this) + { + std::lock_guard lock(_this.m_mutex); + _this.m_passive_locks++; + } + + ~passive_lock() + { + _this.m_passive_locks--; + } + }; + + // Exclusive lock within passive_lock scope + struct writer_lock + { + spu_runtime& _this; + bool notify = false; + + writer_lock(const writer_lock&) = delete; + + writer_lock(spu_runtime& _this) + : _this(_this) + { + // Temporarily release the passive lock + _this.m_passive_locks--; + _this.m_mutex.lock(); + } + + ~writer_lock() + { + _this.m_passive_locks++; + _this.m_mutex.unlock(); + + if (notify) + { + _this.m_cond.notify_all(); + } + } + }; + + struct reader_lock + { + const spu_runtime& _this; + + reader_lock(const reader_lock&) = delete; + + reader_lock(const spu_runtime& _this) + : _this(_this) + { + _this.m_passive_locks--; + _this.m_mutex.lock_shared(); + } + + ~reader_lock() + { + _this.m_passive_locks++; + _this.m_mutex.unlock_shared(); + } + }; }; // SPU Recompiler instance base class @@ -130,8 +217,11 @@ public: // Initialize virtual void init() = 0; - // Compile function - virtual spu_function_t compile(std::vector&&) = 0; + // Compile function (may fail) + virtual bool compile(u64 last_reset_count, const std::vector&) = 0; + + // Compile function, handle failure + void make_function(const std::vector&); // Default dispatch function fallback (second arg is unused) static void dispatch(spu_thread&, void*, u8* rip); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index d366907109..872c7b5f5e 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -568,8 +568,26 @@ void spu_thread::cpu_task() if (jit) { - while (LIKELY(!state || !check_state())) + // Register SPU runtime user + spu_runtime::passive_lock _passive_lock(jit->get_runtime()); + + while (true) { + if (UNLIKELY(state)) + { + if (check_state()) + { + if (state & cpu_flag::jit_return) + { + // Handle jit_return as a special case + jit->get_runtime().handle_return(this); + continue; + } + + break; + } + } + spu_runtime::g_dispatcher[pc / 4](*this, vm::_ptr(offset), nullptr); }