diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 314715dc60..0354da63a8 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -2172,14 +2172,14 @@ u64 thread_base::finalize(thread_state result_state) noexcept const u64 _self = m_thread; // Set result state (errored or finalized) - m_sync.fetch_op([&](u64& v) + m_sync.fetch_op([&](u32& v) { v &= -4; v |= static_cast(result_state); }); // Signal waiting threads - m_sync.notify_all(2); + m_sync.notify_all(); return _self; } @@ -2266,8 +2266,18 @@ thread_state thread_ctrl::state() void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */) { + if (!usec) + { + return; + } + auto _this = g_tls_this_thread; + if (!alert && usec > 50000) + { + usec = 50000; + } + #ifdef __linux__ static thread_local struct linux_timer_handle_t { @@ -2296,13 +2306,13 @@ void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */) } } fd_timer; - if (!alert && usec > 0 && usec <= 1000 && fd_timer != -1) + if (!alert && fd_timer != -1) { struct itimerspec timeout; u64 missed; - timeout.it_value.tv_nsec = usec * 1'000ull; - timeout.it_value.tv_sec = 0; + timeout.it_value.tv_nsec = usec % 1'000'000 * 1'000ull; + timeout.it_value.tv_sec = usec / 1'000'000; timeout.it_interval.tv_sec = 0; timeout.it_interval.tv_nsec = 0; timerfd_settime(fd_timer, 0, &timeout, NULL); @@ -2312,15 +2322,27 @@ void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */) } #endif - if (_this->m_sync.bit_test_reset(2) || _this->m_taskq) + if (alert) { - return; + if (_this->m_sync.bit_test_reset(2) || _this->m_taskq) + { + return; + } } // Wait for signal and thread state abort atomic_wait::list<2> list{}; - list.set<0>(_this->m_sync, 0, 4 + 1); - list.set<1>(_this->m_taskq, nullptr); + + if (alert) + { + list.set<0>(_this->m_sync, 0); + list.set<1>(utils::bless>(&_this->m_taskq)[1], 0); + } + else + { + list.set<0>(_this->m_dummy, 0); + } + list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff}); } @@ -2331,29 +2353,27 @@ void thread_ctrl::wait_for_accurate(u64 usec) return; } + if (usec > 50000) + { + fmt::throw_exception("thread_ctrl::wait_for_accurate: unsupported amount"); + } + +#ifdef __linux__ + return wait_for(usec, false); +#else using namespace std::chrono_literals; const auto until = std::chrono::steady_clock::now() + 1us * usec; while (true) { -#ifdef __linux__ - // NOTE: Assumption that timer initialization has succeeded - u64 host_min_quantum = usec <= 1000 ? 10 : 50; -#else // Host scheduler quantum for windows (worst case) - // NOTE: On ps3 this function has very high accuracy constexpr u64 host_min_quantum = 500; -#endif + if (usec >= host_min_quantum) { -#ifdef __linux__ - // Do not wait for the last quantum to avoid loss of accuracy - wait_for(usec - ((usec % host_min_quantum) + host_min_quantum), false); -#else // Wait on multiple of min quantum for large durations to avoid overloading low thread cpus wait_for(usec - (usec % host_min_quantum), false); -#endif } // TODO: Determine best value for yield delay else if (usec >= host_min_quantum / 2) @@ -2374,6 +2394,7 @@ void thread_ctrl::wait_for_accurate(u64 usec) usec = (until - current).count(); } +#endif } std::string thread_ctrl::get_name_cached() @@ -2440,7 +2461,7 @@ bool thread_base::join(bool dtor) const for (u64 i = 0; (m_sync & 3) <= 1; i++) { - m_sync.wait(0, 2, timeout); + m_sync.wait(m_sync & ~2, timeout); if (m_sync & 2) { @@ -2460,7 +2481,7 @@ void thread_base::notify() { // Set notification m_sync |= 4; - m_sync.notify_one(4); + m_sync.notify_all(); } u64 thread_base::get_native_id() const @@ -2497,7 +2518,7 @@ u64 thread_base::get_cycles() { cycles = static_cast(thread_time.tv_sec) * 1'000'000'000 + thread_time.tv_nsec; #endif - if (const u64 old_cycles = m_sync.fetch_op([&](u64& v){ v &= 7; v |= (cycles << 3); }) >> 3) + if (const u64 old_cycles = m_cycles.exchange(cycles)) { return cycles - old_cycles; } @@ -2507,7 +2528,7 @@ u64 thread_base::get_cycles() } else { - return m_sync >> 3; + return m_cycles; } } @@ -2560,8 +2581,8 @@ void thread_base::exec() } // Notify waiters - ptr->exec.release(nullptr); - ptr->exec.notify_all(); + ptr->done.release(1); + ptr->done.notify_all(); } if (ptr->next) diff --git a/Utilities/Thread.h b/Utilities/Thread.h index a64d64cb99..f85f94194e 100644 --- a/Utilities/Thread.h +++ b/Utilities/Thread.h @@ -100,17 +100,19 @@ class thread_future protected: atomic_t exec{}; + atomic_t done{0}; + public: // Get reference to the atomic variable for inspection and waiting for const auto& get_wait() const { - return exec; + return done; } // Wait (preset) void wait() const { - exec.wait(nullptr); + done.wait(0); } }; @@ -131,8 +133,13 @@ private: // Thread handle (platform-specific) atomic_t m_thread{0}; - // Thread state and cycles - atomic_t m_sync{0}; + // Thread cycles + atomic_t m_cycles{0}; + + atomic_t m_dummy{0}; + + // Thread state + atomic_t m_sync{0}; // Thread name atomic_ptr m_tname; @@ -284,16 +291,22 @@ public: } atomic_wait::list list{}; - list.template set(_this->m_sync, 0, 4 + 1); - list.template set(_this->m_taskq, nullptr); + list.template set(_this->m_sync, 0); + list.template set(_this->m_taskq); setter(list); list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff}); } - template + template static inline void wait_on(T& wait, U old, u64 usec = -1) { - wait_on_custom<1>([&](atomic_wait::list<3>& list){ list.set<0, Op>(wait, old); }, usec); + wait_on_custom<1>([&](atomic_wait::list<3>& list) { list.template set<0>(wait, old); }, usec); + } + + template + static inline void wait_on(T& wait) + { + wait_on_custom<1>([&](atomic_wait::list<3>& list) { list.template set<0>(wait); }); } // Exit. @@ -637,7 +650,7 @@ public: { bool notify_sync = false; - if (s >= thread_state::aborting && thread::m_sync.fetch_op([](u64& v){ return !(v & 3) && (v |= 1); }).second) + if (s >= thread_state::aborting && thread::m_sync.fetch_op([](u32& v) { return !(v & 3) && (v |= 1); }).second) { notify_sync = true; } @@ -650,7 +663,7 @@ public: if (notify_sync) { // Notify after context abortion has been made so all conditions for wake-up be satisfied by the time of notification - thread::m_sync.notify_one(1); + thread::m_sync.notify_all(); } if (s == thread_state::finished) diff --git a/Utilities/cond.cpp b/Utilities/cond.cpp index 69eb2d47c6..af572d191e 100644 --- a/Utilities/cond.cpp +++ b/Utilities/cond.cpp @@ -9,7 +9,7 @@ void cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept ensure(_old); // Wait with timeout - m_value.wait(_old, c_signal_mask, atomic_wait_timeout{_timeout > max_timeout ? umax : _timeout * 1000}); + m_value.wait(_old, atomic_wait_timeout{_timeout > max_timeout ? umax : _timeout * 1000}); // Cleanup m_value.atomic_op([](u32& value) @@ -47,10 +47,10 @@ void cond_variable::imp_wake(u32 _count) noexcept if (_count > 1 || ((_old + (c_signal_mask & (0 - c_signal_mask))) & c_signal_mask) == c_signal_mask) { // Resort to notify_all if signal count reached max - m_value.notify_all(c_signal_mask); + m_value.notify_all(); } else { - m_value.notify_one(c_signal_mask); + m_value.notify_one(); } } diff --git a/Utilities/lockless.h b/Utilities/lockless.h index 41b24fe739..ef19fa3f7e 100644 --- a/Utilities/lockless.h +++ b/Utilities/lockless.h @@ -2,6 +2,7 @@ #include "util/types.hpp" #include "util/atomic.hpp" +#include "util/asm.hpp" //! Simple unshrinkable array base for concurrent access. Only growths automatically. //! There is no way to know the current size. The smaller index is, the faster it's accessed. @@ -280,12 +281,17 @@ public: template class lf_queue final { - atomic_t*> m_head{nullptr}; + atomic_t m_head{0}; + + lf_queue_item* load(u64 value) const noexcept + { + return reinterpret_cast*>(value >> 16); + } // Extract all elements and reverse element order (FILO to FIFO) lf_queue_item* reverse() noexcept { - if (auto* head = m_head.load() ? m_head.exchange(nullptr) : nullptr) + if (auto* head = load(m_head) ? load(m_head.exchange(0)) : nullptr) { if (auto* prev = head->m_link) { @@ -311,35 +317,35 @@ public: ~lf_queue() { - delete m_head.load(); + delete load(m_head); } - template void wait(std::nullptr_t /*null*/ = nullptr) noexcept { - if (m_head == nullptr) + if (m_head == 0) { - m_head.template wait(nullptr); + utils::bless>(&m_head)[1].wait(0); } } const volatile void* observe() const noexcept { - return m_head.load(); + return load(m_head); } explicit operator bool() const noexcept { - return m_head != nullptr; + return m_head != 0; } template void push(Args&&... args) { - auto _old = m_head.load(); + auto oldv = m_head.load(); + auto _old = load(oldv); auto item = new lf_queue_item(_old, std::forward(args)...); - while (!m_head.compare_exchange(_old, item)) + while (!m_head.compare_exchange(oldv, reinterpret_cast(item) << 16)) { item->m_link = _old; } @@ -347,7 +353,7 @@ public: if (!_old) { // Notify only if queue was empty - m_head.notify_one(); + utils::bless>(&m_head)[1].notify_one(); } } @@ -363,7 +369,7 @@ public: lf_queue_slice pop_all_reversed() { lf_queue_slice result; - result.m_head = m_head.exchange(nullptr); + result.m_head = load(m_head.exchange(0)); return result; } diff --git a/Utilities/mutex.cpp b/Utilities/mutex.cpp index e84113d607..0cbd0df30e 100644 --- a/Utilities/mutex.cpp +++ b/Utilities/mutex.cpp @@ -74,14 +74,14 @@ void shared_mutex::imp_wait() break; } - m_value.wait(old, c_sig); + m_value.wait(old); } } void shared_mutex::imp_signal() { m_value += c_sig; - m_value.notify_one(c_sig); + m_value.notify_one(); } void shared_mutex::imp_lock(u32 val) diff --git a/Utilities/sync.h b/Utilities/sync.h index df2481108e..4e60c0f2b4 100644 --- a/Utilities/sync.h +++ b/Utilities/sync.h @@ -38,7 +38,29 @@ constexpr NTSTATUS NTSTATUS_ALERTED = 0x101; constexpr NTSTATUS NTSTATUS_TIMEOUT = 0x102; #endif -#ifndef __linux__ +#ifdef __linux__ +#ifndef SYS_futex_waitv +#if defined(ARCH_X64) || defined(ARCH_ARM64) +#define SYS_futex_waitv 449 +#endif +#endif + +#ifndef FUTEX_32 +#define FUTEX_32 2 +#endif + +#ifndef FUTEX_WAITV_MAX +#define FUTEX_WAITV_MAX 128 +#endif + +struct futex_waitv +{ + __u64 val; + __u64 uaddr; + __u32 flags; + __u32 __reserved; +}; +#else enum { FUTEX_PRIVATE_FLAG = 0, @@ -113,7 +135,7 @@ inline int futex(volatile void* uaddr, int futex_op, uint val, const timespec* t } else { - // TODO + // TODO: absolute timeout } map.erase(std::find(map.find(uaddr), map.end(), ref)); diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 031176a948..aecf9f2a24 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -261,7 +261,7 @@ struct cpu_prof if (threads.empty()) { // Wait for messages if no work (don't waste CPU) - thread_ctrl::wait_on(registered, nullptr); + thread_ctrl::wait_on(registered); continue; } @@ -939,7 +939,7 @@ bool cpu_thread::check_state() noexcept else { // TODO: fix the workaround - g_suspend_counter.wait(ctr, -4, atomic_wait_timeout{100}); + g_suspend_counter.wait(ctr, atomic_wait_timeout{10'000}); } } else @@ -972,8 +972,7 @@ bool cpu_thread::check_state() noexcept } // Short sleep when yield flag is present alone (makes no sense when other methods which can stop thread execution have been done) - // Pass a mask of a single bit which is often unused to avoid notifications - s_dummy_atomic.wait(0, 1u << 30, atomic_wait_timeout{80'000}); + s_dummy_atomic.wait(0, atomic_wait_timeout{80'000}); } } } @@ -1010,13 +1009,13 @@ cpu_thread& cpu_thread::operator=(thread_state) if (old & cpu_flag::wait && old.none_of(cpu_flag::again + cpu_flag::exit)) { - state.notify_one(cpu_flag::exit); + state.notify_one(); if (auto thread = try_get()) { if (u32 resv = atomic_storage::load(thread->raddr)) { - vm::reservation_notifier(resv).notify_all(-128); + vm::reservation_notifier(resv).notify_all(); } } } diff --git a/rpcs3/Emu/Cell/Modules/cellMic.cpp b/rpcs3/Emu/Cell/Modules/cellMic.cpp index 60fd57734f..cfd6e427ec 100644 --- a/rpcs3/Emu/Cell/Modules/cellMic.cpp +++ b/rpcs3/Emu/Cell/Modules/cellMic.cpp @@ -73,7 +73,7 @@ void mic_context::operator()() // Timestep in microseconds constexpr u64 TIMESTEP = 256ull * 1'000'000ull / 48000ull; u64 timeout = 0; - u64 oldvalue = 0; + u32 oldvalue = 0; while (thread_ctrl::state() != thread_state::aborting) { diff --git a/rpcs3/Emu/Cell/Modules/cellMic.h b/rpcs3/Emu/Cell/Modules/cellMic.h index aba0d8bbfa..0bd637fa90 100644 --- a/rpcs3/Emu/Cell/Modules/cellMic.h +++ b/rpcs3/Emu/Cell/Modules/cellMic.h @@ -374,7 +374,7 @@ public: static constexpr auto thread_name = "Microphone Thread"sv; protected: - atomic_t wakey = 0; + atomic_t wakey = 0; // u32 signalStateLocalTalk = 9; // value is in range 0-10. 10 indicates talking, 0 indicating none. // u32 signalStateFarTalk = 0; // value is in range 0-10. 10 indicates talking from far away, 0 indicating none. diff --git a/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp b/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp index 38e8179e4b..1204ff1773 100644 --- a/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp +++ b/rpcs3/Emu/Cell/Modules/cellMsgDialog.cpp @@ -164,7 +164,7 @@ error_code open_msg_dialog(bool is_blocking, u32 type, vm::cptr msgString, return CellSysutilError{ret + 0u}; } - const auto notify = std::make_shared>(false); + const auto notify = std::make_shared>(0); const auto res = manager->create()->show(is_blocking, msgString.get_ptr(), _type, [callback, userData, &return_code, is_blocking, notify](s32 status) { @@ -186,7 +186,7 @@ error_code open_msg_dialog(bool is_blocking, u32 type, vm::cptr msgString, if (is_blocking && notify) { - *notify = true; + *notify = 1; notify->notify_one(); } }); diff --git a/rpcs3/Emu/Cell/Modules/cellMusicDecode.cpp b/rpcs3/Emu/Cell/Modules/cellMusicDecode.cpp index 1ad732df55..0ae0cd80b4 100644 --- a/rpcs3/Emu/Cell/Modules/cellMusicDecode.cpp +++ b/rpcs3/Emu/Cell/Modules/cellMusicDecode.cpp @@ -256,7 +256,7 @@ error_code cell_music_decode_read(vm::ptr buf, vm::ptr startTime, u64 { dec.read_pos = 0; dec.decoder.clear(); - dec.decoder.track_fully_consumed = true; + dec.decoder.track_fully_consumed = 1; dec.decoder.track_fully_consumed.notify_one(); break; } diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index 721a1c82a3..3d5c535058 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -2477,8 +2477,8 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr spurs, vm::ptr spurs_res += 127; spurs_res2 += 127; - spurs_res.notify_all(-128); - spurs_res2.notify_all(-128); + spurs_res.notify_all(); + spurs_res2.notify_all(); u32 res_wkl; const auto wkl = &spurs->wklInfo(wnum); diff --git a/rpcs3/Emu/Cell/Modules/cellVdec.cpp b/rpcs3/Emu/Cell/Modules/cellVdec.cpp index d7f4fd2f0e..cb73b918c0 100644 --- a/rpcs3/Emu/Cell/Modules/cellVdec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellVdec.cpp @@ -303,7 +303,7 @@ struct vdec_context final return; } - thread_ctrl::wait_on(in_cmd, nullptr); + thread_ctrl::wait_on(in_cmd); slice = in_cmd.pop_all(); // Pop new command list }()) { @@ -921,7 +921,7 @@ static error_code vdecOpen(ppu_thread& ppu, T type, U res, vm::cptr }); thrd->state -= cpu_flag::stop; - thrd->state.notify_one(cpu_flag::stop); + thrd->state.notify_one(); return CELL_OK; } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 4b09778a00..8619dcd81e 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1821,7 +1821,11 @@ void ppu_thread::cpu_task() // Wait until the progress dialog is closed. // We don't want to open a cell dialog while a native progress dialog is still open. - thread_ctrl::wait_on(g_progr_ptotal, 0); + while (u32 v = g_progr_ptotal) + { + g_progr_ptotal.wait(v); + } + g_fxo->get().show_overlay_message_only = true; // Sadly we can't postpone initializing guest time because we need to run PPU threads @@ -1839,7 +1843,7 @@ void ppu_thread::cpu_task() } ensure(spu.state.test_and_reset(cpu_flag::stop)); - spu.state.notify_one(cpu_flag::stop); + spu.state.notify_one(); } }); @@ -2051,7 +2055,7 @@ ppu_thread::ppu_thread(utils::serial& ar) struct init_pushed { bool pushed = false; - atomic_t inited = false; + atomic_t inited = false; }; call_history.data.resize(g_cfg.core.ppu_call_history ? call_history_max_size : 1); @@ -2100,7 +2104,7 @@ ppu_thread::ppu_thread(utils::serial& ar) { while (!Emu.IsStopped() && !g_fxo->get().inited) { - thread_ctrl::wait_on(g_fxo->get().inited, false); + thread_ctrl::wait_on(g_fxo->get().inited, 0); } return false; } @@ -2117,7 +2121,7 @@ ppu_thread::ppu_thread(utils::serial& ar) {ppu_cmd::ptr_call, 0}, +[](ppu_thread&) -> bool { auto& inited = g_fxo->get().inited; - inited = true; + inited = 1; inited.notify_all(); return true; } @@ -3046,7 +3050,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) if (ppu.cia < liblv2_begin || ppu.cia >= liblv2_end) { - res.notify_all(-128); + res.notify_all(); } if (addr == ppu.last_faddr) diff --git a/rpcs3/Emu/Cell/RawSPUThread.cpp b/rpcs3/Emu/Cell/RawSPUThread.cpp index 580ee319a9..eaf5e2dbc1 100644 --- a/rpcs3/Emu/Cell/RawSPUThread.cpp +++ b/rpcs3/Emu/Cell/RawSPUThread.cpp @@ -21,7 +21,7 @@ inline void try_start(spu_thread& spu) }).second) { spu.state -= cpu_flag::stop; - spu.state.notify_one(cpu_flag::stop); + spu.state.notify_one(); } }; @@ -273,7 +273,7 @@ bool spu_thread::write_reg(const u32 addr, const u32 value) for (status_npc_sync_var old; (old = status_npc).status & SPU_STATUS_RUNNING;) { - status_npc.wait(old); + utils::bless>(&status_npc)[0].wait(old.status); } } } diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 5c88bba127..be20bcb8ef 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -647,7 +647,10 @@ void spu_cache::initialize() if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit || g_cfg.core.spu_decoder == spu_decoder_type::llvm) { // Initialize progress dialog (wait for previous progress done) - thread_ctrl::wait_on(g_progr_ptotal, 0); + while (u32 v = g_progr_ptotal) + { + g_progr_ptotal.wait(v); + } g_progr_ptotal += ::size32(func_list); progr.emplace("Building SPU cache..."); @@ -7795,7 +7798,7 @@ public: { minusb = eval(x); } - + const auto minusbx = bitcast(minusb); // Data with swapped endian from a load instruction @@ -11011,7 +11014,7 @@ struct spu_llvm_worker return; } - thread_ctrl::wait_on(registered, nullptr); + thread_ctrl::wait_on(utils::bless>(®istered)[1], 0); slice = registered.pop_all(); }()) { @@ -11178,7 +11181,7 @@ struct spu_llvm { // Interrupt profiler thread and put it to sleep static_cast(prof_mutex.reset()); - thread_ctrl::wait_on(registered, nullptr); + thread_ctrl::wait_on(utils::bless>(®istered)[1], 0); continue; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index f24f41ef3a..4fdd72f143 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -2418,7 +2418,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* } } - if (++i < 10) + if (true || ++i < 10) { busy_wait(500); } @@ -2426,7 +2426,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* { // Wait _cpu->state += cpu_flag::wait + cpu_flag::temp; - bits->wait(old, wmask); + // bits->wait(old, wmask); _cpu->check_state(); } }()) @@ -2542,7 +2542,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* v &= ~wmask; }); - bits->notify_all(wmask); + // bits->notify_all(wmask); if (size == size0) { @@ -3588,7 +3588,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) { if (raddr) { - vm::reservation_notifier(addr).notify_all(-128); + vm::reservation_notifier(addr).notify_all(); raddr = 0; } @@ -3775,7 +3775,7 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args) } do_cell_atomic_128_store(addr, _ptr(args.lsa & 0x3ff80)); - vm::reservation_notifier(addr).notify_all(-128); + vm::reservation_notifier(addr).notify_all(); } bool spu_thread::do_mfc(bool can_escape, bool must_finish) @@ -4908,7 +4908,11 @@ s64 spu_thread::get_ch_value(u32 ch) } } +#ifdef __linux__ + const bool reservation_busy_waiting = false; +#else const bool reservation_busy_waiting = ((utils::get_tsc() >> 8) % 100 + ((raddr == spurs_addr) ? 50 : 0)) < g_cfg.core.spu_reservation_busy_waiting_percentage; +#endif for (; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true)) { @@ -4930,8 +4934,11 @@ s64 spu_thread::get_ch_value(u32 ch) if (raddr && (mask1 & ~SPU_EVENT_TM) == SPU_EVENT_LR) { // Don't busy-wait with TSX - memory is sensitive - if (!reservation_busy_waiting) + if (g_use_rtm || !reservation_busy_waiting) { +#ifdef __linux__ + vm::reservation_notifier(raddr).wait(rtime, atomic_wait_timeout{50'000}); +#else if (raddr - spurs_addr <= 0x80 && !g_cfg.core.spu_accurate_reservations && mask1 == SPU_EVENT_LR) { atomic_wait_engine::set_one_time_use_wait_callback(+[](u64) -> bool @@ -4944,7 +4951,7 @@ s64 spu_thread::get_ch_value(u32 ch) // Wait without timeout, in this situation we have notifications for all writes making it possible // Abort notifications are handled specially for performance reasons - vm::reservation_notifier(raddr).wait(rtime, -128); + vm::reservation_notifier(raddr).wait(rtime); continue; } @@ -4976,7 +4983,8 @@ s64 spu_thread::get_ch_value(u32 ch) return true; }); - vm::reservation_notifier(raddr).wait(rtime, -128, atomic_wait_timeout{80'000}); + vm::reservation_notifier(raddr).wait(rtime, atomic_wait_timeout{80'000}); +#endif } else { @@ -5464,7 +5472,7 @@ extern void resume_spu_thread_group_from_waiting(spu_thread& spu) { group->run_state = SPU_THREAD_GROUP_STATUS_SUSPENDED; spu.state += cpu_flag::signal; - spu.state.notify_one(cpu_flag::signal); + spu.state.notify_one(); return; } @@ -5482,7 +5490,7 @@ extern void resume_spu_thread_group_from_waiting(spu_thread& spu) thread->state -= cpu_flag::suspend; } - thread->state.notify_one(cpu_flag::suspend + cpu_flag::signal); + thread->state.notify_one(); } } } @@ -6244,7 +6252,7 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop) while (true) { - thread_ctrl::wait_on(data, bit_wait); + thread_ctrl::wait_on(utils::bless>(&data)[1], u32{bit_wait >> 32}); old = data; if (!(old & bit_wait)) @@ -6325,7 +6333,7 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push) return false; } - thread_ctrl::wait_on(data, state); + thread_ctrl::wait_on(utils::bless>(&data)[1], u32(state >> 32)); state = data; } } @@ -6369,7 +6377,7 @@ std::pair spu_channel_4_t::pop_wait(cpu_thread& spu) while (true) { - thread_ctrl::wait_on(values, old); + thread_ctrl::wait_on(utils::bless>(&values)[0], u32(u64(std::bit_cast(old)))); old = values; if (!old.waiting) diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 4801c7671c..92ed38c66e 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -235,7 +235,7 @@ public: // Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushed to jostling_value) ensure(this->data.bit_test_reset(off_wait)); - data.notify_one(); + utils::bless>(&data)[1].notify_one(); } // Return true if count has changed from 0 to 1, this condition is considered satisfied even if we pushed a value directly to the special storage for waiting SPUs @@ -294,7 +294,7 @@ public: if ((old & mask) == mask) { - data.notify_one(); + utils::bless>(&data)[1].notify_one(); } return static_cast(old); @@ -386,7 +386,7 @@ struct spu_channel_4_t // Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushing to jostling_value) ensure(atomic_storage::exchange(values.raw().waiting, 0)); - values.notify_one(); + utils::bless>(&values)[0].notify_one(); } return; diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 4b775867d3..9b2473ec2b 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -1631,7 +1631,7 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio) if (is_paused(target->state - cpu_flag::suspend)) { - target->state.notify_one(cpu_flag::suspend); + target->state.notify_one(); } } } @@ -1684,7 +1684,7 @@ void lv2_obj::schedule_all(u64 current_time) if (notify_later_idx == std::size(g_to_notify)) { // Out of notification slots, notify locally (resizable container is not worth it) - target->state.notify_one(cpu_flag::signal + cpu_flag::suspend); + target->state.notify_one(); } else { @@ -1718,7 +1718,7 @@ void lv2_obj::schedule_all(u64 current_time) if (notify_later_idx == std::size(g_to_notify)) { // Out of notification slots, notify locally (resizable container is not worth it) - target->state.notify_one(cpu_flag::notify); + target->state.notify_one(); } else { @@ -1948,7 +1948,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep u64 remaining = usec - passed; #ifdef __linux__ // NOTE: Assumption that timer initialization has succeeded - u64 host_min_quantum = is_usleep && remaining <= 1000 ? 10 : 50; + constexpr u64 host_min_quantum = 10; #else // Host scheduler quantum for windows (worst case) // NOTE: On ps3 this function has very high accuracy @@ -1965,8 +1965,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep if (remaining > host_min_quantum) { #ifdef __linux__ - // Do not wait for the last quantum to avoid loss of accuracy - wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum)); + wait_for(remaining); #else // Wait on multiple of min quantum for large durations to avoid overloading low thread cpus wait_for(remaining - (remaining % host_min_quantum)); diff --git a/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp b/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp index f18fb96502..c2e4ebcd9a 100644 --- a/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_interrupt.cpp @@ -183,7 +183,7 @@ error_code _sys_interrupt_thread_establish(ppu_thread& ppu, vm::ptr ih, u32 }); it->state -= cpu_flag::stop; - it->state.notify_one(cpu_flag::stop); + it->state.notify_one(); return result; }); diff --git a/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp b/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp index 5ff9b62ccd..87a5d89d86 100644 --- a/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_mmapper.cpp @@ -862,7 +862,7 @@ error_code mmapper_thread_recover_page_fault(cpu_thread* cpu) if (cpu->state & cpu_flag::signal) { - cpu->state.notify_one(cpu_flag::signal); + cpu->state.notify_one(); } return CELL_OK; diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.cpp b/rpcs3/Emu/Cell/lv2/sys_spu.cpp index 7ccbc6acb1..d69d20c2b7 100644 --- a/rpcs3/Emu/Cell/lv2/sys_spu.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_spu.cpp @@ -1042,7 +1042,7 @@ error_code sys_spu_thread_group_start(ppu_thread& ppu, u32 id) { for (; index != umax; index--) { - threads[index]->state.notify_one(cpu_flag::stop); + threads[index]->state.notify_one(); } } } notify_threads; @@ -1216,7 +1216,7 @@ error_code sys_spu_thread_group_resume(ppu_thread& ppu, u32 id) { for (; index != umax; index--) { - threads[index]->state.notify_one(cpu_flag::suspend); + threads[index]->state.notify_one(); } } } notify_threads; @@ -1397,7 +1397,7 @@ error_code sys_spu_thread_group_terminate(ppu_thread& ppu, u32 id, s32 value) if (prev_resv && prev_resv != resv) { // Batch reservation notifications if possible - vm::reservation_notifier(prev_resv).notify_all(-128); + vm::reservation_notifier(prev_resv).notify_all(); } prev_resv = resv; @@ -1407,7 +1407,7 @@ error_code sys_spu_thread_group_terminate(ppu_thread& ppu, u32 id, s32 value) if (prev_resv) { - vm::reservation_notifier(prev_resv).notify_all(-128); + vm::reservation_notifier(prev_resv).notify_all(); } group->exit_status = value; diff --git a/rpcs3/Emu/Cell/lv2/sys_sync.h b/rpcs3/Emu/Cell/lv2/sys_sync.h index 98df7cd729..28a04d281a 100644 --- a/rpcs3/Emu/Cell/lv2/sys_sync.h +++ b/rpcs3/Emu/Cell/lv2/sys_sync.h @@ -434,7 +434,7 @@ public: { // Note: by the time of notification the thread could have been deallocated which is why the direct function is used // TODO: Pass a narrower mask - atomic_wait_engine::notify_one(cpu, 4, atomic_wait::default_mask>); + atomic_wait_engine::notify_one(cpu); } } diff --git a/rpcs3/Emu/Memory/vm_reservation.h b/rpcs3/Emu/Memory/vm_reservation.h index ec6af4f7ae..6330a08a4d 100644 --- a/rpcs3/Emu/Memory/vm_reservation.h +++ b/rpcs3/Emu/Memory/vm_reservation.h @@ -135,7 +135,7 @@ namespace vm _xend(); #endif if constexpr (Ack) - res.notify_all(-128); + res.notify_all(); return; } else @@ -149,7 +149,7 @@ namespace vm _xend(); #endif if constexpr (Ack) - res.notify_all(-128); + res.notify_all(); return result; } else @@ -204,7 +204,7 @@ namespace vm #endif res += 127; if (Ack) - res.notify_all(-128); + res.notify_all(); return; } else @@ -218,7 +218,7 @@ namespace vm #endif res += 127; if (Ack) - res.notify_all(-128); + res.notify_all(); return result; } else @@ -253,7 +253,7 @@ namespace vm }); if constexpr (Ack) - res.notify_all(-128); + res.notify_all(); return; } else @@ -273,7 +273,7 @@ namespace vm }); if (Ack && result) - res.notify_all(-128); + res.notify_all(); return result; } } @@ -293,7 +293,7 @@ namespace vm } if constexpr (Ack) - res.notify_all(-128); + res.notify_all(); return; } else @@ -313,7 +313,7 @@ namespace vm } if (Ack && result) - res.notify_all(-128); + res.notify_all(); return result; } } @@ -405,7 +405,7 @@ namespace vm if constexpr (Ack) { - res.notify_all(-128); + res.notify_all(); } } else @@ -415,7 +415,7 @@ namespace vm if constexpr (Ack) { - res.notify_all(-128); + res.notify_all(); } return result; diff --git a/rpcs3/Emu/RSX/GL/GLPipelineCompiler.cpp b/rpcs3/Emu/RSX/GL/GLPipelineCompiler.cpp index ff20beb17f..445841b44b 100644 --- a/rpcs3/Emu/RSX/GL/GLPipelineCompiler.cpp +++ b/rpcs3/Emu/RSX/GL/GLPipelineCompiler.cpp @@ -55,7 +55,7 @@ namespace gl job.completion_callback(result); } - thread_ctrl::wait_on(m_work_queue, nullptr); + thread_ctrl::wait_on(m_work_queue); } } diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu.cpp b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu.cpp index ebbcb88e9a..b7b255e780 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu.cpp +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu.cpp @@ -161,7 +161,7 @@ namespace rsx this->on_close = std::move(on_close); visible = true; - const auto notify = std::make_shared>(false); + const auto notify = std::make_shared>(0); auto& overlayman = g_fxo->get(); overlayman.attach_thread_input( diff --git a/rpcs3/Emu/RSX/Overlays/overlay_manager.cpp b/rpcs3/Emu/RSX/Overlays/overlay_manager.cpp index 50517af3e2..1d8a6292f6 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_manager.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_manager.cpp @@ -295,7 +295,7 @@ namespace rsx } else if (!m_input_thread_abort) { - thread_ctrl::wait_on(m_input_token_stack, nullptr); + thread_ctrl::wait_on(m_input_token_stack); } } } diff --git a/rpcs3/Emu/RSX/Overlays/overlay_message_dialog.cpp b/rpcs3/Emu/RSX/Overlays/overlay_message_dialog.cpp index 821bf4c74a..078ccb9c85 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_message_dialog.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_message_dialog.cpp @@ -295,7 +295,7 @@ namespace rsx { if (!m_stop_input_loop) { - const auto notify = std::make_shared>(false); + const auto notify = std::make_shared>(0); auto& overlayman = g_fxo->get(); if (interactive) diff --git a/rpcs3/Emu/RSX/Overlays/overlay_osk.cpp b/rpcs3/Emu/RSX/Overlays/overlay_osk.cpp index 44c8ba6904..5bebaa226e 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_osk.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_osk.cpp @@ -1621,7 +1621,7 @@ namespace rsx update_panel(); - const auto notify = std::make_shared>(false); + const auto notify = std::make_shared>(0); auto& overlayman = g_fxo->get(); overlayman.attach_thread_input( @@ -1631,7 +1631,7 @@ namespace rsx while (!Emu.IsStopped() && !*notify) { - notify->wait(false, atomic_wait_timeout{1'000'000}); + notify->wait(0, atomic_wait_timeout{1'000'000}); } } } diff --git a/rpcs3/Emu/RSX/Overlays/overlay_user_list_dialog.cpp b/rpcs3/Emu/RSX/Overlays/overlay_user_list_dialog.cpp index c7abe5fba2..3f8d296436 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_user_list_dialog.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_user_list_dialog.cpp @@ -240,7 +240,7 @@ namespace rsx this->on_close = std::move(on_close); visible = true; - const auto notify = std::make_shared>(false); + const auto notify = std::make_shared>(0); auto& overlayman = g_fxo->get(); overlayman.attach_thread_input( @@ -250,7 +250,7 @@ namespace rsx while (!Emu.IsStopped() && !*notify) { - notify->wait(false, atomic_wait_timeout{1'000'000}); + notify->wait(0, atomic_wait_timeout{1'000'000}); } return CELL_OK; diff --git a/rpcs3/Emu/RSX/Overlays/overlays.cpp b/rpcs3/Emu/RSX/Overlays/overlays.cpp index d4d3fb3511..e003b7fd5a 100644 --- a/rpcs3/Emu/RSX/Overlays/overlays.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlays.cpp @@ -20,7 +20,7 @@ namespace rsx u64 user_interface::alloc_thread_bit() { - auto [_old, ok] = this->thread_bits.fetch_op([](u64& bits) + auto [_old, ok] = this->thread_bits.fetch_op([](u32& bits) { if (~bits) { @@ -385,7 +385,7 @@ namespace rsx m_stop_pad_interception.release(stop_pad_interception); m_stop_input_loop.release(true); - while (u64 b = thread_bits) + while (u32 b = thread_bits) { if (b == g_thread_bit) { diff --git a/rpcs3/Emu/RSX/Overlays/overlays.h b/rpcs3/Emu/RSX/Overlays/overlays.h index 05a2074e8d..ac23e00406 100644 --- a/rpcs3/Emu/RSX/Overlays/overlays.h +++ b/rpcs3/Emu/RSX/Overlays/overlays.h @@ -85,7 +85,7 @@ namespace rsx bool m_start_pad_interception = true; atomic_t m_stop_pad_interception = false; atomic_t m_input_thread_detached = false; - atomic_t thread_bits = 0; + atomic_t thread_bits = 0; bool m_keyboard_input_enabled = false; // Allow keyboard input bool m_keyboard_pad_handler_active = true; // Initialized as true to prevent keyboard input until proven otherwise. bool m_allow_input_on_pause = false; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 1e9c907f40..c5f2e90161 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -853,9 +853,8 @@ namespace rsx g_fxo->get().set_thread(std::shared_ptr>>(new named_thread>("VBlank Thread"sv, [this]() -> void { - // See sys_timer_usleep for details #ifdef __linux__ - constexpr u32 host_min_quantum = 50; + constexpr u32 host_min_quantum = 10; #else constexpr u32 host_min_quantum = 500; #endif @@ -878,8 +877,12 @@ namespace rsx // Calculate time remaining to that time (0 if we passed it) const u64 wait_for = current >= post_event_time ? 0 : post_event_time - current; +#ifdef __linux__ + const u64 wait_sleep = wait_for; +#else // Substract host operating system min sleep quantom to get sleep time const u64 wait_sleep = wait_for - u64{wait_for >= host_min_quantum} * host_min_quantum; +#endif if (!wait_for) { @@ -3116,7 +3119,7 @@ namespace rsx { #ifdef __linux__ // NOTE: Assumption that timer initialization has succeeded - u64 host_min_quantum = remaining <= 1000 ? 10 : 50; + constexpr u64 host_min_quantum = 10; #else // Host scheduler quantum for windows (worst case) // NOTE: On ps3 this function has very high accuracy @@ -3125,8 +3128,7 @@ namespace rsx if (remaining >= host_min_quantum) { #ifdef __linux__ - // Do not wait for the last quantum to avoid loss of accuracy - thread_ctrl::wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum), false); + thread_ctrl::wait_for(remaining, false); #else // Wait on multiple of min quantum for large durations to avoid overloading low thread cpus thread_ctrl::wait_for(remaining - (remaining % host_min_quantum), false); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index dde75f3e53..ae91c502a0 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -213,7 +213,7 @@ namespace rsx u32 last_known_code_start = 0; atomic_t external_interrupt_lock{ 0 }; atomic_t external_interrupt_ack{ false }; - atomic_t is_initialized{ false }; + atomic_t is_initialized{0}; bool is_fifo_idle() const; void flush_fifo(); diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp index f83d564393..6b6882f616 100644 --- a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp @@ -48,7 +48,7 @@ namespace vk } } - thread_ctrl::wait_on(m_work_queue, nullptr); + thread_ctrl::wait_on(m_work_queue); } } diff --git a/rpcs3/Emu/RSX/rsx_decode.h b/rpcs3/Emu/RSX/rsx_decode.h index 97905110bf..42a5fb9f11 100644 --- a/rpcs3/Emu/RSX/rsx_decode.h +++ b/rpcs3/Emu/RSX/rsx_decode.h @@ -1699,7 +1699,7 @@ struct registers_decoder static void dump(std::string& out, const decoded_type& decoded) { fmt::append(out, "Shader control: raw_value: 0x%x reg_count: %u%s%s", - decoded.shader_ctrl(), ((decoded.shader_ctrl() >> 24) & 0xFF), ((decoded.shader_ctrl() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) ? " depth_replace" : ""), + decoded.shader_ctrl(), ((decoded.shader_ctrl() >> 24) & 0xFF), ((decoded.shader_ctrl() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) ? " depth_replace" : ""), ((decoded.shader_ctrl() & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) ? " 32b_exports" : "")); } }; diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index f4211e2c62..22bf01dd26 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -153,7 +153,7 @@ void fmt_class_string::format(std::string& out, u64 arg) }); } -void Emulator::CallFromMainThread(std::function&& func, atomic_t* wake_up, bool track_emu_state, u64 stop_ctr) const +void Emulator::CallFromMainThread(std::function&& func, atomic_t* wake_up, bool track_emu_state, u64 stop_ctr) const { if (!track_emu_state) { @@ -174,14 +174,14 @@ void Emulator::CallFromMainThread(std::function&& func, atomic_t* void Emulator::BlockingCallFromMainThread(std::function&& func) const { - atomic_t wake_up = false; + atomic_t wake_up = 0; CallFromMainThread(std::move(func), &wake_up); while (!wake_up) { ensure(thread_ctrl::get_current()); - wake_up.wait(false); + wake_up.wait(0); } } @@ -424,7 +424,7 @@ void Emulator::Init() make_path_verbose(dev_flash, true); make_path_verbose(dev_flash2, true); make_path_verbose(dev_flash3, true); - + if (make_path_verbose(dev_usb, true)) { make_path_verbose(dev_usb + "MUSIC/", false); @@ -2152,7 +2152,7 @@ void Emulator::RunPPU() } ensure(cpu.state.test_and_reset(cpu_flag::stop)); - cpu.state.notify_one(cpu_flag::stop); + cpu.state.notify_one(); signalled_thread = true; }); @@ -2165,7 +2165,7 @@ void Emulator::RunPPU() if (auto thr = g_fxo->try_get>()) { thr->state -= cpu_flag::stop; - thr->state.notify_one(cpu_flag::stop); + thr->state.notify_one(); } } @@ -2234,7 +2234,7 @@ void Emulator::FinalizeRunRequest() } ensure(spu.state.test_and_reset(cpu_flag::stop)); - spu.state.notify_one(cpu_flag::stop); + spu.state.notify_one(); }; if (m_savestate_extension_flags1 & SaveStateExtentionFlags1::ShouldCloseMenu) @@ -2437,7 +2437,7 @@ void Emulator::Resume() auto on_select = [](u32, cpu_thread& cpu) { cpu.state -= cpu_flag::dbg_global_pause; - cpu.state.notify_one(cpu_flag::dbg_global_pause); + cpu.state.notify_one(); }; idm::select>(on_select); diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 15753b3631..0148907b8a 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -54,7 +54,7 @@ constexpr bool is_error(game_boot_result res) struct EmuCallbacks { - std::function, atomic_t*)> call_from_main_thread; + std::function, atomic_t*)> call_from_main_thread; std::function on_run; // (start_playtime) continuing or going ingame, so start the clock std::function on_pause; std::function on_resume; @@ -180,7 +180,7 @@ public: } // Call from the GUI thread - void CallFromMainThread(std::function&& func, atomic_t* wake_up = nullptr, bool track_emu_state = true, u64 stop_ctr = umax) const; + void CallFromMainThread(std::function&& func, atomic_t* wake_up = nullptr, bool track_emu_state = true, u64 stop_ctr = umax) const; // Blocking call from the GUI thread void BlockingCallFromMainThread(std::function&& func) const; diff --git a/rpcs3/Emu/system_progress.cpp b/rpcs3/Emu/system_progress.cpp index ea8c0086e9..6ce68a668e 100644 --- a/rpcs3/Emu/system_progress.cpp +++ b/rpcs3/Emu/system_progress.cpp @@ -68,7 +68,7 @@ void progress_dialog_server::operator()() { // Some backends like OpenGL actually initialize a lot of driver objects in the "on_init" method. // Wait for init to complete within reasonable time. Abort just in case we have hardware/driver issues. - renderer->is_initialized.wait(false, atomic_wait_timeout(5 * 1000000000ull)); + renderer->is_initialized.wait(0, atomic_wait_timeout(5 * 1000000000ull)); auto manager = g_fxo->try_get(); show_overlay_message = g_fxo->get().show_overlay_message_only; diff --git a/rpcs3/headless_application.cpp b/rpcs3/headless_application.cpp index a7bbdee44b..763987be3a 100644 --- a/rpcs3/headless_application.cpp +++ b/rpcs3/headless_application.cpp @@ -60,7 +60,7 @@ void headless_application::InitializeCallbacks() return false; }; - callbacks.call_from_main_thread = [this](std::function func, atomic_t* wake_up) + callbacks.call_from_main_thread = [this](std::function func, atomic_t* wake_up) { RequestCallFromMainThread(std::move(func), wake_up); }; @@ -166,7 +166,7 @@ void headless_application::InitializeCallbacks() /** * Using connects avoids timers being unable to be used in a non-qt thread. So, even if this looks stupid to just call func, it's succinct. */ -void headless_application::CallFromMainThread(const std::function& func, atomic_t* wake_up) +void headless_application::CallFromMainThread(const std::function& func, atomic_t* wake_up) { func(); diff --git a/rpcs3/headless_application.h b/rpcs3/headless_application.h index aa283b01fd..5208861236 100644 --- a/rpcs3/headless_application.h +++ b/rpcs3/headless_application.h @@ -30,8 +30,8 @@ private: } Q_SIGNALS: - void RequestCallFromMainThread(std::function func, atomic_t* wake_up); + void RequestCallFromMainThread(std::function func, atomic_t* wake_up); private Q_SLOTS: - static void CallFromMainThread(const std::function& func, atomic_t* wake_up); + static void CallFromMainThread(const std::function& func, atomic_t* wake_up); }; diff --git a/rpcs3/main.cpp b/rpcs3/main.cpp index a02fcf4bd7..7609c7898d 100644 --- a/rpcs3/main.cpp +++ b/rpcs3/main.cpp @@ -55,6 +55,7 @@ DYNAMIC_IMPORT("ntdll.dll", NtSetTimerResolution, NTSTATUS(ULONG DesiredResoluti #ifdef __linux__ #include #include +#include #endif #if defined(__APPLE__) @@ -443,6 +444,11 @@ int main(int argc, char** argv) const u64 intro_time = (intro_stats.ru_utime.tv_sec + intro_stats.ru_stime.tv_sec) * 1000000000ull + (intro_stats.ru_utime.tv_usec + intro_stats.ru_stime.tv_usec) * 1000ull; #endif +#ifdef __linux__ + // Set timerslack value for Linux. The default value is 50,000ns. Change this to just 1 since we value precise timers. + prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0); +#endif + s_argv0 = argv[0]; // Save for report_fatal_error // Only run RPCS3 to display an error diff --git a/rpcs3/rpcs3qt/debugger_frame.cpp b/rpcs3/rpcs3qt/debugger_frame.cpp index 9a34a09959..3d7e7a35bf 100644 --- a/rpcs3/rpcs3qt/debugger_frame.cpp +++ b/rpcs3/rpcs3qt/debugger_frame.cpp @@ -1366,7 +1366,7 @@ void debugger_frame::DoStep(bool step_over) } }); - cpu->state.notify_one(s_pause_flags); + cpu->state.notify_one(); } } @@ -1412,7 +1412,7 @@ void debugger_frame::RunBtnPress() Emu.Resume(); } - cpu->state.notify_one(s_pause_flags); + cpu->state.notify_one(); m_debugger_list->EnableThreadFollowing(); } } diff --git a/rpcs3/rpcs3qt/gui_application.cpp b/rpcs3/rpcs3qt/gui_application.cpp index 244a9fa6aa..4ea300cc29 100644 --- a/rpcs3/rpcs3qt/gui_application.cpp +++ b/rpcs3/rpcs3qt/gui_application.cpp @@ -421,7 +421,7 @@ void gui_application::InitializeCallbacks() return false; }; - callbacks.call_from_main_thread = [this](std::function func, atomic_t* wake_up) + callbacks.call_from_main_thread = [this](std::function func, atomic_t* wake_up) { RequestCallFromMainThread(std::move(func), wake_up); }; @@ -792,7 +792,7 @@ void gui_application::OnChangeStyleSheetRequest() /** * Using connects avoids timers being unable to be used in a non-qt thread. So, even if this looks stupid to just call func, it's succinct. */ -void gui_application::CallFromMainThread(const std::function& func, atomic_t* wake_up) +void gui_application::CallFromMainThread(const std::function& func, atomic_t* wake_up) { func(); diff --git a/rpcs3/rpcs3qt/gui_application.h b/rpcs3/rpcs3qt/gui_application.h index d6fdb6bc9c..eac5e19d95 100644 --- a/rpcs3/rpcs3qt/gui_application.h +++ b/rpcs3/rpcs3qt/gui_application.h @@ -118,8 +118,8 @@ Q_SIGNALS: void OnEnableDiscEject(bool enabled); void OnEnableDiscInsert(bool enabled); - void RequestCallFromMainThread(std::function func, atomic_t* wake_up); + void RequestCallFromMainThread(std::function func, atomic_t* wake_up); private Q_SLOTS: - static void CallFromMainThread(const std::function& func, atomic_t* wake_up); + static void CallFromMainThread(const std::function& func, atomic_t* wake_up); }; diff --git a/rpcs3/util/atomic.cpp b/rpcs3/util/atomic.cpp index a1612b5626..f9541320e9 100644 --- a/rpcs3/util/atomic.cpp +++ b/rpcs3/util/atomic.cpp @@ -1,6 +1,7 @@ #include "atomic.hpp" #if defined(__linux__) +// This definition is unused on Linux #define USE_FUTEX #elif !defined(_WIN32) #define USE_STD @@ -40,8 +41,8 @@ namespace utils // Total number of entries. static constexpr usz s_hashtable_size = 1u << 17; -// Reference counter combined with shifted pointer (which is assumed to be 47 bit) -static constexpr uptr s_ref_mask = (1u << 17) - 1; +// Reference counter combined with shifted pointer (which is assumed to be 48 bit) +static constexpr uptr s_ref_mask = 0xffff; // Fix for silly on-first-use initializer static bool s_null_wait_cb(const void*, u64, u64){ return true; }; @@ -55,163 +56,17 @@ static thread_local bool(*s_tls_one_time_wait_cb)(u64 attempts) = nullptr; // Callback for notification functions for optimizations static thread_local void(*s_tls_notify_cb)(const void* data, u64 progress) = nullptr; -static inline bool operator &(atomic_wait::op lhs, atomic_wait::op_flag rhs) -{ - return !!(static_cast(lhs) & static_cast(rhs)); -} - // Compare data in memory with old value, and return true if they are equal -static NEVER_INLINE bool ptr_cmp(const void* data, u32 _size, u128 old128, u128 mask128, atomic_wait::info* ext = nullptr) +static NEVER_INLINE bool ptr_cmp(const void* data, u32 old, atomic_wait::info* ext = nullptr) { - using atomic_wait::op; - using atomic_wait::op_flag; - - const u8 size = static_cast(_size); - const op flag{static_cast(_size >> 8)}; - - bool result = false; - - if (size <= 8) - { - u64 new_value = 0; - u64 old_value = static_cast(old128); - u64 mask = static_cast(mask128) & (u64{umax} >> ((64 - size * 8) & 63)); - - // Don't load memory on empty mask - switch (mask ? size : 0) - { - case 0: break; - case 1: new_value = reinterpret_cast*>(data)->load(); break; - case 2: new_value = reinterpret_cast*>(data)->load(); break; - case 4: new_value = reinterpret_cast*>(data)->load(); break; - case 8: new_value = reinterpret_cast*>(data)->load(); break; - default: - { - fmt::throw_exception("Bad size (arg=0x%x)", _size); - } - } - - if (flag & op_flag::bit_not) - { - new_value = ~new_value; - } - - if (!mask) [[unlikely]] - { - new_value = 0; - old_value = 0; - } - else - { - if (flag & op_flag::byteswap) - { - switch (size) - { - case 2: - { - new_value = stx::se_storage::swap(static_cast(new_value)); - old_value = stx::se_storage::swap(static_cast(old_value)); - mask = stx::se_storage::swap(static_cast(mask)); - break; - } - case 4: - { - new_value = stx::se_storage::swap(static_cast(new_value)); - old_value = stx::se_storage::swap(static_cast(old_value)); - mask = stx::se_storage::swap(static_cast(mask)); - break; - } - case 8: - { - new_value = stx::se_storage::swap(new_value); - old_value = stx::se_storage::swap(old_value); - mask = stx::se_storage::swap(mask); - break; - } - default: - { - break; - } - } - } - - // Make most significant bit sign bit - const auto shv = std::countl_zero(mask); - new_value &= mask; - old_value &= mask; - new_value <<= shv; - old_value <<= shv; - } - - s64 news = new_value; - s64 olds = old_value; - - u64 newa = news < 0 ? (0ull - new_value) : new_value; - u64 olda = olds < 0 ? (0ull - old_value) : old_value; - - switch (op{static_cast(static_cast(flag) & 0xf)}) - { - case op::eq: result = old_value == new_value; break; - case op::slt: result = olds < news; break; - case op::sgt: result = olds > news; break; - case op::ult: result = old_value < new_value; break; - case op::ugt: result = old_value > new_value; break; - case op::alt: result = olda < newa; break; - case op::agt: result = olda > newa; break; - case op::pop: - { - // Count is taken from least significant byte and ignores some flags - const u64 count = static_cast(old128) & 0xff; - - result = count < utils::popcnt64(new_value); - break; - } - default: - { - fmt::throw_exception("ptr_cmp(): unrecognized atomic wait operation."); - } - } - } - else if (size == 16 && (flag == op::eq || flag == (op::eq | op_flag::inverse))) - { - u128 new_value = 0; - u128 old_value = old128; - u128 mask = mask128; - - // Don't load memory on empty mask - if (mask) [[likely]] - { - new_value = atomic_storage::load(*reinterpret_cast(data)); - } - - // TODO - result = !((old_value ^ new_value) & mask); - } - else if (size > 16 && !~mask128 && (flag == op::eq || flag == (op::eq | op_flag::inverse))) - { - // Interpret old128 as a pointer to the old value - ensure(!(old128 >> (64 + 17))); - - result = std::memcmp(data, reinterpret_cast(static_cast(old128)), size) == 0; - } - else - { - fmt::throw_exception("ptr_cmp(): no alternative operations are supported for non-standard atomic wait yet."); - } - - if (flag & op_flag::inverse) - { - result = !result; - } - // Check other wait variables if provided - if (result) + if (reinterpret_cast*>(data)->load() == old) { if (ext) [[unlikely]] { for (auto e = ext; e->data; e++) { - if (!ptr_cmp(e->data, e->size, e->old, e->mask)) + if (!ptr_cmp(e->data, e->old)) { return false; } @@ -283,18 +138,15 @@ namespace #endif // Essentially a fat semaphore - struct cond_handle + struct alignas(64) cond_handle { - // Combined pointer (most significant 47 bits) and ref counter (17 least significant bits) + // Combined pointer (most significant 48 bits) and ref counter (16 least significant bits) atomic_t ptr_ref; u64 tid; - u128 mask; - u128 oldv; + u32 oldv; u64 tsc0; u16 link; - u8 size; - u8 flag; atomic_t sync; #ifdef USE_STD @@ -316,7 +168,7 @@ namespace mtx.init(mtx); #endif - ensure(!ptr_ref.exchange((iptr << 17) | 1)); + ensure(!ptr_ref.exchange((iptr << 16) | 1)); } void destroy() @@ -324,10 +176,7 @@ namespace tid = 0; tsc0 = 0; link = 0; - size = 0; - flag = 0; sync.release(0); - mask = 0; oldv = 0; #ifdef USE_STD @@ -517,7 +366,7 @@ namespace // TLS storage for few allocaded "semaphores" to allow skipping initialization static thread_local tls_cond_handler s_tls_conds{}; -static u32 cond_alloc(uptr iptr, u128 mask, u32 tls_slot = -1) +static u32 cond_alloc(uptr iptr, u32 tls_slot = -1) { // Try to get cond from tls slot instead u16* ptls = tls_slot >= std::size(s_tls_conds.cond) ? nullptr : s_tls_conds.cond + tls_slot; @@ -526,8 +375,7 @@ static u32 cond_alloc(uptr iptr, u128 mask, u32 tls_slot = -1) { // Fast reinitialize const u32 id = std::exchange(*ptls, 0); - s_cond_list[id].mask = mask; - s_cond_list[id].ptr_ref.release((iptr << 17) | 1); + s_cond_list[id].ptr_ref.release((iptr << 16) | 1); return id; } @@ -581,7 +429,6 @@ static u32 cond_alloc(uptr iptr, u128 mask, u32 tls_slot = -1) const u32 id = level3 * 64 + std::countr_one(bits); // Initialize new "semaphore" - s_cond_list[id].mask = mask; s_cond_list[id].init(iptr); return id; } @@ -625,8 +472,6 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1) { // Fast finalization cond->sync.release(0); - cond->size = 0; - cond->mask = 0; *ptls = static_cast(cond_id); return; } @@ -652,7 +497,7 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1) s_cond_sem1.atomic_op(FN(x -= u128{1} << (level1 * 14))); } -static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0) +static cond_handle* cond_id_lock(u32 cond_id, uptr iptr = 0) { bool did_ref = false; @@ -673,7 +518,7 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0) return false; } - if (iptr && (val >> 17) != iptr) + if (iptr && (val >> 16) != iptr) { // Pointer mismatch return false; @@ -686,11 +531,6 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0) return false; } - if (!(mask & cond->mask) && cond->size) - { - return false; - } - if (!did_ref) { val++; @@ -702,7 +542,7 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0) if (ok) { // Check other fields again - if (const u32 sync_val = cond->sync; sync_val == 0 || sync_val == 3 || (cond->size && !(mask & cond->mask))) + if (const u32 sync_val = cond->sync; sync_val == 0 || sync_val == 3) { did_ref = true; continue; @@ -713,7 +553,7 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0) if ((old & s_ref_mask) == s_ref_mask) { - fmt::throw_exception("Reference count limit (131071) reached in an atomic notifier."); + fmt::throw_exception("Reference count limit (%u) reached in an atomic notifier.", s_ref_mask); } break; @@ -736,8 +576,8 @@ namespace u64 bits: 24; // Allocated bits u64 prio: 24; // Reserved - u64 ref : 17; // Ref counter - u64 iptr: 47; // First pointer to use slot (to count used slots) + u64 ref : 16; // Ref counter + u64 iptr: 48; // First pointer to use slot (to count used slots) }; // Need to spare 16 bits for ref counter @@ -760,7 +600,7 @@ namespace static void slot_free(uptr ptr, atomic_t* slot, u32 tls_slot) noexcept; template - static auto slot_search(uptr iptr, u128 mask, F func) noexcept; + static auto slot_search(uptr iptr, F func) noexcept; }; static_assert(sizeof(root_info) == 64); @@ -944,7 +784,7 @@ void root_info::slot_free(uptr iptr, atomic_t* slot, u32 tls_slot) noexcept } template -FORCE_INLINE auto root_info::slot_search(uptr iptr, u128 mask, F func) noexcept +FORCE_INLINE auto root_info::slot_search(uptr iptr, F func) noexcept { u32 index = 0; [[maybe_unused]] u32 total = 0; @@ -974,7 +814,7 @@ FORCE_INLINE auto root_info::slot_search(uptr iptr, u128 mask, F func) noexcept for (u32 i = 0; i < cond_count; i++) { - if (cond_id_lock(cond_ids[i], mask, iptr)) + if (cond_id_lock(cond_ids[i], iptr)) { if (func(cond_ids[i])) { @@ -994,18 +834,82 @@ FORCE_INLINE auto root_info::slot_search(uptr iptr, u128 mask, F func) noexcept } } -SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old_value, u64 timeout, u128 mask, atomic_wait::info* ext) +SAFE_BUFFERS(void) +atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wait::info* ext) { - const auto stamp0 = utils::get_unique_tsc(); + uint ext_size = 0; - if (!s_tls_wait_cb(data, 0, stamp0)) +#ifdef __linux__ + ::timespec ts{}; + if (timeout + 1) + { + if (ext) [[unlikely]] + { + // futex_waitv uses absolute timeout + ::clock_gettime(CLOCK_MONOTONIC, &ts); + } + + ts.tv_sec += timeout / 1'000'000'000; + ts.tv_nsec += timeout % 1'000'000'000; + if (ts.tv_nsec > 1'000'000'000) + { + ts.tv_sec++; + ts.tv_nsec -= 1'000'000'000; + } + } + + futex_waitv vec[atomic_wait::max_list]{}; + vec[0].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG; + vec[0].uaddr = reinterpret_cast<__u64>(data); + vec[0].val = old_value; + + if (ext) [[unlikely]] + { + for (auto e = ext; e->data; e++) + { + ext_size++; + vec[ext_size].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG; + vec[ext_size].uaddr = reinterpret_cast<__u64>(e->data); + vec[ext_size].val = e->old; + } + } + + if (ext_size) [[unlikely]] + { + if (syscall(SYS_futex_waitv, +vec, ext_size + 1, 0, timeout + 1 ? &ts : nullptr, CLOCK_MONOTONIC) == -1) + { + if (errno == ENOSYS) + { + fmt::throw_exception("futex_waitv is not supported (Linux kernel is too old)"); + } + if (errno == EINVAL) + { + fmt::throw_exception("futex_waitv: bad param"); + } + } + } + else + { + if (futex(const_cast(data), FUTEX_WAIT_PRIVATE, old_value, timeout + 1 ? &ts : nullptr) == -1) + { + if (errno == EINVAL) + { + fmt::throw_exception("futex: bad param"); + } + } + } + + return; +#endif + + if (!s_tls_wait_cb(data, 0, 0)) { return; } - const uptr iptr = reinterpret_cast(data) & (~s_ref_mask >> 17); + const auto stamp0 = utils::get_unique_tsc(); - uint ext_size = 0; + const uptr iptr = reinterpret_cast(data) & (~s_ref_mask >> 16); uptr iptr_ext[atomic_wait::max_list - 1]{}; @@ -1026,18 +930,18 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old } } - iptr_ext[ext_size] = reinterpret_cast(e->data) & (~s_ref_mask >> 17); + iptr_ext[ext_size] = reinterpret_cast(e->data) & (~s_ref_mask >> 16); ext_size++; } } - const u32 cond_id = cond_alloc(iptr, mask, 0); + const u32 cond_id = cond_alloc(iptr, 0); u32 cond_id_ext[atomic_wait::max_list - 1]{}; for (u32 i = 0; i < ext_size; i++) { - cond_id_ext[i] = cond_alloc(iptr_ext[i], ext[i].mask, i + 1); + cond_id_ext[i] = cond_alloc(iptr_ext[i], i + 1); } const auto slot = root_info::slot_alloc(iptr); @@ -1060,8 +964,6 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old // Store some info for notifiers (some may be unused) cond->link = 0; - cond->size = static_cast(size); - cond->flag = static_cast(size >> 8); cond->oldv = old_value; cond->tsc0 = stamp0; @@ -1071,8 +973,6 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old { // Extensions point to original cond_id, copy remaining info cond_ext[i]->link = cond_id; - cond_ext[i]->size = static_cast(ext[i].size); - cond_ext[i]->flag = static_cast(ext[i].size >> 8); cond_ext[i]->oldv = ext[i].old; cond_ext[i]->tsc0 = stamp0; @@ -1105,7 +1005,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old u64 attempts = 0; - while (ptr_cmp(data, size, old_value, mask, ext)) + while (ptr_cmp(data, old_value, ext)) { if (s_tls_one_time_wait_cb) { @@ -1263,7 +1163,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old } template -static u32 alert_sema(u32 cond_id, u32 size, u128 mask) +static u32 alert_sema(u32 cond_id, u32 size) { ensure(cond_id); @@ -1271,11 +1171,11 @@ static u32 alert_sema(u32 cond_id, u32 size, u128 mask) u32 ok = 0; - if (!cond->size || mask & cond->mask) + if (true) { // Redirect if necessary const auto _old = cond; - const auto _new = _old->link ? cond_id_lock(_old->link, u128(-1)) : _old; + const auto _new = _old->link ? cond_id_lock(_old->link) : _old; if (_new && _new->tsc0 == _old->tsc0) { @@ -1336,50 +1236,58 @@ void atomic_wait_engine::set_notify_callback(void(*cb)(const void*, u64)) s_tls_notify_cb = cb; } -void atomic_wait_engine::notify_one(const void* data, u32 size, u128 mask) +void atomic_wait_engine::notify_one(const void* data) { - const uptr iptr = reinterpret_cast(data) & (~s_ref_mask >> 17); - if (s_tls_notify_cb) s_tls_notify_cb(data, 0); - root_info::slot_search(iptr, mask, [&](u32 cond_id) +#ifdef __linux__ + futex(const_cast(data), FUTEX_WAKE_PRIVATE, 1); +#else + const uptr iptr = reinterpret_cast(data) & (~s_ref_mask >> 16); + + root_info::slot_search(iptr, [&](u32 cond_id) { - if (alert_sema(cond_id, size, mask)) + if (alert_sema(cond_id, 4)) { return true; } return false; }); +#endif if (s_tls_notify_cb) s_tls_notify_cb(data, -1); } -SAFE_BUFFERS(void) atomic_wait_engine::notify_all(const void* data, u32 size, u128 mask) +SAFE_BUFFERS(void) +atomic_wait_engine::notify_all(const void* data) { - const uptr iptr = reinterpret_cast(data) & (~s_ref_mask >> 17); - if (s_tls_notify_cb) s_tls_notify_cb(data, 0); +#ifdef __linux__ + futex(const_cast(data), FUTEX_WAKE_PRIVATE, 1); +#else + const uptr iptr = reinterpret_cast(data) & (~s_ref_mask >> 16); + // Array count for batch notification u32 count = 0; // Array itself. u32 cond_ids[128]; - root_info::slot_search(iptr, mask, [&](u32 cond_id) + root_info::slot_search(iptr, [&](u32 cond_id) { if (count >= 128) { // Unusual big amount of sema: fallback to notify_one alg - alert_sema(cond_id, size, mask); + alert_sema(cond_id, 4); return false; } - u32 res = alert_sema(cond_id, size, mask); + u32 res = alert_sema(cond_id, 4); if (~res <= u16{umax}) { @@ -1395,7 +1303,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::notify_all(const void* data, u32 size, u1 { const u32 cond_id = *(std::end(cond_ids) - i - 1); - if (!s_cond_list[cond_id].wakeup(size ? 1 : 2)) + if (!s_cond_list[cond_id].wakeup(1)) { *(std::end(cond_ids) - i - 1) = ~cond_id; } @@ -1434,6 +1342,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::notify_all(const void* data, u32 size, u1 { cond_free(~*(std::end(cond_ids) - i - 1)); } +#endif if (s_tls_notify_cb) s_tls_notify_cb(data, -1); diff --git a/rpcs3/util/atomic.hpp b/rpcs3/util/atomic.hpp index c3132584cd..d546a9c434 100644 --- a/rpcs3/util/atomic.hpp +++ b/rpcs3/util/atomic.hpp @@ -129,54 +129,21 @@ enum class atomic_wait_timeout : u64 inf = 0xffffffffffffffff, }; +template +class lf_queue; + +namespace stx +{ + template + class atomic_ptr; +} + // Various extensions for atomic_t::wait namespace atomic_wait { // Max number of simultaneous atomic variables to wait on (can be extended if really necessary) constexpr uint max_list = 8; - enum class op : u8 - { - eq, // Wait while value is bitwise equal to - slt, // Wait while signed value is less than - sgt, // Wait while signed value is greater than - ult, // Wait while unsigned value is less than - ugt, // Wait while unsigned value is greater than - alt, // Wait while absolute value is less than - agt, // Wait while absolute value is greater than - pop, // Wait while set bit count of the value is less than - __max - }; - - static_assert(static_cast(op::__max) == 8); - - enum class op_flag : u8 - { - inverse = 1 << 4, // Perform inverse operation (negate the result) - bit_not = 1 << 5, // Perform bitwise NOT on loaded value before operation - byteswap = 1 << 6, // Perform byteswap on both arguments and masks when applicable - }; - - constexpr op_flag op_be = std::endian::native == std::endian::little ? op_flag::byteswap : op_flag{0}; - constexpr op_flag op_le = std::endian::native == std::endian::little ? op_flag{0} : op_flag::byteswap; - - constexpr op operator |(op_flag lhs, op_flag rhs) - { - return op{static_cast(static_cast(lhs) | static_cast(rhs))}; - } - - constexpr op operator |(op_flag lhs, op rhs) - { - return op{static_cast(static_cast(lhs) | static_cast(rhs))}; - } - - constexpr op operator |(op lhs, op_flag rhs) - { - return op{static_cast(static_cast(lhs) | static_cast(rhs))}; - } - - constexpr op op_ne = op::eq | op_flag::inverse; - constexpr struct any_value_t { template @@ -186,46 +153,10 @@ namespace atomic_wait } } any_value; - template - using payload_type = decltype(std::declval().observe()); - - template > - constexpr u128 default_mask = sizeof(T) <= 8 ? u128{u64{umax} >> ((64 - sizeof(T) * 8) & 63)} : u128(-1); - - template > - constexpr u128 get_value(X&, T value = T{}, ...) - { - static_assert((sizeof(T) & (sizeof(T) - 1)) == 0); - static_assert(sizeof(T) <= 16); - return std::bit_cast, T>(value); - } - struct info { const void* data; - u32 size; - u128 old; - u128 mask; - - template > - constexpr void set_value(X& a, T value = T{}) - { - old = get_value(a, value); - } - - template > - constexpr void set_mask(T value) - { - static_assert((sizeof(T) & (sizeof(T) - 1)) == 0); - static_assert(sizeof(T) <= 16); - mask = std::bit_cast, T>(value); - } - - template > - constexpr void set_mask() - { - mask = default_mask; - } + u32 old; }; template @@ -243,9 +174,9 @@ namespace atomic_wait constexpr list& operator=(const list&) noexcept = default; - template ().template wait(any_value))...>> + template ().wait(any_value))...>> constexpr list(U&... vars) - : m_info{{&vars, sizeof(vars.observe()), get_value(vars), default_mask}...} + : m_info{{&vars, 0}...} { static_assert(sizeof...(U) == Max, "Inconsistent amount of atomics."); } @@ -256,40 +187,37 @@ namespace atomic_wait static_assert(sizeof...(U) == Max, "Inconsistent amount of values."); auto* ptr = m_info; - ((ptr->template set_value(*static_cast(ptr->data), values), ptr++), ...); + (((ptr->old = std::bit_cast(values)), ptr++), ...); return *this; } - template - constexpr list& masks(U... masks) - { - static_assert(sizeof...(U) <= Max, "Too many masks."); - - auto* ptr = m_info; - ((ptr++)->template set_mask(masks), ...); - return *this; - } - - template ().template wait(any_value))>> + template ().wait(any_value))>> constexpr void set(T2& var, U value) { static_assert(Index < Max); m_info[Index].data = &var; - m_info[Index].size = sizeof(var.observe()) | (static_cast(Flags) << 8); - m_info[Index].template set_value(var, value); - m_info[Index].template set_mask(); + m_info[Index].old = std::bit_cast(value); } - template ().template wait(any_value))>> - constexpr void set(T2& var, U value, V mask) + template + constexpr void set(lf_queue& var, std::nullptr_t = nullptr) { static_assert(Index < Max); + static_assert(sizeof(var) == sizeof(uptr)); - m_info[Index].data = &var; - m_info[Index].size = sizeof(var.observe()) | (static_cast(Flags) << 8); - m_info[Index].template set_value(var, value); - m_info[Index].template set_mask(mask); + m_info[Index].data = reinterpret_cast(&var) + sizeof(u32); + m_info[Index].old = 0; + } + + template + constexpr void set(stx::atomic_ptr& var, std::nullptr_t = nullptr) + { + static_assert(Index < Max); + static_assert(sizeof(var) == sizeof(uptr)); + + m_info[Index].data = reinterpret_cast(&var) + sizeof(u32); + m_info[Index].old = 0; } // Timeout is discouraged @@ -302,7 +230,7 @@ namespace atomic_wait } }; - template ().template wait(any_value))...>> + template ().wait(any_value))...>> list(T&... vars) -> list; } @@ -322,20 +250,15 @@ private: template friend class atomic_wait::list; - static void wait(const void* data, u32 size, u128 old_value, u64 timeout, u128 mask, atomic_wait::info* ext = nullptr); + static void wait(const void* data, u32 old_value, u64 timeout, atomic_wait::info* ext = nullptr); public: - static void notify_one(const void* data, u32 size, u128 mask128); - static void notify_all(const void* data, u32 size, u128 mask128); + static void notify_one(const void* data); + static void notify_all(const void* data); static void set_wait_callback(bool(*cb)(const void* data, u64 attempts, u64 stamp0)); static void set_notify_callback(void(*cb)(const void* data, u64 progress)); - static void set_one_time_use_wait_callback(bool(*cb)(u64 progress)); - - static void notify_all(const void* data) - { - notify_all(data, 0, u128(-1)); - } + static void set_one_time_use_wait_callback(bool (*cb)(u64 progress)); }; template @@ -343,7 +266,7 @@ void atomic_wait::list::wait(atomic_wait_timeout timeout) { static_assert(!!Max, "Cannot initiate atomic wait with empty list."); - atomic_wait_engine::wait(m_info[0].data, m_info[0].size, m_info[0].old, static_cast(timeout), m_info[0].mask, m_info + 1); + atomic_wait_engine::wait(m_info[0].data, m_info[0].old, static_cast(timeout), m_info + 1); } // Helper class, provides access to compiler-specific atomic intrinsics @@ -1759,46 +1682,31 @@ public: }); } - // Timeout is discouraged - template - void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept + void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const + requires(sizeof(type) == 4) { - const u128 old = std::bit_cast>(old_value); - const u128 mask = atomic_wait::default_mask; - atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast(Flags) << 8), old, static_cast(timeout), mask); + atomic_wait_engine::wait(&m_data, std::bit_cast(old_value), static_cast(timeout)); } - // Overload with mask (only selected bits are checked), timeout is discouraged - template - void wait(type old_value, type mask_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept + [[deprecated]] void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const + requires(sizeof(type) == 8) { - const u128 old = std::bit_cast>(old_value); - const u128 mask = std::bit_cast>(mask_value); - atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast(Flags) << 8), old, static_cast(timeout), mask); + atomic_wait::info ext[2]{}; + ext[0].data = reinterpret_cast(&m_data) + 4; + ext[0].old = std::bit_cast(old_value) >> 32; + atomic_wait_engine::wait(&m_data, std::bit_cast(old_value), static_cast(timeout), ext); } - void notify_one() noexcept + void notify_one() + requires(sizeof(type) == 4 || sizeof(type) == 8) { - atomic_wait_engine::notify_one(&m_data, sizeof(T), atomic_wait::default_mask); + atomic_wait_engine::notify_one(&m_data); } - // Notify with mask, allowing to not wake up thread which doesn't wait on this mask - void notify_one(type mask_value) noexcept + void notify_all() + requires(sizeof(type) == 4 || sizeof(type) == 8) { - const u128 mask = std::bit_cast>(mask_value); - atomic_wait_engine::notify_one(&m_data, sizeof(T), mask); - } - - void notify_all() noexcept - { - atomic_wait_engine::notify_all(&m_data, sizeof(T), atomic_wait::default_mask); - } - - // Notify all threads with mask, allowing to not wake up threads which don't wait on them - void notify_all(type mask_value) noexcept - { - const u128 mask = std::bit_cast>(mask_value); - atomic_wait_engine::notify_all(&m_data, sizeof(T), mask); + atomic_wait_engine::notify_all(&m_data); } }; @@ -1874,23 +1782,6 @@ public: { return base::fetch_xor(1) != 0; } - - // Timeout is discouraged - template - void wait(bool old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept - { - base::template wait(old_value, 1, timeout); - } - - void notify_one() noexcept - { - base::notify_one(1); - } - - void notify_all() noexcept - { - base::notify_all(1); - } }; // Specializations @@ -1904,12 +1795,6 @@ struct std::common_type, T2> : std::common_type struct std::common_type> : std::common_type, T2> {}; -namespace atomic_wait -{ - template - constexpr u128 default_mask> = 1; -} - #ifndef _MSC_VER #pragma GCC diagnostic pop #pragma GCC diagnostic pop diff --git a/rpcs3/util/fifo_mutex.hpp b/rpcs3/util/fifo_mutex.hpp index cf2d9b0d0e..cddb7b11cf 100644 --- a/rpcs3/util/fifo_mutex.hpp +++ b/rpcs3/util/fifo_mutex.hpp @@ -6,33 +6,35 @@ // Mutex that tries to maintain the order of acquisition class fifo_mutex { - // Low 8 bits are incremented on acquisition, high 8 bits are incremented on release - atomic_t m_value{0}; + // Low 16 bits are incremented on acquisition, high 16 bits are incremented on release + atomic_t m_value{0}; public: constexpr fifo_mutex() noexcept = default; void lock() noexcept { - const u16 val = m_value.fetch_op([](u16& val) + // clang-format off + const u32 val = m_value.fetch_op([](u32& val) { - val = (val & 0xff00) | ((val + 1) & 0xff); + val = (val & 0xffff0000) | ((val + 1) & 0xffff); }); + // clang-format on - if (val >> 8 != (val & 0xff)) [[unlikely]] + if (val >> 16 != (val & 0xffff)) [[unlikely]] { // TODO: implement busy waiting along with moving to cpp file - m_value.wait(((val + 1) & 0xff) << 8, 0xff00); + m_value.wait((val & 0xffff0000) | ((val + 1) & 0xffff)); } } bool try_lock() noexcept { - const u16 val = m_value.load(); + const u32 val = m_value.load(); - if (val >> 8 == (val & 0xff)) + if (val >> 16 == (val & 0xffff)) { - if (m_value.compare_and_swap(val, ((val + 1) & 0xff) | (val & 0xff00))) + if (m_value.compare_and_swap(val, ((val + 1) & 0xffff) | (val & 0xffff0000))) { return true; } @@ -43,9 +45,9 @@ public: void unlock() noexcept { - const u16 val = m_value.add_fetch(0x100); + const u32 val = m_value.add_fetch(0x10000); - if (val >> 8 != (val & 0xff)) + if (val >> 16 != (val & 0xffff)) { m_value.notify_one(); } @@ -53,9 +55,9 @@ public: bool is_free() const noexcept { - const u16 val = m_value.load(); + const u32 val = m_value.load(); - return (val >> 8) == (val & 0xff); + return (val >> 16) == (val & 0xffff); } void lock_unlock() noexcept diff --git a/rpcs3/util/media_utils.cpp b/rpcs3/util/media_utils.cpp index f99617f98f..83eacc0b51 100644 --- a/rpcs3/util/media_utils.cpp +++ b/rpcs3/util/media_utils.cpp @@ -259,8 +259,8 @@ namespace utils void audio_decoder::clear() { - track_fully_decoded = false; - track_fully_consumed = false; + track_fully_decoded = 0; + track_fully_consumed = 0; has_error = false; m_size = 0; duration_ms = 0; @@ -274,7 +274,7 @@ namespace utils { auto& thread = *m_thread; thread = thread_state::aborting; - track_fully_consumed = true; + track_fully_consumed = 1; track_fully_consumed.notify_one(); thread(); m_thread.reset(); @@ -511,7 +511,7 @@ namespace utils media_log.notice("audio_decoder: about to decode: %s (index=%d)", ::at32(m_context.playlist, m_context.current_track), m_context.current_track); decode_track(::at32(m_context.playlist, m_context.current_track)); - track_fully_decoded = true; + track_fully_decoded = 1; if (has_error) { @@ -521,7 +521,7 @@ namespace utils // Let's only decode one track at a time. Wait for the consumer to finish reading the track. media_log.notice("audio_decoder: waiting until track is consumed..."); - thread_ctrl::wait_on(track_fully_consumed, false); + thread_ctrl::wait_on(track_fully_consumed, 0); track_fully_consumed = false; } diff --git a/rpcs3/util/media_utils.h b/rpcs3/util/media_utils.h index 6c450a89d3..5c25d14be9 100644 --- a/rpcs3/util/media_utils.h +++ b/rpcs3/util/media_utils.h @@ -77,8 +77,8 @@ namespace utils std::vector data; atomic_t m_size = 0; atomic_t duration_ms = 0; - atomic_t track_fully_decoded{false}; - atomic_t track_fully_consumed{false}; + atomic_t track_fully_decoded{0}; + atomic_t track_fully_consumed{0}; atomic_t has_error{false}; std::deque> timestamps_ms; diff --git a/rpcs3/util/shared_ptr.hpp b/rpcs3/util/shared_ptr.hpp index 6e3e9dbfb9..23d4bfd974 100644 --- a/rpcs3/util/shared_ptr.hpp +++ b/rpcs3/util/shared_ptr.hpp @@ -3,6 +3,7 @@ #include #include #include "atomic.hpp" +#include "asm.hpp" namespace stx { @@ -21,7 +22,7 @@ namespace stx // Basic assumption of userspace pointer size constexpr uint c_ptr_size = 48; - // Use lower 17 bits as atomic_ptr internal counter of borrowed refs (pointer itself is shifted) + // Use lower 16 bits as atomic_ptr internal counter of borrowed refs (pointer itself is shifted) constexpr uint c_ref_mask = 0xffff, c_ref_size = 16; // Remaining pointer bits @@ -1054,20 +1055,19 @@ namespace stx return observe() == r.get(); } - template - void wait(const volatile void* value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) + void wait(std::nullptr_t, atomic_wait_timeout timeout = atomic_wait_timeout::inf) { - m_val.wait(reinterpret_cast(value) << c_ref_size, c_ptr_mask, timeout); + utils::bless>(&m_val)[1].wait(0, timeout); } void notify_one() { - m_val.notify_one(c_ptr_mask); + utils::bless>(&m_val)[1].notify_one(); } void notify_all() { - m_val.notify_all(c_ptr_mask); + utils::bless>(&m_val)[1].notify_all(); } }; @@ -1110,18 +1110,6 @@ namespace stx } null_ptr; } -namespace atomic_wait -{ - template - constexpr u128 default_mask> = stx::c_ptr_mask; - - template - constexpr u128 get_value(stx::atomic_ptr&, const volatile void* value = nullptr) - { - return reinterpret_cast(value) << stx::c_ref_size; - } -} - using stx::null_ptr; using stx::single_ptr; using stx::shared_ptr;