From 68cf47e2458ea677d1dae7d16f73242c5deb4ed0 Mon Sep 17 00:00:00 2001 From: Joel Linn Date: Fri, 6 Nov 2020 18:45:32 +0100 Subject: [PATCH] [threading] Fix Fence for multiple waiting threads --- src/xenia/base/testing/threading_test.cc | 16 +++++----- src/xenia/base/threading.h | 39 +++++++++++++++++++++--- 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/xenia/base/testing/threading_test.cc b/src/xenia/base/testing/threading_test.cc index ad663812f..8d5f74449 100644 --- a/src/xenia/base/testing/threading_test.cc +++ b/src/xenia/base/testing/threading_test.cc @@ -38,6 +38,13 @@ TEST_CASE("Fence") { pFence->Signal(); pFence->Wait(); + // Signal and wait two times + pFence = std::make_unique(); + pFence->Signal(); + pFence->Wait(); + pFence->Signal(); + pFence->Wait(); + // Test to synchronize multiple threads std::atomic started(0); std::atomic finished(0); @@ -57,15 +64,10 @@ TEST_CASE("Fence") { }); Sleep(100ms); + REQUIRE(started.load() == threads.size()); REQUIRE(finished.load() == 0); - // TODO(bwrsandman): Check if this is correct behaviour: looping with Sleep - // is the only way to get fence to signal all threads on windows - for (int i = 0; i < threads.size(); ++i) { - Sleep(10ms); - pFence->Signal(); - } - REQUIRE(started.load() == threads.size()); + pFence->Signal(); for (auto& t : threads) t.join(); REQUIRE(finished.load() == threads.size()); diff --git a/src/xenia/base/threading.h b/src/xenia/base/threading.h index 1e10be22b..776a158e0 100644 --- a/src/xenia/base/threading.h +++ b/src/xenia/base/threading.h @@ -24,27 +24,56 @@ #include #include +#include "xenia/base/assert.h" + namespace xe { namespace threading { +// This is more like an Event with self-reset when returning from Wait() class Fence { public: - Fence() : signaled_(false) {} + Fence() : signal_state_(0) {} + void Signal() { std::unique_lock lock(mutex_); - signaled_ = true; + signal_state_ |= SIGMASK_; cond_.notify_all(); } + + // Wait for the Fence to be signaled. Clears the signal on return. void Wait() { std::unique_lock lock(mutex_); - cond_.wait(lock, [this] { return signaled_; }); - signaled_ = false; + assert_true((signal_state_ & ~SIGMASK_) < (SIGMASK_ - 1) && + "Too many threads?"); + + // keep local copy to minimize loads + auto signal_state = ++signal_state_; + for (; !(signal_state & SIGMASK_); signal_state = signal_state_) { + cond_.wait(lock); + } + + // We can't just clear the signal as other threads may not have read it yet + assert_true((signal_state & ~SIGMASK_) > 0); // wait_count > 0 + if (signal_state == (1 | SIGMASK_)) { // wait_count == 1 + // Last one out turn off the lights + signal_state_ = 0; + } else { + // Oops, another thread is still waiting, set the new count and keep the + // signal. + signal_state_ = --signal_state; + } } private: + using state_t_ = uint_fast32_t; + static constexpr state_t_ SIGMASK_ = state_t_(1) + << (sizeof(state_t_) * 8 - 1); + std::mutex mutex_; std::condition_variable cond_; - bool signaled_; + // Use the highest bit (sign bit) as the signal flag and the rest to count + // waiting threads. + volatile state_t_ signal_state_; }; // Returns the total number of logical processors in the host system.