diff --git a/Utilities/Atomic.h b/Utilities/Atomic.h index e172793672..9bd54053f2 100644 --- a/Utilities/Atomic.h +++ b/Utilities/Atomic.h @@ -12,11 +12,26 @@ struct atomic_storage /* First part: Non-MSVC intrinsics */ #ifndef _MSC_VER + +#if defined(__ATOMIC_HLE_ACQUIRE) && defined(__ATOMIC_HLE_RELEASE) + static constexpr int s_hle_ack = __ATOMIC_SEQ_CST | __ATOMIC_HLE_ACQUIRE; + static constexpr int s_hle_rel = __ATOMIC_SEQ_CST | __ATOMIC_HLE_RELEASE; +#else + static constexpr int s_hle_ack = __ATOMIC_SEQ_CST; + static constexpr int s_hle_rel = __ATOMIC_SEQ_CST; +#endif + static inline bool compare_exchange(T& dest, T& comp, T exch) { return __atomic_compare_exchange(&dest, &comp, &exch, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + static inline bool compare_exchange_hle_acq(T& dest, T& comp, T exch) + { + static_assert(sizeof(T) == 4 || sizeof(T) == 8); + return __atomic_compare_exchange(&dest, &comp, &exch, false, s_hle_ack, s_hle_ack); + } + static inline T load(const T& dest) { T result; @@ -46,6 +61,12 @@ struct atomic_storage return __atomic_fetch_add(&dest, value, __ATOMIC_SEQ_CST); } + static inline T fetch_add_hle_rel(T& dest, T value) + { + static_assert(sizeof(T) == 4 || sizeof(T) == 8); + return __atomic_fetch_add(&dest, value, s_hle_rel); + } + static inline T add_fetch(T& dest, T value) { return __atomic_add_fetch(&dest, value, __ATOMIC_SEQ_CST); @@ -353,6 +374,14 @@ struct atomic_storage : atomic_storage return r == v; } + static inline bool compare_exchange_hle_acq(T& dest, T& comp, T exch) + { + long v = *(long*)∁ + long r = _InterlockedCompareExchange_HLEAcquire((volatile long*)&dest, (long&)exch, v); + comp = (T&)r; + return r == v; + } + static inline T load(const T& dest) { long value = *(const volatile long*)&dest; @@ -383,6 +412,12 @@ struct atomic_storage : atomic_storage return (T&)r; } + static inline T fetch_add_hle_rel(T& dest, T value) + { + long r = _InterlockedExchangeAdd_HLERelease((volatile long*)&dest, (long&)value); + return (T&)r; + } + static inline T fetch_and(T& dest, T value) { long r = _InterlockedAnd((volatile long*)&dest, (long&)value); @@ -458,6 +493,14 @@ struct atomic_storage : atomic_storage return r == v; } + static inline bool compare_exchange_hle_acq(T& dest, T& comp, T exch) + { + llong v = *(llong*)∁ + llong r = _InterlockedCompareExchange64_HLEAcquire((volatile llong*)&dest, (llong&)exch, v); + comp = (T&)r; + return r == v; + } + static inline T load(const T& dest) { llong value = *(const volatile llong*)&dest; @@ -488,6 +531,12 @@ struct atomic_storage : atomic_storage return (T&)r; } + static inline T fetch_add_hle_rel(T& dest, T value) + { + llong r = _InterlockedExchangeAdd64_HLERelease((volatile llong*)&dest, (llong&)value); + return (T&)r; + } + static inline T fetch_and(T& dest, T value) { llong r = _InterlockedAnd64((volatile llong*)&dest, (llong&)value); diff --git a/Utilities/mutex.h b/Utilities/mutex.h index 6a75b026a6..cbe7599ef3 100644 --- a/Utilities/mutex.h +++ b/Utilities/mutex.h @@ -46,6 +46,22 @@ public: } } + void lock_shared_hle() + { + const u32 value = m_value.load(); + + if (LIKELY(value < c_one - 1)) + { + u32 old = value; + if (LIKELY(atomic_storage::compare_exchange_hle_acq(m_value.raw(), old, value + 1))) + { + return; + } + } + + imp_lock_shared(value); + } + void unlock_shared() { // Unconditional decrement (can result in broken state) @@ -57,6 +73,16 @@ public: } } + void unlock_shared_hle() + { + const u32 value = atomic_storage::fetch_add_hle_rel(m_value.raw(), -1); + + if (UNLIKELY(value >= c_one)) + { + imp_unlock_shared(value); + } + } + bool try_lock() { return m_value.compare_and_swap_test(0, c_one); @@ -72,6 +98,16 @@ public: } } + void lock_hle() + { + u32 value = 0; + + if (UNLIKELY(!atomic_storage::compare_exchange_hle_acq(m_value.raw(), value, c_one))) + { + imp_lock(value); + } + } + void unlock() { // Unconditional decrement (can result in broken state) @@ -83,6 +119,16 @@ public: } } + void unlock_hle() + { + const u32 value = atomic_storage::fetch_add_hle_rel(m_value.raw(), -c_one); + + if (UNLIKELY(value != c_one)) + { + imp_unlock(value); + } + } + bool try_lock_upgrade() { const u32 value = m_value.load();