
697 lines
18 KiB
Raw Normal View History

#pragma once
#if defined(__GNUG__)
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T), T> sync_val_compare_and_swap(volatile T* dest, T2 comp, T2 exch)
return __sync_val_compare_and_swap(dest, comp, exch);
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T), bool> sync_bool_compare_and_swap(volatile T* dest, T2 comp, T2 exch)
return __sync_bool_compare_and_swap(dest, comp, exch);
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T), T> sync_lock_test_and_set(volatile T* dest, T2 value)
return __sync_lock_test_and_set(dest, value);
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T), T> sync_fetch_and_add(volatile T* dest, T2 value)
return __sync_fetch_and_add(dest, value);
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T), T> sync_fetch_and_sub(volatile T* dest, T2 value)
return __sync_fetch_and_sub(dest, value);
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T), T> sync_fetch_and_or(volatile T* dest, T2 value)
return __sync_fetch_and_or(dest, value);
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T), T> sync_fetch_and_and(volatile T* dest, T2 value)
return __sync_fetch_and_and(dest, value);
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T), T> sync_fetch_and_xor(volatile T* dest, T2 value)
return __sync_fetch_and_xor(dest, value);
#elif defined(_MSC_VER)
// atomic compare and swap functions
inline u8 sync_val_compare_and_swap(volatile u8* dest, u8 comp, u8 exch)
return _InterlockedCompareExchange8((volatile char*)dest, exch, comp);
inline u16 sync_val_compare_and_swap(volatile u16* dest, u16 comp, u16 exch)
return _InterlockedCompareExchange16((volatile short*)dest, exch, comp);
inline u32 sync_val_compare_and_swap(volatile u32* dest, u32 comp, u32 exch)
return _InterlockedCompareExchange((volatile long*)dest, exch, comp);
inline u64 sync_val_compare_and_swap(volatile u64* dest, u64 comp, u64 exch)
return _InterlockedCompareExchange64((volatile long long*)dest, exch, comp);
inline u128 sync_val_compare_and_swap(volatile u128* dest, u128 comp, u128 exch)
_InterlockedCompareExchange128((volatile long long*)dest, exch.hi, exch.lo, (long long*)&comp);
return comp;
inline bool sync_bool_compare_and_swap(volatile u8* dest, u8 comp, u8 exch)
return (u8)_InterlockedCompareExchange8((volatile char*)dest, exch, comp) == comp;
inline bool sync_bool_compare_and_swap(volatile u16* dest, u16 comp, u16 exch)
return (u16)_InterlockedCompareExchange16((volatile short*)dest, exch, comp) == comp;
inline bool sync_bool_compare_and_swap(volatile u32* dest, u32 comp, u32 exch)
return (u32)_InterlockedCompareExchange((volatile long*)dest, exch, comp) == comp;
inline bool sync_bool_compare_and_swap(volatile u64* dest, u64 comp, u64 exch)
return (u64)_InterlockedCompareExchange64((volatile long long*)dest, exch, comp) == comp;
inline bool sync_bool_compare_and_swap(volatile u128* dest, u128 comp, u128 exch)
return _InterlockedCompareExchange128((volatile long long*)dest, exch.hi, exch.lo, (long long*)&comp) != 0;
// atomic exchange functions
inline u8 sync_lock_test_and_set(volatile u8* dest, u8 value)
return _InterlockedExchange8((volatile char*)dest, value);
inline u16 sync_lock_test_and_set(volatile u16* dest, u16 value)
return _InterlockedExchange16((volatile short*)dest, value);
inline u32 sync_lock_test_and_set(volatile u32* dest, u32 value)
return _InterlockedExchange((volatile long*)dest, value);
inline u64 sync_lock_test_and_set(volatile u64* dest, u64 value)
return _InterlockedExchange64((volatile long long*)dest, value);
inline u128 sync_lock_test_and_set(volatile u128* dest, u128 value)
while (true)
u128 old;
old.lo = dest->lo;
old.hi = dest->hi;
if (sync_bool_compare_and_swap(dest, old, value)) return old;
// atomic add functions
inline u8 sync_fetch_and_add(volatile u8* dest, u8 value)
return _InterlockedExchangeAdd8((volatile char*)dest, value);
inline u16 sync_fetch_and_add(volatile u16* dest, u16 value)
return _InterlockedExchangeAdd16((volatile short*)dest, value);
inline u32 sync_fetch_and_add(volatile u32* dest, u32 value)
return _InterlockedExchangeAdd((volatile long*)dest, value);
inline u64 sync_fetch_and_add(volatile u64* dest, u64 value)
return _InterlockedExchangeAdd64((volatile long long*)dest, value);
inline u128 sync_fetch_and_add(volatile u128* dest, u128 value)
while (true)
u128 old;
old.lo = dest->lo;
old.hi = dest->hi;
if (sync_bool_compare_and_swap(dest, old, old + value)) return old;
// atomic sub functions
inline u8 sync_fetch_and_sub(volatile u8* dest, u8 value)
return _InterlockedExchangeAdd8((volatile char*)dest, -(char)value);
inline u16 sync_fetch_and_sub(volatile u16* dest, u16 value)
return _InterlockedExchangeAdd16((volatile short*)dest, -(short)value);
inline u32 sync_fetch_and_sub(volatile u32* dest, u32 value)
return _InterlockedExchangeAdd((volatile long*)dest, -(long)value);
inline u64 sync_fetch_and_sub(volatile u64* dest, u64 value)
return _InterlockedExchangeAdd64((volatile long long*)dest, -(long long)value);
inline u128 sync_fetch_and_sub(volatile u128* dest, u128 value)
while (true)
u128 old;
old.lo = dest->lo;
old.hi = dest->hi;
if (sync_bool_compare_and_swap(dest, old, old - value)) return old;
// atomic `bitwise or` functions
inline u8 sync_fetch_and_or(volatile u8* dest, u8 value)
return _InterlockedOr8((volatile char*)dest, value);
inline u16 sync_fetch_and_or(volatile u16* dest, u16 value)
return _InterlockedOr16((volatile short*)dest, value);
inline u32 sync_fetch_and_or(volatile u32* dest, u32 value)
return _InterlockedOr((volatile long*)dest, value);
inline u64 sync_fetch_and_or(volatile u64* dest, u64 value)
return _InterlockedOr64((volatile long long*)dest, value);
inline u128 sync_fetch_and_or(volatile u128* dest, u128 value)
while (true)
u128 old;
old.lo = dest->lo;
old.hi = dest->hi;
if (sync_bool_compare_and_swap(dest, old, old | value)) return old;
// atomic `bitwise and` functions
inline u8 sync_fetch_and_and(volatile u8* dest, u8 value)
return _InterlockedAnd8((volatile char*)dest, value);
inline u16 sync_fetch_and_and(volatile u16* dest, u16 value)
return _InterlockedAnd16((volatile short*)dest, value);
inline u32 sync_fetch_and_and(volatile u32* dest, u32 value)
return _InterlockedAnd((volatile long*)dest, value);
inline u64 sync_fetch_and_and(volatile u64* dest, u64 value)
return _InterlockedAnd64((volatile long long*)dest, value);
inline u128 sync_fetch_and_and(volatile u128* dest, u128 value)
while (true)
u128 old;
old.lo = dest->lo;
old.hi = dest->hi;
if (sync_bool_compare_and_swap(dest, old, old & value)) return old;
// atomic `bitwise xor` functions
inline u8 sync_fetch_and_xor(volatile u8* dest, u8 value)
return _InterlockedXor8((volatile char*)dest, value);
inline u16 sync_fetch_and_xor(volatile u16* dest, u16 value)
return _InterlockedXor16((volatile short*)dest, value);
inline u32 sync_fetch_and_xor(volatile u32* dest, u32 value)
return _InterlockedXor((volatile long*)dest, value);
inline u64 sync_fetch_and_xor(volatile u64* dest, u64 value)
return _InterlockedXor64((volatile long long*)dest, value);
inline u128 sync_fetch_and_xor(volatile u128* dest, u128 value)
while (true)
u128 old;
old.lo = dest->lo;
old.hi = dest->hi;
if (sync_bool_compare_and_swap(dest, old, old ^ value)) return old;
#endif /* _MSC_VER */
template<typename T, std::size_t Size = sizeof(T)> struct atomic_storage
static_assert(!Size, "Invalid atomic type");
template<typename T> struct atomic_storage<T, 1>
using type = u8;
template<typename T> struct atomic_storage<T, 2>
using type = u16;
template<typename T> struct atomic_storage<T, 4>
using type = u32;
template<typename T> struct atomic_storage<T, 8>
using type = u64;
template<typename T> struct atomic_storage<T, 16>
using type = u128;
template<typename T> using atomic_storage_t = typename atomic_storage<T>::type;
// atomic result wrapper; implements special behaviour for void result type
template<typename T, typename RT, typename VT> struct atomic_op_result_t
RT result;
template<typename... Args> atomic_op_result_t(T func, VT& var, Args&&... args)
: result(std::move(func(var, std::forward<Args>(args)...)))
RT move()
return std::move(result);
// void specialization: result is the initial value of the first arg
template<typename T, typename VT> struct atomic_op_result_t<T, void, VT>
VT result;
template<typename... Args> atomic_op_result_t(T func, VT& var, Args&&... args)
: result(var)
func(var, std::forward<Args>(args)...);
VT move()
return std::move(result);
// member function specialization
template<typename CT, typename... FArgs, typename RT, typename VT> struct atomic_op_result_t<RT(CT::*)(FArgs...), RT, VT>
RT result;
template<typename... Args> atomic_op_result_t(RT(CT::*func)(FArgs...), VT& var, Args&&... args)
: result(std::move((var.*func)(std::forward<Args>(args)...)))
RT move()
return std::move(result);
// member function void specialization
template<typename CT, typename... FArgs, typename VT> struct atomic_op_result_t<void(CT::*)(FArgs...), void, VT>
VT result;
template<typename... Args> atomic_op_result_t(void(CT::*func)(FArgs...), VT& var, Args&&... args)
: result(var)
VT move()
return std::move(result);
// Atomic type with lock-free and standard layout guarantees (and appropriate limitations)
template<typename T> class atomic_t
using type = std::remove_cv_t<T>;
using stype = atomic_storage_t<type>;
using storage = atomic_storage<type>;
static_assert(alignof(type) <= alignof(stype), "atomic_t<> error: unexpected alignment");
stype m_data;
template<typename T2> static inline void write_relaxed(volatile T2& data, const T2& value)
data = value;
static inline void write_relaxed(volatile u128& data, const u128& value)
sync_lock_test_and_set(&data, value);
template<typename T2> static inline T2 read_relaxed(const volatile T2& data)
return data;
static inline u128 read_relaxed(const volatile u128& value)
return sync_val_compare_and_swap(const_cast<volatile u128*>(&value), u128{0}, u128{0});
static inline const stype to_subtype(const type& value)
return reinterpret_cast<const stype&>(value);
static inline const type from_subtype(const stype value)
return reinterpret_cast<const type&>(value);
atomic_t() = default;
atomic_t(const atomic_t&) = delete;
atomic_t(type value)
: m_data(to_subtype(value))
atomic_t& operator =(const atomic_t&) = delete;
atomic_t& operator =(type value)
return write_relaxed(m_data, to_subtype(value)), *this;
operator type() const volatile
return from_subtype(read_relaxed(m_data));
// Unsafe direct access
stype* raw_data()
return reinterpret_cast<stype*>(&m_data);
// Unsafe direct access
type& raw()
return reinterpret_cast<type&>(m_data);
// Atomically compare data with cmp, replace with exch if equal, return previous data value anyway
type compare_and_swap(const type& cmp, const type& exch) volatile
return from_subtype(sync_val_compare_and_swap(&m_data, to_subtype(cmp), to_subtype(exch)));
// Atomically compare data with cmp, replace with exch if equal, return true if data was replaced
bool compare_and_swap_test(const type& cmp, const type& exch) volatile
return sync_bool_compare_and_swap(&m_data, to_subtype(cmp), to_subtype(exch));
// Atomically replace data with exch, return previous data value
type exchange(const type& exch) volatile
return from_subtype(sync_lock_test_and_set(&m_data, to_subtype(exch)));
// Atomically read data, possibly without memory barrier (not for 128 bit)
type load() const volatile
return from_subtype(read_relaxed(m_data));
// Atomically write data, possibly without memory barrier (not for 128 bit)
void store(const type& value) volatile
write_relaxed(m_data, to_subtype(value));
// Perform an atomic operation on data (func is either pointer to member function or callable object with a T& first arg);
// Returns the result of the callable object call or previous (old) value of the atomic variable if the return type is void
template<typename F, typename... Args, typename RT = std::result_of_t<F(T&, Args...)>> auto atomic_op(F func, Args&&... args) volatile -> decltype(atomic_op_result_t<F, RT, T>::result)
while (true)
// Read the old value from memory
const stype old = read_relaxed(m_data);
// Copy the old value
stype _new = old;
// Call atomic op for the local copy of the old value and save the return value of the function
atomic_op_result_t<F, RT, T> result(func, reinterpret_cast<type&>(_new), args...);
// Atomically compare value with `old`, replace with `_new` and return on success
if (sync_bool_compare_and_swap(&m_data, old, _new)) return result.move();
// Atomic bitwise OR, returns previous data
type _or(const type& right) volatile
return from_subtype(sync_fetch_and_or(&m_data, to_subtype(right)));
// Atomic bitwise AND, returns previous data
type _and(const type& right) volatile
return from_subtype(sync_fetch_and_and(&m_data, to_subtype(right)));
// Atomic bitwise AND NOT (inverts right argument), returns previous data
type _and_not(const type& right) volatile
return from_subtype(sync_fetch_and_and(&m_data, ~to_subtype(right)));
// Atomic bitwise XOR, returns previous data
type _xor(const type& right) volatile
return from_subtype(sync_fetch_and_xor(&m_data, to_subtype(right)));
type operator |=(const type& right) volatile
return from_subtype(sync_fetch_and_or(&m_data, to_subtype(right)) | to_subtype(right));
type operator &=(const type& right) volatile
return from_subtype(sync_fetch_and_and(&m_data, to_subtype(right)) & to_subtype(right));
type operator ^=(const type& right) volatile
return from_subtype(sync_fetch_and_xor(&m_data, to_subtype(right)) ^ to_subtype(right));
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), T> operator ++(atomic_t<T>& left)
return left.from_subtype(sync_fetch_and_add(left.raw_data(), 1) + 1);
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), T> operator --(atomic_t<T>& left)
return left.from_subtype(sync_fetch_and_sub(left.raw_data(), 1) - 1);
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), T> operator ++(atomic_t<T>& left, int)
return left.from_subtype(sync_fetch_and_add(left.raw_data(), 1));
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), T> operator --(atomic_t<T>& left, int)
return left.from_subtype(sync_fetch_and_sub(left.raw_data(), 1));
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T) && std::is_convertible<T2, T>::value, T> operator +=(atomic_t<T>& left, const T2& right)
return left.from_subtype(sync_fetch_and_add(left.raw_data(), right) + right);
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T) && std::is_convertible<T2, T>::value, T> operator -=(atomic_t<T>& left, const T2& right)
return left.from_subtype(sync_fetch_and_sub(left.raw_data(), right) - right);
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), nse_t<T>> operator ++(atomic_t<nse_t<T>>& left)
return left.from_subtype(sync_fetch_and_add(left.raw_data(), 1) + 1);
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), nse_t<T>> operator --(atomic_t<nse_t<T>>& left)
return left.from_subtype(sync_fetch_and_sub(left.raw_data(), 1) - 1);
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), nse_t<T>> operator ++(atomic_t<nse_t<T>>& left, int)
return left.from_subtype(sync_fetch_and_add(left.raw_data(), 1));
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), nse_t<T>> operator --(atomic_t<nse_t<T>>& left, int)
return left.from_subtype(sync_fetch_and_sub(left.raw_data(), 1));
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T) && std::is_convertible<T2, T>::value, nse_t<T>> operator +=(atomic_t<nse_t<T>>& left, const T2& right)
return left.from_subtype(sync_fetch_and_add(left.raw_data(), right) + right);
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T) && std::is_convertible<T2, T>::value, nse_t<T>> operator -=(atomic_t<nse_t<T>>& left, const T2& right)
return left.from_subtype(sync_fetch_and_sub(left.raw_data(), right) - right);
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), se_t<T>> operator ++(atomic_t<se_t<T>>& left)
return left.atomic_op([](se_t<T>& value) -> se_t<T>
return ++value;
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), se_t<T>> operator --(atomic_t<se_t<T>>& left)
return left.atomic_op([](se_t<T>& value) -> se_t<T>
return --value;
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), se_t<T>> operator ++(atomic_t<se_t<T>>& left, int)
return left.atomic_op([](se_t<T>& value) -> se_t<T>
return value++;
template<typename T> inline std::enable_if_t<IS_INTEGRAL(T), se_t<T>> operator --(atomic_t<se_t<T>>& left, int)
return left.atomic_op([](se_t<T>& value) -> se_t<T>
return value--;
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T) && std::is_convertible<T2, T>::value, se_t<T>> operator +=(atomic_t<se_t<T>>& left, const T2& right)
return left.atomic_op([&](se_t<T>& value) -> se_t<T>
return value += right;
template<typename T, typename T2> inline std::enable_if_t<IS_INTEGRAL(T) && std::is_convertible<T2, T>::value, se_t<T>> operator -=(atomic_t<se_t<T>>& left, const T2& right)
return left.atomic_op([&](se_t<T>& value) -> se_t<T>
return value -= right;
// Atomic BE Type (for PS3 virtual memory)
template<typename T> using atomic_be_t = atomic_t<be_t<T>>;
// Atomic LE Type (for PSV virtual memory)
template<typename T> using atomic_le_t = atomic_t<le_t<T>>;
// Algorithm for std::atomic; similar to atomic_t::atomic_op()
template<typename T, typename F, typename... Args, typename RT = std::result_of_t<F(T&, Args...)>> auto atomic_op(std::atomic<T>& var, F func, Args&&... args) -> decltype(atomic_op_result_t<F, RT, T>::result)
auto old = var.load();
while (true)
auto _new = old;
atomic_op_result_t<F, RT, T> result(func, _new, args...);
if (var.compare_exchange_strong(old, _new)) return result.move();