mirror of https://github.com/RPCS3/rpcs3.git
atomic.hpp: improve internal data layout
Put semaphore control variable together in hashtable. Avoid unnecessary allocation of extents.
This commit is contained in:
parent
56b310ebac
commit
4d46e66833
|
@ -19,7 +19,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
// Hashtable size factor (can be set to 0 to stress-test collisions)
|
// Hashtable size factor (can be set to 0 to stress-test collisions)
|
||||||
static constexpr uint s_hashtable_power = 17;
|
static constexpr uint s_hashtable_power = 16;
|
||||||
|
|
||||||
// Total number of entries, should be a power of 2.
|
// Total number of entries, should be a power of 2.
|
||||||
static constexpr std::uintptr_t s_hashtable_size = 1u << s_hashtable_power;
|
static constexpr std::uintptr_t s_hashtable_size = 1u << s_hashtable_power;
|
||||||
|
@ -36,29 +36,40 @@ static constexpr u64 s_collision_bit = 0x8000'0000'0000'0000;
|
||||||
// Allocated slot with secondary table.
|
// Allocated slot with secondary table.
|
||||||
static constexpr u64 s_slot_mask = ~(s_waiter_mask | s_pointer_mask | s_collision_bit);
|
static constexpr u64 s_slot_mask = ~(s_waiter_mask | s_pointer_mask | s_collision_bit);
|
||||||
|
|
||||||
// Main hashtable for atomic wait, uses lowest pointer bits.
|
|
||||||
static atomic_t<u64> s_hashtable[s_hashtable_size]{};
|
|
||||||
|
|
||||||
// Helper to get least significant set bit from 64-bit masks
|
// Helper to get least significant set bit from 64-bit masks
|
||||||
template <u64 Mask>
|
template <u64 Mask>
|
||||||
static constexpr u64 one_v = Mask & (0 - Mask);
|
static constexpr u64 one_v = Mask & (0 - Mask);
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
struct sync_var
|
||||||
|
{
|
||||||
|
constexpr sync_var() noexcept = default;
|
||||||
|
|
||||||
|
// Reference counter, owning pointer, collision bit and optionally selected slot
|
||||||
|
atomic_t<u64> addr_ref{};
|
||||||
|
|
||||||
|
// Counter for waiting threads for the semaphore and allocated semaphore id
|
||||||
|
atomic_t<u64> sema_var{};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main hashtable for atomic wait.
|
||||||
|
static sync_var s_hashtable[s_hashtable_size]{};
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
struct slot_info
|
struct slot_info
|
||||||
{
|
{
|
||||||
constexpr slot_info() noexcept = default;
|
constexpr slot_info() noexcept = default;
|
||||||
|
|
||||||
// Combined allocated semaphore id and number of waiters
|
// Branch extension
|
||||||
atomic_t<u64> sema_var{};
|
sync_var branch[48 - s_hashtable_power]{};
|
||||||
|
|
||||||
// Sub slots
|
|
||||||
atomic_t<u64> branch[48 - s_hashtable_power]{};
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Number of search groups (defines max slot branch count as gcount * 64)
|
// Number of search groups (defines max slot branch count as gcount * 64)
|
||||||
static constexpr u32 s_slot_gcount = (s_hashtable_power ? 16384 : 256) / 64;
|
static constexpr u32 s_slot_gcount = (s_hashtable_power ? 4096 : 256) / 64;
|
||||||
|
|
||||||
// Array of slot branch objects
|
// Array of slot branch objects
|
||||||
static slot_info s_slot_list[s_slot_gcount * 64]{};
|
static slot_info s_slot_list[s_slot_gcount * 64]{};
|
||||||
|
@ -106,14 +117,14 @@ static u64 slot_alloc()
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static slot_info* slot_get(std::uintptr_t iptr, atomic_t<u64>* loc, u64 lv = 0)
|
static sync_var* slot_get(std::uintptr_t iptr, sync_var* loc, u64 lv = 0)
|
||||||
{
|
{
|
||||||
if (!loc)
|
if (!loc)
|
||||||
{
|
{
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u64 value = loc->load();
|
const u64 value = loc->addr_ref.load();
|
||||||
|
|
||||||
if (!value)
|
if (!value)
|
||||||
{
|
{
|
||||||
|
@ -122,7 +133,7 @@ static slot_info* slot_get(std::uintptr_t iptr, atomic_t<u64>* loc, u64 lv = 0)
|
||||||
|
|
||||||
if ((value & s_pointer_mask) == (iptr & s_pointer_mask))
|
if ((value & s_pointer_mask) == (iptr & s_pointer_mask))
|
||||||
{
|
{
|
||||||
return &s_slot_list[(value & s_slot_mask) / one_v<s_slot_mask>];
|
return loc;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((value & s_collision_bit) == 0)
|
if ((value & s_collision_bit) == 0)
|
||||||
|
@ -316,7 +327,7 @@ void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_valu
|
||||||
u64 slot_a = -1;
|
u64 slot_a = -1;
|
||||||
|
|
||||||
// Found slot object
|
// Found slot object
|
||||||
slot_info* slot = nullptr;
|
sync_var* slot = nullptr;
|
||||||
|
|
||||||
auto install_op = [&](u64& value) -> u64
|
auto install_op = [&](u64& value) -> u64
|
||||||
{
|
{
|
||||||
|
@ -328,29 +339,26 @@ void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_valu
|
||||||
|
|
||||||
if (!value || (value & s_pointer_mask) == (iptr & s_pointer_mask))
|
if (!value || (value & s_pointer_mask) == (iptr & s_pointer_mask))
|
||||||
{
|
{
|
||||||
if (!value)
|
// Store pointer bits
|
||||||
|
value |= (iptr & s_pointer_mask);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((value & s_collision_bit) == 0)
|
||||||
{
|
{
|
||||||
if (slot_a + 1 == 0)
|
if (slot_a + 1 == 0)
|
||||||
{
|
{
|
||||||
// First waiter: allocate slot and install it
|
// Second waiter: allocate slot and install it
|
||||||
slot_a = slot_alloc() * one_v<s_slot_mask>;
|
slot_a = slot_alloc() * one_v<s_slot_mask>;
|
||||||
}
|
}
|
||||||
|
|
||||||
value |= slot_a;
|
value |= slot_a;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store pointer bits
|
|
||||||
value |= (iptr & s_pointer_mask);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Set collision bit
|
// Set collision bit
|
||||||
value |= s_collision_bit;
|
value |= s_collision_bit;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return slot ptr
|
|
||||||
slot = &s_slot_list[(value & s_slot_mask) / one_v<s_slot_mask>];
|
|
||||||
|
|
||||||
// Add waiter
|
// Add waiter
|
||||||
value += one_v<s_waiter_mask>;
|
value += one_v<s_waiter_mask>;
|
||||||
return value;
|
return value;
|
||||||
|
@ -360,15 +368,15 @@ void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_valu
|
||||||
u64 lv = 0;
|
u64 lv = 0;
|
||||||
|
|
||||||
// For cleanup
|
// For cleanup
|
||||||
std::basic_string<atomic_t<u64>*> install_list;
|
std::basic_string<sync_var*> install_list;
|
||||||
|
|
||||||
for (atomic_t<u64>* ptr = &s_hashtable[iptr % s_hashtable_size];;)
|
for (sync_var* ptr = &s_hashtable[iptr % s_hashtable_size];;)
|
||||||
{
|
{
|
||||||
auto [_old, ok] = ptr->fetch_op(install_op);
|
auto [_old, ok] = ptr->addr_ref.fetch_op(install_op);
|
||||||
|
|
||||||
if (slot_a + 1)
|
if (slot_a + 1)
|
||||||
{
|
{
|
||||||
if ((ok & s_slot_mask) == slot_a)
|
if ((_old & s_collision_bit) == 0 && (ok & s_collision_bit) && (ok & s_slot_mask) == slot_a)
|
||||||
{
|
{
|
||||||
// Slot set successfully
|
// Slot set successfully
|
||||||
slot_a = -1;
|
slot_a = -1;
|
||||||
|
@ -391,6 +399,7 @@ void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_valu
|
||||||
slot_a = -1;
|
slot_a = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slot = ptr;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -398,7 +407,7 @@ void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_valu
|
||||||
const u64 eq_bits = utils::cntlz64((((iptr ^ ok) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16, true);
|
const u64 eq_bits = utils::cntlz64((((iptr ^ ok) & (s_pointer_mask >> lv)) | ~s_pointer_mask) << 16, true);
|
||||||
|
|
||||||
// Collision; need to go deeper
|
// Collision; need to go deeper
|
||||||
ptr = slot->branch + eq_bits;
|
ptr = s_slot_list[(ok & s_slot_mask) / one_v<s_slot_mask>].branch + eq_bits;
|
||||||
install_list.push_back(ptr);
|
install_list.push_back(ptr);
|
||||||
|
|
||||||
lv = eq_bits + 1;
|
lv = eq_bits + 1;
|
||||||
|
@ -651,7 +660,7 @@ void atomic_storage_futex::wait(const void* data, std::size_t size, u64 old_valu
|
||||||
|
|
||||||
for (auto ptr = (install_list.empty() ? &s_hashtable[iptr % s_hashtable_size] : install_list.back());;)
|
for (auto ptr = (install_list.empty() ? &s_hashtable[iptr % s_hashtable_size] : install_list.back());;)
|
||||||
{
|
{
|
||||||
auto [_old, ok] = ptr->fetch_op([&](u64& value)
|
auto [_old, ok] = ptr->addr_ref.fetch_op([&](u64& value)
|
||||||
{
|
{
|
||||||
if (value & s_waiter_mask)
|
if (value & s_waiter_mask)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue