mirror of https://github.com/RPCS3/rpcs3.git
SPU/vm: Improve vm::range_lock a bit
Use some prefetching Use optimistic locking
This commit is contained in:
parent
3507cd0a37
commit
8bc9868c1f
|
@ -1736,6 +1736,8 @@ void spu_thread::push_snr(u32 number, u32 value)
|
||||||
|
|
||||||
void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* ls)
|
void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* ls)
|
||||||
{
|
{
|
||||||
|
perf_meter<"DMA"_u32> perf_;
|
||||||
|
|
||||||
const bool is_get = (args.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_START_MASK)) == MFC_GET_CMD;
|
const bool is_get = (args.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_START_MASK)) == MFC_GET_CMD;
|
||||||
|
|
||||||
u32 eal = args.eal;
|
u32 eal = args.eal;
|
||||||
|
@ -1834,14 +1836,8 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
||||||
|
|
||||||
if ((!g_use_rtm && !is_get) || g_cfg.core.spu_accurate_dma) [[unlikely]]
|
if ((!g_use_rtm && !is_get) || g_cfg.core.spu_accurate_dma) [[unlikely]]
|
||||||
{
|
{
|
||||||
perf_meter<"ADMA_GET"_u64> perf_get;
|
perf_meter<"ADMA_GET"_u64> perf_get = perf_;
|
||||||
perf_meter<"ADMA_PUT"_u64> perf_put = perf_get;
|
perf_meter<"ADMA_PUT"_u64> perf_put = perf_;
|
||||||
|
|
||||||
if (!g_cfg.core.spu_accurate_dma) [[likely]]
|
|
||||||
{
|
|
||||||
perf_put.reset();
|
|
||||||
perf_get.reset();
|
|
||||||
}
|
|
||||||
|
|
||||||
cpu_thread* _cpu = _this ? _this : get_current_cpu_thread();
|
cpu_thread* _cpu = _this ? _this : get_current_cpu_thread();
|
||||||
|
|
||||||
|
@ -1864,6 +1860,8 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
||||||
range_lock = _this->range_lock;
|
range_lock = _this->range_lock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_m_prefetchw(range_lock);
|
||||||
|
|
||||||
for (u32 size = args.size, size0; is_get; size -= size0, dst += size0, src += size0, eal += size0)
|
for (u32 size = args.size, size0; is_get; size -= size0, dst += size0, src += size0, eal += size0)
|
||||||
{
|
{
|
||||||
size0 = std::min<u32>(128 - (eal & 127), std::min<u32>(size, 128));
|
size0 = std::min<u32>(128 - (eal & 127), std::min<u32>(size, 128));
|
||||||
|
@ -2161,8 +2159,13 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
||||||
//std::atomic_thread_fence(std::memory_order_seq_cst);
|
//std::atomic_thread_fence(std::memory_order_seq_cst);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
perf_put.reset();
|
||||||
|
perf_get.reset();
|
||||||
|
}
|
||||||
|
|
||||||
perf_meter<"DMA_PUT"_u64> perf2;
|
perf_meter<"DMA_PUT"_u64> perf2 = perf_;
|
||||||
|
|
||||||
switch (u32 size = args.size)
|
switch (u32 size = args.size)
|
||||||
{
|
{
|
||||||
|
|
|
@ -167,6 +167,8 @@ namespace vm
|
||||||
|
|
||||||
for (u64 i = 0;; i++)
|
for (u64 i = 0;; i++)
|
||||||
{
|
{
|
||||||
|
range_lock->store(begin | (u64{size} << 32));
|
||||||
|
|
||||||
const u64 lock_val = g_range_lock.load();
|
const u64 lock_val = g_range_lock.load();
|
||||||
const u64 is_share = g_shmem[begin >> 16].load();
|
const u64 is_share = g_shmem[begin >> 16].load();
|
||||||
|
|
||||||
|
@ -188,18 +190,18 @@ namespace vm
|
||||||
|
|
||||||
if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]]
|
if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]]
|
||||||
{
|
{
|
||||||
range_lock->store(begin | (u64{size} << 32));
|
|
||||||
|
|
||||||
const u64 new_lock_val = g_range_lock.load();
|
const u64 new_lock_val = g_range_lock.load();
|
||||||
|
|
||||||
if (!new_lock_val || new_lock_val == lock_val) [[likely]]
|
if (!new_lock_val || new_lock_val == lock_val) [[likely]]
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
range_lock->release(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wait a bit before accessing g_mutex
|
||||||
|
range_lock->store(0);
|
||||||
|
busy_wait(200);
|
||||||
|
|
||||||
std::shared_lock lock(g_mutex, std::try_to_lock);
|
std::shared_lock lock(g_mutex, std::try_to_lock);
|
||||||
|
|
||||||
if (!lock && i < 15)
|
if (!lock && i < 15)
|
||||||
|
|
|
@ -41,13 +41,20 @@ namespace vm
|
||||||
|
|
||||||
void range_lock_internal(atomic_t<u64, 64>* range_lock, u32 begin, u32 size);
|
void range_lock_internal(atomic_t<u64, 64>* range_lock, u32 begin, u32 size);
|
||||||
|
|
||||||
// Lock memory range
|
// Lock memory range ignoring memory protection (Size!=0 also implies aligned begin)
|
||||||
template <uint Size = 0>
|
template <uint Size = 0>
|
||||||
FORCE_INLINE void range_lock(atomic_t<u64, 64>* range_lock, u32 begin, u32 _size)
|
FORCE_INLINE void range_lock(atomic_t<u64, 64>* range_lock, u32 begin, u32 _size)
|
||||||
{
|
{
|
||||||
|
// Optimistic locking.
|
||||||
|
// Note that we store the range we will be accessing, without any clamping.
|
||||||
|
range_lock->store(begin | (u64{_size} << 32));
|
||||||
|
|
||||||
|
// Old-style conditional constexpr
|
||||||
const u32 size = Size ? Size : _size;
|
const u32 size = Size ? Size : _size;
|
||||||
|
|
||||||
const u64 lock_val = g_range_lock.load();
|
const u64 lock_val = g_range_lock.load();
|
||||||
const u64 is_share = g_shmem[begin >> 16].load();
|
const u64 is_share = g_shmem[begin >> 16].load();
|
||||||
|
|
||||||
#ifndef _MSC_VER
|
#ifndef _MSC_VER
|
||||||
__asm__(""); // Tiny barrier
|
__asm__(""); // Tiny barrier
|
||||||
#endif
|
#endif
|
||||||
|
@ -59,7 +66,7 @@ namespace vm
|
||||||
|
|
||||||
// Optimization: if range_locked is not used, the addr check will always pass
|
// Optimization: if range_locked is not used, the addr check will always pass
|
||||||
// Otherwise, g_shmem is unchanged and its value is reliable to read
|
// Otherwise, g_shmem is unchanged and its value is reliable to read
|
||||||
if ((lock_val >> range_pos) == (range_locked >> range_pos)) [[likely]]
|
if ((lock_val >> range_pos) == (range_locked >> range_pos))
|
||||||
{
|
{
|
||||||
lock_size = 128;
|
lock_size = 128;
|
||||||
|
|
||||||
|
@ -72,20 +79,16 @@ namespace vm
|
||||||
|
|
||||||
if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]]
|
if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]]
|
||||||
{
|
{
|
||||||
// Optimistic locking.
|
|
||||||
// Note that we store the range we will be accessing, without any clamping.
|
|
||||||
range_lock->store(begin | (u64{size} << 32));
|
|
||||||
|
|
||||||
const u64 new_lock_val = g_range_lock.load();
|
const u64 new_lock_val = g_range_lock.load();
|
||||||
|
|
||||||
if (!new_lock_val || new_lock_val == lock_val) [[likely]]
|
if (!new_lock_val || new_lock_val == lock_val) [[likely]]
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
range_lock->release(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
range_lock->release(0);
|
||||||
|
|
||||||
// Fallback to slow path
|
// Fallback to slow path
|
||||||
range_lock_internal(range_lock, begin, size);
|
range_lock_internal(range_lock, begin, size);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue