diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 47adddd5f9..1405c20617 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1736,6 +1736,8 @@ void spu_thread::push_snr(u32 number, u32 value) void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* ls) { + perf_meter<"DMA"_u32> perf_; + const bool is_get = (args.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_START_MASK)) == MFC_GET_CMD; u32 eal = args.eal; @@ -1834,14 +1836,8 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* if ((!g_use_rtm && !is_get) || g_cfg.core.spu_accurate_dma) [[unlikely]] { - perf_meter<"ADMA_GET"_u64> perf_get; - perf_meter<"ADMA_PUT"_u64> perf_put = perf_get; - - if (!g_cfg.core.spu_accurate_dma) [[likely]] - { - perf_put.reset(); - perf_get.reset(); - } + perf_meter<"ADMA_GET"_u64> perf_get = perf_; + perf_meter<"ADMA_PUT"_u64> perf_put = perf_; cpu_thread* _cpu = _this ? _this : get_current_cpu_thread(); @@ -1864,6 +1860,8 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* range_lock = _this->range_lock; } + _m_prefetchw(range_lock); + for (u32 size = args.size, size0; is_get; size -= size0, dst += size0, src += size0, eal += size0) { size0 = std::min(128 - (eal & 127), std::min(size, 128)); @@ -2161,8 +2159,13 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* //std::atomic_thread_fence(std::memory_order_seq_cst); return; } + else + { + perf_put.reset(); + perf_get.reset(); + } - perf_meter<"DMA_PUT"_u64> perf2; + perf_meter<"DMA_PUT"_u64> perf2 = perf_; switch (u32 size = args.size) { diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 2b964c07d8..de34a95686 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -167,6 +167,8 @@ namespace vm for (u64 i = 0;; i++) { + range_lock->store(begin | (u64{size} << 32)); + const u64 lock_val = g_range_lock.load(); const u64 is_share = g_shmem[begin >> 16].load(); @@ -188,18 +190,18 @@ namespace vm if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]] { - range_lock->store(begin | (u64{size} << 32)); - const u64 new_lock_val = g_range_lock.load(); if (!new_lock_val || new_lock_val == lock_val) [[likely]] { break; } - - range_lock->release(0); } + // Wait a bit before accessing g_mutex + range_lock->store(0); + busy_wait(200); + std::shared_lock lock(g_mutex, std::try_to_lock); if (!lock && i < 15) diff --git a/rpcs3/Emu/Memory/vm_locking.h b/rpcs3/Emu/Memory/vm_locking.h index fce204fcc4..6594ca35f1 100644 --- a/rpcs3/Emu/Memory/vm_locking.h +++ b/rpcs3/Emu/Memory/vm_locking.h @@ -41,13 +41,20 @@ namespace vm void range_lock_internal(atomic_t* range_lock, u32 begin, u32 size); - // Lock memory range + // Lock memory range ignoring memory protection (Size!=0 also implies aligned begin) template FORCE_INLINE void range_lock(atomic_t* range_lock, u32 begin, u32 _size) { + // Optimistic locking. + // Note that we store the range we will be accessing, without any clamping. + range_lock->store(begin | (u64{_size} << 32)); + + // Old-style conditional constexpr const u32 size = Size ? Size : _size; + const u64 lock_val = g_range_lock.load(); const u64 is_share = g_shmem[begin >> 16].load(); + #ifndef _MSC_VER __asm__(""); // Tiny barrier #endif @@ -59,7 +66,7 @@ namespace vm // Optimization: if range_locked is not used, the addr check will always pass // Otherwise, g_shmem is unchanged and its value is reliable to read - if ((lock_val >> range_pos) == (range_locked >> range_pos)) [[likely]] + if ((lock_val >> range_pos) == (range_locked >> range_pos)) { lock_size = 128; @@ -72,20 +79,16 @@ namespace vm if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]] { - // Optimistic locking. - // Note that we store the range we will be accessing, without any clamping. - range_lock->store(begin | (u64{size} << 32)); - const u64 new_lock_val = g_range_lock.load(); if (!new_lock_val || new_lock_val == lock_val) [[likely]] { return; } - - range_lock->release(0); } + range_lock->release(0); + // Fallback to slow path range_lock_internal(range_lock, begin, size); }