vm: remove g_mutex, use g_range_lock instead

Simplification and performance improvements.
This commit is contained in:
Nekotekina 2022-04-30 16:51:52 +03:00 committed by Ivan
parent 799c4837d3
commit 5c1f79ab26
9 changed files with 78 additions and 148 deletions

View File

@ -1430,7 +1430,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe
return false;
}
if (vm::reader_lock rlock; vm::check_addr(addr, 0))
if (vm::writer_lock mlock; vm::check_addr(addr, 0))
{
// For allocated memory with protection lower than required (such as protection::no or read-only while writing to it)
utils::memory_protect(vm::base(addr & -0x1000), 0x1000, utils::protection::rw);
@ -1485,7 +1485,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe
u64 data3;
{
vm::reader_lock rlock;
vm::writer_lock rlock;
if (vm::check_addr(addr, is_writing ? vm::page_writable : vm::page_readable))
{
// Memory was allocated inbetween, retry

View File

@ -772,7 +772,7 @@ extern bool ppu_patch(u32 addr, u32 value)
return false;
}
vm::reader_lock rlock;
vm::writer_lock rlock;
if (!vm::check_addr(addr))
{

View File

@ -182,7 +182,7 @@ error_code sys_memory_get_page_attribute(cpu_thread& cpu, u32 addr, vm::ptr<sys_
sys_memory.trace("sys_memory_get_page_attribute(addr=0x%x, attr=*0x%x)", addr, attr);
vm::reader_lock rlock;
vm::writer_lock rlock;
if (!vm::check_addr(addr) || addr >= SPU_FAKE_BASE_ADDR)
{

View File

@ -62,7 +62,13 @@ void _sys_ppu_thread_exit(ppu_thread& ppu, u64 errorcode)
ppu.state += cpu_flag::wait;
// Need to wait until the current writer finish
if (ppu.state & cpu_flag::memory) vm::g_mutex.lock_unlock();
if (ppu.state & cpu_flag::memory)
{
while (vm::g_range_lock)
{
busy_wait(200);
}
}
sys_ppu_thread.trace("_sys_ppu_thread_exit(errorcode=0x%llx)", errorcode);

View File

@ -318,7 +318,7 @@ error_code sys_rsx_context_iomap(cpu_thread& cpu, u32 context_id, u32 io, u32 ea
// Wait until we have no active RSX locks and reserve iomap for use. Must do so before acquiring vm lock to avoid deadlocks
rsx::reservation_lock<true> rsx_lock(ea, size);
vm::reader_lock rlock;
vm::writer_lock rlock;
for (u32 addr = ea, end = ea + size; addr < end; addr += 0x100000)
{
@ -379,7 +379,7 @@ error_code sys_rsx_context_iounmap(cpu_thread& cpu, u32 context_id, u32 io, u32
sys_rsx.warning("sys_rsx_context_iounmap(): RSX is not idle while unmapping io");
}
vm::reader_lock rlock;
vm::writer_lock rlock;
std::scoped_lock lock(render->sys_rsx_mtx);
@ -627,7 +627,7 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64
if (location == CELL_GCM_LOCATION_MAIN && bound)
{
vm::reader_lock rlock;
vm::writer_lock rlock;
for (u32 io = (offset >> 20), end = (range.end >> 20); io <= end; io++)
{

View File

@ -13,7 +13,6 @@
#include "Emu/Cell/SPURecompiler.h"
#include "Emu/perf_meter.hpp"
#include <deque>
#include <shared_mutex>
#include "util/vm.hpp"
#include "util/asm.hpp"
@ -64,9 +63,6 @@ namespace vm
// Memory locations
alignas(64) std::vector<std::shared_ptr<block_t>> g_locations;
// Memory mutex core
shared_mutex g_mutex;
// Memory mutex acknowledgement
thread_local atomic_t<cpu_thread*>* g_tls_locked = nullptr;
@ -199,47 +195,32 @@ namespace vm
{
break;
}
}
// Wait a bit before accessing g_mutex
range_lock->store(0);
busy_wait(200);
u32 test = 0;
std::shared_lock lock(g_mutex, std::try_to_lock);
if (!lock && i < 15)
{
busy_wait(200);
continue;
}
else if (!lock)
{
lock.lock();
}
u32 test = 0;
for (u32 i = begin / 4096, max = (begin + size - 1) / 4096; i <= max; i++)
{
if (!(g_pages[i] & (vm::page_readable)))
for (u32 i = begin / 4096, max = (begin + size - 1) / 4096; i <= max; i++)
{
test = i * 4096;
break;
if (!(g_pages[i] & (vm::page_readable)))
{
test = i * 4096;
break;
}
}
if (test)
{
range_lock->release(0);
// Try triggering a page fault (write)
// TODO: Read memory if needed
vm::_ref<atomic_t<u8>>(test) += 0;
continue;
}
}
if (test)
{
lock.unlock();
// Try tiggering a page fault (write)
// TODO: Read memory if needed
vm::_ref<atomic_t<u8>>(test) += 0;
continue;
}
range_lock->release(begin | (u64{size} << 32));
break;
// Wait a bit before accessing global lock
range_lock->store(0);
busy_wait(200);
}
if (_cpu)
@ -295,7 +276,6 @@ namespace vm
if (size == 0)
{
vm_log.warning("Tried to lock empty range (flags=0x%x, addr=0x%x)", flags >> 32, addr);
g_range_lock.release(0);
return;
}
@ -350,7 +330,7 @@ namespace vm
cpu.state -= cpu_flag::memory;
}
if (g_mutex.is_lockable())
if (!g_range_lock)
{
return;
}
@ -360,12 +340,21 @@ namespace vm
if (!ok || cpu.state & cpu_flag::memory)
{
while (true)
for (u64 i = 0;; i++)
{
g_mutex.lock_unlock();
if (i < 100)
busy_wait(200);
else
std::this_thread::yield();
if (g_range_lock)
{
continue;
}
cpu.state -= cpu_flag::memory;
if (g_mutex.is_lockable()) [[likely]]
if (!g_range_lock) [[likely]]
{
return;
}
@ -405,7 +394,12 @@ namespace vm
}
}
reader_lock::reader_lock()
writer_lock::writer_lock()
: writer_lock(0, 1)
{
}
writer_lock::writer_lock(u32 const addr, u32 const size, u64 const flags)
{
auto cpu = get_current_cpu_thread();
@ -421,55 +415,21 @@ namespace vm
}
}
g_mutex.lock_shared();
if (cpu)
for (u64 i = 0;; i++)
{
cpu->state -= cpu_flag::memory + cpu_flag::wait;
}
}
reader_lock::~reader_lock()
{
if (m_upgraded)
{
g_mutex.unlock();
}
else
{
g_mutex.unlock_shared();
}
}
void reader_lock::upgrade()
{
if (m_upgraded)
{
return;
}
g_mutex.lock_upgrade();
m_upgraded = true;
}
writer_lock::writer_lock(u32 addr /*mutable*/)
{
auto cpu = get_current_cpu_thread();
if (cpu)
{
if (!g_tls_locked || *g_tls_locked != cpu || cpu->state & cpu_flag::wait)
if (g_range_lock || !g_range_lock.compare_and_swap_test(0, addr | u64{size} << 32 | flags))
{
cpu = nullptr;
if (i < 100)
busy_wait(200);
else
std::this_thread::yield();
}
else
{
cpu->state += cpu_flag::wait;
break;
}
}
g_mutex.lock();
if (addr >= 0x10000)
{
perf_meter<"SUSPEND"_u64> perf0;
@ -490,8 +450,6 @@ namespace vm
addr1 = static_cast<u16>(addr) | is_shared;
}
g_range_lock = addr | range_locked;
utils::prefetch_read(g_range_lock_set + 0);
utils::prefetch_read(g_range_lock_set + 2);
utils::prefetch_read(g_range_lock_set + 4);
@ -546,8 +504,7 @@ namespace vm
writer_lock::~writer_lock()
{
g_range_lock.release(0);
g_mutex.unlock();
g_range_lock = 0;
}
u64 reservation_lock_internal(u32 addr, atomic_t<u64>& res)
@ -764,16 +721,13 @@ namespace vm
fmt::throw_exception("Concurrent access (addr=0x%x, size=0x%x, flags=0x%x, current_addr=0x%x)", addr, size, flags, i * 4096);
}
}
// Unlock
g_range_lock.release(0);
}
bool page_protect(u32 addr, u32 size, u8 flags_test, u8 flags_set, u8 flags_clear)
{
perf_meter<"PAGE_PRO"_u64> perf0;
vm::writer_lock lock(0);
vm::writer_lock lock;
if (!size || (size | addr) % 4096)
{
@ -837,18 +791,12 @@ namespace vm
utils::memory_protect(g_base_addr + start * 4096, page_size, protection);
}
}
else
{
g_range_lock.release(0);
}
start_value = new_val;
start = i;
}
}
g_range_lock.release(0);
return true;
}
@ -943,9 +891,6 @@ namespace vm
}
}
// Unlock
g_range_lock.release(0);
return size;
}
@ -1268,7 +1213,7 @@ namespace vm
return 0;
}
vm::writer_lock lock(0);
vm::writer_lock lock;
if (!is_valid())
{
@ -1338,7 +1283,7 @@ namespace vm
shm = std::make_shared<utils::shm>(size);
}
vm::writer_lock lock(0);
vm::writer_lock lock;
if (!is_valid())
{
@ -1358,7 +1303,7 @@ namespace vm
{
auto& m_map = (m.*block_map)();
{
vm::writer_lock lock(0);
vm::writer_lock lock;
const auto found = m_map.find(addr - (flags & stack_guarded ? 0x1000 : 0));
@ -1408,7 +1353,7 @@ namespace vm
auto& m_map = (m.*block_map)();
vm::reader_lock lock;
vm::writer_lock lock;
const auto upper = m_map.upper_bound(addr);
@ -1454,7 +1399,7 @@ namespace vm
u32 block_t::used()
{
vm::writer_lock lock(0);
vm::writer_lock lock;
return imp_used(lock);
}
@ -1563,14 +1508,14 @@ namespace vm
std::shared_ptr<block_t> map(u32 addr, u32 size, u64 flags)
{
vm::writer_lock lock(0);
vm::writer_lock lock;
return _map(addr, size, flags);
}
std::shared_ptr<block_t> find_map(u32 orig_size, u32 align, u64 flags)
{
vm::writer_lock lock(0);
vm::writer_lock lock;
// Align to minimal page size
const u32 size = utils::align(orig_size, 0x10000);
@ -1603,7 +1548,7 @@ namespace vm
std::pair<std::shared_ptr<block_t>, bool> result{};
vm::writer_lock lock(0);
vm::writer_lock lock;
for (auto it = g_locations.begin() + memory_location_max; it != g_locations.end(); it++)
{
@ -1643,14 +1588,14 @@ namespace vm
std::shared_ptr<block_t> get(memory_location_t location, u32 addr)
{
vm::reader_lock lock;
vm::writer_lock lock;
return _get_map(location, addr);
}
std::shared_ptr<block_t> reserve_map(memory_location_t location, u32 addr, u32 area_size, u64 flags)
{
vm::reader_lock lock;
vm::writer_lock lock;
auto area = _get_map(location, addr);
@ -1659,8 +1604,6 @@ namespace vm
return area;
}
lock.upgrade();
// Allocation on arbitrary address
if (location != any && location < g_locations.size())
{
@ -1689,7 +1632,7 @@ namespace vm
bool try_access(u32 addr, void* ptr, u32 size, bool is_write)
{
vm::reader_lock lock;
vm::writer_lock lock;
if (vm::check_addr(addr, is_write ? page_writable : page_readable, size))
{
@ -1771,7 +1714,7 @@ namespace vm
void close()
{
{
vm::writer_lock lock(0);
vm::writer_lock lock;
for (auto& block : g_locations)
{

View File

@ -7,8 +7,6 @@ class shared_mutex;
namespace vm
{
extern shared_mutex g_mutex;
extern thread_local atomic_t<cpu_thread*>* g_tls_locked;
enum range_lock_flags : u64
@ -110,24 +108,12 @@ namespace vm
void temporary_unlock(cpu_thread& cpu) noexcept;
void temporary_unlock() noexcept;
class reader_lock final
{
bool m_upgraded = false;
public:
reader_lock(const reader_lock&) = delete;
reader_lock& operator=(const reader_lock&) = delete;
reader_lock();
~reader_lock();
void upgrade();
};
struct writer_lock final
{
writer_lock(const writer_lock&) = delete;
writer_lock& operator=(const writer_lock&) = delete;
writer_lock(u32 addr = 0);
writer_lock();
writer_lock(u32 addr, u32 size = 0, u64 flags = range_locked);
~writer_lock();
};
} // namespace vm

View File

@ -314,7 +314,7 @@ u64 memory_string_searcher::OnSearch(std::string wstr, int mode)
static constexpr u32 block_size = 0x2000000;
vm::reader_lock rlock;
vm::writer_lock rlock;
const named_thread_group workers("Memory Searcher "sv, max_threads, [&]()
{
@ -497,7 +497,7 @@ u64 memory_string_searcher::OnSearch(std::string wstr, int mode)
auto log_occurance = [&](std::string_view& test_sv, bool always_log_str)
{
// Cut out a view which may or may not be suffixed by a single null character
// This view is a peek at the full string which resides in PS3 memory
// This view is a peek at the full string which resides in PS3 memory
test_sv = test_sv.substr(0, std::max<usz>(wstr.size(), 100));
const usz null_pos = test_sv.find_first_of("\n\0"sv, wstr.size());
test_sv = test_sv.substr(0, null_pos);

View File

@ -15,7 +15,6 @@
#include <QTextEdit>
#include <QComboBox>
#include <QWheelEvent>
#include <shared_mutex>
#include "util/asm.hpp"
#include "util/vm.hpp"
@ -591,8 +590,6 @@ void memory_viewer_panel::ShowImage(QWidget* parent, u32 addr, color_format form
return;
}
std::shared_lock rlock(vm::g_mutex);
const auto originalBuffer = static_cast<u8*>(this->to_ptr(addr, memsize));
const auto convertedBuffer = new (std::nothrow) u8[memsize];
@ -672,8 +669,6 @@ void memory_viewer_panel::ShowImage(QWidget* parent, u32 addr, color_format form
}
}
rlock.unlock();
// Flip vertically
if (flipv && height > 1 && memsize > 1)
{