Alternative mutex

This commit is contained in:
chss95cs@gmail.com 2022-08-14 08:59:11 -07:00
parent 6bc3191b97
commit 08f7a28920
13 changed files with 152 additions and 54 deletions

View File

@ -91,7 +91,7 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
}
bool XmaContext::Work() {
std::lock_guard<std::mutex> lock(lock_);
std::lock_guard<xe_mutex> lock(lock_);
if (!is_allocated() || !is_enabled()) {
return false;
}
@ -106,7 +106,7 @@ bool XmaContext::Work() {
}
void XmaContext::Enable() {
std::lock_guard<std::mutex> lock(lock_);
std::lock_guard<xe_mutex> lock(lock_);
auto context_ptr = memory()->TranslateVirtual(guest_ptr());
XMA_CONTEXT_DATA data(context_ptr);
@ -134,7 +134,7 @@ bool XmaContext::Block(bool poll) {
}
void XmaContext::Clear() {
std::lock_guard<std::mutex> lock(lock_);
std::lock_guard<xe_mutex> lock(lock_);
XELOGAPU("XmaContext: reset context {}", id());
auto context_ptr = memory()->TranslateVirtual(guest_ptr());
@ -151,14 +151,14 @@ void XmaContext::Clear() {
}
void XmaContext::Disable() {
std::lock_guard<std::mutex> lock(lock_);
std::lock_guard<xe_mutex> lock(lock_);
XELOGAPU("XmaContext: disabling context {}", id());
set_is_enabled(false);
}
void XmaContext::Release() {
// Lock it in case the decoder thread is working on it now.
std::lock_guard<std::mutex> lock(lock_);
std::lock_guard<xe_mutex> lock(lock_);
assert_true(is_allocated_ == true);
set_is_allocated(false);

View File

@ -200,7 +200,7 @@ class XmaContext {
uint32_t id_ = 0;
uint32_t guest_ptr_ = 0;
std::mutex lock_;
xe_mutex lock_;
bool is_allocated_ = false;
bool is_enabled_ = false;
// bool is_dirty_ = true;

View File

@ -8,11 +8,76 @@
*/
#include "xenia/base/mutex.h"
#if XE_PLATFORM_WIN32 == 1
#include "xenia/base/platform_win.h"
#endif
namespace xe {
#if XE_PLATFORM_WIN32 == 1 &&XE_ENABLE_FAST_WIN32_MUTEX == 1
//default spincount for entercriticalsection is insane on windows, 0x20007D0i64 (33556432 times!!)
//when a lock is highly contended performance degrades sharply on some processors
#define XE_CRIT_SPINCOUNT 128
/*
chrispy: todo, if a thread exits before releasing the global mutex we need to
check this and release the mutex one way to do this is by using FlsAlloc and
PFLS_CALLBACK_FUNCTION, which gets called with the fiber local data when a
thread exits
*/
thread_local unsigned global_mutex_depth = 0;
static CRITICAL_SECTION* global_critical_section(xe_global_mutex* mutex) {
return reinterpret_cast<CRITICAL_SECTION*>(mutex);
}
xe_global_mutex::xe_global_mutex() {
InitializeCriticalSectionAndSpinCount(global_critical_section(this),
XE_CRIT_SPINCOUNT);
}
xe_global_mutex ::~xe_global_mutex() {
DeleteCriticalSection(global_critical_section(this));
}
void xe_global_mutex::lock() {
if (global_mutex_depth) {
} else {
EnterCriticalSection(global_critical_section(this));
}
global_mutex_depth++;
}
void xe_global_mutex::unlock() {
if (--global_mutex_depth == 0) {
LeaveCriticalSection(global_critical_section(this));
}
}
bool xe_global_mutex::try_lock() {
if (global_mutex_depth) {
++global_mutex_depth;
return true;
} else {
BOOL success = TryEnterCriticalSection(global_critical_section(this));
if (success) {
++global_mutex_depth;
}
return success;
}
}
CRITICAL_SECTION* fast_crit(xe_fast_mutex* mutex) {
return reinterpret_cast<CRITICAL_SECTION*>(mutex);
}
xe_fast_mutex::xe_fast_mutex() {
InitializeCriticalSectionAndSpinCount(fast_crit(this), XE_CRIT_SPINCOUNT);
}
xe_fast_mutex::~xe_fast_mutex() { DeleteCriticalSection(fast_crit(this)); }
void xe_fast_mutex::lock() { EnterCriticalSection(fast_crit(this)); }
void xe_fast_mutex::unlock() { LeaveCriticalSection(fast_crit(this)); }
bool xe_fast_mutex::try_lock() {
return TryEnterCriticalSection(fast_crit(this));
}
#endif
// chrispy: moved this out of body of function to eliminate the initialization
// guards
static std::recursive_mutex global_mutex;
std::recursive_mutex& global_critical_region::mutex() { return global_mutex; }
static global_mutex_type global_mutex;
global_mutex_type& global_critical_region::mutex() { return global_mutex; }
} // namespace xe

View File

@ -9,11 +9,50 @@
#ifndef XENIA_BASE_MUTEX_H_
#define XENIA_BASE_MUTEX_H_
#include <mutex>
#include "platform.h"
//#define XE_ENABLE_FAST_WIN32_MUTEX 1
namespace xe {
#if XE_PLATFORM_WIN32 == 1 && XE_ENABLE_FAST_WIN32_MUTEX==1
/*
must conform to
BasicLockable:https://en.cppreference.com/w/cpp/named_req/BasicLockable as
well as Lockable: https://en.cppreference.com/w/cpp/named_req/Lockable
this emulates a recursive mutex, except with far less overhead
*/
class alignas(64) xe_global_mutex {
char detail[64];
public:
xe_global_mutex();
~xe_global_mutex();
void lock();
void unlock();
bool try_lock();
};
using global_mutex_type = xe_global_mutex;
class alignas(64) xe_fast_mutex {
char detail[64];
public:
xe_fast_mutex();
~xe_fast_mutex();
void lock();
void unlock();
bool try_lock();
};
using xe_mutex = xe_fast_mutex;
#else
using global_mutex_type = std::recursive_mutex;
using xe_mutex = std::mutex;
#endif
using global_unique_lock_type = std::unique_lock<global_mutex_type>;
// The global critical region mutex singleton.
// This must guard any operation that may suspend threads or be sensitive to
// being suspended such as global table locks and such.
@ -54,30 +93,30 @@ namespace xe {
// };
class global_critical_region {
public:
static std::recursive_mutex& mutex();
static global_mutex_type& mutex();
// Acquires a lock on the global critical section.
// Use this when keeping an instance is not possible. Otherwise, prefer
// to keep an instance of global_critical_region near the members requiring
// it to keep things readable.
static std::unique_lock<std::recursive_mutex> AcquireDirect() {
return std::unique_lock<std::recursive_mutex>(mutex());
static global_unique_lock_type AcquireDirect() {
return global_unique_lock_type(mutex());
}
// Acquires a lock on the global critical section.
inline std::unique_lock<std::recursive_mutex> Acquire() {
return std::unique_lock<std::recursive_mutex>(mutex());
inline global_unique_lock_type Acquire() {
return global_unique_lock_type(mutex());
}
// Acquires a deferred lock on the global critical section.
inline std::unique_lock<std::recursive_mutex> AcquireDeferred() {
return std::unique_lock<std::recursive_mutex>(mutex(), std::defer_lock);
inline global_unique_lock_type AcquireDeferred() {
return global_unique_lock_type(mutex(), std::defer_lock);
}
// Tries to acquire a lock on the glboal critical section.
// Check owns_lock() to see if the lock was successfully acquired.
inline std::unique_lock<std::recursive_mutex> TryAcquire() {
return std::unique_lock<std::recursive_mutex>(mutex(), std::try_to_lock);
inline global_unique_lock_type TryAcquire() {
return global_unique_lock_type(mutex(), std::try_to_lock);
}
};

View File

@ -36,7 +36,7 @@ using WaitItem = TimerQueueWaitItem;
edit: actually had to change it back, when i was testing it only worked because i fixed disruptorplus' code to compile (it gives wrong args to condition_variable::wait_until) but now builds
*/
using WaitStrat = dp::spin_wait_strategy; //dp::blocking_wait_strategy;
using WaitStrat = dp::blocking_wait_strategy;
class TimerQueue {
public:

View File

@ -48,7 +48,7 @@ class MMIOHandler {
typedef uint32_t (*HostToGuestVirtual)(const void* context,
const void* host_address);
typedef bool (*AccessViolationCallback)(
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
global_unique_lock_type global_lock_locked_once, //not passed by reference with const like the others?
void* context, void* host_address, bool is_write);
// access_violation_callback is called with global_critical_region locked once

View File

@ -15,7 +15,7 @@
#include <string>
#include "xenia/base/vec128.h"
#include "xenia/base/mutex.h"
namespace xe {
namespace cpu {
class Processor;
@ -405,7 +405,7 @@ typedef struct alignas(64) PPCContext_s {
// Global interrupt lock, held while interrupts are disabled or interrupts are
// executing. This is shared among all threads and comes from the processor.
std::recursive_mutex* global_mutex;
global_mutex_type* global_mutex;
// Used to shuttle data into externs. Contents volatile.
uint64_t scratch;

View File

@ -883,7 +883,7 @@ class PrimitiveProcessor {
// Must be called in a global critical region.
void UpdateCacheBucketsNonEmptyL2(
uint32_t bucket_index_div_64,
[[maybe_unused]] const std::unique_lock<std::recursive_mutex>&
[[maybe_unused]] const global_unique_lock_type&
global_lock) {
uint64_t& cache_buckets_non_empty_l2_ref =
cache_buckets_non_empty_l2_[bucket_index_div_64 >> 6];

View File

@ -35,7 +35,7 @@ class SharedMemory {
virtual void SetSystemPageBlocksValidWithGpuDataWritten();
typedef void (*GlobalWatchCallback)(
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
const global_unique_lock_type& global_lock, void* context,
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu);
typedef void* GlobalWatchHandle;
// Registers a callback invoked when something is invalidated in the GPU
@ -49,8 +49,8 @@ class SharedMemory {
GlobalWatchHandle RegisterGlobalWatch(GlobalWatchCallback callback,
void* callback_context);
void UnregisterGlobalWatch(GlobalWatchHandle handle);
typedef void (*WatchCallback)(
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
typedef void (*WatchCallback)(const global_unique_lock_type& global_lock,
void* context,
void* data, uint64_t argument, bool invalidated_by_gpu);
typedef void* WatchHandle;
// Registers a callback invoked when the specified memory range is invalidated

View File

@ -507,7 +507,7 @@ TextureCache::Texture::~Texture() {
}
void TextureCache::Texture::MakeUpToDateAndWatch(
const std::unique_lock<std::recursive_mutex>& global_lock) {
const global_unique_lock_type& global_lock) {
SharedMemory& shared_memory = texture_cache().shared_memory();
if (base_outdated_) {
assert_not_zero(GetGuestBaseSize());
@ -552,7 +552,7 @@ void TextureCache::Texture::MarkAsUsed() {
}
void TextureCache::Texture::WatchCallback(
[[maybe_unused]] const std::unique_lock<std::recursive_mutex>& global_lock,
[[maybe_unused]] const global_unique_lock_type& global_lock,
bool is_mip) {
if (is_mip) {
assert_not_zero(GetGuestMipsSize());
@ -565,8 +565,8 @@ void TextureCache::Texture::WatchCallback(
}
}
void TextureCache::WatchCallback(
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
void TextureCache::WatchCallback(const global_unique_lock_type& global_lock,
void* context,
void* data, uint64_t argument, bool invalidated_by_gpu) {
Texture& texture = *static_cast<Texture*>(context);
texture.WatchCallback(global_lock, argument != 0);
@ -902,7 +902,7 @@ bool TextureCache::IsRangeScaledResolved(uint32_t start_unscaled,
}
void TextureCache::ScaledResolveGlobalWatchCallbackThunk(
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
const global_unique_lock_type& global_lock, void* context,
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) {
TextureCache* texture_cache = reinterpret_cast<TextureCache*>(context);
texture_cache->ScaledResolveGlobalWatchCallback(
@ -910,7 +910,7 @@ void TextureCache::ScaledResolveGlobalWatchCallbackThunk(
}
void TextureCache::ScaledResolveGlobalWatchCallback(
const std::unique_lock<std::recursive_mutex>& global_lock,
const global_unique_lock_type& global_lock,
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) {
assert_true(IsDrawResolutionScaled());
if (invalidated_by_gpu) {

View File

@ -230,19 +230,15 @@ class TextureCache {
}
bool IsResolved() const { return base_resolved_ || mips_resolved_; }
bool base_outdated(
const std::unique_lock<std::recursive_mutex>& global_lock) const {
bool base_outdated(const global_unique_lock_type& global_lock) const {
return base_outdated_;
}
bool mips_outdated(
const std::unique_lock<std::recursive_mutex>& global_lock) const {
bool mips_outdated(const global_unique_lock_type& global_lock) const {
return mips_outdated_;
}
void MakeUpToDateAndWatch(
const std::unique_lock<std::recursive_mutex>& global_lock);
void MakeUpToDateAndWatch(const global_unique_lock_type& global_lock);
void WatchCallback(
const std::unique_lock<std::recursive_mutex>& global_lock, bool is_mip);
void WatchCallback(const global_unique_lock_type& global_lock, bool is_mip);
// For LRU caching - updates the last usage frame and moves the texture to
// the end of the usage queue. Must be called any time the texture is
@ -579,8 +575,8 @@ class TextureCache {
void UpdateTexturesTotalHostMemoryUsage(uint64_t add, uint64_t subtract);
// Shared memory callback for texture data invalidation.
static void WatchCallback(
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
static void WatchCallback(const global_unique_lock_type& global_lock,
void* context,
void* data, uint64_t argument, bool invalidated_by_gpu);
// Checks if there are any pages that contain scaled resolve data within the
@ -589,10 +585,10 @@ class TextureCache {
// Global shared memory invalidation callback for invalidating scaled resolved
// texture data.
static void ScaledResolveGlobalWatchCallbackThunk(
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
const global_unique_lock_type& global_lock, void* context,
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu);
void ScaledResolveGlobalWatchCallback(
const std::unique_lock<std::recursive_mutex>& global_lock,
const global_unique_lock_type& global_lock,
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu);
const RegisterFile& register_file_;

View File

@ -465,7 +465,7 @@ cpu::MMIORange* Memory::LookupVirtualMappedRange(uint32_t virtual_address) {
}
bool Memory::AccessViolationCallback(
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
global_unique_lock_type global_lock_locked_once,
void* host_address, bool is_write) {
// Access via physical_membase_ is special, when need to bypass everything
// (for instance, for a data provider to actually write the data) so only
@ -493,14 +493,14 @@ bool Memory::AccessViolationCallback(
}
bool Memory::AccessViolationCallbackThunk(
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
global_unique_lock_type global_lock_locked_once,
void* context, void* host_address, bool is_write) {
return reinterpret_cast<Memory*>(context)->AccessViolationCallback(
std::move(global_lock_locked_once), host_address, is_write);
}
bool Memory::TriggerPhysicalMemoryCallbacks(
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
global_unique_lock_type global_lock_locked_once,
uint32_t virtual_address, uint32_t length, bool is_write,
bool unwatch_exact_range, bool unprotect) {
BaseHeap* heap = LookupHeap(virtual_address);
@ -1711,7 +1711,7 @@ void PhysicalHeap::EnableAccessCallbacks(uint32_t physical_address,
}
bool PhysicalHeap::TriggerCallbacks(
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
global_unique_lock_type global_lock_locked_once,
uint32_t virtual_address, uint32_t length, bool is_write,
bool unwatch_exact_range, bool unprotect) {
// TODO(Triang3l): Support read watches.

View File

@ -271,8 +271,7 @@ class PhysicalHeap : public BaseHeap {
bool enable_invalidation_notifications,
bool enable_data_providers);
// Returns true if any page in the range was watched.
bool TriggerCallbacks(
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
bool TriggerCallbacks(global_unique_lock_type global_lock_locked_once,
uint32_t virtual_address, uint32_t length, bool is_write,
bool unwatch_exact_range, bool unprotect = true);
@ -459,7 +458,7 @@ class Memory {
// TODO(Triang3l): Implement data providers - this is why locking depth of 1
// will be required in the future.
bool TriggerPhysicalMemoryCallbacks(
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
global_unique_lock_type global_lock_locked_once,
uint32_t virtual_address, uint32_t length, bool is_write,
bool unwatch_exact_range, bool unprotect = true);
@ -508,11 +507,10 @@ class Memory {
static uint32_t HostToGuestVirtualThunk(const void* context,
const void* host_address);
bool AccessViolationCallback(
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
bool AccessViolationCallback(global_unique_lock_type global_lock_locked_once,
void* host_address, bool is_write);
static bool AccessViolationCallbackThunk(
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
global_unique_lock_type global_lock_locked_once,
void* context, void* host_address, bool is_write);
std::filesystem::path file_name_;