Alternative mutex
This commit is contained in:
parent
6bc3191b97
commit
08f7a28920
|
@ -91,7 +91,7 @@ int XmaContext::Setup(uint32_t id, Memory* memory, uint32_t guest_ptr) {
|
|||
}
|
||||
|
||||
bool XmaContext::Work() {
|
||||
std::lock_guard<std::mutex> lock(lock_);
|
||||
std::lock_guard<xe_mutex> lock(lock_);
|
||||
if (!is_allocated() || !is_enabled()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -106,7 +106,7 @@ bool XmaContext::Work() {
|
|||
}
|
||||
|
||||
void XmaContext::Enable() {
|
||||
std::lock_guard<std::mutex> lock(lock_);
|
||||
std::lock_guard<xe_mutex> lock(lock_);
|
||||
|
||||
auto context_ptr = memory()->TranslateVirtual(guest_ptr());
|
||||
XMA_CONTEXT_DATA data(context_ptr);
|
||||
|
@ -134,7 +134,7 @@ bool XmaContext::Block(bool poll) {
|
|||
}
|
||||
|
||||
void XmaContext::Clear() {
|
||||
std::lock_guard<std::mutex> lock(lock_);
|
||||
std::lock_guard<xe_mutex> lock(lock_);
|
||||
XELOGAPU("XmaContext: reset context {}", id());
|
||||
|
||||
auto context_ptr = memory()->TranslateVirtual(guest_ptr());
|
||||
|
@ -151,14 +151,14 @@ void XmaContext::Clear() {
|
|||
}
|
||||
|
||||
void XmaContext::Disable() {
|
||||
std::lock_guard<std::mutex> lock(lock_);
|
||||
std::lock_guard<xe_mutex> lock(lock_);
|
||||
XELOGAPU("XmaContext: disabling context {}", id());
|
||||
set_is_enabled(false);
|
||||
}
|
||||
|
||||
void XmaContext::Release() {
|
||||
// Lock it in case the decoder thread is working on it now.
|
||||
std::lock_guard<std::mutex> lock(lock_);
|
||||
std::lock_guard<xe_mutex> lock(lock_);
|
||||
assert_true(is_allocated_ == true);
|
||||
|
||||
set_is_allocated(false);
|
||||
|
|
|
@ -200,7 +200,7 @@ class XmaContext {
|
|||
|
||||
uint32_t id_ = 0;
|
||||
uint32_t guest_ptr_ = 0;
|
||||
std::mutex lock_;
|
||||
xe_mutex lock_;
|
||||
bool is_allocated_ = false;
|
||||
bool is_enabled_ = false;
|
||||
// bool is_dirty_ = true;
|
||||
|
|
|
@ -8,11 +8,76 @@
|
|||
*/
|
||||
|
||||
#include "xenia/base/mutex.h"
|
||||
#if XE_PLATFORM_WIN32 == 1
|
||||
#include "xenia/base/platform_win.h"
|
||||
#endif
|
||||
|
||||
|
||||
namespace xe {
|
||||
#if XE_PLATFORM_WIN32 == 1 &&XE_ENABLE_FAST_WIN32_MUTEX == 1
|
||||
//default spincount for entercriticalsection is insane on windows, 0x20007D0i64 (33556432 times!!)
|
||||
//when a lock is highly contended performance degrades sharply on some processors
|
||||
#define XE_CRIT_SPINCOUNT 128
|
||||
/*
|
||||
chrispy: todo, if a thread exits before releasing the global mutex we need to
|
||||
check this and release the mutex one way to do this is by using FlsAlloc and
|
||||
PFLS_CALLBACK_FUNCTION, which gets called with the fiber local data when a
|
||||
thread exits
|
||||
*/
|
||||
thread_local unsigned global_mutex_depth = 0;
|
||||
static CRITICAL_SECTION* global_critical_section(xe_global_mutex* mutex) {
|
||||
return reinterpret_cast<CRITICAL_SECTION*>(mutex);
|
||||
}
|
||||
|
||||
xe_global_mutex::xe_global_mutex() {
|
||||
InitializeCriticalSectionAndSpinCount(global_critical_section(this),
|
||||
XE_CRIT_SPINCOUNT);
|
||||
}
|
||||
xe_global_mutex ::~xe_global_mutex() {
|
||||
DeleteCriticalSection(global_critical_section(this));
|
||||
}
|
||||
void xe_global_mutex::lock() {
|
||||
if (global_mutex_depth) {
|
||||
} else {
|
||||
EnterCriticalSection(global_critical_section(this));
|
||||
}
|
||||
global_mutex_depth++;
|
||||
}
|
||||
void xe_global_mutex::unlock() {
|
||||
if (--global_mutex_depth == 0) {
|
||||
LeaveCriticalSection(global_critical_section(this));
|
||||
}
|
||||
}
|
||||
bool xe_global_mutex::try_lock() {
|
||||
if (global_mutex_depth) {
|
||||
++global_mutex_depth;
|
||||
return true;
|
||||
} else {
|
||||
BOOL success = TryEnterCriticalSection(global_critical_section(this));
|
||||
if (success) {
|
||||
++global_mutex_depth;
|
||||
}
|
||||
return success;
|
||||
}
|
||||
}
|
||||
|
||||
CRITICAL_SECTION* fast_crit(xe_fast_mutex* mutex) {
|
||||
return reinterpret_cast<CRITICAL_SECTION*>(mutex);
|
||||
}
|
||||
xe_fast_mutex::xe_fast_mutex() {
|
||||
InitializeCriticalSectionAndSpinCount(fast_crit(this), XE_CRIT_SPINCOUNT);
|
||||
}
|
||||
xe_fast_mutex::~xe_fast_mutex() { DeleteCriticalSection(fast_crit(this)); }
|
||||
|
||||
void xe_fast_mutex::lock() { EnterCriticalSection(fast_crit(this)); }
|
||||
void xe_fast_mutex::unlock() { LeaveCriticalSection(fast_crit(this)); }
|
||||
bool xe_fast_mutex::try_lock() {
|
||||
return TryEnterCriticalSection(fast_crit(this));
|
||||
}
|
||||
#endif
|
||||
// chrispy: moved this out of body of function to eliminate the initialization
|
||||
// guards
|
||||
static std::recursive_mutex global_mutex;
|
||||
std::recursive_mutex& global_critical_region::mutex() { return global_mutex; }
|
||||
static global_mutex_type global_mutex;
|
||||
global_mutex_type& global_critical_region::mutex() { return global_mutex; }
|
||||
|
||||
} // namespace xe
|
||||
|
|
|
@ -9,11 +9,50 @@
|
|||
|
||||
#ifndef XENIA_BASE_MUTEX_H_
|
||||
#define XENIA_BASE_MUTEX_H_
|
||||
|
||||
#include <mutex>
|
||||
#include "platform.h"
|
||||
|
||||
//#define XE_ENABLE_FAST_WIN32_MUTEX 1
|
||||
namespace xe {
|
||||
|
||||
#if XE_PLATFORM_WIN32 == 1 && XE_ENABLE_FAST_WIN32_MUTEX==1
|
||||
/*
|
||||
must conform to
|
||||
BasicLockable:https://en.cppreference.com/w/cpp/named_req/BasicLockable as
|
||||
well as Lockable: https://en.cppreference.com/w/cpp/named_req/Lockable
|
||||
|
||||
this emulates a recursive mutex, except with far less overhead
|
||||
*/
|
||||
class alignas(64) xe_global_mutex {
|
||||
char detail[64];
|
||||
|
||||
public:
|
||||
xe_global_mutex();
|
||||
~xe_global_mutex();
|
||||
|
||||
void lock();
|
||||
void unlock();
|
||||
bool try_lock();
|
||||
};
|
||||
using global_mutex_type = xe_global_mutex;
|
||||
|
||||
class alignas(64) xe_fast_mutex {
|
||||
char detail[64];
|
||||
|
||||
public:
|
||||
xe_fast_mutex();
|
||||
~xe_fast_mutex();
|
||||
|
||||
void lock();
|
||||
void unlock();
|
||||
bool try_lock();
|
||||
};
|
||||
using xe_mutex = xe_fast_mutex;
|
||||
#else
|
||||
using global_mutex_type = std::recursive_mutex;
|
||||
using xe_mutex = std::mutex;
|
||||
#endif
|
||||
using global_unique_lock_type = std::unique_lock<global_mutex_type>;
|
||||
// The global critical region mutex singleton.
|
||||
// This must guard any operation that may suspend threads or be sensitive to
|
||||
// being suspended such as global table locks and such.
|
||||
|
@ -54,30 +93,30 @@ namespace xe {
|
|||
// };
|
||||
class global_critical_region {
|
||||
public:
|
||||
static std::recursive_mutex& mutex();
|
||||
static global_mutex_type& mutex();
|
||||
|
||||
// Acquires a lock on the global critical section.
|
||||
// Use this when keeping an instance is not possible. Otherwise, prefer
|
||||
// to keep an instance of global_critical_region near the members requiring
|
||||
// it to keep things readable.
|
||||
static std::unique_lock<std::recursive_mutex> AcquireDirect() {
|
||||
return std::unique_lock<std::recursive_mutex>(mutex());
|
||||
static global_unique_lock_type AcquireDirect() {
|
||||
return global_unique_lock_type(mutex());
|
||||
}
|
||||
|
||||
// Acquires a lock on the global critical section.
|
||||
inline std::unique_lock<std::recursive_mutex> Acquire() {
|
||||
return std::unique_lock<std::recursive_mutex>(mutex());
|
||||
inline global_unique_lock_type Acquire() {
|
||||
return global_unique_lock_type(mutex());
|
||||
}
|
||||
|
||||
// Acquires a deferred lock on the global critical section.
|
||||
inline std::unique_lock<std::recursive_mutex> AcquireDeferred() {
|
||||
return std::unique_lock<std::recursive_mutex>(mutex(), std::defer_lock);
|
||||
inline global_unique_lock_type AcquireDeferred() {
|
||||
return global_unique_lock_type(mutex(), std::defer_lock);
|
||||
}
|
||||
|
||||
// Tries to acquire a lock on the glboal critical section.
|
||||
// Check owns_lock() to see if the lock was successfully acquired.
|
||||
inline std::unique_lock<std::recursive_mutex> TryAcquire() {
|
||||
return std::unique_lock<std::recursive_mutex>(mutex(), std::try_to_lock);
|
||||
inline global_unique_lock_type TryAcquire() {
|
||||
return global_unique_lock_type(mutex(), std::try_to_lock);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ using WaitItem = TimerQueueWaitItem;
|
|||
edit: actually had to change it back, when i was testing it only worked because i fixed disruptorplus' code to compile (it gives wrong args to condition_variable::wait_until) but now builds
|
||||
|
||||
*/
|
||||
using WaitStrat = dp::spin_wait_strategy; //dp::blocking_wait_strategy;
|
||||
using WaitStrat = dp::blocking_wait_strategy;
|
||||
|
||||
class TimerQueue {
|
||||
public:
|
||||
|
|
|
@ -48,7 +48,7 @@ class MMIOHandler {
|
|||
typedef uint32_t (*HostToGuestVirtual)(const void* context,
|
||||
const void* host_address);
|
||||
typedef bool (*AccessViolationCallback)(
|
||||
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||
global_unique_lock_type global_lock_locked_once, //not passed by reference with const like the others?
|
||||
void* context, void* host_address, bool is_write);
|
||||
|
||||
// access_violation_callback is called with global_critical_region locked once
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <string>
|
||||
|
||||
#include "xenia/base/vec128.h"
|
||||
|
||||
#include "xenia/base/mutex.h"
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
class Processor;
|
||||
|
@ -405,7 +405,7 @@ typedef struct alignas(64) PPCContext_s {
|
|||
|
||||
// Global interrupt lock, held while interrupts are disabled or interrupts are
|
||||
// executing. This is shared among all threads and comes from the processor.
|
||||
std::recursive_mutex* global_mutex;
|
||||
global_mutex_type* global_mutex;
|
||||
|
||||
// Used to shuttle data into externs. Contents volatile.
|
||||
uint64_t scratch;
|
||||
|
|
|
@ -883,7 +883,7 @@ class PrimitiveProcessor {
|
|||
// Must be called in a global critical region.
|
||||
void UpdateCacheBucketsNonEmptyL2(
|
||||
uint32_t bucket_index_div_64,
|
||||
[[maybe_unused]] const std::unique_lock<std::recursive_mutex>&
|
||||
[[maybe_unused]] const global_unique_lock_type&
|
||||
global_lock) {
|
||||
uint64_t& cache_buckets_non_empty_l2_ref =
|
||||
cache_buckets_non_empty_l2_[bucket_index_div_64 >> 6];
|
||||
|
|
|
@ -35,7 +35,7 @@ class SharedMemory {
|
|||
virtual void SetSystemPageBlocksValidWithGpuDataWritten();
|
||||
|
||||
typedef void (*GlobalWatchCallback)(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
const global_unique_lock_type& global_lock, void* context,
|
||||
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu);
|
||||
typedef void* GlobalWatchHandle;
|
||||
// Registers a callback invoked when something is invalidated in the GPU
|
||||
|
@ -49,8 +49,8 @@ class SharedMemory {
|
|||
GlobalWatchHandle RegisterGlobalWatch(GlobalWatchCallback callback,
|
||||
void* callback_context);
|
||||
void UnregisterGlobalWatch(GlobalWatchHandle handle);
|
||||
typedef void (*WatchCallback)(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
typedef void (*WatchCallback)(const global_unique_lock_type& global_lock,
|
||||
void* context,
|
||||
void* data, uint64_t argument, bool invalidated_by_gpu);
|
||||
typedef void* WatchHandle;
|
||||
// Registers a callback invoked when the specified memory range is invalidated
|
||||
|
|
|
@ -507,7 +507,7 @@ TextureCache::Texture::~Texture() {
|
|||
}
|
||||
|
||||
void TextureCache::Texture::MakeUpToDateAndWatch(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock) {
|
||||
const global_unique_lock_type& global_lock) {
|
||||
SharedMemory& shared_memory = texture_cache().shared_memory();
|
||||
if (base_outdated_) {
|
||||
assert_not_zero(GetGuestBaseSize());
|
||||
|
@ -552,7 +552,7 @@ void TextureCache::Texture::MarkAsUsed() {
|
|||
}
|
||||
|
||||
void TextureCache::Texture::WatchCallback(
|
||||
[[maybe_unused]] const std::unique_lock<std::recursive_mutex>& global_lock,
|
||||
[[maybe_unused]] const global_unique_lock_type& global_lock,
|
||||
bool is_mip) {
|
||||
if (is_mip) {
|
||||
assert_not_zero(GetGuestMipsSize());
|
||||
|
@ -565,8 +565,8 @@ void TextureCache::Texture::WatchCallback(
|
|||
}
|
||||
}
|
||||
|
||||
void TextureCache::WatchCallback(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
void TextureCache::WatchCallback(const global_unique_lock_type& global_lock,
|
||||
void* context,
|
||||
void* data, uint64_t argument, bool invalidated_by_gpu) {
|
||||
Texture& texture = *static_cast<Texture*>(context);
|
||||
texture.WatchCallback(global_lock, argument != 0);
|
||||
|
@ -902,7 +902,7 @@ bool TextureCache::IsRangeScaledResolved(uint32_t start_unscaled,
|
|||
}
|
||||
|
||||
void TextureCache::ScaledResolveGlobalWatchCallbackThunk(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
const global_unique_lock_type& global_lock, void* context,
|
||||
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) {
|
||||
TextureCache* texture_cache = reinterpret_cast<TextureCache*>(context);
|
||||
texture_cache->ScaledResolveGlobalWatchCallback(
|
||||
|
@ -910,7 +910,7 @@ void TextureCache::ScaledResolveGlobalWatchCallbackThunk(
|
|||
}
|
||||
|
||||
void TextureCache::ScaledResolveGlobalWatchCallback(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock,
|
||||
const global_unique_lock_type& global_lock,
|
||||
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu) {
|
||||
assert_true(IsDrawResolutionScaled());
|
||||
if (invalidated_by_gpu) {
|
||||
|
|
|
@ -230,19 +230,15 @@ class TextureCache {
|
|||
}
|
||||
bool IsResolved() const { return base_resolved_ || mips_resolved_; }
|
||||
|
||||
bool base_outdated(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock) const {
|
||||
bool base_outdated(const global_unique_lock_type& global_lock) const {
|
||||
return base_outdated_;
|
||||
}
|
||||
bool mips_outdated(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock) const {
|
||||
bool mips_outdated(const global_unique_lock_type& global_lock) const {
|
||||
return mips_outdated_;
|
||||
}
|
||||
void MakeUpToDateAndWatch(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock);
|
||||
void MakeUpToDateAndWatch(const global_unique_lock_type& global_lock);
|
||||
|
||||
void WatchCallback(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, bool is_mip);
|
||||
void WatchCallback(const global_unique_lock_type& global_lock, bool is_mip);
|
||||
|
||||
// For LRU caching - updates the last usage frame and moves the texture to
|
||||
// the end of the usage queue. Must be called any time the texture is
|
||||
|
@ -579,8 +575,8 @@ class TextureCache {
|
|||
void UpdateTexturesTotalHostMemoryUsage(uint64_t add, uint64_t subtract);
|
||||
|
||||
// Shared memory callback for texture data invalidation.
|
||||
static void WatchCallback(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
static void WatchCallback(const global_unique_lock_type& global_lock,
|
||||
void* context,
|
||||
void* data, uint64_t argument, bool invalidated_by_gpu);
|
||||
|
||||
// Checks if there are any pages that contain scaled resolve data within the
|
||||
|
@ -589,10 +585,10 @@ class TextureCache {
|
|||
// Global shared memory invalidation callback for invalidating scaled resolved
|
||||
// texture data.
|
||||
static void ScaledResolveGlobalWatchCallbackThunk(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock, void* context,
|
||||
const global_unique_lock_type& global_lock, void* context,
|
||||
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu);
|
||||
void ScaledResolveGlobalWatchCallback(
|
||||
const std::unique_lock<std::recursive_mutex>& global_lock,
|
||||
const global_unique_lock_type& global_lock,
|
||||
uint32_t address_first, uint32_t address_last, bool invalidated_by_gpu);
|
||||
|
||||
const RegisterFile& register_file_;
|
||||
|
|
|
@ -465,7 +465,7 @@ cpu::MMIORange* Memory::LookupVirtualMappedRange(uint32_t virtual_address) {
|
|||
}
|
||||
|
||||
bool Memory::AccessViolationCallback(
|
||||
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||
global_unique_lock_type global_lock_locked_once,
|
||||
void* host_address, bool is_write) {
|
||||
// Access via physical_membase_ is special, when need to bypass everything
|
||||
// (for instance, for a data provider to actually write the data) so only
|
||||
|
@ -493,14 +493,14 @@ bool Memory::AccessViolationCallback(
|
|||
}
|
||||
|
||||
bool Memory::AccessViolationCallbackThunk(
|
||||
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||
global_unique_lock_type global_lock_locked_once,
|
||||
void* context, void* host_address, bool is_write) {
|
||||
return reinterpret_cast<Memory*>(context)->AccessViolationCallback(
|
||||
std::move(global_lock_locked_once), host_address, is_write);
|
||||
}
|
||||
|
||||
bool Memory::TriggerPhysicalMemoryCallbacks(
|
||||
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||
global_unique_lock_type global_lock_locked_once,
|
||||
uint32_t virtual_address, uint32_t length, bool is_write,
|
||||
bool unwatch_exact_range, bool unprotect) {
|
||||
BaseHeap* heap = LookupHeap(virtual_address);
|
||||
|
@ -1711,7 +1711,7 @@ void PhysicalHeap::EnableAccessCallbacks(uint32_t physical_address,
|
|||
}
|
||||
|
||||
bool PhysicalHeap::TriggerCallbacks(
|
||||
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||
global_unique_lock_type global_lock_locked_once,
|
||||
uint32_t virtual_address, uint32_t length, bool is_write,
|
||||
bool unwatch_exact_range, bool unprotect) {
|
||||
// TODO(Triang3l): Support read watches.
|
||||
|
|
|
@ -271,8 +271,7 @@ class PhysicalHeap : public BaseHeap {
|
|||
bool enable_invalidation_notifications,
|
||||
bool enable_data_providers);
|
||||
// Returns true if any page in the range was watched.
|
||||
bool TriggerCallbacks(
|
||||
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||
bool TriggerCallbacks(global_unique_lock_type global_lock_locked_once,
|
||||
uint32_t virtual_address, uint32_t length, bool is_write,
|
||||
bool unwatch_exact_range, bool unprotect = true);
|
||||
|
||||
|
@ -459,7 +458,7 @@ class Memory {
|
|||
// TODO(Triang3l): Implement data providers - this is why locking depth of 1
|
||||
// will be required in the future.
|
||||
bool TriggerPhysicalMemoryCallbacks(
|
||||
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||
global_unique_lock_type global_lock_locked_once,
|
||||
uint32_t virtual_address, uint32_t length, bool is_write,
|
||||
bool unwatch_exact_range, bool unprotect = true);
|
||||
|
||||
|
@ -508,11 +507,10 @@ class Memory {
|
|||
static uint32_t HostToGuestVirtualThunk(const void* context,
|
||||
const void* host_address);
|
||||
|
||||
bool AccessViolationCallback(
|
||||
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||
bool AccessViolationCallback(global_unique_lock_type global_lock_locked_once,
|
||||
void* host_address, bool is_write);
|
||||
static bool AccessViolationCallbackThunk(
|
||||
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||
global_unique_lock_type global_lock_locked_once,
|
||||
void* context, void* host_address, bool is_write);
|
||||
|
||||
std::filesystem::path file_name_;
|
||||
|
|
Loading…
Reference in New Issue