From f5e374f9b51ae2a9cf1ac8dd3ef6aa130a716037 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Tue, 1 Sep 2015 09:45:14 -0700 Subject: [PATCH] Changing the way the global lock works. Some things are better, I think. Regressions are likely. --- src/xenia/cpu/frontend/ppc_context.h | 6 +- src/xenia/cpu/frontend/ppc_emit_control.cc | 29 ++++++- src/xenia/cpu/frontend/ppc_emit_memory.cc | 51 ++++++++++-- src/xenia/cpu/frontend/ppc_frontend.cc | 47 ++++++----- src/xenia/cpu/frontend/ppc_frontend.h | 6 +- src/xenia/cpu/frontend/ppc_hir_builder.cc | 40 +--------- src/xenia/cpu/frontend/ppc_hir_builder.h | 9 +-- src/xenia/cpu/processor.cc | 33 ++++++++ src/xenia/cpu/processor.h | 5 +- src/xenia/cpu/thread_state.cc | 2 +- src/xenia/gpu/graphics_system.cc | 4 +- src/xenia/kernel/xam_ui.cc | 1 - src/xenia/kernel/xboxkrnl_threading.cc | 91 +++++++++++----------- src/xenia/memory.h | 3 - 14 files changed, 199 insertions(+), 128 deletions(-) diff --git a/src/xenia/cpu/frontend/ppc_context.h b/src/xenia/cpu/frontend/ppc_context.h index 2165487a3..4974e59fb 100644 --- a/src/xenia/cpu/frontend/ppc_context.h +++ b/src/xenia/cpu/frontend/ppc_context.h @@ -13,6 +13,7 @@ #include #include +#include "xenia/base/mutex.h" #include "xenia/base/vec128.h" namespace xe { @@ -404,8 +405,9 @@ typedef struct alignas(64) PPCContext_s { // Thread ID assigned to this context. uint32_t thread_id; - // Reserve address for load acquire/store release. Shared. - uint64_t* reserve_address; + // Global interrupt lock, held while interrupts are disabled or interrupts are + // executing. This is shared among all threads and comes from the processor. + xe::recursive_mutex* global_mutex; // Used to shuttle data into externs. Contents volatile. uint64_t scratch; diff --git a/src/xenia/cpu/frontend/ppc_emit_control.cc b/src/xenia/cpu/frontend/ppc_emit_control.cc index a8013d9c6..45495a3ea 100644 --- a/src/xenia/cpu/frontend/ppc_emit_control.cc +++ b/src/xenia/cpu/frontend/ppc_emit_control.cc @@ -11,6 +11,7 @@ #include "xenia/base/assert.h" #include "xenia/cpu/frontend/ppc_context.h" +#include "xenia/cpu/frontend/ppc_frontend.h" #include "xenia/cpu/frontend/ppc_hir_builder.h" namespace xe { @@ -699,14 +700,28 @@ XEEMITTER(mtspr, 0x7C0003A6, XFX)(PPCHIRBuilder& f, InstrData& i) { // without the lock here threads can livelock. XEEMITTER(mfmsr, 0x7C0000A6, X)(PPCHIRBuilder& f, InstrData& i) { - f.StoreGPR(i.X.RT, f.LoadMSR()); + // bit 48 = EE; interrupt enabled + // bit 62 = RI; recoverable interrupt + // return 8000h if unlocked (interrupts enabled), else 0 + f.CallExtern(f.builtins()->check_global_lock); + f.StoreGPR(i.X.RT, f.LoadContext(offsetof(PPCContext, scratch), INT64_TYPE)); return 0; } XEEMITTER(mtmsr, 0x7C000124, X)(PPCHIRBuilder& f, InstrData& i) { if (i.X.RA & 0x01) { // L = 1 - f.StoreMSR(f.ZeroExtend(f.LoadGPR(i.X.RT), INT64_TYPE)); + // iff storing from r13 + f.StoreContext( + offsetof(PPCContext, scratch), + f.ZeroExtend(f.ZeroExtend(f.LoadGPR(i.X.RT), INT64_TYPE), INT64_TYPE)); + if (i.X.RT == 13) { + // iff storing from r13 we are taking a lock (disable interrupts). + f.CallExtern(f.builtins()->enter_global_lock); + } else { + // Otherwise we are restoring interrupts (probably). + f.CallExtern(f.builtins()->leave_global_lock); + } return 0; } else { // L = 0 @@ -718,7 +733,15 @@ XEEMITTER(mtmsr, 0x7C000124, X)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(mtmsrd, 0x7C000164, X)(PPCHIRBuilder& f, InstrData& i) { if (i.X.RA & 0x01) { // L = 1 - f.StoreMSR(f.LoadGPR(i.X.RT)); + f.StoreContext(offsetof(PPCContext, scratch), + f.ZeroExtend(f.LoadGPR(i.X.RT), INT64_TYPE)); + if (i.X.RT == 13) { + // iff storing from r13 we are taking a lock (disable interrupts). + f.CallExtern(f.builtins()->enter_global_lock); + } else { + // Otherwise we are restoring interrupts (probably). + f.CallExtern(f.builtins()->leave_global_lock); + } return 0; } else { // L = 0 diff --git a/src/xenia/cpu/frontend/ppc_emit_memory.cc b/src/xenia/cpu/frontend/ppc_emit_memory.cc index 64bfdf908..bbf4e099a 100644 --- a/src/xenia/cpu/frontend/ppc_emit_memory.cc +++ b/src/xenia/cpu/frontend/ppc_emit_memory.cc @@ -683,8 +683,16 @@ XEEMITTER(ldarx, 0x7C0000A8, X)(PPCHIRBuilder& f, InstrData& i) { // RESERVE_LENGTH <- 8 // RESERVE_ADDR <- real_addr(EA) // RT <- MEM(EA, 8) + + // NOTE: we assume we are within a global lock. + // We could assert here that the block (or its parent) has taken a global lock + // already, but I haven't see anything but interrupt callbacks (which are + // always under a global lock) do that yet. + // We issue a memory barrier here to make sure that we get good values. + f.MemoryBarrier(); + Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); - Value* rt = f.ByteSwap(f.LoadAcquire(ea, INT64_TYPE)); + Value* rt = f.ByteSwap(f.Load(ea, INT64_TYPE)); f.StoreGPR(i.X.RT, rt); return 0; } @@ -699,9 +707,16 @@ XEEMITTER(lwarx, 0x7C000028, X)(PPCHIRBuilder& f, InstrData& i) { // RESERVE_LENGTH <- 4 // RESERVE_ADDR <- real_addr(EA) // RT <- i32.0 || MEM(EA, 4) + + // NOTE: we assume we are within a global lock. + // We could assert here that the block (or its parent) has taken a global lock + // already, but I haven't see anything but interrupt callbacks (which are + // always under a global lock) do that yet. + // We issue a memory barrier here to make sure that we get good values. + f.MemoryBarrier(); + Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); - Value* rt = - f.ZeroExtend(f.ByteSwap(f.LoadAcquire(ea, INT32_TYPE)), INT64_TYPE); + Value* rt = f.ZeroExtend(f.ByteSwap(f.Load(ea, INT32_TYPE)), INT64_TYPE); f.StoreGPR(i.X.RT, rt); return 0; } @@ -716,9 +731,22 @@ XEEMITTER(stdcx, 0x7C0001AD, X)(PPCHIRBuilder& f, InstrData& i) { // MEM(EA, 8) <- (RS) // n <- 1 if store performed // CR0[LT GT EQ SO] = 0b00 || n || XER[SO] + + // NOTE: we assume we are within a global lock. + // As we have been exclusively executing this entire time, we assume that no + // one else could have possibly touched the memory and must always succeed. + Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); Value* rt = f.ByteSwap(f.LoadGPR(i.X.RT)); - f.StoreRelease(ea, rt); // also updates cr0 + f.Store(ea, rt); + f.StoreContext(offsetof(PPCContext, cr0.cr0_eq), f.LoadConstantInt8(1)); + f.StoreContext(offsetof(PPCContext, cr0.cr0_lt), f.LoadZeroInt8()); + f.StoreContext(offsetof(PPCContext, cr0.cr0_gt), f.LoadZeroInt8()); + + // Issue memory barrier for when we go out of lock and want others to see our + // updates. + f.MemoryBarrier(); + return 0; } @@ -732,9 +760,22 @@ XEEMITTER(stwcx, 0x7C00012D, X)(PPCHIRBuilder& f, InstrData& i) { // MEM(EA, 4) <- (RS)[32:63] // n <- 1 if store performed // CR0[LT GT EQ SO] = 0b00 || n || XER[SO] + + // NOTE: we assume we are within a global lock. + // As we have been exclusively executing this entire time, we assume that no + // one else could have possibly touched the memory and must always succeed. + Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); Value* rt = f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE)); - f.StoreRelease(ea, rt); // also updates cr0 + f.Store(ea, rt); + f.StoreContext(offsetof(PPCContext, cr0.cr0_eq), f.LoadConstantInt8(1)); + f.StoreContext(offsetof(PPCContext, cr0.cr0_lt), f.LoadZeroInt8()); + f.StoreContext(offsetof(PPCContext, cr0.cr0_gt), f.LoadZeroInt8()); + + // Issue memory barrier for when we go out of lock and want others to see our + // updates. + f.MemoryBarrier(); + return 0; } diff --git a/src/xenia/cpu/frontend/ppc_frontend.cc b/src/xenia/cpu/frontend/ppc_frontend.cc index f0e7f1034..e4522a58b 100644 --- a/src/xenia/cpu/frontend/ppc_frontend.cc +++ b/src/xenia/cpu/frontend/ppc_frontend.cc @@ -57,32 +57,41 @@ PPCFrontend::~PPCFrontend() { Memory* PPCFrontend::memory() const { return processor_->memory(); } +// Checks the state of the global lock and sets scratch to the current MSR +// value. void CheckGlobalLock(PPCContext* ppc_context, void* arg0, void* arg1) { - ppc_context->scratch = 0x8000; + auto global_mutex = reinterpret_cast(arg0); + auto global_lock_count = reinterpret_cast(arg1); + std::lock_guard lock(*global_mutex); + ppc_context->scratch = *global_lock_count ? 0 : 0x8000; } -void HandleGlobalLock(PPCContext* ppc_context, void* arg0, void* arg1) { - auto global_lock = reinterpret_cast(arg0); - volatile bool* global_lock_taken = reinterpret_cast(arg1); - uint64_t value = ppc_context->scratch; - if (value == 0x8000) { - global_lock->lock(); - *global_lock_taken = false; - global_lock->unlock(); - } else if (value == ppc_context->r[13]) { - global_lock->lock(); - *global_lock_taken = true; - global_lock->unlock(); - } + +// Enters the global lock. Safe to recursion. +void EnterGlobalLock(PPCContext* ppc_context, void* arg0, void* arg1) { + auto global_mutex = reinterpret_cast(arg0); + auto global_lock_count = reinterpret_cast(arg1); + global_mutex->lock(); + *global_lock_count = *global_lock_count + 1; +} + +// Leaves the global lock. Safe to recursion. +void LeaveGlobalLock(PPCContext* ppc_context, void* arg0, void* arg1) { + auto global_mutex = reinterpret_cast(arg0); + auto global_lock_count = reinterpret_cast(arg1); + *global_lock_count = *global_lock_count - 1; + assert_true(*global_lock_count >= 0); + global_mutex->unlock(); } bool PPCFrontend::Initialize() { - void* arg0 = reinterpret_cast(&builtins_.global_lock); - void* arg1 = reinterpret_cast(&builtins_.global_lock_taken); + void* arg0 = reinterpret_cast(processor_->global_mutex()); + void* arg1 = reinterpret_cast(&builtins_.global_lock_count); builtins_.check_global_lock = processor_->DefineBuiltin("CheckGlobalLock", CheckGlobalLock, arg0, arg1); - builtins_.handle_global_lock = processor_->DefineBuiltin( - "HandleGlobalLock", HandleGlobalLock, arg0, arg1); - + builtins_.enter_global_lock = + processor_->DefineBuiltin("EnterGlobalLock", EnterGlobalLock, arg0, arg1); + builtins_.leave_global_lock = + processor_->DefineBuiltin("LeaveGlobalLock", LeaveGlobalLock, arg0, arg1); return true; } diff --git a/src/xenia/cpu/frontend/ppc_frontend.h b/src/xenia/cpu/frontend/ppc_frontend.h index 262c2b374..1e1663607 100644 --- a/src/xenia/cpu/frontend/ppc_frontend.h +++ b/src/xenia/cpu/frontend/ppc_frontend.h @@ -32,10 +32,10 @@ namespace frontend { class PPCTranslator; struct PPCBuiltins { - xe::mutex global_lock; - bool global_lock_taken; + int32_t global_lock_count; Function* check_global_lock; - Function* handle_global_lock; + Function* enter_global_lock; + Function* leave_global_lock; }; class PPCFrontend { diff --git a/src/xenia/cpu/frontend/ppc_hir_builder.cc b/src/xenia/cpu/frontend/ppc_hir_builder.cc index e691669ab..cf7bb6d8b 100644 --- a/src/xenia/cpu/frontend/ppc_hir_builder.cc +++ b/src/xenia/cpu/frontend/ppc_hir_builder.cc @@ -39,6 +39,8 @@ PPCHIRBuilder::PPCHIRBuilder(PPCFrontend* frontend) PPCHIRBuilder::~PPCHIRBuilder() = default; +PPCBuiltins* PPCHIRBuilder::builtins() const { return frontend_->builtins(); } + void PPCHIRBuilder::Reset() { function_ = nullptr; start_address_ = 0; @@ -345,20 +347,6 @@ void PPCHIRBuilder::UpdateCR6(Value* src_value) { // TOOD(benvanik): trace CR. } -Value* PPCHIRBuilder::LoadMSR() { - // bit 48 = EE; interrupt enabled - // bit 62 = RI; recoverable interrupt - // return 8000h if unlocked, else 0 - CallExtern(frontend_->builtins()->check_global_lock); - return LoadContext(offsetof(PPCContext, scratch), INT64_TYPE); -} - -void PPCHIRBuilder::StoreMSR(Value* value) { - // if & 0x8000 == 0, lock, else unlock - StoreContext(offsetof(PPCContext, scratch), ZeroExtend(value, INT64_TYPE)); - CallExtern(frontend_->builtins()->handle_global_lock); -} - Value* PPCHIRBuilder::LoadFPSCR() { return LoadContext(offsetof(PPCContext, fpscr), INT64_TYPE); } @@ -444,30 +432,6 @@ void PPCHIRBuilder::StoreVR(uint32_t reg, Value* value) { trace_reg.value = value; } -Value* PPCHIRBuilder::LoadAcquire(Value* address, TypeName type, - uint32_t load_flags) { - AtomicExchange(LoadContext(offsetof(PPCContext, reserve_address), INT64_TYPE), - Truncate(address, INT32_TYPE)); - return Load(address, type, load_flags); -} - -Value* PPCHIRBuilder::StoreRelease(Value* address, Value* value, - uint32_t store_flags) { - Value* old_address = AtomicExchange( - LoadContext(offsetof(PPCContext, reserve_address), INT64_TYPE), - LoadZeroInt32()); - // Ensure the reservation addresses match. - Value* eq = CompareEQ(Truncate(address, INT32_TYPE), old_address); - StoreContext(offsetof(PPCContext, cr0.cr0_eq), eq); - StoreContext(offsetof(PPCContext, cr0.cr0_lt), LoadZeroInt8()); - StoreContext(offsetof(PPCContext, cr0.cr0_gt), LoadZeroInt8()); - auto skip_label = NewLabel(); - BranchFalse(eq, skip_label, BRANCH_UNLIKELY); - Store(address, value, store_flags); - MarkLabel(skip_label); - return eq; -} - } // namespace frontend } // namespace cpu } // namespace xe diff --git a/src/xenia/cpu/frontend/ppc_hir_builder.h b/src/xenia/cpu/frontend/ppc_hir_builder.h index d4c0b9ee3..630d3c4be 100644 --- a/src/xenia/cpu/frontend/ppc_hir_builder.h +++ b/src/xenia/cpu/frontend/ppc_hir_builder.h @@ -18,6 +18,7 @@ namespace xe { namespace cpu { namespace frontend { +struct PPCBuiltins; class PPCFrontend; class PPCHIRBuilder : public hir::HIRBuilder { @@ -29,6 +30,8 @@ class PPCHIRBuilder : public hir::HIRBuilder { explicit PPCHIRBuilder(PPCFrontend* frontend); ~PPCHIRBuilder() override; + PPCBuiltins* builtins() const; + void Reset() override; enum EmitFlags { @@ -54,8 +57,6 @@ class PPCHIRBuilder : public hir::HIRBuilder { void UpdateCR(uint32_t n, Value* lhs, bool is_signed = true); void UpdateCR(uint32_t n, Value* lhs, Value* rhs, bool is_signed = true); void UpdateCR6(Value* src_value); - Value* LoadMSR(); - void StoreMSR(Value* value); Value* LoadFPSCR(); void StoreFPSCR(Value* value); Value* LoadXER(); @@ -75,10 +76,6 @@ class PPCHIRBuilder : public hir::HIRBuilder { Value* LoadVR(uint32_t reg); void StoreVR(uint32_t reg, Value* value); - Value* LoadAcquire(Value* address, hir::TypeName type, - uint32_t load_flags = 0); - Value* StoreRelease(Value* address, Value* value, uint32_t store_flags = 0); - private: void AnnotateLabel(uint32_t address, Label* label); diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index 3c8ac6f55..2cd7db16d 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -327,6 +327,39 @@ uint64_t Processor::Execute(ThreadState* thread_state, uint32_t address, return context->r[3]; } +uint64_t Processor::ExecuteInterrupt(ThreadState* thread_state, + uint32_t address, uint64_t args[], + size_t arg_count) { + SCOPE_profile_cpu_f("cpu"); + + // Hold the global lock during interrupt dispatch. + // This will block if any code is in a critical region (has interrupts + // disabled) or if any other interrupt is executing. + std::lock_guard lock(global_mutex_); + + PPCContext* context = thread_state->context(); + assert_true(arg_count <= 5); + for (size_t i = 0; i < arg_count; ++i) { + context->r[3 + i] = args[i]; + } + + // TLS ptr must be zero during interrupts. Some games check this and + // early-exit routines when under interrupts. + auto pcr_address = + memory_->TranslateVirtual(static_cast(context->r[13])); + uint32_t old_tls_ptr = xe::load_and_swap(pcr_address); + xe::store_and_swap(pcr_address, 0); + + if (!Execute(thread_state, address)) { + return 0xDEADBABE; + } + + // Restores TLS ptr. + xe::store_and_swap(pcr_address, old_tls_ptr); + + return context->r[3]; +} + Irql Processor::RaiseIrql(Irql new_value) { return static_cast( xe::atomic_exchange(static_cast(new_value), diff --git a/src/xenia/cpu/processor.h b/src/xenia/cpu/processor.h index 179bcb1b0..73601b20e 100644 --- a/src/xenia/cpu/processor.h +++ b/src/xenia/cpu/processor.h @@ -11,7 +11,6 @@ #define XENIA_CPU_PROCESSOR_H_ #include -#include #include #include @@ -84,7 +83,10 @@ class Processor { bool Execute(ThreadState* thread_state, uint32_t address); uint64_t Execute(ThreadState* thread_state, uint32_t address, uint64_t args[], size_t arg_count); + uint64_t ExecuteInterrupt(ThreadState* thread_state, uint32_t address, + uint64_t args[], size_t arg_count); + xe::recursive_mutex* global_mutex() { return &global_mutex_; } Irql RaiseIrql(Irql new_value); void LowerIrql(Irql old_value); @@ -108,6 +110,7 @@ class Processor { uint32_t next_builtin_address_ = 0xFFFF0000u; Irql irql_; + xe::recursive_mutex global_mutex_; }; } // namespace cpu diff --git a/src/xenia/cpu/thread_state.cc b/src/xenia/cpu/thread_state.cc index 582f7cae3..dd24c24f3 100644 --- a/src/xenia/cpu/thread_state.cc +++ b/src/xenia/cpu/thread_state.cc @@ -90,7 +90,7 @@ ThreadState::ThreadState(Processor* processor, uint32_t thread_id, std::memset(context_, 0, sizeof(PPCContext)); // Stash pointers to common structures that callbacks may need. - context_->reserve_address = memory_->reserve_address(); + context_->global_mutex = processor_->global_mutex(); context_->virtual_membase = memory_->virtual_membase(); context_->physical_membase = memory_->physical_membase(); context_->processor = processor_; diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index 11691fbd5..b41487c9c 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -80,8 +80,8 @@ void GraphicsSystem::DispatchInterruptCallback(uint32_t source, uint32_t cpu) { interrupt_callback_, source, cpu); uint64_t args[] = {source, interrupt_callback_data_}; - processor_->Execute(thread->thread_state(), interrupt_callback_, args, - xe::countof(args)); + processor_->ExecuteInterrupt(thread->thread_state(), interrupt_callback_, + args, xe::countof(args)); } } // namespace gpu diff --git a/src/xenia/kernel/xam_ui.cc b/src/xenia/kernel/xam_ui.cc index f399a722d..796a75e4b 100644 --- a/src/xenia/kernel/xam_ui.cc +++ b/src/xenia/kernel/xam_ui.cc @@ -403,7 +403,6 @@ SHIM_CALL XamShowDirtyDiscErrorUI_shim(PPCContext* ppc_context, // This is death, and should never return. // TODO(benvanik): cleaner exit. - assert_always(); exit(1); } diff --git a/src/xenia/kernel/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl_threading.cc index 1bfb55660..ee409f49f 100644 --- a/src/xenia/kernel/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl_threading.cc @@ -1279,23 +1279,23 @@ pointer_result_t InterlockedPushEntrySList( assert_not_null(plist_ptr); assert_not_null(entry); - alignas(8) X_SLIST_HEADER old_hdr = {0}; + // Hold a global lock during this method. Once in the lock we assume we have + // exclusive access to the structure. + std::lock_guard lock( + *kernel_state()->processor()->global_mutex()); + + alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr; alignas(8) X_SLIST_HEADER new_hdr = {0}; - uint32_t old_head = 0; + new_hdr.depth = old_hdr.depth + 1; + new_hdr.sequence = old_hdr.sequence + 1; - do { - // Kill the guest reservation - xe::atomic_exchange(0, kernel_memory()->reserve_address()); + uint32_t old_head = old_hdr.next.next; + entry->next = old_hdr.next.next; + new_hdr.next.next = entry.guest_address(); - old_hdr = *plist_ptr; - new_hdr.depth = old_hdr.depth + 1; - new_hdr.sequence = old_hdr.sequence + 1; - - old_head = old_hdr.next.next; - entry->next = old_hdr.next.next; - new_hdr.next.next = entry.guest_address(); - } while (!xe::atomic_cas(*(uint64_t*)&old_hdr, *(uint64_t*)&new_hdr, - (uint64_t*)plist_ptr.host_address())); + xe::atomic_cas(*reinterpret_cast(&old_hdr), + *reinterpret_cast(&new_hdr), + reinterpret_cast(plist_ptr.host_address())); return old_head; } @@ -1305,27 +1305,29 @@ DECLARE_XBOXKRNL_EXPORT(InterlockedPushEntrySList, pointer_result_t InterlockedPopEntrySList(pointer_t plist_ptr) { assert_not_null(plist_ptr); + // Hold a global lock during this method. Once in the lock we assume we have + // exclusive access to the structure. + std::lock_guard lock( + *kernel_state()->processor()->global_mutex()); + uint32_t popped = 0; - alignas(8) X_SLIST_HEADER old_hdr = {0}; + alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr; alignas(8) X_SLIST_HEADER new_hdr = {0}; - do { - // Kill the guest reservation (guest InterlockedPushEntrySList uses this) - xe::atomic_exchange(0, kernel_memory()->reserve_address()); + auto next = kernel_memory()->TranslateVirtual( + old_hdr.next.next); + if (!old_hdr.next.next) { + return 0; + } + popped = old_hdr.next.next; - old_hdr = *plist_ptr; - auto next = kernel_memory()->TranslateVirtual( - old_hdr.next.next); - if (!old_hdr.next.next) { - return 0; - } - popped = old_hdr.next.next; + new_hdr.depth = old_hdr.depth - 1; + new_hdr.next.next = next->next; + new_hdr.sequence = old_hdr.sequence; - new_hdr.depth = old_hdr.depth - 1; - new_hdr.next.next = next->next; - new_hdr.sequence = old_hdr.sequence; - } while (!xe::atomic_cas(*(uint64_t*)&old_hdr, *(uint64_t*)&new_hdr, - (uint64_t*)plist_ptr.host_address())); + xe::atomic_cas(*reinterpret_cast(&old_hdr), + *reinterpret_cast(&new_hdr), + reinterpret_cast(plist_ptr.host_address())); return popped; } @@ -1333,22 +1335,23 @@ DECLARE_XBOXKRNL_EXPORT(InterlockedPopEntrySList, ExportTag::kImplemented | ExportTag::kHighFrequency); pointer_result_t InterlockedFlushSList(pointer_t plist_ptr) { - alignas(8) X_SLIST_HEADER old_hdr = {0}; + assert_not_null(plist_ptr); + + // Hold a global lock during this method. Once in the lock we assume we have + // exclusive access to the structure. + std::lock_guard lock( + *kernel_state()->processor()->global_mutex()); + + alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr; alignas(8) X_SLIST_HEADER new_hdr = {0}; - uint32_t first = 0; + uint32_t first = old_hdr.next.next; + new_hdr.next.next = 0; + new_hdr.depth = 0; + new_hdr.sequence = 0; - do { - // Kill the guest reservation - xe::atomic_exchange(0, kernel_memory()->reserve_address()); - - old_hdr = *plist_ptr; - - first = old_hdr.next.next; - new_hdr.next.next = 0; - new_hdr.depth = 0; - new_hdr.sequence = 0; - } while (!xe::atomic_cas(*(uint64_t*)&old_hdr, *(uint64_t*)&new_hdr, - (uint64_t*)plist_ptr.host_address())); + xe::atomic_cas(*reinterpret_cast(&old_hdr), + *reinterpret_cast(&new_hdr), + reinterpret_cast(plist_ptr.host_address())); return first; } diff --git a/src/xenia/memory.h b/src/xenia/memory.h index 35fa8ad10..e68c3e49e 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -181,8 +181,6 @@ class Memory { (guest_address & 0x1FFFFFFF)); } - inline uint64_t* reserve_address() { return &reserve_address_; } - // TODO(benvanik): make poly memory utils for these. void Zero(uint32_t address, uint32_t size); void Fill(uint32_t address, uint32_t size, uint8_t value); @@ -219,7 +217,6 @@ class Memory { uint32_t system_page_size_ = 0; uint8_t* virtual_membase_ = nullptr; uint8_t* physical_membase_ = nullptr; - uint64_t reserve_address_ = 0; xe::memory::FileMappingHandle mapping_ = nullptr; uint8_t* mapping_base_ = nullptr;