diff --git a/src/xenia/cpu/cpu_flags.cc b/src/xenia/cpu/cpu_flags.cc index 0d1748c1a..6c37e7edc 100644 --- a/src/xenia/cpu/cpu_flags.cc +++ b/src/xenia/cpu/cpu_flags.cc @@ -28,6 +28,10 @@ DEFINE_bool(trace_function_references, false, DEFINE_bool(trace_function_data, false, "Generate tracing for function result data."); +DEFINE_bool( + disable_global_lock, false, + "Disables global lock usage in guest code. Does not affect host code."); + DEFINE_bool(validate_hir, false, "Perform validation checks on the HIR during compilation."); diff --git a/src/xenia/cpu/cpu_flags.h b/src/xenia/cpu/cpu_flags.h index 578429b74..17b88ff08 100644 --- a/src/xenia/cpu/cpu_flags.h +++ b/src/xenia/cpu/cpu_flags.h @@ -23,6 +23,8 @@ DECLARE_bool(trace_function_coverage); DECLARE_bool(trace_function_references); DECLARE_bool(trace_function_data); +DECLARE_bool(disable_global_lock); + DECLARE_bool(validate_hir); DECLARE_uint64(break_on_instruction); diff --git a/src/xenia/cpu/ppc/ppc_emit_control.cc b/src/xenia/cpu/ppc/ppc_emit_control.cc index a44644193..0bd9cbd2e 100644 --- a/src/xenia/cpu/ppc/ppc_emit_control.cc +++ b/src/xenia/cpu/ppc/ppc_emit_control.cc @@ -10,6 +10,7 @@ #include "xenia/cpu/ppc/ppc_emit-private.h" #include "xenia/base/assert.h" +#include "xenia/cpu/cpu_flags.h" #include "xenia/cpu/ppc/ppc_context.h" #include "xenia/cpu/ppc/ppc_frontend.h" #include "xenia/cpu/ppc/ppc_hir_builder.h" @@ -725,10 +726,14 @@ int InstrEmit_mtmsr(PPCHIRBuilder& f, const InstrData& i) { f.ZeroExtend(f.ZeroExtend(f.LoadGPR(i.X.RT), INT64_TYPE), INT64_TYPE)); if (i.X.RT == 13) { // iff storing from r13 we are taking a lock (disable interrupts). - f.CallExtern(f.builtins()->enter_global_lock); + if (!FLAGS_disable_global_lock) { + f.CallExtern(f.builtins()->enter_global_lock); + } } else { // Otherwise we are restoring interrupts (probably). - f.CallExtern(f.builtins()->leave_global_lock); + if (!FLAGS_disable_global_lock) { + f.CallExtern(f.builtins()->leave_global_lock); + } } return 0; } else { @@ -746,10 +751,14 @@ int InstrEmit_mtmsrd(PPCHIRBuilder& f, const InstrData& i) { f.ZeroExtend(f.LoadGPR(i.X.RT), INT64_TYPE)); if (i.X.RT == 13) { // iff storing from r13 we are taking a lock (disable interrupts). - f.CallExtern(f.builtins()->enter_global_lock); + if (!FLAGS_disable_global_lock) { + f.CallExtern(f.builtins()->enter_global_lock); + } } else { // Otherwise we are restoring interrupts (probably). - f.CallExtern(f.builtins()->leave_global_lock); + if (!FLAGS_disable_global_lock) { + f.CallExtern(f.builtins()->leave_global_lock); + } } return 0; } else { diff --git a/src/xenia/cpu/ppc/ppc_emit_memory.cc b/src/xenia/cpu/ppc/ppc_emit_memory.cc index 517081c68..e9294048e 100644 --- a/src/xenia/cpu/ppc/ppc_emit_memory.cc +++ b/src/xenia/cpu/ppc/ppc_emit_memory.cc @@ -702,6 +702,9 @@ int InstrEmit_stdcx(PPCHIRBuilder& f, const InstrData& i) { // NOTE: we assume we are within a global lock. // As we have been exclusively executing this entire time, we assume that no // one else could have possibly touched the memory and must always succeed. + // We use atomic compare exchange here to support reserved load/store without + // being under the global lock (flag disable_global_lock - see mtmsr/mtmsrd). + // This will always succeed if under the global lock, however. Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); Value* rt = f.ByteSwap(f.LoadGPR(i.X.RT)); @@ -732,6 +735,9 @@ int InstrEmit_stwcx(PPCHIRBuilder& f, const InstrData& i) { // NOTE: we assume we are within a global lock. // As we have been exclusively executing this entire time, we assume that no // one else could have possibly touched the memory and must always succeed. + // We use atomic compare exchange here to support reserved load/store without + // being under the global lock (flag disable_global_lock - see mtmsr/mtmsrd). + // This will always succeed if under the global lock, however. Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); Value* rt = f.ByteSwap(f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE)); diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index e9db2fc31..2d7b935bb 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -1251,22 +1251,20 @@ pointer_result_t InterlockedPushEntrySList( assert_not_null(plist_ptr); assert_not_null(entry); - // Hold a global lock during this method. Once in the lock we assume we have - // exclusive access to the structure. - auto global_lock = xe::global_critical_region::AcquireDirect(); - alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr; alignas(8) X_SLIST_HEADER new_hdr = {0}; - new_hdr.depth = old_hdr.depth + 1; - new_hdr.sequence = old_hdr.sequence + 1; + uint32_t old_head = 0; + do { + old_hdr = *plist_ptr; + new_hdr.depth = old_hdr.depth + 1; + new_hdr.sequence = old_hdr.sequence + 1; - uint32_t old_head = old_hdr.next.next; - entry->next = old_hdr.next.next; - new_hdr.next.next = entry.guest_address(); - - *reinterpret_cast(plist_ptr.host_address()) = - *reinterpret_cast(&new_hdr); - xe::threading::SyncMemory(); + uint32_t old_head = old_hdr.next.next; + entry->next = old_hdr.next.next; + new_hdr.next.next = entry.guest_address(); + } while ( + !xe::atomic_cas(*(uint64_t*)(&old_hdr), *(uint64_t*)(&new_hdr), + reinterpret_cast(plist_ptr.host_address()))); return old_head; } @@ -1276,28 +1274,24 @@ DECLARE_XBOXKRNL_EXPORT(InterlockedPushEntrySList, pointer_result_t InterlockedPopEntrySList(pointer_t plist_ptr) { assert_not_null(plist_ptr); - // Hold a global lock during this method. Once in the lock we assume we have - // exclusive access to the structure. - auto global_lock = xe::global_critical_region::AcquireDirect(); - uint32_t popped = 0; - - alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr; + alignas(8) X_SLIST_HEADER old_hdr = {0}; alignas(8) X_SLIST_HEADER new_hdr = {0}; - auto next = kernel_memory()->TranslateVirtual( - old_hdr.next.next); - if (!old_hdr.next.next) { - return 0; - } - popped = old_hdr.next.next; + do { + old_hdr = *plist_ptr; + auto next = kernel_memory()->TranslateVirtual( + old_hdr.next.next); + if (!old_hdr.next.next) { + return 0; + } + popped = old_hdr.next.next; - new_hdr.depth = old_hdr.depth - 1; - new_hdr.next.next = next->next; - new_hdr.sequence = old_hdr.sequence; - - *reinterpret_cast(plist_ptr.host_address()) = - *reinterpret_cast(&new_hdr); - xe::threading::SyncMemory(); + new_hdr.depth = old_hdr.depth - 1; + new_hdr.next.next = next->next; + new_hdr.sequence = old_hdr.sequence; + } while ( + !xe::atomic_cas(*(uint64_t*)(&old_hdr), *(uint64_t*)(&new_hdr), + reinterpret_cast(plist_ptr.host_address()))); return popped; } @@ -1307,20 +1301,18 @@ DECLARE_XBOXKRNL_EXPORT(InterlockedPopEntrySList, pointer_result_t InterlockedFlushSList(pointer_t plist_ptr) { assert_not_null(plist_ptr); - // Hold a global lock during this method. Once in the lock we assume we have - // exclusive access to the structure. - auto global_lock = xe::global_critical_region::AcquireDirect(); - alignas(8) X_SLIST_HEADER old_hdr = *plist_ptr; alignas(8) X_SLIST_HEADER new_hdr = {0}; - uint32_t first = old_hdr.next.next; - new_hdr.next.next = 0; - new_hdr.depth = 0; - new_hdr.sequence = 0; - - *reinterpret_cast(plist_ptr.host_address()) = - *reinterpret_cast(&new_hdr); - xe::threading::SyncMemory(); + uint32_t first = 0; + do { + old_hdr = *plist_ptr; + first = old_hdr.next.next; + new_hdr.next.next = 0; + new_hdr.depth = 0; + new_hdr.sequence = 0; + } while ( + !xe::atomic_cas(*(uint64_t*)(&old_hdr), *(uint64_t*)(&new_hdr), + reinterpret_cast(plist_ptr.host_address()))); return first; }