Rewrite Rtl critical section implementation (and implement full waiters)

This commit is contained in:
Dr. Chat 2015-12-08 15:15:23 -06:00 committed by Ben Vanik
parent 606aa9af6f
commit b97a39150b
2 changed files with 69 additions and 93 deletions

View File

@ -21,6 +21,8 @@
#include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/util/shim_utils.h"
#include "xenia/kernel/util/xex2.h" #include "xenia/kernel/util/xex2.h"
#include "xenia/kernel/xboxkrnl/xboxkrnl_private.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_private.h"
#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h"
#include "xenia/kernel/xevent.h"
#include "xenia/kernel/xthread.h" #include "xenia/kernel/xthread.h"
// For FileTimeToSystemTime and SystemTimeToFileTime: // For FileTimeToSystemTime and SystemTimeToFileTime:
@ -296,28 +298,22 @@ DECLARE_XBOXKRNL_EXPORT(RtlImageXexHeaderField, ExportTag::kImplemented);
// their embedded data and InitializeCriticalSection will never be called. // their embedded data and InitializeCriticalSection will never be called.
#pragma pack(push, 1) #pragma pack(push, 1)
struct X_RTL_CRITICAL_SECTION { struct X_RTL_CRITICAL_SECTION {
xe::be<uint8_t> unk_00; // 0x0 X_DISPATCH_HEADER header;
xe::be<uint8_t> spin_count_div_256; // 0x1 int32_t lock_count; // 0x10 -1 -> 0 on first lock
xe::be<uint16_t> __padding0; // 0x2 xe::be<int32_t> recursion_count; // 0x14 0 -> 1 on first lock
xe::be<uint32_t> unk_04; // 0x4 maybe the handle to the event? xe::be<uint32_t> owning_thread; // 0x18 PKTHREAD 0 unless locked
xe::be<uint32_t> queue_head; // 0x8 head of queue, pointing to this offset
xe::be<uint32_t> queue_tail; // 0xC tail of queue?
int32_t lock_count; // 0x10 -1 -> 0 on first lock 0x10
xe::be<int32_t> recursion_count; // 0x14 0 -> 1 on first lock 0x14
xe::be<uint32_t> owning_thread_id; // 0x18 0 unless locked 0x18
}; };
#pragma pack(pop) #pragma pack(pop)
static_assert_size(X_RTL_CRITICAL_SECTION, 28); static_assert_size(X_RTL_CRITICAL_SECTION, 28);
void xeRtlInitializeCriticalSection(X_RTL_CRITICAL_SECTION* cs, void xeRtlInitializeCriticalSection(X_RTL_CRITICAL_SECTION* cs,
uint32_t cs_ptr) { uint32_t cs_ptr) {
cs->unk_00 = 1; cs->header.type = 1; // EventSynchronizationObject (auto reset)
cs->spin_count_div_256 = 0; cs->header.absolute = 0; // spin count div 256
cs->queue_head = cs_ptr + 8; cs->header.signal_state = 0;
cs->queue_tail = cs_ptr + 8;
cs->lock_count = -1; cs->lock_count = -1;
cs->recursion_count = 0; cs->recursion_count = 0;
cs->owning_thread_id = 0; cs->owning_thread = 0;
} }
void RtlInitializeCriticalSection(pointer_t<X_RTL_CRITICAL_SECTION> cs) { void RtlInitializeCriticalSection(pointer_t<X_RTL_CRITICAL_SECTION> cs) {
@ -335,13 +331,12 @@ X_STATUS xeRtlInitializeCriticalSectionAndSpinCount(X_RTL_CRITICAL_SECTION* cs,
spin_count_div_256 = 255; spin_count_div_256 = 255;
} }
cs->unk_00 = 1; cs->header.type = 1; // EventSynchronizationObject (auto reset)
cs->spin_count_div_256 = spin_count_div_256; cs->header.absolute = spin_count_div_256;
cs->queue_head = cs_ptr + 8; cs->header.signal_state = 0;
cs->queue_tail = cs_ptr + 8;
cs->lock_count = -1; cs->lock_count = -1;
cs->recursion_count = 0; cs->recursion_count = 0;
cs->owning_thread_id = 0; cs->owning_thread = 0;
return X_STATUS_SUCCESS; return X_STATUS_SUCCESS;
} }
@ -354,90 +349,68 @@ dword_result_t RtlInitializeCriticalSectionAndSpinCount(
DECLARE_XBOXKRNL_EXPORT(RtlInitializeCriticalSectionAndSpinCount, DECLARE_XBOXKRNL_EXPORT(RtlInitializeCriticalSectionAndSpinCount,
ExportTag::kImplemented); ExportTag::kImplemented);
SHIM_CALL RtlEnterCriticalSection_shim(PPCContext* ppc_context, void RtlEnterCriticalSection(pointer_t<X_RTL_CRITICAL_SECTION> cs) {
KernelState* kernel_state) { uint32_t cur_thread = XThread::GetCurrentThread()->guest_object();
// VOID uint32_t spin_count = cs->header.absolute * 256;
// _Inout_ LPCRITICAL_SECTION lpCriticalSection
uint32_t cs_ptr = SHIM_GET_ARG_32(0);
// XELOGD("RtlEnterCriticalSection(%.8X)", cs_ptr); if (cs->owning_thread == cur_thread) {
// We already own the lock.
uint32_t thread_id = XThread::GetCurrentThreadId(); xe::atomic_inc(&cs->lock_count);
auto cs = reinterpret_cast<X_RTL_CRITICAL_SECTION*>(SHIM_MEM_ADDR(cs_ptr));
uint32_t spin_wait_remaining = cs->spin_count_div_256 * 256;
spin:
if (xe::atomic_inc(&cs->lock_count) != 0) {
// If this thread already owns the CS increment the recursion count.
if (cs->owning_thread_id == thread_id) {
cs->recursion_count++; cs->recursion_count++;
return; return;
} }
xe::atomic_dec(&cs->lock_count);
// Thread was locked - spin wait. // Spin loop
if (spin_wait_remaining) { while (spin_count--) {
spin_wait_remaining--; if (xe::atomic_cas(-1, 0, &cs->lock_count)) {
goto spin; // Acquired.
cs->owning_thread = cur_thread;
cs->recursion_count = 1;
return;
}
} }
// All out of spin waits, create a full waiter. if (xe::atomic_inc(&cs->lock_count) != 0) {
// TODO(benvanik): contention - do a real wait! // Create a full waiter.
// XELOGE("RtlEnterCriticalSection tried to really lock!"); KeWaitForSingleObject(reinterpret_cast<void*>(cs.host_address()), 8, 0, 0,
spin_wait_remaining = 0; // HACK: spin forever nullptr);
xe::threading::MaybeYield();
goto spin;
} }
// Now own the lock. // We've now acquired the lock.
cs->owning_thread_id = thread_id; assert_true(cs->owning_thread == 0);
cs->owning_thread = cur_thread;
cs->recursion_count = 1; cs->recursion_count = 1;
} }
DECLARE_XBOXKRNL_EXPORT(RtlEnterCriticalSection,
ExportTag::kImplemented | ExportTag::kHighFrequency);
SHIM_CALL RtlTryEnterCriticalSection_shim(PPCContext* ppc_context, dword_result_t RtlTryEnterCriticalSection(
KernelState* kernel_state) { pointer_t<X_RTL_CRITICAL_SECTION> cs) {
// DWORD uint32_t thread = XThread::GetCurrentThread()->guest_object();
// _Inout_ LPCRITICAL_SECTION lpCriticalSection
uint32_t cs_ptr = SHIM_GET_ARG_32(0);
// XELOGD("RtlTryEnterCriticalSection(%.8X)", cs_ptr);
uint32_t thread_id = XThread::GetCurrentThreadId();
auto cs = reinterpret_cast<X_RTL_CRITICAL_SECTION*>(SHIM_MEM_ADDR(cs_ptr));
uint32_t result = 0;
if (xe::atomic_cas(-1, 0, &cs->lock_count)) { if (xe::atomic_cas(-1, 0, &cs->lock_count)) {
// Able to steal the lock right away. // Able to steal the lock right away.
cs->owning_thread_id = thread_id; cs->owning_thread = thread;
cs->recursion_count = 1; cs->recursion_count = 1;
result = 1; return 1;
} else if (cs->owning_thread_id == thread_id) { } else if (cs->owning_thread == thread) {
// Already own the lock.
xe::atomic_inc(&cs->lock_count); xe::atomic_inc(&cs->lock_count);
++cs->recursion_count; ++cs->recursion_count;
result = 1; return 1;
}
SHIM_SET_RETURN_32(result);
} }
SHIM_CALL RtlLeaveCriticalSection_shim(PPCContext* ppc_context, // Failed to acquire lock.
KernelState* kernel_state) { return 0;
// VOID }
// _Inout_ LPCRITICAL_SECTION lpCriticalSection DECLARE_XBOXKRNL_EXPORT(RtlTryEnterCriticalSection,
uint32_t cs_ptr = SHIM_GET_ARG_32(0); ExportTag::kImplemented | ExportTag::kHighFrequency);
// XELOGD("RtlLeaveCriticalSection(%.8X)", cs_ptr); void RtlLeaveCriticalSection(pointer_t<X_RTL_CRITICAL_SECTION> cs) {
assert_true(cs->owning_thread == XThread::GetCurrentThread()->guest_object());
// FYI: No need to check if the owning thread is calling this, as that should // Drop recursion count - if it isn't zero we still have the lock.
// be the only case. if (--cs->recursion_count != 0) {
auto cs = reinterpret_cast<X_RTL_CRITICAL_SECTION*>(SHIM_MEM_ADDR(cs_ptr));
// Drop recursion count - if we are still not zero'ed return.
int32_t recursion_count = --cs->recursion_count;
assert_true(recursion_count > -1);
if (recursion_count) {
assert_true(cs->recursion_count > 0); assert_true(cs->recursion_count > 0);
xe::atomic_dec(&cs->lock_count); xe::atomic_dec(&cs->lock_count);
@ -445,15 +418,14 @@ SHIM_CALL RtlLeaveCriticalSection_shim(PPCContext* ppc_context,
} }
// Not owned - unlock! // Not owned - unlock!
cs->owning_thread_id = 0; cs->owning_thread = 0;
if (xe::atomic_dec(&cs->lock_count) != -1) { if (xe::atomic_dec(&cs->lock_count) != -1) {
// There were waiters - wake one of them. // There were waiters - wake one of them.
// TODO(benvanik): wake a waiter. KeSetEvent(reinterpret_cast<X_KEVENT*>(cs.host_address()), 1, 0);
XELOGE("RtlLeaveCriticalSection would have woken a waiter");
} }
XThread::GetCurrentThread()->CheckApcs();
} }
DECLARE_XBOXKRNL_EXPORT(RtlLeaveCriticalSection,
ExportTag::kImplemented | ExportTag::kHighFrequency);
struct X_TIME_FIELDS { struct X_TIME_FIELDS {
xe::be<uint16_t> year; xe::be<uint16_t> year;
@ -515,10 +487,6 @@ void RegisterRtlExports(xe::cpu::ExportResolver* export_resolver,
SHIM_SET_MAPPING("xboxkrnl.exe", RtlUnicodeStringToAnsiString, state); SHIM_SET_MAPPING("xboxkrnl.exe", RtlUnicodeStringToAnsiString, state);
SHIM_SET_MAPPING("xboxkrnl.exe", RtlMultiByteToUnicodeN, state); SHIM_SET_MAPPING("xboxkrnl.exe", RtlMultiByteToUnicodeN, state);
SHIM_SET_MAPPING("xboxkrnl.exe", RtlUnicodeToMultiByteN, state); SHIM_SET_MAPPING("xboxkrnl.exe", RtlUnicodeToMultiByteN, state);
SHIM_SET_MAPPING("xboxkrnl.exe", RtlEnterCriticalSection, state);
SHIM_SET_MAPPING("xboxkrnl.exe", RtlTryEnterCriticalSection, state);
SHIM_SET_MAPPING("xboxkrnl.exe", RtlLeaveCriticalSection, state);
} }
} // namespace xboxkrnl } // namespace xboxkrnl

View File

@ -15,6 +15,8 @@
namespace xe { namespace xe {
namespace kernel { namespace kernel {
struct X_KEVENT;
namespace xboxkrnl { namespace xboxkrnl {
dword_result_t NtSetEvent(dword_t handle, lpdword_t previous_state_ptr); dword_result_t NtSetEvent(dword_t handle, lpdword_t previous_state_ptr);
@ -25,6 +27,12 @@ dword_result_t NtWaitForMultipleObjectsEx(dword_t count, lpdword_t handles,
dword_t alertable, dword_t alertable,
lpqword_t timeout_ptr); lpqword_t timeout_ptr);
dword_result_t KeWaitForSingleObject(lpvoid_t object_ptr, dword_t wait_reason,
dword_t processor_mode, dword_t alertable,
lpqword_t timeout_ptr);
dword_result_t KeSetEvent(pointer_t<X_KEVENT> event_ptr, dword_t increment,
dword_t wait);
} // namespace xboxkrnl } // namespace xboxkrnl
} // namespace kernel } // namespace kernel
} // namespace xe } // namespace xe