diff --git a/src/xenia/kernel/util/native_list.h b/src/xenia/kernel/util/native_list.h index 70b5f00e8..96e950ed2 100644 --- a/src/xenia/kernel/util/native_list.h +++ b/src/xenia/kernel/util/native_list.h @@ -158,24 +158,92 @@ static void XeInsertHeadList(X_LIST_ENTRY* list_head, uint32_t entry, XeInsertHeadList(list_head, XeGuestList(list_head, context), XeHostList(entry, context), entry, context); } + +template +static void XeInsertHeadList(uint32_t list_head, X_LIST_ENTRY* entry, + VirtualTranslator context) { + XeInsertHeadList(XeHostList(list_head, context), list_head, entry, + XeGuestList(entry, context), context); +} +template +static void XeInsertHeadList(X_LIST_ENTRY* list_head, X_LIST_ENTRY* entry, + VirtualTranslator context) { + XeInsertHeadList(list_head, XeGuestList(list_head, context), + entry, XeGuestList(entry, context), context); +} + template struct X_TYPED_LIST : public X_LIST_ENTRY { public: - X_LIST_ENTRY* ObjectListEntry(TObject* obj) { + using this_type = X_TYPED_LIST; + + static X_LIST_ENTRY* ObjectListEntry(TObject* obj) { return reinterpret_cast( &reinterpret_cast(obj)[static_cast(EntryListOffset)]); } - TObject* ListEntryObject(X_LIST_ENTRY* entry) { + static TObject* ListEntryObject(X_LIST_ENTRY* entry) { return reinterpret_cast(&reinterpret_cast( entry)[-static_cast(EntryListOffset)]); } + template - void Initialize(VirtualTranslator* translator) { + struct ForwardIterator { + VirtualTranslator vt; + uint32_t current_entry; + + inline ForwardIterator& operator++() { + current_entry = + vt->TranslateVirtual(current_entry)->flink_ptr; + return *this; + } + inline bool operator!=(uint32_t other_ptr) const { + return current_entry != other_ptr; + } + + inline TObject& operator*() { + return *ListEntryObject( + vt->TranslateVirtual(current_entry)); + } + }; + template + struct ForwardIteratorBegin { + VirtualTranslator vt; + this_type* const thiz; + + ForwardIterator begin() { + return ForwardIterator{vt, thiz->flink_ptr}; + } + + uint32_t end() { return vt->HostToGuestVirtual(thiz); } + }; + template + ForwardIteratorBegin IterateForward(VirtualTranslator vt) { + return ForwardIteratorBegin{vt, this}; + } + + template + void Initialize(VirtualTranslator translator) { XeInitializeListHead(this, translator); } template - void InsertHead(TObject* entry, VirtualTranslator* translator) { - XeInsertHeadList(this, ObjectListEntry(entry), translator); + void InsertHead(TObject* entry, VirtualTranslator translator) { + XeInsertHeadList(static_cast(this), ObjectListEntry(entry), translator); + } + template + void InsertTail(TObject* entry, VirtualTranslator translator) { + XeInsertTailList(this, ObjectListEntry(entry), translator); + } + template + bool empty(VirtualTranslator vt) const { + return vt->TranslateVirtual(flink_ptr) == this; + } + template + TObject* HeadObject(VirtualTranslator vt) { + return ListEntryObject(vt->TranslateVirtual(flink_ptr)); + } + template + TObject* TailObject(VirtualTranslator vt) { + return ListEntryObject(vt->TranslateVirtual(blink_ptr)); } }; } // namespace util diff --git a/src/xenia/kernel/xam/xam_info.cc b/src/xenia/kernel/xam/xam_info.cc index d1a240312..aad29c5c3 100644 --- a/src/xenia/kernel/xam/xam_info.cc +++ b/src/xenia/kernel/xam/xam_info.cc @@ -435,12 +435,12 @@ dword_result_t RtlSleep_entry(dword_t dwMilliseconds, dword_t bAlertable) { : static_cast(-10000) * dwMilliseconds; X_STATUS result = xboxkrnl::KeDelayExecutionThread(MODE::UserMode, bAlertable, - (uint64_t*)&delay); + (uint64_t*)&delay, nullptr); // If the delay was interrupted by an APC, keep delaying the thread while (bAlertable && result == X_STATUS_ALERTED) { result = xboxkrnl::KeDelayExecutionThread(MODE::UserMode, bAlertable, - (uint64_t*)&delay); + (uint64_t*)&delay, nullptr); } return result == X_STATUS_SUCCESS ? X_STATUS_SUCCESS : X_STATUS_USER_APC; diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index 662b77ca7..22609c8a5 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -379,19 +379,35 @@ DECLARE_XBOXKRNL_EXPORT2(KeQueryPerformanceFrequency, kThreading, kImplemented, kHighFrequency); uint32_t KeDelayExecutionThread(uint32_t processor_mode, uint32_t alertable, - uint64_t* interval_ptr) { + uint64_t* interval_ptr, + cpu::ppc::PPCContext* ctx) { XThread* thread = XThread::GetCurrentThread(); + + if (alertable) { + X_STATUS stat = xeProcessUserApcs(ctx); + if (stat == X_STATUS_USER_APC) { + return stat; + } + } X_STATUS result = thread->Delay(processor_mode, alertable, *interval_ptr); + if (result == X_STATUS_USER_APC) { + result = xeProcessUserApcs(ctx); + if (result == X_STATUS_USER_APC) { + return result; + } + } + return result; } dword_result_t KeDelayExecutionThread_entry(dword_t processor_mode, dword_t alertable, - lpqword_t interval_ptr) { + lpqword_t interval_ptr, + const ppc_context_t& context) { uint64_t interval = interval_ptr ? static_cast(*interval_ptr) : 0u; return KeDelayExecutionThread(processor_mode, alertable, - interval_ptr ? &interval : nullptr); + interval_ptr ? &interval : nullptr, context); } DECLARE_XBOXKRNL_EXPORT3(KeDelayExecutionThread, kThreading, kImplemented, kBlocking, kHighFrequency); @@ -889,7 +905,11 @@ uint32_t xeKeWaitForSingleObject(void* object_ptr, uint32_t wait_reason, X_STATUS result = object->Wait(wait_reason, processor_mode, alertable, timeout_ptr); - + if (alertable) { + if (result == X_STATUS_USER_APC) { + result = xeProcessUserApcs(nullptr); + } + } return result; } @@ -915,6 +935,11 @@ uint32_t NtWaitForSingleObjectEx(uint32_t object_handle, uint32_t wait_mode, uint64_t timeout = timeout_ptr ? static_cast(*timeout_ptr) : 0u; result = object->Wait(3, wait_mode, alertable, timeout_ptr ? &timeout : nullptr); + if (alertable) { + if (result == X_STATUS_USER_APC) { + result = xeProcessUserApcs(nullptr); + } + } } else { result = X_STATUS_INVALID_HANDLE; } @@ -955,9 +980,15 @@ dword_result_t KeWaitForMultipleObjects_entry( } } uint64_t timeout = timeout_ptr ? static_cast(*timeout_ptr) : 0u; - return XObject::WaitMultiple( + X_STATUS result = XObject::WaitMultiple( uint32_t(count), reinterpret_cast(&objects[0]), wait_type, wait_reason, processor_mode, alertable, timeout_ptr ? &timeout : nullptr); + if (alertable) { + if (result == X_STATUS_USER_APC) { + result = xeProcessUserApcs(nullptr); + } + } + return result; } DECLARE_XBOXKRNL_EXPORT3(KeWaitForMultipleObjects, kThreading, kImplemented, kBlocking, kHighFrequency); @@ -993,8 +1024,15 @@ uint32_t xeNtWaitForMultipleObjectsEx(uint32_t count, xe::be* handles, } } - return XObject::WaitMultiple(count, reinterpret_cast(&objects[0]), - wait_type, 6, wait_mode, alertable, timeout_ptr); + auto result = + XObject::WaitMultiple(count, reinterpret_cast(&objects[0]), + wait_type, 6, wait_mode, alertable, timeout_ptr); + if (alertable) { + if (result == X_STATUS_USER_APC) { + result = xeProcessUserApcs(nullptr); + } + } + return result; } dword_result_t NtWaitForMultipleObjectsEx_entry( @@ -1034,6 +1072,11 @@ dword_result_t NtSignalAndWaitForSingleObjectEx_entry(dword_t signal_handle, result = X_STATUS_INVALID_HANDLE; } + if (alertable) { + if (result == X_STATUS_USER_APC) { + result = xeProcessUserApcs(nullptr); + } + } return result; } DECLARE_XBOXKRNL_EXPORT3(NtSignalAndWaitForSingleObjectEx, kThreading, @@ -1042,8 +1085,7 @@ DECLARE_XBOXKRNL_EXPORT3(NtSignalAndWaitForSingleObjectEx, kThreading, static void PrefetchForCAS(const void* value) { swcache::PrefetchW(value); } uint32_t xeKeKfAcquireSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, - bool change_irql) -{ + bool change_irql) { auto old_irql = change_irql ? xeKfRaiseIrql(ctx, 2) : 0; PrefetchForCAS(lock); @@ -1071,7 +1113,7 @@ void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, dword_t old_irql, assert_true(lock->prcb_of_owner == static_cast(ctx->r[13])); // Unlock. lock->prcb_of_owner.value = 0; - + if (change_irql) { // Unlock. if (old_irql >= 2) { @@ -1186,23 +1228,169 @@ dword_result_t KfRaiseIrql_entry(dword_t new_irql, const ppc_context_t& ctx) { DECLARE_XBOXKRNL_EXPORT2(KfRaiseIrql, kThreading, kImplemented, kHighFrequency); -void NtQueueApcThread_entry(dword_t thread_handle, lpvoid_t apc_routine, - lpvoid_t apc_routine_context, lpvoid_t arg1, - lpvoid_t arg2) { +uint32_t xeNtQueueApcThread(uint32_t thread_handle, uint32_t apc_routine, + uint32_t apc_routine_context, uint32_t arg1, + uint32_t arg2, cpu::ppc::PPCContext* context) { + auto kernelstate = context->kernel_state; + auto memory = kernelstate->memory(); auto thread = - kernel_state()->object_table()->LookupObject(thread_handle); + kernelstate->object_table()->LookupObject(thread_handle); if (!thread) { XELOGE("NtQueueApcThread: Incorrect thread handle! Might cause crash"); - return; + return X_STATUS_INVALID_HANDLE; } - if (!apc_routine) { - XELOGE("NtQueueApcThread: Incorrect apc routine! Might cause crash"); - return; + uint32_t apc_ptr = memory->SystemHeapAlloc(XAPC::kSize); + if (!apc_ptr) { + return X_STATUS_NO_MEMORY; + } + XAPC* apc = context->TranslateVirtual(apc_ptr); + xeKeInitializeApc(apc, thread->guest_object(), XAPC::kDummyKernelRoutine, 0, + apc_routine, 1 /*user apc mode*/, apc_routine_context); + + if (!xeKeInsertQueueApc(apc, arg1, arg2, 0, context)) { + memory->SystemHeapFree(apc_ptr); + return X_STATUS_UNSUCCESSFUL; + } + // no-op, just meant to awaken a sleeping alertable thread to process real + // apcs + thread->thread()->QueueUserCallback([]() {}); + return X_STATUS_SUCCESS; +} +dword_result_t NtQueueApcThread_entry(dword_t thread_handle, + lpvoid_t apc_routine, + lpvoid_t apc_routine_context, + lpvoid_t arg1, lpvoid_t arg2, + const ppc_context_t& context) { + return xeNtQueueApcThread(thread_handle, apc_routine, apc_routine_context, + arg1, arg2, context); +} + +X_STATUS xeProcessUserApcs(PPCContext* ctx) { + if (!ctx) { + ctx = cpu::ThreadState::Get()->context(); + } + X_STATUS alert_status = X_STATUS_SUCCESS; + auto kpcr = ctx->TranslateVirtualGPR(ctx->r[13]); + + auto current_thread = ctx->TranslateVirtual(kpcr->prcb_data.current_thread); + + uint32_t unlocked_irql = + xeKeKfAcquireSpinLock(ctx, ¤t_thread->apc_lock); + + auto& user_apc_queue = current_thread->apc_lists[1]; + + // use guest stack for temporaries + uint32_t old_stack_pointer = static_cast(ctx->r[1]); + + uint32_t scratch_address = old_stack_pointer - 16; + ctx->r[1] = old_stack_pointer - 32; + + while (!user_apc_queue.empty(ctx)) { + uint32_t apc_ptr = user_apc_queue.flink_ptr; + + XAPC* apc = user_apc_queue.ListEntryObject( + ctx->TranslateVirtual(apc_ptr)); + + uint8_t* scratch_ptr = ctx->TranslateVirtual(scratch_address); + xe::store_and_swap(scratch_ptr + 0, apc->normal_routine); + xe::store_and_swap(scratch_ptr + 4, apc->normal_context); + xe::store_and_swap(scratch_ptr + 8, apc->arg1); + xe::store_and_swap(scratch_ptr + 12, apc->arg2); + util::XeRemoveEntryList(&apc->list_entry, ctx); + apc->enqueued = 0; + + xeKeKfReleaseSpinLock(ctx, ¤t_thread->apc_lock, unlocked_irql); + alert_status = X_STATUS_USER_APC; + if (apc->kernel_routine != XAPC::kDummyKernelRoutine) { + uint64_t kernel_args[] = { + apc_ptr, + scratch_address + 0, + scratch_address + 4, + scratch_address + 8, + scratch_address + 12, + }; + ctx->processor->Execute(ctx->thread_state, apc->kernel_routine, + kernel_args, xe::countof(kernel_args)); + } else { + ctx->kernel_state->memory()->SystemHeapFree(apc_ptr); + } + + uint32_t normal_routine = xe::load_and_swap(scratch_ptr + 0); + uint32_t normal_context = xe::load_and_swap(scratch_ptr + 4); + uint32_t arg1 = xe::load_and_swap(scratch_ptr + 8); + uint32_t arg2 = xe::load_and_swap(scratch_ptr + 12); + + if (normal_routine) { + uint64_t normal_args[] = {normal_context, arg1, arg2}; + ctx->processor->Execute(ctx->thread_state, normal_routine, normal_args, + xe::countof(normal_args)); + } + + unlocked_irql = xeKeKfAcquireSpinLock(ctx, ¤t_thread->apc_lock); } - thread->EnqueueApc(apc_routine, apc_routine_context, arg1, arg2); + ctx->r[1] = old_stack_pointer; + + xeKeKfReleaseSpinLock(ctx, ¤t_thread->apc_lock, unlocked_irql); + return alert_status; +} + +static void YankApcList(PPCContext* ctx, X_KTHREAD* current_thread, unsigned apc_mode, + bool rundown) { + uint32_t unlocked_irql = + xeKeKfAcquireSpinLock(ctx, ¤t_thread->apc_lock); + + XAPC* result = nullptr; + auto& user_apc_queue = current_thread->apc_lists[apc_mode]; + + if (user_apc_queue.empty(ctx)) { + result = nullptr; + } else { + result = user_apc_queue.HeadObject(ctx); + for (auto&& entry : user_apc_queue.IterateForward(ctx)) { + entry.enqueued = 0; + } + util::XeRemoveEntryList(&user_apc_queue, ctx); + } + + xeKeKfReleaseSpinLock(ctx, ¤t_thread->apc_lock, unlocked_irql); + + if (rundown && result) { + XAPC* current_entry = result; + while (true) { + XAPC* this_entry = current_entry; + uint32_t next_entry = this_entry->list_entry.flink_ptr; + + if (this_entry->rundown_routine) { + uint64_t args[] = {ctx->HostToGuestVirtual(this_entry)}; + kernel_state()->processor()->Execute(ctx->thread_state, + this_entry->rundown_routine, args, + xe::countof(args)); + } else { + ctx->kernel_state->memory()->SystemHeapFree( + ctx->HostToGuestVirtual(this_entry)); + } + + if (next_entry == 0) { + break; + } + current_entry = user_apc_queue.ListEntryObject( + ctx->TranslateVirtual(next_entry)); + if (current_entry == result) { + break; + } + } + } +} + +void xeRundownApcs(PPCContext* ctx) { + auto kpcr = ctx->TranslateVirtualGPR(ctx->r[13]); + + auto current_thread = ctx->TranslateVirtual(kpcr->prcb_data.current_thread); + YankApcList(ctx, current_thread, 1, true); + YankApcList(ctx, current_thread, 0, false); } DECLARE_XBOXKRNL_EXPORT1(NtQueueApcThread, kThreading, kImplemented); void xeKeInitializeApc(XAPC* apc, uint32_t thread_ptr, uint32_t kernel_routine, @@ -1231,67 +1419,82 @@ void KeInitializeApc_entry(pointer_t apc, lpvoid_t thread_ptr, } DECLARE_XBOXKRNL_EXPORT1(KeInitializeApc, kThreading, kImplemented); +uint32_t xeKeInsertQueueApc(XAPC* apc, uint32_t arg1, uint32_t arg2, + uint32_t priority_increment, + cpu::ppc::PPCContext* context) { + uint32_t thread_guest_pointer = apc->thread_ptr; + if (!thread_guest_pointer) { + return 0; + } + auto target_thread = context->TranslateVirtual(apc->thread_ptr); + auto old_irql = xeKeKfAcquireSpinLock(context, &target_thread->apc_lock); + uint32_t result; + if (!target_thread->apc_related || apc->enqueued) { + result = 0; + } else { + apc->arg1 = arg1; + apc->arg2 = arg2; + + auto& which_list = target_thread->apc_lists[apc->apc_mode]; + + if (apc->normal_routine) { + which_list.InsertTail(apc, context); + } else { + XAPC* insertion_pos = nullptr; + for (auto&& sub_apc : which_list.IterateForward(context)) { + insertion_pos = &sub_apc; + if (sub_apc.normal_routine) { + break; + } + } + if (!insertion_pos) { + which_list.InsertHead(apc, context); + } else { + util::XeInsertHeadList(insertion_pos->list_entry.blink_ptr, + &apc->list_entry, context); + } + } + + apc->enqueued = 1; + + /* + todo: this is incomplete, a ton of other logic happens here, i believe + for waking the target thread if its alertable + */ + result = 1; + } + xeKeKfReleaseSpinLock(context, &target_thread->apc_lock, old_irql); + return result; +} + dword_result_t KeInsertQueueApc_entry(pointer_t apc, lpvoid_t arg1, - lpvoid_t arg2, - dword_t priority_increment) { - auto thread = XObject::GetNativeObject( - kernel_state(), - kernel_state()->memory()->TranslateVirtual(apc->thread_ptr)); - if (!thread) { - return 0; - } + lpvoid_t arg2, dword_t priority_increment, + const ppc_context_t& context) { - // Lock thread. - thread->LockApc(); + return xeKeInsertQueueApc(apc, arg1, arg2, priority_increment, context); - // Fail if already inserted. - if (apc->enqueued) { - thread->UnlockApc(false); - return 0; - } - - // Prep APC. - apc->arg1 = arg1.guest_address(); - apc->arg2 = arg2.guest_address(); - apc->enqueued = 1; - - auto apc_list = thread->apc_list(); - - uint32_t list_entry_ptr = apc.guest_address() + 8; - apc_list->Insert(list_entry_ptr); - - // Unlock thread. - thread->UnlockApc(true); - - return 1; } DECLARE_XBOXKRNL_EXPORT1(KeInsertQueueApc, kThreading, kImplemented); -dword_result_t KeRemoveQueueApc_entry(pointer_t apc) { +dword_result_t KeRemoveQueueApc_entry(pointer_t apc, + const ppc_context_t& context) { bool result = false; - auto thread = XObject::GetNativeObject( - kernel_state(), - kernel_state()->memory()->TranslateVirtual(apc->thread_ptr)); - if (!thread) { + + uint32_t thread_guest_pointer = apc->thread_ptr; + if (!thread_guest_pointer) { return 0; } + auto target_thread = context->TranslateVirtual(apc->thread_ptr); + auto old_irql = xeKeKfAcquireSpinLock(context, &target_thread->apc_lock); - thread->LockApc(); - - if (!apc->enqueued) { - thread->UnlockApc(false); - return 0; - } - - auto apc_list = thread->apc_list(); - uint32_t list_entry_ptr = apc.guest_address() + 8; - if (apc_list->IsQueued(list_entry_ptr)) { - apc_list->Remove(list_entry_ptr); + if (apc->enqueued) { result = true; + apc->enqueued = 0; + util::XeRemoveEntryList(&apc->list_entry, context); + // todo: this is incomplete, there is more logic here in actual kernel } - - thread->UnlockApc(true); + xeKeKfReleaseSpinLock(context, &target_thread->apc_lock, old_irql); return result ? 1 : 0; } @@ -1358,7 +1561,7 @@ struct X_ERWLOCK { be readers_entry_count; // 0xC X_KEVENT writer_event; // 0x10 X_KSEMAPHORE reader_semaphore; // 0x20 - X_KSPINLOCK spin_lock; // 0x34 + X_KSPINLOCK spin_lock; // 0x34 }; static_assert_size(X_ERWLOCK, 0x38); @@ -1393,8 +1596,7 @@ DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockExclusive, kThreading, dword_result_t ExTryToAcquireReadWriteLockExclusive_entry( pointer_t lock_ptr, const ppc_context_t& ppc_context) { - auto old_irql = - xeKeKfAcquireSpinLock(ppc_context, &lock_ptr->spin_lock); + auto old_irql = xeKeKfAcquireSpinLock(ppc_context, &lock_ptr->spin_lock); uint32_t result; if (lock_ptr->lock_count < 0) { @@ -1412,13 +1614,13 @@ DECLARE_XBOXKRNL_EXPORT1(ExTryToAcquireReadWriteLockExclusive, kThreading, void ExAcquireReadWriteLockShared_entry(pointer_t lock_ptr, const ppc_context_t& ppc_context) { - auto old_irql = xeKeKfAcquireSpinLock(ppc_context, & lock_ptr->spin_lock); + auto old_irql = xeKeKfAcquireSpinLock(ppc_context, &lock_ptr->spin_lock); int32_t lock_count = ++lock_ptr->lock_count; if (!lock_count || (lock_ptr->readers_entry_count && !lock_ptr->writers_waiting_count)) { lock_ptr->readers_entry_count++; - xeKeKfReleaseSpinLock(ppc_context, & lock_ptr->spin_lock, old_irql); + xeKeKfReleaseSpinLock(ppc_context, &lock_ptr->spin_lock, old_irql); return; } @@ -1432,7 +1634,7 @@ DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockShared, kThreading, kImplemented, dword_result_t ExTryToAcquireReadWriteLockShared_entry( pointer_t lock_ptr, const ppc_context_t& ppc_context) { - auto old_irql = xeKeKfAcquireSpinLock(ppc_context, & lock_ptr->spin_lock); + auto old_irql = xeKeKfAcquireSpinLock(ppc_context, &lock_ptr->spin_lock); uint32_t result; if (lock_ptr->lock_count < 0 || @@ -1452,7 +1654,7 @@ DECLARE_XBOXKRNL_EXPORT1(ExTryToAcquireReadWriteLockShared, kThreading, void ExReleaseReadWriteLock_entry(pointer_t lock_ptr, const ppc_context_t& ppc_context) { - auto old_irql = xeKeKfAcquireSpinLock(ppc_context, & lock_ptr->spin_lock); + auto old_irql = xeKeKfAcquireSpinLock(ppc_context, &lock_ptr->spin_lock); int32_t lock_count = --lock_ptr->lock_count; diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h index d8bce91df..db4163663 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h @@ -38,7 +38,8 @@ uint32_t NtWaitForSingleObjectEx(uint32_t object_handle, uint32_t wait_mode, uint32_t xeKeSetEvent(X_KEVENT* event_ptr, uint32_t increment, uint32_t wait); uint32_t KeDelayExecutionThread(uint32_t processor_mode, uint32_t alertable, - uint64_t* interval_ptr); + uint64_t* interval_ptr, + cpu::ppc::PPCContext* ctx); uint32_t ExCreateThread(xe::be* handle_ptr, uint32_t stack_size, xe::be* thread_id_ptr, @@ -53,13 +54,21 @@ uint32_t NtClose(uint32_t handle); void xeKeInitializeApc(XAPC* apc, uint32_t thread_ptr, uint32_t kernel_routine, uint32_t rundown_routine, uint32_t normal_routine, uint32_t apc_mode, uint32_t normal_context); - +uint32_t xeKeInsertQueueApc(XAPC* apc, uint32_t arg1, uint32_t arg2, + uint32_t priority_increment, + cpu::ppc::PPCContext* context); +uint32_t xeNtQueueApcThread(uint32_t thread_handle, uint32_t apc_routine, + uint32_t apc_routine_context, uint32_t arg1, + uint32_t arg2, cpu::ppc::PPCContext* context); void xeKfLowerIrql(PPCContext* ctx, unsigned char new_irql); unsigned char xeKfRaiseIrql(PPCContext* ctx, unsigned char new_irql); void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, dword_t old_irql, bool change_irql=true); uint32_t xeKeKfAcquireSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, bool change_irql=true); +X_STATUS xeProcessUserApcs(PPCContext* ctx); + +void xeRundownApcs(PPCContext* ctx); } // namespace xboxkrnl } // namespace kernel diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index 67ad38f4d..9dd9a9690 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -25,9 +25,9 @@ #include "xenia/emulator.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/user_module.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" #include "xenia/kernel/xevent.h" #include "xenia/kernel/xmutant.h" - DEFINE_bool(ignore_thread_priorities, true, "Ignores game-specified thread priorities.", "Kernel"); DEFINE_bool(ignore_thread_affinities, true, @@ -62,8 +62,7 @@ XThread::XThread(KernelState* kernel_state, uint32_t stack_size, : XObject(kernel_state, kObjectType, !guest_thread), thread_id_(++next_xthread_id_), guest_thread_(guest_thread), - main_thread_(main_thread), - apc_list_(kernel_state->memory()) { + main_thread_(main_thread) { creation_params_.stack_size = stack_size; creation_params_.xapi_thread_startup = xapi_thread_startup; creation_params_.start_address = start_address; @@ -179,52 +178,55 @@ static uint8_t GetFakeCpuNumber(uint8_t proc_mask) { void XThread::InitializeGuestObject() { auto guest_thread = guest_object(); - - // Setup the thread state block (last error/etc). - uint8_t* p = memory()->TranslateVirtual(guest_object()); + auto thread_guest_ptr = guest_object(); guest_thread->header.type = 6; guest_thread->suspend_count = (creation_params_.creation_flags & X_CREATE_SUSPENDED) ? 1 : 0; - xe::store_and_swap(p + 0x010, guest_object() + 0x010); - xe::store_and_swap(p + 0x014, guest_object() + 0x010); + guest_thread->unk_10 = (thread_guest_ptr + 0x10); + guest_thread->unk_14 = (thread_guest_ptr + 0x10); + guest_thread->unk_40 = (thread_guest_ptr + 0x20); + guest_thread->unk_44 = (thread_guest_ptr + 0x20); + guest_thread->unk_48 = (thread_guest_ptr); + uint32_t v6 = thread_guest_ptr + 0x18; + *(uint32_t*)&guest_thread->unk_54 = 16777729; + guest_thread->unk_4C = (v6); + guest_thread->stack_base = (this->stack_base_); + guest_thread->stack_limit = (this->stack_limit_); + guest_thread->stack_kernel = (this->stack_base_ - 240); + guest_thread->tls_address = (this->tls_static_address_); + guest_thread->thread_state = 0; + uint32_t process_info_block_address = + kernel_state_->process_info_block_address(); - xe::store_and_swap(p + 0x040, guest_object() + 0x018 + 8); - xe::store_and_swap(p + 0x044, guest_object() + 0x018 + 8); - xe::store_and_swap(p + 0x048, guest_object()); - xe::store_and_swap(p + 0x04C, guest_object() + 0x018); + X_KPROCESS* process = + memory()->TranslateVirtual(process_info_block_address); + uint32_t kpcrb = pcr_address_ + offsetof(X_KPCR, prcb_data); - xe::store_and_swap(p + 0x054, 0x102); - xe::store_and_swap(p + 0x056, 1); - xe::store_and_swap(p + 0x05C, stack_base_); - xe::store_and_swap(p + 0x060, stack_limit_); - xe::store_and_swap(p + 0x064, stack_base_ - kThreadKernelStackSize); - xe::store_and_swap(p + 0x068, tls_static_address_); - xe::store_and_swap(p + 0x06C, 0); - xe::store_and_swap(p + 0x074, guest_object() + 0x074); - xe::store_and_swap(p + 0x078, guest_object() + 0x074); - xe::store_and_swap(p + 0x07C, guest_object() + 0x07C); - xe::store_and_swap(p + 0x080, guest_object() + 0x07C); - xe::store_and_swap(p + 0x084, - kernel_state_->process_info_block_address()); - xe::store_and_swap(p + 0x08B, 1); - // 0xD4 = APC - // 0xFC = semaphore (ptr, 0, 2) - // 0xA88 = APC - // 0x18 = timer - xe::store_and_swap(p + 0x09C, 0xFDFFD7FF); - // current_cpu is expected to be initialized externally via SetActiveCpu. - xe::store_and_swap(p + 0x0D0, stack_base_); - xe::store_and_swap(p + 0x130, Clock::QueryGuestSystemTime()); - xe::store_and_swap(p + 0x144, guest_object() + 0x144); - xe::store_and_swap(p + 0x148, guest_object() + 0x144); - xe::store_and_swap(p + 0x14C, thread_id_); - xe::store_and_swap(p + 0x150, creation_params_.start_address); - xe::store_and_swap(p + 0x154, guest_object() + 0x154); - xe::store_and_swap(p + 0x158, guest_object() + 0x154); - xe::store_and_swap(p + 0x160, 0); // last error - xe::store_and_swap(p + 0x16C, creation_params_.creation_flags); - xe::store_and_swap(p + 0x17C, 1); + auto process_type = X_PROCTYPE_USER; // process->process_type; + guest_thread->process_type_dup = process_type; + guest_thread->process_type = process_type; + guest_thread->apc_lists[0].Initialize(memory()); + guest_thread->apc_lists[1].Initialize(memory()); + + guest_thread->a_prcb_ptr = kpcrb; + guest_thread->another_prcb_ptr = kpcrb; + + guest_thread->apc_related = 1; + guest_thread->msr_mask = 0xFDFFD7FF; + guest_thread->process = process_info_block_address; + guest_thread->stack_alloc_base = this->stack_base_; + guest_thread->create_time = Clock::QueryGuestSystemTime(); + guest_thread->unk_144 = thread_guest_ptr + 324; + guest_thread->unk_148 = thread_guest_ptr + 324; + guest_thread->thread_id = this->thread_id_; + guest_thread->start_address = this->creation_params_.start_address; + guest_thread->unk_154 = thread_guest_ptr + 340; + uint32_t v9 = thread_guest_ptr; + guest_thread->last_error = 0; + guest_thread->unk_158 = v9 + 340; + guest_thread->creation_flags = this->creation_params_.creation_flags; + guest_thread->unk_17C = 1; } bool XThread::AllocateStack(uint32_t size) { @@ -449,7 +451,8 @@ X_STATUS XThread::Create() { X_STATUS XThread::Exit(int exit_code) { // This may only be called on the thread itself. assert_true(XThread::GetCurrentThread() == this); - //TODO(chrispy): not sure if this order is correct, should it come after apcs? + // TODO(chrispy): not sure if this order is correct, should it come after + // apcs? guest_object()->terminated = 1; // TODO(benvanik): dispatch events? waiters? etc? @@ -590,53 +593,15 @@ void XThread::LeaveCriticalRegion() { auto apc_disable_count = ++kthread->apc_disable_count; } -uint32_t XThread::RaiseIrql(uint32_t new_irql) { - return irql_.exchange(new_irql); -} - -void XThread::LowerIrql(uint32_t new_irql) { irql_ = new_irql; } - -void XThread::CheckApcs() { DeliverAPCs(); } - -void XThread::LockApc() { global_critical_region_.mutex().lock(); } - -void XThread::UnlockApc(bool queue_delivery) { - bool needs_apc = apc_list_.HasPending(); - global_critical_region_.mutex().unlock(); - if (needs_apc && queue_delivery) { - thread_->QueueUserCallback([this]() { - cpu::ThreadState::Bind(this->thread_state()); - this->SetCurrentThread(); // we store current thread in TLS, but tls - // slots are different in windows user - // callback! - DeliverAPCs(); - }); - } -} - void XThread::EnqueueApc(uint32_t normal_routine, uint32_t normal_context, uint32_t arg1, uint32_t arg2) { - LockApc(); + // don't use thread_state_ -> context() ! we're not running on the thread + // we're enqueuing to + uint32_t success = xboxkrnl::xeNtQueueApcThread( + this->handle(), normal_routine, normal_context, arg1, arg2, + cpu::ThreadState::Get()->context()); - // Allocate APC. - // We'll tag it as special and free it when dispatched. - uint32_t apc_ptr = memory()->SystemHeapAlloc(XAPC::kSize); - auto apc = reinterpret_cast(memory()->TranslateVirtual(apc_ptr)); - - apc->type = 18; - apc->apc_mode = 1; - apc->kernel_routine = XAPC::kDummyKernelRoutine; - apc->rundown_routine = XAPC::kDummyRundownRoutine; - apc->normal_routine = normal_routine; - apc->normal_context = normal_context; - apc->arg1 = arg1; - apc->arg2 = arg2; - apc->enqueued = 1; - - uint32_t list_entry_ptr = apc_ptr + 8; - apc_list_.Insert(list_entry_ptr); - - UnlockApc(true); + xenia_assert(success == X_STATUS_SUCCESS); } void XThread::SetCurrentThread() { current_xthread_tls_ = this; } @@ -644,99 +609,11 @@ void XThread::SetCurrentThread() { current_xthread_tls_ = this; } void XThread::DeliverAPCs() { // https://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=1 // https://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=7 - auto processor = kernel_state()->processor(); - LockApc(); - auto kthread = guest_object(); - while (apc_list_.HasPending() && kthread->apc_disable_count == 0) { - // Get APC entry (offset for LIST_ENTRY offset) and cache what we need. - // Calling the routine may delete the memory/overwrite it. - uint32_t apc_ptr = apc_list_.Shift() - 8; - auto apc = reinterpret_cast(memory()->TranslateVirtual(apc_ptr)); - bool needs_freeing = apc->kernel_routine == XAPC::kDummyKernelRoutine; - - XELOGD("Delivering APC to {:08X}", uint32_t(apc->normal_routine)); - - // Mark as uninserted so that it can be reinserted again by the routine. - apc->enqueued = 0; - - // Call kernel routine. - // The routine can modify all of its arguments before passing it on. - // Since we need to give guest accessible pointers over, we copy things - // into and out of scratch. - uint8_t* scratch_ptr = memory()->TranslateVirtual(scratch_address_); - xe::store_and_swap(scratch_ptr + 0, apc->normal_routine); - xe::store_and_swap(scratch_ptr + 4, apc->normal_context); - xe::store_and_swap(scratch_ptr + 8, apc->arg1); - xe::store_and_swap(scratch_ptr + 12, apc->arg2); - if (apc->kernel_routine != XAPC::kDummyKernelRoutine) { - // kernel_routine(apc_address, &normal_routine, &normal_context, - // &system_arg1, &system_arg2) - uint64_t kernel_args[] = { - apc_ptr, - scratch_address_ + 0, - scratch_address_ + 4, - scratch_address_ + 8, - scratch_address_ + 12, - }; - processor->Execute(thread_state_, apc->kernel_routine, kernel_args, - xe::countof(kernel_args)); - } - uint32_t normal_routine = xe::load_and_swap(scratch_ptr + 0); - uint32_t normal_context = xe::load_and_swap(scratch_ptr + 4); - uint32_t arg1 = xe::load_and_swap(scratch_ptr + 8); - uint32_t arg2 = xe::load_and_swap(scratch_ptr + 12); - - // Call the normal routine. Note that it may have been killed by the kernel - // routine. - if (normal_routine) { - UnlockApc(false); - // normal_routine(normal_context, system_arg1, system_arg2) - uint64_t normal_args[] = {normal_context, arg1, arg2}; - processor->Execute(thread_state_, normal_routine, normal_args, - xe::countof(normal_args)); - LockApc(); - } - - XELOGD("Completed delivery of APC to {:08X} ({:08X}, {:08X}, {:08X})", - normal_routine, normal_context, arg1, arg2); - - // If special, free it. - if (needs_freeing) { - memory()->SystemHeapFree(apc_ptr); - } - } - UnlockApc(true); + xboxkrnl::xeProcessUserApcs(thread_state_->context()); } void XThread::RundownAPCs() { - assert_true(XThread::GetCurrentThread() == this); - LockApc(); - while (apc_list_.HasPending()) { - // Get APC entry (offset for LIST_ENTRY offset) and cache what we need. - // Calling the routine may delete the memory/overwrite it. - uint32_t apc_ptr = apc_list_.Shift() - 8; - auto apc = reinterpret_cast(memory()->TranslateVirtual(apc_ptr)); - bool needs_freeing = apc->kernel_routine == XAPC::kDummyKernelRoutine; - - // Mark as uninserted so that it can be reinserted again by the routine. - apc->enqueued = 0; - - // Call the rundown routine. - if (apc->rundown_routine == XAPC::kDummyRundownRoutine) { - // No-op. - } else if (apc->rundown_routine) { - // rundown_routine(apc) - uint64_t args[] = {apc_ptr}; - kernel_state()->processor()->Execute(thread_state(), apc->rundown_routine, - args, xe::countof(args)); - } - - // If special, free it. - if (needs_freeing) { - memory()->SystemHeapFree(apc_ptr); - } - } - UnlockApc(true); + xboxkrnl::xeRundownApcs(thread_state_->context()); } int32_t XThread::QueryPriority() { return thread_->priority(); } @@ -791,7 +668,8 @@ void XThread::SetActiveCpu(uint8_t cpu_index) { thread_->set_affinity_mask(uint64_t(1) << cpu_index); } } else { - //there no good reason why we need to log this... we don't perfectly emulate the 360's scheduler in any way + // there no good reason why we need to log this... we don't perfectly + // emulate the 360's scheduler in any way // XELOGW("Too few processor cores - scheduling will be wonky"); } } @@ -831,14 +709,18 @@ X_STATUS XThread::Resume(uint32_t* out_suspend_count) { } X_STATUS XThread::Suspend(uint32_t* out_suspend_count) { - auto global_lock = global_critical_region_.Acquire(); + + //this normally holds the apc lock for the thread, because it queues a kernel mode apc that does the actual suspension - ++guest_object()->suspend_count; + X_KTHREAD* guest_thread = guest_object(); + + uint8_t previous_suspend_count = + reinterpret_cast(&guest_thread->suspend_count) + ->fetch_add(1); + + *out_suspend_count = previous_suspend_count; // If we are suspending ourselves, we can't hold the lock. - if (XThread::IsInThread() && XThread::GetCurrentThread() == this) { - global_lock.unlock(); - } if (thread_->Suspend(out_suspend_count)) { return X_STATUS_SUCCESS; @@ -941,7 +823,6 @@ bool XThread::Save(ByteStream* stream) { state.thread_id = thread_id_; state.is_main_thread = main_thread_; state.is_running = running_; - state.apc_head = apc_list_.head(); state.tls_static_address = tls_static_address_; state.tls_dynamic_address = tls_dynamic_address_; state.tls_total_size = tls_total_size_; @@ -1004,7 +885,6 @@ object_ref XThread::Restore(KernelState* kernel_state, thread->thread_id_ = state.thread_id; thread->main_thread_ = state.is_main_thread; thread->running_ = state.is_running; - thread->apc_list_.set_head(state.apc_head); thread->tls_static_address_ = state.tls_static_address; thread->tls_dynamic_address_ = state.tls_dynamic_address; thread->tls_total_size_ = state.tls_total_size; @@ -1014,8 +894,6 @@ object_ref XThread::Restore(KernelState* kernel_state, thread->stack_alloc_base_ = state.stack_alloc_base; thread->stack_alloc_size_ = state.stack_alloc_size; - thread->apc_list_.set_memory(kernel_state->memory()); - // Register now that we know our thread ID. kernel_state->RegisterThread(thread); diff --git a/src/xenia/kernel/xthread.h b/src/xenia/kernel/xthread.h index 10dcccd8a..4890ac11a 100644 --- a/src/xenia/kernel/xthread.h +++ b/src/xenia/kernel/xthread.h @@ -207,7 +207,8 @@ struct X_KTHREAD { TypedGuestPointer process; // 0x84 uint8_t unk_88[0x3]; // 0x88 uint8_t apc_related; // 0x8B - uint8_t unk_8C[0x10]; // 0x8C + X_KSPINLOCK apc_lock; // 0x8C + uint8_t unk_90[0xC]; // 0x90 xe::be msr_mask; // 0x9C uint8_t unk_A0[4]; // 0xA0 uint8_t unk_A4; // 0xA4 @@ -313,13 +314,7 @@ class XThread : public XObject, public cpu::Thread { void EnterCriticalRegion(); void LeaveCriticalRegion(); - uint32_t RaiseIrql(uint32_t new_irql); - void LowerIrql(uint32_t new_irql); - void CheckApcs(); - void LockApc(); - void UnlockApc(bool queue_delivery); - util::NativeList* apc_list() { return &apc_list_; } void EnqueueApc(uint32_t normal_routine, uint32_t normal_context, uint32_t arg1, uint32_t arg2); @@ -388,10 +383,6 @@ class XThread : public XObject, public cpu::Thread { bool running_ = false; int32_t priority_ = 0; - - xe::global_critical_region global_critical_region_; - std::atomic irql_ = {0}; - util::NativeList apc_list_; }; class XHostThread : public XThread {