total apc rework

This commit is contained in:
disjtqz 2023-10-11 09:47:19 -04:00 committed by Radosław Gliński
parent b5ddd30572
commit d0a6cec024
6 changed files with 430 additions and 282 deletions

View File

@ -158,24 +158,92 @@ static void XeInsertHeadList(X_LIST_ENTRY* list_head, uint32_t entry,
XeInsertHeadList(list_head, XeGuestList(list_head, context),
XeHostList(entry, context), entry, context);
}
template <typename VirtualTranslator>
static void XeInsertHeadList(uint32_t list_head, X_LIST_ENTRY* entry,
VirtualTranslator context) {
XeInsertHeadList(XeHostList(list_head, context), list_head, entry,
XeGuestList(entry, context), context);
}
template <typename VirtualTranslator>
static void XeInsertHeadList(X_LIST_ENTRY* list_head, X_LIST_ENTRY* entry,
VirtualTranslator context) {
XeInsertHeadList(list_head, XeGuestList(list_head, context),
entry, XeGuestList(entry, context), context);
}
template <typename TObject, size_t EntryListOffset>
struct X_TYPED_LIST : public X_LIST_ENTRY {
public:
X_LIST_ENTRY* ObjectListEntry(TObject* obj) {
using this_type = X_TYPED_LIST<TObject, EntryListOffset>;
static X_LIST_ENTRY* ObjectListEntry(TObject* obj) {
return reinterpret_cast<X_LIST_ENTRY*>(
&reinterpret_cast<char*>(obj)[static_cast<ptrdiff_t>(EntryListOffset)]);
}
TObject* ListEntryObject(X_LIST_ENTRY* entry) {
static TObject* ListEntryObject(X_LIST_ENTRY* entry) {
return reinterpret_cast<TObject*>(&reinterpret_cast<char*>(
entry)[-static_cast<ptrdiff_t>(EntryListOffset)]);
}
template <typename VirtualTranslator>
void Initialize(VirtualTranslator* translator) {
struct ForwardIterator {
VirtualTranslator vt;
uint32_t current_entry;
inline ForwardIterator& operator++() {
current_entry =
vt->TranslateVirtual<X_LIST_ENTRY*>(current_entry)->flink_ptr;
return *this;
}
inline bool operator!=(uint32_t other_ptr) const {
return current_entry != other_ptr;
}
inline TObject& operator*() {
return *ListEntryObject(
vt->TranslateVirtual<X_LIST_ENTRY*>(current_entry));
}
};
template <typename VirtualTranslator>
struct ForwardIteratorBegin {
VirtualTranslator vt;
this_type* const thiz;
ForwardIterator<VirtualTranslator> begin() {
return ForwardIterator<VirtualTranslator>{vt, thiz->flink_ptr};
}
uint32_t end() { return vt->HostToGuestVirtual(thiz); }
};
template<typename VirtualTranslator>
ForwardIteratorBegin<VirtualTranslator> IterateForward(VirtualTranslator vt) {
return ForwardIteratorBegin<VirtualTranslator>{vt, this};
}
template <typename VirtualTranslator>
void Initialize(VirtualTranslator translator) {
XeInitializeListHead(this, translator);
}
template <typename VirtualTranslator>
void InsertHead(TObject* entry, VirtualTranslator* translator) {
XeInsertHeadList(this, ObjectListEntry(entry), translator);
void InsertHead(TObject* entry, VirtualTranslator translator) {
XeInsertHeadList(static_cast<X_LIST_ENTRY*>(this), ObjectListEntry(entry), translator);
}
template <typename VirtualTranslator>
void InsertTail(TObject* entry, VirtualTranslator translator) {
XeInsertTailList(this, ObjectListEntry(entry), translator);
}
template<typename VirtualTranslator>
bool empty(VirtualTranslator vt) const {
return vt->TranslateVirtual<X_LIST_ENTRY*>(flink_ptr) == this;
}
template <typename VirtualTranslator>
TObject* HeadObject(VirtualTranslator vt) {
return ListEntryObject(vt->TranslateVirtual<X_LIST_ENTRY*>(flink_ptr));
}
template <typename VirtualTranslator>
TObject* TailObject(VirtualTranslator vt) {
return ListEntryObject(vt->TranslateVirtual<X_LIST_ENTRY*>(blink_ptr));
}
};
} // namespace util

View File

@ -435,12 +435,12 @@ dword_result_t RtlSleep_entry(dword_t dwMilliseconds, dword_t bAlertable) {
: static_cast<LONGLONG>(-10000) * dwMilliseconds;
X_STATUS result = xboxkrnl::KeDelayExecutionThread(MODE::UserMode, bAlertable,
(uint64_t*)&delay);
(uint64_t*)&delay, nullptr);
// If the delay was interrupted by an APC, keep delaying the thread
while (bAlertable && result == X_STATUS_ALERTED) {
result = xboxkrnl::KeDelayExecutionThread(MODE::UserMode, bAlertable,
(uint64_t*)&delay);
(uint64_t*)&delay, nullptr);
}
return result == X_STATUS_SUCCESS ? X_STATUS_SUCCESS : X_STATUS_USER_APC;

View File

@ -379,19 +379,35 @@ DECLARE_XBOXKRNL_EXPORT2(KeQueryPerformanceFrequency, kThreading, kImplemented,
kHighFrequency);
uint32_t KeDelayExecutionThread(uint32_t processor_mode, uint32_t alertable,
uint64_t* interval_ptr) {
uint64_t* interval_ptr,
cpu::ppc::PPCContext* ctx) {
XThread* thread = XThread::GetCurrentThread();
if (alertable) {
X_STATUS stat = xeProcessUserApcs(ctx);
if (stat == X_STATUS_USER_APC) {
return stat;
}
}
X_STATUS result = thread->Delay(processor_mode, alertable, *interval_ptr);
if (result == X_STATUS_USER_APC) {
result = xeProcessUserApcs(ctx);
if (result == X_STATUS_USER_APC) {
return result;
}
}
return result;
}
dword_result_t KeDelayExecutionThread_entry(dword_t processor_mode,
dword_t alertable,
lpqword_t interval_ptr) {
lpqword_t interval_ptr,
const ppc_context_t& context) {
uint64_t interval = interval_ptr ? static_cast<uint64_t>(*interval_ptr) : 0u;
return KeDelayExecutionThread(processor_mode, alertable,
interval_ptr ? &interval : nullptr);
interval_ptr ? &interval : nullptr, context);
}
DECLARE_XBOXKRNL_EXPORT3(KeDelayExecutionThread, kThreading, kImplemented,
kBlocking, kHighFrequency);
@ -889,7 +905,11 @@ uint32_t xeKeWaitForSingleObject(void* object_ptr, uint32_t wait_reason,
X_STATUS result =
object->Wait(wait_reason, processor_mode, alertable, timeout_ptr);
if (alertable) {
if (result == X_STATUS_USER_APC) {
result = xeProcessUserApcs(nullptr);
}
}
return result;
}
@ -915,6 +935,11 @@ uint32_t NtWaitForSingleObjectEx(uint32_t object_handle, uint32_t wait_mode,
uint64_t timeout = timeout_ptr ? static_cast<uint64_t>(*timeout_ptr) : 0u;
result =
object->Wait(3, wait_mode, alertable, timeout_ptr ? &timeout : nullptr);
if (alertable) {
if (result == X_STATUS_USER_APC) {
result = xeProcessUserApcs(nullptr);
}
}
} else {
result = X_STATUS_INVALID_HANDLE;
}
@ -955,9 +980,15 @@ dword_result_t KeWaitForMultipleObjects_entry(
}
}
uint64_t timeout = timeout_ptr ? static_cast<uint64_t>(*timeout_ptr) : 0u;
return XObject::WaitMultiple(
X_STATUS result = XObject::WaitMultiple(
uint32_t(count), reinterpret_cast<XObject**>(&objects[0]), wait_type,
wait_reason, processor_mode, alertable, timeout_ptr ? &timeout : nullptr);
if (alertable) {
if (result == X_STATUS_USER_APC) {
result = xeProcessUserApcs(nullptr);
}
}
return result;
}
DECLARE_XBOXKRNL_EXPORT3(KeWaitForMultipleObjects, kThreading, kImplemented,
kBlocking, kHighFrequency);
@ -993,8 +1024,15 @@ uint32_t xeNtWaitForMultipleObjectsEx(uint32_t count, xe::be<uint32_t>* handles,
}
}
return XObject::WaitMultiple(count, reinterpret_cast<XObject**>(&objects[0]),
wait_type, 6, wait_mode, alertable, timeout_ptr);
auto result =
XObject::WaitMultiple(count, reinterpret_cast<XObject**>(&objects[0]),
wait_type, 6, wait_mode, alertable, timeout_ptr);
if (alertable) {
if (result == X_STATUS_USER_APC) {
result = xeProcessUserApcs(nullptr);
}
}
return result;
}
dword_result_t NtWaitForMultipleObjectsEx_entry(
@ -1034,6 +1072,11 @@ dword_result_t NtSignalAndWaitForSingleObjectEx_entry(dword_t signal_handle,
result = X_STATUS_INVALID_HANDLE;
}
if (alertable) {
if (result == X_STATUS_USER_APC) {
result = xeProcessUserApcs(nullptr);
}
}
return result;
}
DECLARE_XBOXKRNL_EXPORT3(NtSignalAndWaitForSingleObjectEx, kThreading,
@ -1042,8 +1085,7 @@ DECLARE_XBOXKRNL_EXPORT3(NtSignalAndWaitForSingleObjectEx, kThreading,
static void PrefetchForCAS(const void* value) { swcache::PrefetchW(value); }
uint32_t xeKeKfAcquireSpinLock(PPCContext* ctx, X_KSPINLOCK* lock,
bool change_irql)
{
bool change_irql) {
auto old_irql = change_irql ? xeKfRaiseIrql(ctx, 2) : 0;
PrefetchForCAS(lock);
@ -1071,7 +1113,7 @@ void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, dword_t old_irql,
assert_true(lock->prcb_of_owner == static_cast<uint32_t>(ctx->r[13]));
// Unlock.
lock->prcb_of_owner.value = 0;
if (change_irql) {
// Unlock.
if (old_irql >= 2) {
@ -1186,23 +1228,169 @@ dword_result_t KfRaiseIrql_entry(dword_t new_irql, const ppc_context_t& ctx) {
DECLARE_XBOXKRNL_EXPORT2(KfRaiseIrql, kThreading, kImplemented, kHighFrequency);
void NtQueueApcThread_entry(dword_t thread_handle, lpvoid_t apc_routine,
lpvoid_t apc_routine_context, lpvoid_t arg1,
lpvoid_t arg2) {
uint32_t xeNtQueueApcThread(uint32_t thread_handle, uint32_t apc_routine,
uint32_t apc_routine_context, uint32_t arg1,
uint32_t arg2, cpu::ppc::PPCContext* context) {
auto kernelstate = context->kernel_state;
auto memory = kernelstate->memory();
auto thread =
kernel_state()->object_table()->LookupObject<XThread>(thread_handle);
kernelstate->object_table()->LookupObject<XThread>(thread_handle);
if (!thread) {
XELOGE("NtQueueApcThread: Incorrect thread handle! Might cause crash");
return;
return X_STATUS_INVALID_HANDLE;
}
if (!apc_routine) {
XELOGE("NtQueueApcThread: Incorrect apc routine! Might cause crash");
return;
uint32_t apc_ptr = memory->SystemHeapAlloc(XAPC::kSize);
if (!apc_ptr) {
return X_STATUS_NO_MEMORY;
}
XAPC* apc = context->TranslateVirtual<XAPC*>(apc_ptr);
xeKeInitializeApc(apc, thread->guest_object(), XAPC::kDummyKernelRoutine, 0,
apc_routine, 1 /*user apc mode*/, apc_routine_context);
if (!xeKeInsertQueueApc(apc, arg1, arg2, 0, context)) {
memory->SystemHeapFree(apc_ptr);
return X_STATUS_UNSUCCESSFUL;
}
// no-op, just meant to awaken a sleeping alertable thread to process real
// apcs
thread->thread()->QueueUserCallback([]() {});
return X_STATUS_SUCCESS;
}
dword_result_t NtQueueApcThread_entry(dword_t thread_handle,
lpvoid_t apc_routine,
lpvoid_t apc_routine_context,
lpvoid_t arg1, lpvoid_t arg2,
const ppc_context_t& context) {
return xeNtQueueApcThread(thread_handle, apc_routine, apc_routine_context,
arg1, arg2, context);
}
X_STATUS xeProcessUserApcs(PPCContext* ctx) {
if (!ctx) {
ctx = cpu::ThreadState::Get()->context();
}
X_STATUS alert_status = X_STATUS_SUCCESS;
auto kpcr = ctx->TranslateVirtualGPR<X_KPCR*>(ctx->r[13]);
auto current_thread = ctx->TranslateVirtual(kpcr->prcb_data.current_thread);
uint32_t unlocked_irql =
xeKeKfAcquireSpinLock(ctx, &current_thread->apc_lock);
auto& user_apc_queue = current_thread->apc_lists[1];
// use guest stack for temporaries
uint32_t old_stack_pointer = static_cast<uint32_t>(ctx->r[1]);
uint32_t scratch_address = old_stack_pointer - 16;
ctx->r[1] = old_stack_pointer - 32;
while (!user_apc_queue.empty(ctx)) {
uint32_t apc_ptr = user_apc_queue.flink_ptr;
XAPC* apc = user_apc_queue.ListEntryObject(
ctx->TranslateVirtual<X_LIST_ENTRY*>(apc_ptr));
uint8_t* scratch_ptr = ctx->TranslateVirtual(scratch_address);
xe::store_and_swap<uint32_t>(scratch_ptr + 0, apc->normal_routine);
xe::store_and_swap<uint32_t>(scratch_ptr + 4, apc->normal_context);
xe::store_and_swap<uint32_t>(scratch_ptr + 8, apc->arg1);
xe::store_and_swap<uint32_t>(scratch_ptr + 12, apc->arg2);
util::XeRemoveEntryList(&apc->list_entry, ctx);
apc->enqueued = 0;
xeKeKfReleaseSpinLock(ctx, &current_thread->apc_lock, unlocked_irql);
alert_status = X_STATUS_USER_APC;
if (apc->kernel_routine != XAPC::kDummyKernelRoutine) {
uint64_t kernel_args[] = {
apc_ptr,
scratch_address + 0,
scratch_address + 4,
scratch_address + 8,
scratch_address + 12,
};
ctx->processor->Execute(ctx->thread_state, apc->kernel_routine,
kernel_args, xe::countof(kernel_args));
} else {
ctx->kernel_state->memory()->SystemHeapFree(apc_ptr);
}
uint32_t normal_routine = xe::load_and_swap<uint32_t>(scratch_ptr + 0);
uint32_t normal_context = xe::load_and_swap<uint32_t>(scratch_ptr + 4);
uint32_t arg1 = xe::load_and_swap<uint32_t>(scratch_ptr + 8);
uint32_t arg2 = xe::load_and_swap<uint32_t>(scratch_ptr + 12);
if (normal_routine) {
uint64_t normal_args[] = {normal_context, arg1, arg2};
ctx->processor->Execute(ctx->thread_state, normal_routine, normal_args,
xe::countof(normal_args));
}
unlocked_irql = xeKeKfAcquireSpinLock(ctx, &current_thread->apc_lock);
}
thread->EnqueueApc(apc_routine, apc_routine_context, arg1, arg2);
ctx->r[1] = old_stack_pointer;
xeKeKfReleaseSpinLock(ctx, &current_thread->apc_lock, unlocked_irql);
return alert_status;
}
static void YankApcList(PPCContext* ctx, X_KTHREAD* current_thread, unsigned apc_mode,
bool rundown) {
uint32_t unlocked_irql =
xeKeKfAcquireSpinLock(ctx, &current_thread->apc_lock);
XAPC* result = nullptr;
auto& user_apc_queue = current_thread->apc_lists[apc_mode];
if (user_apc_queue.empty(ctx)) {
result = nullptr;
} else {
result = user_apc_queue.HeadObject(ctx);
for (auto&& entry : user_apc_queue.IterateForward(ctx)) {
entry.enqueued = 0;
}
util::XeRemoveEntryList(&user_apc_queue, ctx);
}
xeKeKfReleaseSpinLock(ctx, &current_thread->apc_lock, unlocked_irql);
if (rundown && result) {
XAPC* current_entry = result;
while (true) {
XAPC* this_entry = current_entry;
uint32_t next_entry = this_entry->list_entry.flink_ptr;
if (this_entry->rundown_routine) {
uint64_t args[] = {ctx->HostToGuestVirtual(this_entry)};
kernel_state()->processor()->Execute(ctx->thread_state,
this_entry->rundown_routine, args,
xe::countof(args));
} else {
ctx->kernel_state->memory()->SystemHeapFree(
ctx->HostToGuestVirtual(this_entry));
}
if (next_entry == 0) {
break;
}
current_entry = user_apc_queue.ListEntryObject(
ctx->TranslateVirtual<X_LIST_ENTRY*>(next_entry));
if (current_entry == result) {
break;
}
}
}
}
void xeRundownApcs(PPCContext* ctx) {
auto kpcr = ctx->TranslateVirtualGPR<X_KPCR*>(ctx->r[13]);
auto current_thread = ctx->TranslateVirtual(kpcr->prcb_data.current_thread);
YankApcList(ctx, current_thread, 1, true);
YankApcList(ctx, current_thread, 0, false);
}
DECLARE_XBOXKRNL_EXPORT1(NtQueueApcThread, kThreading, kImplemented);
void xeKeInitializeApc(XAPC* apc, uint32_t thread_ptr, uint32_t kernel_routine,
@ -1231,67 +1419,82 @@ void KeInitializeApc_entry(pointer_t<XAPC> apc, lpvoid_t thread_ptr,
}
DECLARE_XBOXKRNL_EXPORT1(KeInitializeApc, kThreading, kImplemented);
uint32_t xeKeInsertQueueApc(XAPC* apc, uint32_t arg1, uint32_t arg2,
uint32_t priority_increment,
cpu::ppc::PPCContext* context) {
uint32_t thread_guest_pointer = apc->thread_ptr;
if (!thread_guest_pointer) {
return 0;
}
auto target_thread = context->TranslateVirtual<X_KTHREAD*>(apc->thread_ptr);
auto old_irql = xeKeKfAcquireSpinLock(context, &target_thread->apc_lock);
uint32_t result;
if (!target_thread->apc_related || apc->enqueued) {
result = 0;
} else {
apc->arg1 = arg1;
apc->arg2 = arg2;
auto& which_list = target_thread->apc_lists[apc->apc_mode];
if (apc->normal_routine) {
which_list.InsertTail(apc, context);
} else {
XAPC* insertion_pos = nullptr;
for (auto&& sub_apc : which_list.IterateForward(context)) {
insertion_pos = &sub_apc;
if (sub_apc.normal_routine) {
break;
}
}
if (!insertion_pos) {
which_list.InsertHead(apc, context);
} else {
util::XeInsertHeadList(insertion_pos->list_entry.blink_ptr,
&apc->list_entry, context);
}
}
apc->enqueued = 1;
/*
todo: this is incomplete, a ton of other logic happens here, i believe
for waking the target thread if its alertable
*/
result = 1;
}
xeKeKfReleaseSpinLock(context, &target_thread->apc_lock, old_irql);
return result;
}
dword_result_t KeInsertQueueApc_entry(pointer_t<XAPC> apc, lpvoid_t arg1,
lpvoid_t arg2,
dword_t priority_increment) {
auto thread = XObject::GetNativeObject<XThread>(
kernel_state(),
kernel_state()->memory()->TranslateVirtual(apc->thread_ptr));
if (!thread) {
return 0;
}
lpvoid_t arg2, dword_t priority_increment,
const ppc_context_t& context) {
// Lock thread.
thread->LockApc();
return xeKeInsertQueueApc(apc, arg1, arg2, priority_increment, context);
// Fail if already inserted.
if (apc->enqueued) {
thread->UnlockApc(false);
return 0;
}
// Prep APC.
apc->arg1 = arg1.guest_address();
apc->arg2 = arg2.guest_address();
apc->enqueued = 1;
auto apc_list = thread->apc_list();
uint32_t list_entry_ptr = apc.guest_address() + 8;
apc_list->Insert(list_entry_ptr);
// Unlock thread.
thread->UnlockApc(true);
return 1;
}
DECLARE_XBOXKRNL_EXPORT1(KeInsertQueueApc, kThreading, kImplemented);
dword_result_t KeRemoveQueueApc_entry(pointer_t<XAPC> apc) {
dword_result_t KeRemoveQueueApc_entry(pointer_t<XAPC> apc,
const ppc_context_t& context) {
bool result = false;
auto thread = XObject::GetNativeObject<XThread>(
kernel_state(),
kernel_state()->memory()->TranslateVirtual(apc->thread_ptr));
if (!thread) {
uint32_t thread_guest_pointer = apc->thread_ptr;
if (!thread_guest_pointer) {
return 0;
}
auto target_thread = context->TranslateVirtual<X_KTHREAD*>(apc->thread_ptr);
auto old_irql = xeKeKfAcquireSpinLock(context, &target_thread->apc_lock);
thread->LockApc();
if (!apc->enqueued) {
thread->UnlockApc(false);
return 0;
}
auto apc_list = thread->apc_list();
uint32_t list_entry_ptr = apc.guest_address() + 8;
if (apc_list->IsQueued(list_entry_ptr)) {
apc_list->Remove(list_entry_ptr);
if (apc->enqueued) {
result = true;
apc->enqueued = 0;
util::XeRemoveEntryList(&apc->list_entry, context);
// todo: this is incomplete, there is more logic here in actual kernel
}
thread->UnlockApc(true);
xeKeKfReleaseSpinLock(context, &target_thread->apc_lock, old_irql);
return result ? 1 : 0;
}
@ -1358,7 +1561,7 @@ struct X_ERWLOCK {
be<uint32_t> readers_entry_count; // 0xC
X_KEVENT writer_event; // 0x10
X_KSEMAPHORE reader_semaphore; // 0x20
X_KSPINLOCK spin_lock; // 0x34
X_KSPINLOCK spin_lock; // 0x34
};
static_assert_size(X_ERWLOCK, 0x38);
@ -1393,8 +1596,7 @@ DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockExclusive, kThreading,
dword_result_t ExTryToAcquireReadWriteLockExclusive_entry(
pointer_t<X_ERWLOCK> lock_ptr, const ppc_context_t& ppc_context) {
auto old_irql =
xeKeKfAcquireSpinLock(ppc_context, &lock_ptr->spin_lock);
auto old_irql = xeKeKfAcquireSpinLock(ppc_context, &lock_ptr->spin_lock);
uint32_t result;
if (lock_ptr->lock_count < 0) {
@ -1412,13 +1614,13 @@ DECLARE_XBOXKRNL_EXPORT1(ExTryToAcquireReadWriteLockExclusive, kThreading,
void ExAcquireReadWriteLockShared_entry(pointer_t<X_ERWLOCK> lock_ptr,
const ppc_context_t& ppc_context) {
auto old_irql = xeKeKfAcquireSpinLock(ppc_context, & lock_ptr->spin_lock);
auto old_irql = xeKeKfAcquireSpinLock(ppc_context, &lock_ptr->spin_lock);
int32_t lock_count = ++lock_ptr->lock_count;
if (!lock_count ||
(lock_ptr->readers_entry_count && !lock_ptr->writers_waiting_count)) {
lock_ptr->readers_entry_count++;
xeKeKfReleaseSpinLock(ppc_context, & lock_ptr->spin_lock, old_irql);
xeKeKfReleaseSpinLock(ppc_context, &lock_ptr->spin_lock, old_irql);
return;
}
@ -1432,7 +1634,7 @@ DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockShared, kThreading, kImplemented,
dword_result_t ExTryToAcquireReadWriteLockShared_entry(
pointer_t<X_ERWLOCK> lock_ptr, const ppc_context_t& ppc_context) {
auto old_irql = xeKeKfAcquireSpinLock(ppc_context, & lock_ptr->spin_lock);
auto old_irql = xeKeKfAcquireSpinLock(ppc_context, &lock_ptr->spin_lock);
uint32_t result;
if (lock_ptr->lock_count < 0 ||
@ -1452,7 +1654,7 @@ DECLARE_XBOXKRNL_EXPORT1(ExTryToAcquireReadWriteLockShared, kThreading,
void ExReleaseReadWriteLock_entry(pointer_t<X_ERWLOCK> lock_ptr,
const ppc_context_t& ppc_context) {
auto old_irql = xeKeKfAcquireSpinLock(ppc_context, & lock_ptr->spin_lock);
auto old_irql = xeKeKfAcquireSpinLock(ppc_context, &lock_ptr->spin_lock);
int32_t lock_count = --lock_ptr->lock_count;

View File

@ -38,7 +38,8 @@ uint32_t NtWaitForSingleObjectEx(uint32_t object_handle, uint32_t wait_mode,
uint32_t xeKeSetEvent(X_KEVENT* event_ptr, uint32_t increment, uint32_t wait);
uint32_t KeDelayExecutionThread(uint32_t processor_mode, uint32_t alertable,
uint64_t* interval_ptr);
uint64_t* interval_ptr,
cpu::ppc::PPCContext* ctx);
uint32_t ExCreateThread(xe::be<uint32_t>* handle_ptr, uint32_t stack_size,
xe::be<uint32_t>* thread_id_ptr,
@ -53,13 +54,21 @@ uint32_t NtClose(uint32_t handle);
void xeKeInitializeApc(XAPC* apc, uint32_t thread_ptr, uint32_t kernel_routine,
uint32_t rundown_routine, uint32_t normal_routine,
uint32_t apc_mode, uint32_t normal_context);
uint32_t xeKeInsertQueueApc(XAPC* apc, uint32_t arg1, uint32_t arg2,
uint32_t priority_increment,
cpu::ppc::PPCContext* context);
uint32_t xeNtQueueApcThread(uint32_t thread_handle, uint32_t apc_routine,
uint32_t apc_routine_context, uint32_t arg1,
uint32_t arg2, cpu::ppc::PPCContext* context);
void xeKfLowerIrql(PPCContext* ctx, unsigned char new_irql);
unsigned char xeKfRaiseIrql(PPCContext* ctx, unsigned char new_irql);
void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, dword_t old_irql, bool change_irql=true);
uint32_t xeKeKfAcquireSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, bool change_irql=true);
X_STATUS xeProcessUserApcs(PPCContext* ctx);
void xeRundownApcs(PPCContext* ctx);
} // namespace xboxkrnl
} // namespace kernel

View File

@ -25,9 +25,9 @@
#include "xenia/emulator.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/kernel/user_module.h"
#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h"
#include "xenia/kernel/xevent.h"
#include "xenia/kernel/xmutant.h"
DEFINE_bool(ignore_thread_priorities, true,
"Ignores game-specified thread priorities.", "Kernel");
DEFINE_bool(ignore_thread_affinities, true,
@ -62,8 +62,7 @@ XThread::XThread(KernelState* kernel_state, uint32_t stack_size,
: XObject(kernel_state, kObjectType, !guest_thread),
thread_id_(++next_xthread_id_),
guest_thread_(guest_thread),
main_thread_(main_thread),
apc_list_(kernel_state->memory()) {
main_thread_(main_thread) {
creation_params_.stack_size = stack_size;
creation_params_.xapi_thread_startup = xapi_thread_startup;
creation_params_.start_address = start_address;
@ -179,52 +178,55 @@ static uint8_t GetFakeCpuNumber(uint8_t proc_mask) {
void XThread::InitializeGuestObject() {
auto guest_thread = guest_object<X_KTHREAD>();
// Setup the thread state block (last error/etc).
uint8_t* p = memory()->TranslateVirtual(guest_object());
auto thread_guest_ptr = guest_object();
guest_thread->header.type = 6;
guest_thread->suspend_count =
(creation_params_.creation_flags & X_CREATE_SUSPENDED) ? 1 : 0;
xe::store_and_swap<uint32_t>(p + 0x010, guest_object() + 0x010);
xe::store_and_swap<uint32_t>(p + 0x014, guest_object() + 0x010);
guest_thread->unk_10 = (thread_guest_ptr + 0x10);
guest_thread->unk_14 = (thread_guest_ptr + 0x10);
guest_thread->unk_40 = (thread_guest_ptr + 0x20);
guest_thread->unk_44 = (thread_guest_ptr + 0x20);
guest_thread->unk_48 = (thread_guest_ptr);
uint32_t v6 = thread_guest_ptr + 0x18;
*(uint32_t*)&guest_thread->unk_54 = 16777729;
guest_thread->unk_4C = (v6);
guest_thread->stack_base = (this->stack_base_);
guest_thread->stack_limit = (this->stack_limit_);
guest_thread->stack_kernel = (this->stack_base_ - 240);
guest_thread->tls_address = (this->tls_static_address_);
guest_thread->thread_state = 0;
uint32_t process_info_block_address =
kernel_state_->process_info_block_address();
xe::store_and_swap<uint32_t>(p + 0x040, guest_object() + 0x018 + 8);
xe::store_and_swap<uint32_t>(p + 0x044, guest_object() + 0x018 + 8);
xe::store_and_swap<uint32_t>(p + 0x048, guest_object());
xe::store_and_swap<uint32_t>(p + 0x04C, guest_object() + 0x018);
X_KPROCESS* process =
memory()->TranslateVirtual<X_KPROCESS*>(process_info_block_address);
uint32_t kpcrb = pcr_address_ + offsetof(X_KPCR, prcb_data);
xe::store_and_swap<uint16_t>(p + 0x054, 0x102);
xe::store_and_swap<uint16_t>(p + 0x056, 1);
xe::store_and_swap<uint32_t>(p + 0x05C, stack_base_);
xe::store_and_swap<uint32_t>(p + 0x060, stack_limit_);
xe::store_and_swap<uint32_t>(p + 0x064, stack_base_ - kThreadKernelStackSize);
xe::store_and_swap<uint32_t>(p + 0x068, tls_static_address_);
xe::store_and_swap<uint8_t>(p + 0x06C, 0);
xe::store_and_swap<uint32_t>(p + 0x074, guest_object() + 0x074);
xe::store_and_swap<uint32_t>(p + 0x078, guest_object() + 0x074);
xe::store_and_swap<uint32_t>(p + 0x07C, guest_object() + 0x07C);
xe::store_and_swap<uint32_t>(p + 0x080, guest_object() + 0x07C);
xe::store_and_swap<uint32_t>(p + 0x084,
kernel_state_->process_info_block_address());
xe::store_and_swap<uint8_t>(p + 0x08B, 1);
// 0xD4 = APC
// 0xFC = semaphore (ptr, 0, 2)
// 0xA88 = APC
// 0x18 = timer
xe::store_and_swap<uint32_t>(p + 0x09C, 0xFDFFD7FF);
// current_cpu is expected to be initialized externally via SetActiveCpu.
xe::store_and_swap<uint32_t>(p + 0x0D0, stack_base_);
xe::store_and_swap<uint64_t>(p + 0x130, Clock::QueryGuestSystemTime());
xe::store_and_swap<uint32_t>(p + 0x144, guest_object() + 0x144);
xe::store_and_swap<uint32_t>(p + 0x148, guest_object() + 0x144);
xe::store_and_swap<uint32_t>(p + 0x14C, thread_id_);
xe::store_and_swap<uint32_t>(p + 0x150, creation_params_.start_address);
xe::store_and_swap<uint32_t>(p + 0x154, guest_object() + 0x154);
xe::store_and_swap<uint32_t>(p + 0x158, guest_object() + 0x154);
xe::store_and_swap<uint32_t>(p + 0x160, 0); // last error
xe::store_and_swap<uint32_t>(p + 0x16C, creation_params_.creation_flags);
xe::store_and_swap<uint32_t>(p + 0x17C, 1);
auto process_type = X_PROCTYPE_USER; // process->process_type;
guest_thread->process_type_dup = process_type;
guest_thread->process_type = process_type;
guest_thread->apc_lists[0].Initialize(memory());
guest_thread->apc_lists[1].Initialize(memory());
guest_thread->a_prcb_ptr = kpcrb;
guest_thread->another_prcb_ptr = kpcrb;
guest_thread->apc_related = 1;
guest_thread->msr_mask = 0xFDFFD7FF;
guest_thread->process = process_info_block_address;
guest_thread->stack_alloc_base = this->stack_base_;
guest_thread->create_time = Clock::QueryGuestSystemTime();
guest_thread->unk_144 = thread_guest_ptr + 324;
guest_thread->unk_148 = thread_guest_ptr + 324;
guest_thread->thread_id = this->thread_id_;
guest_thread->start_address = this->creation_params_.start_address;
guest_thread->unk_154 = thread_guest_ptr + 340;
uint32_t v9 = thread_guest_ptr;
guest_thread->last_error = 0;
guest_thread->unk_158 = v9 + 340;
guest_thread->creation_flags = this->creation_params_.creation_flags;
guest_thread->unk_17C = 1;
}
bool XThread::AllocateStack(uint32_t size) {
@ -449,7 +451,8 @@ X_STATUS XThread::Create() {
X_STATUS XThread::Exit(int exit_code) {
// This may only be called on the thread itself.
assert_true(XThread::GetCurrentThread() == this);
//TODO(chrispy): not sure if this order is correct, should it come after apcs?
// TODO(chrispy): not sure if this order is correct, should it come after
// apcs?
guest_object<X_KTHREAD>()->terminated = 1;
// TODO(benvanik): dispatch events? waiters? etc?
@ -590,53 +593,15 @@ void XThread::LeaveCriticalRegion() {
auto apc_disable_count = ++kthread->apc_disable_count;
}
uint32_t XThread::RaiseIrql(uint32_t new_irql) {
return irql_.exchange(new_irql);
}
void XThread::LowerIrql(uint32_t new_irql) { irql_ = new_irql; }
void XThread::CheckApcs() { DeliverAPCs(); }
void XThread::LockApc() { global_critical_region_.mutex().lock(); }
void XThread::UnlockApc(bool queue_delivery) {
bool needs_apc = apc_list_.HasPending();
global_critical_region_.mutex().unlock();
if (needs_apc && queue_delivery) {
thread_->QueueUserCallback([this]() {
cpu::ThreadState::Bind(this->thread_state());
this->SetCurrentThread(); // we store current thread in TLS, but tls
// slots are different in windows user
// callback!
DeliverAPCs();
});
}
}
void XThread::EnqueueApc(uint32_t normal_routine, uint32_t normal_context,
uint32_t arg1, uint32_t arg2) {
LockApc();
// don't use thread_state_ -> context() ! we're not running on the thread
// we're enqueuing to
uint32_t success = xboxkrnl::xeNtQueueApcThread(
this->handle(), normal_routine, normal_context, arg1, arg2,
cpu::ThreadState::Get()->context());
// Allocate APC.
// We'll tag it as special and free it when dispatched.
uint32_t apc_ptr = memory()->SystemHeapAlloc(XAPC::kSize);
auto apc = reinterpret_cast<XAPC*>(memory()->TranslateVirtual(apc_ptr));
apc->type = 18;
apc->apc_mode = 1;
apc->kernel_routine = XAPC::kDummyKernelRoutine;
apc->rundown_routine = XAPC::kDummyRundownRoutine;
apc->normal_routine = normal_routine;
apc->normal_context = normal_context;
apc->arg1 = arg1;
apc->arg2 = arg2;
apc->enqueued = 1;
uint32_t list_entry_ptr = apc_ptr + 8;
apc_list_.Insert(list_entry_ptr);
UnlockApc(true);
xenia_assert(success == X_STATUS_SUCCESS);
}
void XThread::SetCurrentThread() { current_xthread_tls_ = this; }
@ -644,99 +609,11 @@ void XThread::SetCurrentThread() { current_xthread_tls_ = this; }
void XThread::DeliverAPCs() {
// https://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=1
// https://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=7
auto processor = kernel_state()->processor();
LockApc();
auto kthread = guest_object<X_KTHREAD>();
while (apc_list_.HasPending() && kthread->apc_disable_count == 0) {
// Get APC entry (offset for LIST_ENTRY offset) and cache what we need.
// Calling the routine may delete the memory/overwrite it.
uint32_t apc_ptr = apc_list_.Shift() - 8;
auto apc = reinterpret_cast<XAPC*>(memory()->TranslateVirtual(apc_ptr));
bool needs_freeing = apc->kernel_routine == XAPC::kDummyKernelRoutine;
XELOGD("Delivering APC to {:08X}", uint32_t(apc->normal_routine));
// Mark as uninserted so that it can be reinserted again by the routine.
apc->enqueued = 0;
// Call kernel routine.
// The routine can modify all of its arguments before passing it on.
// Since we need to give guest accessible pointers over, we copy things
// into and out of scratch.
uint8_t* scratch_ptr = memory()->TranslateVirtual(scratch_address_);
xe::store_and_swap<uint32_t>(scratch_ptr + 0, apc->normal_routine);
xe::store_and_swap<uint32_t>(scratch_ptr + 4, apc->normal_context);
xe::store_and_swap<uint32_t>(scratch_ptr + 8, apc->arg1);
xe::store_and_swap<uint32_t>(scratch_ptr + 12, apc->arg2);
if (apc->kernel_routine != XAPC::kDummyKernelRoutine) {
// kernel_routine(apc_address, &normal_routine, &normal_context,
// &system_arg1, &system_arg2)
uint64_t kernel_args[] = {
apc_ptr,
scratch_address_ + 0,
scratch_address_ + 4,
scratch_address_ + 8,
scratch_address_ + 12,
};
processor->Execute(thread_state_, apc->kernel_routine, kernel_args,
xe::countof(kernel_args));
}
uint32_t normal_routine = xe::load_and_swap<uint32_t>(scratch_ptr + 0);
uint32_t normal_context = xe::load_and_swap<uint32_t>(scratch_ptr + 4);
uint32_t arg1 = xe::load_and_swap<uint32_t>(scratch_ptr + 8);
uint32_t arg2 = xe::load_and_swap<uint32_t>(scratch_ptr + 12);
// Call the normal routine. Note that it may have been killed by the kernel
// routine.
if (normal_routine) {
UnlockApc(false);
// normal_routine(normal_context, system_arg1, system_arg2)
uint64_t normal_args[] = {normal_context, arg1, arg2};
processor->Execute(thread_state_, normal_routine, normal_args,
xe::countof(normal_args));
LockApc();
}
XELOGD("Completed delivery of APC to {:08X} ({:08X}, {:08X}, {:08X})",
normal_routine, normal_context, arg1, arg2);
// If special, free it.
if (needs_freeing) {
memory()->SystemHeapFree(apc_ptr);
}
}
UnlockApc(true);
xboxkrnl::xeProcessUserApcs(thread_state_->context());
}
void XThread::RundownAPCs() {
assert_true(XThread::GetCurrentThread() == this);
LockApc();
while (apc_list_.HasPending()) {
// Get APC entry (offset for LIST_ENTRY offset) and cache what we need.
// Calling the routine may delete the memory/overwrite it.
uint32_t apc_ptr = apc_list_.Shift() - 8;
auto apc = reinterpret_cast<XAPC*>(memory()->TranslateVirtual(apc_ptr));
bool needs_freeing = apc->kernel_routine == XAPC::kDummyKernelRoutine;
// Mark as uninserted so that it can be reinserted again by the routine.
apc->enqueued = 0;
// Call the rundown routine.
if (apc->rundown_routine == XAPC::kDummyRundownRoutine) {
// No-op.
} else if (apc->rundown_routine) {
// rundown_routine(apc)
uint64_t args[] = {apc_ptr};
kernel_state()->processor()->Execute(thread_state(), apc->rundown_routine,
args, xe::countof(args));
}
// If special, free it.
if (needs_freeing) {
memory()->SystemHeapFree(apc_ptr);
}
}
UnlockApc(true);
xboxkrnl::xeRundownApcs(thread_state_->context());
}
int32_t XThread::QueryPriority() { return thread_->priority(); }
@ -791,7 +668,8 @@ void XThread::SetActiveCpu(uint8_t cpu_index) {
thread_->set_affinity_mask(uint64_t(1) << cpu_index);
}
} else {
//there no good reason why we need to log this... we don't perfectly emulate the 360's scheduler in any way
// there no good reason why we need to log this... we don't perfectly
// emulate the 360's scheduler in any way
// XELOGW("Too few processor cores - scheduling will be wonky");
}
}
@ -831,14 +709,18 @@ X_STATUS XThread::Resume(uint32_t* out_suspend_count) {
}
X_STATUS XThread::Suspend(uint32_t* out_suspend_count) {
auto global_lock = global_critical_region_.Acquire();
//this normally holds the apc lock for the thread, because it queues a kernel mode apc that does the actual suspension
++guest_object<X_KTHREAD>()->suspend_count;
X_KTHREAD* guest_thread = guest_object<X_KTHREAD>();
uint8_t previous_suspend_count =
reinterpret_cast<std::atomic_uint8_t*>(&guest_thread->suspend_count)
->fetch_add(1);
*out_suspend_count = previous_suspend_count;
// If we are suspending ourselves, we can't hold the lock.
if (XThread::IsInThread() && XThread::GetCurrentThread() == this) {
global_lock.unlock();
}
if (thread_->Suspend(out_suspend_count)) {
return X_STATUS_SUCCESS;
@ -941,7 +823,6 @@ bool XThread::Save(ByteStream* stream) {
state.thread_id = thread_id_;
state.is_main_thread = main_thread_;
state.is_running = running_;
state.apc_head = apc_list_.head();
state.tls_static_address = tls_static_address_;
state.tls_dynamic_address = tls_dynamic_address_;
state.tls_total_size = tls_total_size_;
@ -1004,7 +885,6 @@ object_ref<XThread> XThread::Restore(KernelState* kernel_state,
thread->thread_id_ = state.thread_id;
thread->main_thread_ = state.is_main_thread;
thread->running_ = state.is_running;
thread->apc_list_.set_head(state.apc_head);
thread->tls_static_address_ = state.tls_static_address;
thread->tls_dynamic_address_ = state.tls_dynamic_address;
thread->tls_total_size_ = state.tls_total_size;
@ -1014,8 +894,6 @@ object_ref<XThread> XThread::Restore(KernelState* kernel_state,
thread->stack_alloc_base_ = state.stack_alloc_base;
thread->stack_alloc_size_ = state.stack_alloc_size;
thread->apc_list_.set_memory(kernel_state->memory());
// Register now that we know our thread ID.
kernel_state->RegisterThread(thread);

View File

@ -207,7 +207,8 @@ struct X_KTHREAD {
TypedGuestPointer<X_KPROCESS> process; // 0x84
uint8_t unk_88[0x3]; // 0x88
uint8_t apc_related; // 0x8B
uint8_t unk_8C[0x10]; // 0x8C
X_KSPINLOCK apc_lock; // 0x8C
uint8_t unk_90[0xC]; // 0x90
xe::be<uint32_t> msr_mask; // 0x9C
uint8_t unk_A0[4]; // 0xA0
uint8_t unk_A4; // 0xA4
@ -313,13 +314,7 @@ class XThread : public XObject, public cpu::Thread {
void EnterCriticalRegion();
void LeaveCriticalRegion();
uint32_t RaiseIrql(uint32_t new_irql);
void LowerIrql(uint32_t new_irql);
void CheckApcs();
void LockApc();
void UnlockApc(bool queue_delivery);
util::NativeList* apc_list() { return &apc_list_; }
void EnqueueApc(uint32_t normal_routine, uint32_t normal_context,
uint32_t arg1, uint32_t arg2);
@ -388,10 +383,6 @@ class XThread : public XObject, public cpu::Thread {
bool running_ = false;
int32_t priority_ = 0;
xe::global_critical_region global_critical_region_;
std::atomic<uint32_t> irql_ = {0};
util::NativeList apc_list_;
};
class XHostThread : public XThread {