diff --git a/src/xenia/cpu/backend/x64/x64_tracers.cc b/src/xenia/cpu/backend/x64/x64_tracers.cc index 2f90fe09c..010ce6db6 100644 --- a/src/xenia/cpu/backend/x64/x64_tracers.cc +++ b/src/xenia/cpu/backend/x64/x64_tracers.cc @@ -26,25 +26,25 @@ namespace x64 { #define ITRACE 0 #define DTRACE 0 -#define TARGET_THREAD 4 +#define TARGET_THREAD 0 bool trace_enabled = true; +#define THREAD_MATCH \ + (!TARGET_THREAD || thread_state->thread_id() == TARGET_THREAD) #if !DTRACE -#define IFLUSH() \ - if (trace_enabled && thread_state->thread_id() == TARGET_THREAD) \ - fflush(stdout) +#define IFLUSH() \ + if (trace_enabled && THREAD_MATCH) fflush(stdout) #else #define IFLUSH() #endif #define IPRINT \ - if (trace_enabled && thread_state->thread_id() == TARGET_THREAD) printf -#define DFLUSH() \ - if (trace_enabled && thread_state->thread_id() == TARGET_THREAD) \ - fflush(stdout) + if (trace_enabled && THREAD_MATCH) printf +#define DFLUSH() \ + if (trace_enabled && THREAD_MATCH) fflush(stdout) #define DPRINT \ DFLUSH(); \ - if (trace_enabled && thread_state->thread_id() == TARGET_THREAD) printf + if (trace_enabled && THREAD_MATCH) printf uint32_t GetTracingMode() { uint32_t mode = 0; diff --git a/src/xenia/kernel/objects/xfile.cc b/src/xenia/kernel/objects/xfile.cc index fb5c87655..8a0a94a72 100644 --- a/src/xenia/kernel/objects/xfile.cc +++ b/src/xenia/kernel/objects/xfile.cc @@ -40,6 +40,7 @@ X_STATUS XFile::Read(void* buffer, size_t buffer_length, size_t byte_offset, if (XSUCCEEDED(result)) { position_ += *out_bytes_read; } + async_event_->Set(0, false); return result; } @@ -64,6 +65,7 @@ X_STATUS XFile::Write(const void* buffer, size_t buffer_length, if (XSUCCEEDED(result)) { position_ += *out_bytes_written; } + async_event_->Set(0, false); return result; } diff --git a/src/xenia/kernel/objects/xthread.cc b/src/xenia/kernel/objects/xthread.cc index 8f5d28076..3aa0a0548 100644 --- a/src/xenia/kernel/objects/xthread.cc +++ b/src/xenia/kernel/objects/xthread.cc @@ -97,9 +97,7 @@ XThread::~XThread() { } } -bool XThread::IsInThread(XThread* other) { - return current_thread_tls == other; -} +bool XThread::IsInThread(XThread* other) { return current_thread_tls == other; } XThread* XThread::GetCurrentThread() { XThread* thread = current_thread_tls; @@ -135,7 +133,7 @@ void XThread::set_name(const std::string& name) { uint8_t fake_CPU_number(uint8_t proc_mask) { if (!proc_mask) { - return 0; // is this reasonable? + return 0; // is this reasonable? } assert_false(proc_mask & 0xC0); @@ -207,7 +205,8 @@ X_STATUS XThread::Create() { thread_state_->thread_id(), thread_state_->stack_limit(), thread_state_->stack_base()); - uint8_t proc_mask = static_cast(creation_params_.creation_flags >> 24); + uint8_t proc_mask = + static_cast(creation_params_.creation_flags >> 24); uint8_t* pcr = memory()->TranslateVirtual(pcr_address_); std::memset(pcr, 0x0, 0x2D8 + 0xAB0); // Zero the PCR @@ -217,8 +216,9 @@ X_STATUS XThread::Create() { thread_state_->stack_size()); xe::store_and_swap(pcr + 0x074, thread_state_->stack_address()); xe::store_and_swap(pcr + 0x100, thread_state_address_); - xe::store_and_swap(pcr + 0x10C, fake_CPU_number(proc_mask)); // Current CPU(?) - xe::store_and_swap(pcr + 0x150, 0); // DPC active bool? + xe::store_and_swap(pcr + 0x10C, + fake_CPU_number(proc_mask)); // Current CPU(?) + xe::store_and_swap(pcr + 0x150, 0); // DPC active bool? // Setup the thread state block (last error/etc). uint8_t* p = memory()->TranslateVirtual(thread_state_address_); @@ -269,7 +269,7 @@ X_STATUS XThread::Create() { return return_code; } - //uint32_t proc_mask = creation_params_.creation_flags >> 24; + // uint32_t proc_mask = creation_params_.creation_flags >> 24; if (proc_mask) { SetAffinity(proc_mask); } @@ -453,21 +453,49 @@ uint32_t XThread::RaiseIrql(uint32_t new_irql) { void XThread::LowerIrql(uint32_t new_irql) { irql_ = new_irql; } +void XThread::CheckApcs() { DeliverAPCs(this); } + void XThread::LockApc() { apc_lock_.lock(); } -void XThread::UnlockApc() { +void XThread::UnlockApc(bool queue_delivery) { bool needs_apc = apc_list_->HasPending(); apc_lock_.unlock(); - if (needs_apc) { + if (needs_apc && queue_delivery) { QueueUserAPC(reinterpret_cast(DeliverAPCs), thread_handle_, reinterpret_cast(this)); } } +void XThread::EnqueueApc(uint32_t normal_routine, uint32_t normal_context, + uint32_t arg1, uint32_t arg2) { + LockApc(); + + // Allocate APC. + // We'll tag it as special and free it when dispatched. + uint32_t apc_ptr = memory()->SystemHeapAlloc(XAPC::kSize); + auto apc = reinterpret_cast(memory()->TranslateVirtual(apc_ptr)); + + apc->Initialize(); + apc->kernel_routine = XAPC::kDummyKernelRoutine; + apc->rundown_routine = XAPC::kDummyRundownRoutine; + apc->normal_routine = normal_routine; + apc->normal_context = normal_context; + apc->arg1 = arg1; + apc->arg2 = arg2; + apc->enqueued = 1; + + uint32_t list_entry_ptr = apc_ptr + 8; + apc_list_->Insert(list_entry_ptr); + + UnlockApc(true); +} + void XThread::DeliverAPCs(void* data) { + XThread* thread = reinterpret_cast(data); + assert_true(XThread::GetCurrentThread() == thread); + // http://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=1 // http://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=7 - XThread* thread = reinterpret_cast(data); auto memory = thread->memory(); auto processor = thread->kernel_state()->processor(); auto apc_list = thread->apc_list(); @@ -475,76 +503,86 @@ void XThread::DeliverAPCs(void* data) { while (apc_list->HasPending()) { // Get APC entry (offset for LIST_ENTRY offset) and cache what we need. // Calling the routine may delete the memory/overwrite it. - uint32_t apc_address = apc_list->Shift() - 8; - uint8_t* apc_ptr = memory->TranslateVirtual(apc_address); - uint32_t kernel_routine = xe::load_and_swap(apc_ptr + 16); - uint32_t normal_routine = xe::load_and_swap(apc_ptr + 24); - uint32_t normal_context = xe::load_and_swap(apc_ptr + 28); - uint32_t system_arg1 = xe::load_and_swap(apc_ptr + 32); - uint32_t system_arg2 = xe::load_and_swap(apc_ptr + 36); + uint32_t apc_ptr = apc_list->Shift() - 8; + auto apc = reinterpret_cast(memory->TranslateVirtual(apc_ptr)); + bool needs_freeing = apc->kernel_routine == XAPC::kDummyKernelRoutine; // Mark as uninserted so that it can be reinserted again by the routine. - uint32_t old_flags = xe::load_and_swap(apc_ptr + 40); - xe::store_and_swap(apc_ptr + 40, old_flags & ~0xFF00); + apc->enqueued = 0; // Call kernel routine. // The routine can modify all of its arguments before passing it on. // Since we need to give guest accessible pointers over, we copy things // into and out of scratch. uint8_t* scratch_ptr = memory->TranslateVirtual(thread->scratch_address_); - xe::store_and_swap(scratch_ptr + 0, normal_routine); - xe::store_and_swap(scratch_ptr + 4, normal_context); - xe::store_and_swap(scratch_ptr + 8, system_arg1); - xe::store_and_swap(scratch_ptr + 12, system_arg2); - // kernel_routine(apc_address, &normal_routine, &normal_context, - // &system_arg1, &system_arg2) - uint64_t kernel_args[] = { - apc_address, thread->scratch_address_ + 0, thread->scratch_address_ + 4, - thread->scratch_address_ + 8, thread->scratch_address_ + 12, - }; - processor->ExecuteInterrupt(kernel_routine, kernel_args, - xe::countof(kernel_args)); - normal_routine = xe::load_and_swap(scratch_ptr + 0); - normal_context = xe::load_and_swap(scratch_ptr + 4); - system_arg1 = xe::load_and_swap(scratch_ptr + 8); - system_arg2 = xe::load_and_swap(scratch_ptr + 12); + xe::store_and_swap(scratch_ptr + 0, apc->normal_routine); + xe::store_and_swap(scratch_ptr + 4, apc->normal_context); + xe::store_and_swap(scratch_ptr + 8, apc->arg1); + xe::store_and_swap(scratch_ptr + 12, apc->arg2); + if (apc->kernel_routine != XAPC::kDummyKernelRoutine) { + // kernel_routine(apc_address, &normal_routine, &normal_context, + // &system_arg1, &system_arg2) + uint64_t kernel_args[] = { + apc_ptr, thread->scratch_address_ + 0, + thread->scratch_address_ + 4, thread->scratch_address_ + 8, + thread->scratch_address_ + 12, + }; + processor->Execute(thread->thread_state(), apc->kernel_routine, + kernel_args, xe::countof(kernel_args)); + } + uint32_t normal_routine = xe::load_and_swap(scratch_ptr + 0); + uint32_t normal_context = xe::load_and_swap(scratch_ptr + 4); + uint32_t arg1 = xe::load_and_swap(scratch_ptr + 8); + uint32_t arg2 = xe::load_and_swap(scratch_ptr + 12); // Call the normal routine. Note that it may have been killed by the kernel // routine. if (normal_routine) { - thread->UnlockApc(); + thread->UnlockApc(false); // normal_routine(normal_context, system_arg1, system_arg2) - uint64_t normal_args[] = {normal_context, system_arg1, system_arg2}; - processor->ExecuteInterrupt(normal_routine, normal_args, - xe::countof(normal_args)); + uint64_t normal_args[] = {normal_context, apc->arg1, apc->arg2}; + processor->Execute(thread->thread_state(), normal_routine, normal_args, + xe::countof(normal_args)); thread->LockApc(); } + + // If special, free it. + if (needs_freeing) { + memory->SystemHeapFree(apc_ptr); + } } - thread->UnlockApc(); + thread->UnlockApc(true); } void XThread::RundownAPCs() { + assert_true(XThread::GetCurrentThread() == this); LockApc(); while (apc_list_->HasPending()) { // Get APC entry (offset for LIST_ENTRY offset) and cache what we need. // Calling the routine may delete the memory/overwrite it. - uint32_t apc_address = apc_list_->Shift() - 8; - uint8_t* apc_ptr = memory()->TranslateVirtual(apc_address); - uint32_t rundown_routine = xe::load_and_swap(apc_ptr + 20); + uint32_t apc_ptr = apc_list_->Shift() - 8; + auto apc = reinterpret_cast(memory()->TranslateVirtual(apc_ptr)); + bool needs_freeing = apc->kernel_routine == XAPC::kDummyKernelRoutine; // Mark as uninserted so that it can be reinserted again by the routine. - uint32_t old_flags = xe::load_and_swap(apc_ptr + 40); - xe::store_and_swap(apc_ptr + 40, old_flags & ~0xFF00); + apc->enqueued = 0; // Call the rundown routine. - if (rundown_routine) { + if (apc->rundown_routine == XAPC::kDummyRundownRoutine) { + // No-op. + } else if (apc->rundown_routine) { // rundown_routine(apc) - uint64_t args[] = {apc_address}; - kernel_state()->processor()->ExecuteInterrupt(rundown_routine, args, - xe::countof(args)); + uint64_t args[] = {apc_ptr}; + kernel_state()->processor()->Execute(thread_state(), apc->rundown_routine, + args, xe::countof(args)); + } + + // If special, free it. + if (needs_freeing) { + memory()->SystemHeapFree(apc_ptr); } } - UnlockApc(); + UnlockApc(true); } int32_t XThread::QueryPriority() { return GetThreadPriority(thread_handle_); } diff --git a/src/xenia/kernel/objects/xthread.h b/src/xenia/kernel/objects/xthread.h index b0fd97c2f..ee2f441d0 100644 --- a/src/xenia/kernel/objects/xthread.h +++ b/src/xenia/kernel/objects/xthread.h @@ -24,6 +24,42 @@ namespace kernel { class NativeList; class XEvent; +struct XAPC { + static const uint32_t kSize = 40; + static const uint32_t kDummyKernelRoutine = 0xF00DFF00; + static const uint32_t kDummyRundownRoutine = 0xF00DFF01; + + // KAPC is 0x28(40) bytes? (what's passed to ExAllocatePoolWithTag) + // This is 4b shorter than NT - looks like the reserved dword at +4 is gone. + // NOTE: stored in guest memory. + xe::be type; // +0 + xe::be unk1; // +1 + xe::be processor_mode; // +2 + xe::be enqueued; // +3 + xe::be thread_ptr; // +4 + xe::be flink; // +8 + xe::be blink; // +12 + xe::be kernel_routine; // +16 + xe::be rundown_routine; // +20 + xe::be normal_routine; // +24 + xe::be normal_context; // +28 + xe::be arg1; // +32 + xe::be arg2; // +36 + + void Initialize() { + type = 18; // ApcObject + unk1 = 0; + processor_mode = 0; + enqueued = 0; + thread_ptr = 0; + flink = blink = 0; + kernel_routine = 0; + normal_routine = 0; + normal_context = 0; + arg1 = arg2 = 0; + } +}; + class XThread : public XObject { public: XThread(KernelState* kernel_state, uint32_t stack_size, @@ -55,9 +91,12 @@ class XThread : public XObject { uint32_t RaiseIrql(uint32_t new_irql); void LowerIrql(uint32_t new_irql); + void CheckApcs(); void LockApc(); - void UnlockApc(); + void UnlockApc(bool queue_delivery); NativeList* apc_list() const { return apc_list_; } + void EnqueueApc(uint32_t normal_routine, uint32_t normal_context, + uint32_t arg1, uint32_t arg2); int32_t QueryPriority(); void SetPriority(int32_t increment); diff --git a/src/xenia/kernel/xboxkrnl_io.cc b/src/xenia/kernel/xboxkrnl_io.cc index ff263aaf2..16ad650f8 100644 --- a/src/xenia/kernel/xboxkrnl_io.cc +++ b/src/xenia/kernel/xboxkrnl_io.cc @@ -9,11 +9,13 @@ #include "xenia/base/logging.h" #include "xenia/base/memory.h" +#include "xenia/cpu/processor.h" #include "xenia/kernel/async_request.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/fs/device.h" #include "xenia/kernel/objects/xevent.h" #include "xenia/kernel/objects/xfile.h" +#include "xenia/kernel/objects/xthread.h" #include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/xboxkrnl_private.h" #include "xenia/xbox.h" @@ -235,9 +237,6 @@ SHIM_CALL NtReadFile_shim(PPCContext* ppc_state, KernelState* state) { io_status_block_ptr, buffer, buffer_length, byte_offset_ptr, byte_offset); - // Async not supported yet. - assert_zero(apc_routine_ptr); - X_STATUS result = X_STATUS_SUCCESS; uint32_t info = 0; @@ -279,6 +278,14 @@ SHIM_CALL NtReadFile_shim(PPCContext* ppc_state, KernelState* state) { info = (int32_t)bytes_read; } + // Queue the APC callback. It must be delivered via the APC mechanism even + // though were are completing immediately. + if (apc_routine_ptr & ~1) { + auto thread = XThread::GetCurrentThread(); + thread->EnqueueApc(apc_routine_ptr & ~1, apc_context, + io_status_block_ptr, 0); + } + // Mark that we should signal the event now. We do this after // we have written the info out. signal_event = true; diff --git a/src/xenia/kernel/xboxkrnl_rtl.cc b/src/xenia/kernel/xboxkrnl_rtl.cc index 1da742c89..d8c908810 100644 --- a/src/xenia/kernel/xboxkrnl_rtl.cc +++ b/src/xenia/kernel/xboxkrnl_rtl.cc @@ -603,6 +603,8 @@ SHIM_CALL RtlLeaveCriticalSection_shim(PPCContext* ppc_state, // TODO(benvanik): wake a waiter. XELOGE("RtlLeaveCriticalSection would have woken a waiter"); } + + XThread::GetCurrentThread()->CheckApcs(); } SHIM_CALL RtlTimeToTimeFields_shim(PPCContext* ppc_state, KernelState* state) { diff --git a/src/xenia/kernel/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl_threading.cc index 69d39de2e..ad79cbfe1 100644 --- a/src/xenia/kernel/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl_threading.cc @@ -1054,6 +1054,8 @@ SHIM_CALL KeLeaveCriticalRegion_shim(PPCContext* ppc_state, // XELOGD( // "KeLeaveCriticalRegion()"); XThread::LeaveCriticalRegion(); + + XThread::GetCurrentThread()->CheckApcs(); } SHIM_CALL KeRaiseIrqlToDpcLevel_shim(PPCContext* ppc_state, @@ -1070,6 +1072,8 @@ SHIM_CALL KfLowerIrql_shim(PPCContext* ppc_state, KernelState* state) { // "KfLowerIrql(%d)", // old_value); state->processor()->LowerIrql(static_cast(old_value)); + + XThread::GetCurrentThread()->CheckApcs(); } SHIM_CALL NtQueueApcThread_shim(PPCContext* ppc_state, KernelState* state) { @@ -1082,6 +1086,8 @@ SHIM_CALL NtQueueApcThread_shim(PPCContext* ppc_state, KernelState* state) { apc_routine, arg1, arg2, arg3); // Alloc APC object (from somewhere) and insert. + + assert_always("not implemented"); } SHIM_CALL KeInitializeApc_shim(PPCContext* ppc_state, KernelState* state) { @@ -1097,27 +1103,14 @@ SHIM_CALL KeInitializeApc_shim(PPCContext* ppc_state, KernelState* state) { thread, kernel_routine, rundown_routine, normal_routine, processor_mode, normal_context); - // KAPC is 0x28(40) bytes? (what's passed to ExAllocatePoolWithTag) - // This is 4b shorter than NT - looks like the reserved dword at +4 is gone - uint32_t type = 18; // ApcObject - uint32_t unk0 = 0; - uint32_t size = 0x28; - uint32_t unk1 = 0; - SHIM_SET_MEM_32(apc_ptr + 0, - (type << 24) | (unk0 << 16) | (size << 8) | (unk1)); - SHIM_SET_MEM_32(apc_ptr + 4, thread); // known offset - derefed by games - SHIM_SET_MEM_32(apc_ptr + 8, 0); // flink - SHIM_SET_MEM_32(apc_ptr + 12, 0); // blink - SHIM_SET_MEM_32(apc_ptr + 16, kernel_routine); - SHIM_SET_MEM_32(apc_ptr + 20, rundown_routine); - SHIM_SET_MEM_32(apc_ptr + 24, normal_routine); - SHIM_SET_MEM_32(apc_ptr + 28, normal_routine ? normal_context : 0); - SHIM_SET_MEM_32(apc_ptr + 32, 0); // arg1 - SHIM_SET_MEM_32(apc_ptr + 36, 0); // arg2 - uint32_t state_index = 0; - uint32_t inserted = 0; - SHIM_SET_MEM_32(apc_ptr + 40, (state_index << 24) | (processor_mode << 16) | - (inserted << 8)); + auto apc = SHIM_STRUCT(XAPC, apc_ptr); + apc->Initialize(); + apc->processor_mode = processor_mode; + apc->thread_ptr = thread; + apc->kernel_routine = kernel_routine; + apc->rundown_routine = rundown_routine; + apc->normal_routine = normal_routine; + apc->normal_context = normal_routine ? normal_context : 0; } SHIM_CALL KeInsertQueueApc_shim(PPCContext* ppc_state, KernelState* state) { @@ -1129,9 +1122,10 @@ SHIM_CALL KeInsertQueueApc_shim(PPCContext* ppc_state, KernelState* state) { XELOGD("KeInsertQueueApc(%.8X, %.8X, %.8X, %.8X)", apc_ptr, arg1, arg2, priority_increment); - uint32_t thread_ptr = SHIM_MEM_32(apc_ptr + 4); + auto apc = SHIM_STRUCT(XAPC, apc_ptr); + auto thread = - XObject::GetNativeObject(state, SHIM_MEM_ADDR(thread_ptr)); + XObject::GetNativeObject(state, SHIM_MEM_ADDR(apc->thread_ptr)); if (!thread) { SHIM_SET_RETURN_32(0); return; @@ -1141,17 +1135,16 @@ SHIM_CALL KeInsertQueueApc_shim(PPCContext* ppc_state, KernelState* state) { thread->LockApc(); // Fail if already inserted. - if (SHIM_MEM_32(apc_ptr + 40) & 0xFF00) { - thread->UnlockApc(); + if (apc->enqueued) { + thread->UnlockApc(false); SHIM_SET_RETURN_32(0); return; } // Prep APC. - SHIM_SET_MEM_32(apc_ptr + 32, arg1); - SHIM_SET_MEM_32(apc_ptr + 36, arg2); - SHIM_SET_MEM_32(apc_ptr + 40, - (SHIM_MEM_32(apc_ptr + 40) & ~0xFF00) | (1 << 8)); + apc->arg1 = arg1; + apc->arg2 = arg2; + apc->enqueued = 1; auto apc_list = thread->apc_list(); @@ -1159,7 +1152,7 @@ SHIM_CALL KeInsertQueueApc_shim(PPCContext* ppc_state, KernelState* state) { apc_list->Insert(list_entry_ptr); // Unlock thread. - thread->UnlockApc(); + thread->UnlockApc(true); SHIM_SET_RETURN_32(1); } @@ -1171,9 +1164,10 @@ SHIM_CALL KeRemoveQueueApc_shim(PPCContext* ppc_state, KernelState* state) { bool result = false; - uint32_t thread_ptr = SHIM_MEM_32(apc_ptr + 4); + auto apc = SHIM_STRUCT(XAPC, apc_ptr); + auto thread = - XObject::GetNativeObject(state, SHIM_MEM_ADDR(thread_ptr)); + XObject::GetNativeObject(state, SHIM_MEM_ADDR(apc->thread_ptr)); if (!thread) { SHIM_SET_RETURN_32(0); return; @@ -1181,8 +1175,8 @@ SHIM_CALL KeRemoveQueueApc_shim(PPCContext* ppc_state, KernelState* state) { thread->LockApc(); - if (!(SHIM_MEM_32(apc_ptr + 40) & 0xFF00)) { - thread->UnlockApc(); + if (!apc->enqueued) { + thread->UnlockApc(false); SHIM_SET_RETURN_32(0); return; } @@ -1194,7 +1188,7 @@ SHIM_CALL KeRemoveQueueApc_shim(PPCContext* ppc_state, KernelState* state) { result = true; } - thread->UnlockApc(); + thread->UnlockApc(true); SHIM_SET_RETURN_32(result ? 1 : 0); }