From a7b047b2a24216f3a85f36a974b08ac0e72a14bb Mon Sep 17 00:00:00 2001 From: disjtqz Date: Thu, 12 Oct 2023 15:34:27 -0400 Subject: [PATCH] Implement kernel processes --- src/xenia/apu/audio_system.cc | 3 +- src/xenia/apu/xma_decoder.cc | 4 +- src/xenia/cpu/processor.cc | 44 ---- src/xenia/cpu/processor.h | 5 - src/xenia/emulator.cc | 15 +- src/xenia/gpu/command_processor.cc | 2 +- src/xenia/gpu/graphics_system.cc | 24 +-- src/xenia/kernel/kernel_state.cc | 197 +++++++++++++----- src/xenia/kernel/kernel_state.h | 69 ++++-- src/xenia/kernel/xam/xam_task.cc | 2 +- .../kernel/xboxkrnl/xboxkrnl_threading.cc | 69 +++--- .../kernel/xboxkrnl/xboxkrnl_threading.h | 8 +- src/xenia/kernel/xthread.cc | 107 ++++++---- src/xenia/kernel/xthread.h | 15 +- src/xenia/xbox.h | 1 + 15 files changed, 338 insertions(+), 227 deletions(-) diff --git a/src/xenia/apu/audio_system.cc b/src/xenia/apu/audio_system.cc index ef4feb29b..1b0093424 100644 --- a/src/xenia/apu/audio_system.cc +++ b/src/xenia/apu/audio_system.cc @@ -21,6 +21,7 @@ #include "xenia/base/string_buffer.h" #include "xenia/base/threading.h" #include "xenia/cpu/thread_state.h" +#include "xenia/kernel/kernel_state.h" // As with normal Microsoft, there are like twelve different ways to access // the audio APIs. Early games use XMA*() methods almost exclusively to touch @@ -79,7 +80,7 @@ X_STATUS AudioSystem::Setup(kernel::KernelState* kernel_state) { new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() { WorkerThreadMain(); return 0; - })); + }, kernel_state->GetSystemProcess())); // As we run audio callbacks the debugger must be able to suspend us. worker_thread_->set_can_debugger_suspend(true); worker_thread_->set_name("Audio Worker"); diff --git a/src/xenia/apu/xma_decoder.cc b/src/xenia/apu/xma_decoder.cc index 6562266fa..fd07f84bc 100644 --- a/src/xenia/apu/xma_decoder.cc +++ b/src/xenia/apu/xma_decoder.cc @@ -19,7 +19,7 @@ #include "xenia/cpu/processor.h" #include "xenia/cpu/thread_state.h" #include "xenia/kernel/xthread.h" - +#include "xenia/kernel/kernel_state.h" extern "C" { #include "third_party/FFmpeg/libavutil/log.h" } // extern "C" @@ -145,7 +145,7 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) { new kernel::XHostThread(kernel_state, 128 * 1024, 0, [this]() { WorkerThreadMain(); return 0; - })); + }, kernel_state->GetIdleProcess()));//this one doesnt need any process actually. never calls any guest code worker_thread_->set_name("XMA Decoder"); worker_thread_->set_can_debugger_suspend(true); worker_thread_->Create(); diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index cb6105464..c3e46af2a 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -432,50 +432,6 @@ uint64_t Processor::Execute(ThreadState* thread_state, uint32_t address, return context->r[3]; } -uint64_t Processor::ExecuteInterrupt(ThreadState* thread_state, - uint32_t address, uint64_t args[], - size_t arg_count) { - SCOPE_profile_cpu_f("cpu"); - - // Hold the global lock during interrupt dispatch. - // This will block if any code is in a critical region (has interrupts - // disabled) or if any other interrupt is executing. - auto global_lock = global_critical_region_.Acquire(); - - auto context = thread_state->context(); - assert_true(arg_count <= 5); - for (size_t i = 0; i < arg_count; ++i) { - context->r[3 + i] = args[i]; - } - - // TLS ptr must be zero during interrupts. Some games check this and - // early-exit routines when under interrupts. - auto pcr_address = - memory_->TranslateVirtual(static_cast(context->r[13])); - uint32_t old_tls_ptr = xe::load_and_swap(pcr_address); - xe::store_and_swap(pcr_address, 0); - - if (!Execute(thread_state, address)) { - return 0xDEADBABE; - } - - // Restores TLS ptr. - xe::store_and_swap(pcr_address, old_tls_ptr); - - return context->r[3]; -} - -Irql Processor::RaiseIrql(Irql new_value) { - return static_cast( - xe::atomic_exchange(static_cast(new_value), - reinterpret_cast(&irql_))); -} - -void Processor::LowerIrql(Irql old_value) { - xe::atomic_exchange(static_cast(old_value), - reinterpret_cast(&irql_)); -} - bool Processor::Save(ByteStream* stream) { stream->Write(kProcessorSaveSignature); return true; diff --git a/src/xenia/cpu/processor.h b/src/xenia/cpu/processor.h index 630cf4633..c0985672b 100644 --- a/src/xenia/cpu/processor.h +++ b/src/xenia/cpu/processor.h @@ -123,11 +123,6 @@ class Processor { bool ExecuteRaw(ThreadState* thread_state, uint32_t address); uint64_t Execute(ThreadState* thread_state, uint32_t address, uint64_t args[], size_t arg_count); - uint64_t ExecuteInterrupt(ThreadState* thread_state, uint32_t address, - uint64_t args[], size_t arg_count); - - Irql RaiseIrql(Irql new_value); - void LowerIrql(Irql old_value); bool Save(ByteStream* stream); bool Restore(ByteStream* stream); diff --git a/src/xenia/emulator.cc b/src/xenia/emulator.cc index bacdefeb5..dc2990ade 100644 --- a/src/xenia/emulator.cc +++ b/src/xenia/emulator.cc @@ -268,7 +268,13 @@ X_STATUS Emulator::Setup( // Shared kernel state. kernel_state_ = std::make_unique(this); - +#define LOAD_KERNEL_MODULE(t) \ + static_cast(kernel_state_->LoadKernelModule()) + // HLE kernel modules. + LOAD_KERNEL_MODULE(xboxkrnl::XboxkrnlModule); + LOAD_KERNEL_MODULE(xam::XamModule); + LOAD_KERNEL_MODULE(xbdm::XbdmModule); +#undef LOAD_KERNEL_MODULE plugin_loader_ = std::make_unique( kernel_state_.get(), storage_root() / "plugins"); @@ -288,13 +294,6 @@ X_STATUS Emulator::Setup( } } -#define LOAD_KERNEL_MODULE(t) \ - static_cast(kernel_state_->LoadKernelModule()) - // HLE kernel modules. - LOAD_KERNEL_MODULE(xboxkrnl::XboxkrnlModule); - LOAD_KERNEL_MODULE(xam::XamModule); - LOAD_KERNEL_MODULE(xbdm::XbdmModule); -#undef LOAD_KERNEL_MODULE // Initialize emulator fallback exception handling last. ExceptionHandler::Install(Emulator::ExceptionCallbackThunk, this); diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 7e20fe991..0614bc4c8 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -104,7 +104,7 @@ bool CommandProcessor::Initialize() { new kernel::XHostThread(kernel_state_, 128 * 1024, 0, [this]() { WorkerThreadMain(); return 0; - })); + }, kernel_state_->GetIdleProcess())); worker_thread_->set_name("GPU Commands"); worker_thread_->Create(); diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index 5879ac9ef..cc06c1390 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -28,7 +28,7 @@ #include "xenia/ui/graphics_provider.h" #include "xenia/ui/window.h" #include "xenia/ui/windowed_app_context.h" - +#include "xenia/kernel/kernel_state.h" DEFINE_bool( store_shaders, true, "Store shaders persistently and load them when loading games to avoid " @@ -138,7 +138,7 @@ X_STATUS GraphicsSystem::Setup(cpu::Processor* processor, } } return 0; - })); + }, kernel_state->GetIdleProcess())); // As we run vblank interrupts the debugger must be able to suspend us. vsync_worker_thread_->set_can_debugger_suspend(true); vsync_worker_thread_->set_name("GPU VSync"); @@ -267,25 +267,7 @@ void GraphicsSystem::SetInterruptCallback(uint32_t callback, } void GraphicsSystem::DispatchInterruptCallback(uint32_t source, uint32_t cpu) { - if (!interrupt_callback_) { - return; - } - - auto thread = kernel::XThread::GetCurrentThread(); - assert_not_null(thread); - - // Pick a CPU, if needed. We're going to guess 2. Because. - if (cpu == 0xFFFFFFFF) { - cpu = 2; - } - thread->SetActiveCpu(cpu); - - // XELOGGPU("Dispatching GPU interrupt at {:08X} w/ mode {} on cpu {}", - // interrupt_callback_, source, cpu); - - uint64_t args[] = {source, interrupt_callback_data_}; - processor_->ExecuteInterrupt(thread->thread_state(), interrupt_callback_, - args, xe::countof(args)); + kernel_state()->EmulateCPInterruptDPC(interrupt_callback_,interrupt_callback_data_, source, cpu); } void GraphicsSystem::MarkVblank() { diff --git a/src/xenia/kernel/kernel_state.cc b/src/xenia/kernel/kernel_state.cc index a0c9f6a2c..f22c9970e 100644 --- a/src/xenia/kernel/kernel_state.cc +++ b/src/xenia/kernel/kernel_state.cc @@ -23,6 +23,7 @@ #include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/xam/xam_module.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_module.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" #include "xenia/kernel/xevent.h" #include "xenia/kernel/xmodule.h" #include "xenia/kernel/xnotifylistener.h" @@ -55,6 +56,8 @@ KernelState::KernelState(Emulator* emulator) achievement_manager_ = std::make_unique(); user_profiles_.emplace(0, std::make_unique(0)); + InitializeKernelGuestGlobals(); + auto content_root = emulator_->content_root(); if (!content_root.empty()) { content_root = std::filesystem::absolute(content_root); @@ -136,18 +139,6 @@ util::XdbfGameData KernelState::module_xdbf( return util::XdbfGameData(nullptr, resource_size); } -uint32_t KernelState::process_type() const { - auto pib = - memory_->TranslateVirtual(process_info_block_address_); - return pib->process_type; -} - -void KernelState::set_process_type(uint32_t value) { - auto pib = - memory_->TranslateVirtual(process_info_block_address_); - pib->process_type = uint8_t(value); -} - uint32_t KernelState::AllocateTLS() { return uint32_t(tls_bitmap_.Acquire()); } void KernelState::FreeTLS(uint32_t slot) { @@ -324,30 +315,32 @@ void KernelState::SetExecutableModule(object_ref module) { return; } - assert_zero(process_info_block_address_); - process_info_block_address_ = memory_->SystemHeapAlloc(0x60); + auto title_process = + memory_->TranslateVirtual(GetTitleProcess()); - auto pib = - memory_->TranslateVirtual(process_info_block_address_); - // TODO(benvanik): figure out what this list is. - pib->unk_04 = pib->unk_08 = 0; - pib->unk_0C = 0x0000007F; - pib->unk_10 = 0x001F0000; - pib->thread_count = 0; - pib->unk_1B = 0x06; - pib->kernel_stack_size = 16 * 1024; - pib->process_type = process_type_; - // TODO(benvanik): figure out what this list is. - pib->unk_54 = pib->unk_58 = 0; + InitializeProcess(title_process, X_PROCTYPE_TITLE, 10, 13, 17); xex2_opt_tls_info* tls_header = nullptr; executable_module_->GetOptHeader(XEX_HEADER_TLS_INFO, &tls_header); if (tls_header) { - auto pib = memory_->TranslateVirtual( - process_info_block_address_); - pib->tls_data_size = tls_header->data_size; - pib->tls_raw_data_size = tls_header->raw_data_size; - pib->tls_slot_size = tls_header->slot_count * 4; + title_process->tls_static_data_address = tls_header->raw_data_address; + title_process->tls_data_size = tls_header->data_size; + title_process->tls_raw_data_size = tls_header->raw_data_size; + title_process->tls_slot_size = tls_header->slot_count * 4; + SetProcessTLSVars(title_process, tls_header->slot_count, + tls_header->data_size, tls_header->raw_data_address); + } + + uint32_t kernel_stacksize = 0; + + executable_module_->GetOptHeader(XEX_HEADER_DEFAULT_STACK_SIZE, + &kernel_stacksize); + if (kernel_stacksize) { + kernel_stacksize = (kernel_stacksize + 4095) & 0xFFFFF000; + if (kernel_stacksize < 0x4000) { + kernel_stacksize = 0x4000; + } + title_process->kernel_stack_size = kernel_stacksize; } // Setup the kernel's XexExecutableModuleHandle field. @@ -376,8 +369,9 @@ void KernelState::SetExecutableModule(object_ref module) { // here). if (!dispatch_thread_running_) { dispatch_thread_running_ = true; - dispatch_thread_ = - object_ref(new XHostThread(this, 128 * 1024, 0, [this]() { + dispatch_thread_ = object_ref(new XHostThread( + this, 128 * 1024, 0, + [this]() { // As we run guest callbacks the debugger must be able to suspend us. dispatch_thread_->set_can_debugger_suspend(true); @@ -398,7 +392,8 @@ void KernelState::SetExecutableModule(object_ref module) { fn(); } return 0; - })); + }, + GetSystemProcess())); // don't think an equivalent exists on real hw dispatch_thread_->set_name("Kernel Dispatch"); dispatch_thread_->Create(); } @@ -628,11 +623,6 @@ void KernelState::TerminateTitle() { // Unset the executable module. executable_module_ = nullptr; - if (process_info_block_address_) { - memory_->SystemHeapFree(process_info_block_address_); - process_info_block_address_ = 0; - } - if (XThread::IsInThread()) { threads_by_id_.erase(XThread::GetCurrentThread()->thread_id()); @@ -646,12 +636,6 @@ void KernelState::TerminateTitle() { void KernelState::RegisterThread(XThread* thread) { auto global_lock = global_critical_region_.Acquire(); threads_by_id_[thread->thread_id()] = thread; - - /* - auto pib = - memory_->TranslateVirtual(process_info_block_address_); - pib->thread_count = pib->thread_count + 1; - */ } void KernelState::UnregisterThread(XThread* thread) { @@ -660,12 +644,6 @@ void KernelState::UnregisterThread(XThread* thread) { if (it != threads_by_id_.end()) { threads_by_id_.erase(it); } - - /* - auto pib = - memory_->TranslateVirtual(process_info_block_address_); - pib->thread_count = pib->thread_count - 1; - */ } void KernelState::OnThreadExecute(XThread* thread) { @@ -1049,6 +1027,64 @@ uint8_t KernelState::GetConnectedUsers() const { return input_sys->GetConnectedSlots(); } +// todo: definitely need to do more to pretend to be in a dpc +void KernelState::BeginDPCImpersonation(cpu::ppc::PPCContext* context, + DPCImpersonationScope& scope) { + auto kpcr = context->TranslateVirtualGPR(context->r[13]); + xenia_assert(kpcr->prcb_data.dpc_active == 0); + scope.previous_irql_ = kpcr->current_irql; + + kpcr->current_irql = 2; + kpcr->prcb_data.dpc_active = 1; +} +void KernelState::EndDPCImpersonation(cpu::ppc::PPCContext* context, + DPCImpersonationScope& end_scope) { + auto kpcr = context->TranslateVirtualGPR(context->r[13]); + xenia_assert(kpcr->prcb_data.dpc_active == 1); + kpcr->current_irql = end_scope.previous_irql_; + kpcr->prcb_data.dpc_active = 0; +} +void KernelState::EmulateCPInterruptDPC(uint32_t interrupt_callback, + uint32_t interrupt_callback_data, + uint32_t source, uint32_t cpu) { + if (!interrupt_callback) { + return; + } + + auto thread = kernel::XThread::GetCurrentThread(); + assert_not_null(thread); + + // Pick a CPU, if needed. We're going to guess 2. Because. + if (cpu == 0xFFFFFFFF) { + cpu = 2; + } + thread->SetActiveCpu(cpu); + + /* + in reality, our interrupt is a callback that is called in a dpc which is + scheduled by the actual interrupt + + we need to impersonate a dpc + */ + auto current_context = thread->thread_state()->context(); + auto kthread = memory()->TranslateVirtual(thread->guest_object()); + + auto pcr = memory()->TranslateVirtual(thread->pcr_ptr()); + + DPCImpersonationScope dpc_scope{}; + BeginDPCImpersonation(current_context, dpc_scope); + + // todo: check VdGlobalXamDevice here. if VdGlobalXamDevice is nonzero, should + // set X_PROCTYPE_SYSTEM + xboxkrnl::xeKeSetCurrentProcessType(X_PROCTYPE_TITLE, current_context); + + uint64_t args[] = {source, interrupt_callback_data}; + processor_->Execute(thread->thread_state(), interrupt_callback, args, + xe::countof(args)); + xboxkrnl::xeKeSetCurrentProcessType(X_PROCTYPE_IDLE, current_context); + + EndDPCImpersonation(current_context, dpc_scope); +} void KernelState::UpdateUsedUserProfiles() { const uint8_t used_slots_bitmask = GetConnectedUsers(); @@ -1068,5 +1104,64 @@ void KernelState::UpdateUsedUserProfiles() { } } +void KernelState::InitializeProcess(X_KPROCESS* process, uint32_t type, + char unk_18, char unk_19, char unk_1A) { + uint32_t guest_kprocess = memory()->HostToGuestVirtual(process); + + uint32_t thread_list_guest_ptr = + guest_kprocess + offsetof(X_KPROCESS, thread_list); + + process->unk_18 = unk_18; + process->unk_19 = unk_19; + process->unk_1A = unk_1A; + util::XeInitializeListHead(&process->thread_list, thread_list_guest_ptr); + process->unk_0C = 60; + // doubt any guest code uses this ptr, which i think probably has something to + // do with the page table + process->clrdataa_masked_ptr = 0; + // clrdataa_ & ~(1U << 31); + process->thread_count = 0; + process->unk_1B = 0x06; + process->kernel_stack_size = 16 * 1024; + process->tls_slot_size = 0x80; + + process->process_type = type; + uint32_t unk_list_guest_ptr = guest_kprocess + offsetof(X_KPROCESS, unk_54); + // TODO(benvanik): figure out what this list is. + util::XeInitializeListHead(&process->unk_54, unk_list_guest_ptr); +} + +void KernelState::SetProcessTLSVars(X_KPROCESS* process, int num_slots, + int tls_data_size, + int tls_static_data_address) { + uint32_t slots_padded = (num_slots + 3) & 0xFFFFFFFC; + process->tls_data_size = tls_data_size; + process->tls_raw_data_size = tls_data_size; + process->tls_static_data_address = tls_static_data_address; + process->tls_slot_size = 4 * slots_padded; + uint32_t count_div32 = slots_padded / 32; + for (unsigned word_index = 0; word_index < count_div32; ++word_index) { + process->bitmap[word_index] = -1; + } + + // set remainder of bitset + if (((num_slots + 3) & 0x1C) != 0) + process->bitmap[count_div32] = -1 << (32 - ((num_slots + 3) & 0x1C)); +} +void KernelState::InitializeKernelGuestGlobals() { + kernel_guest_globals_ = memory_->SystemHeapAlloc(sizeof(KernelGuestGlobals)); + + KernelGuestGlobals* block = + memory_->TranslateVirtual(kernel_guest_globals_); + memset(block, 0, sizeof(block)); + + auto idle_process = memory()->TranslateVirtual(GetIdleProcess()); + InitializeProcess(idle_process, X_PROCTYPE_IDLE, 0, 0, 0); + idle_process->unk_0C = 0x7F; + auto system_process = + memory()->TranslateVirtual(GetSystemProcess()); + InitializeProcess(system_process, X_PROCTYPE_SYSTEM, 2, 5, 9); + SetProcessTLSVars(system_process, 32, 0, 0); +} } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/kernel_state.h b/src/xenia/kernel/kernel_state.h index 200ed75a1..61ab21dc0 100644 --- a/src/xenia/kernel/kernel_state.h +++ b/src/xenia/kernel/kernel_state.h @@ -48,33 +48,41 @@ constexpr fourcc_t kKernelSaveSignature = make_fourcc("KRNL"); // (?), used by KeGetCurrentProcessType constexpr uint32_t X_PROCTYPE_IDLE = 0; -constexpr uint32_t X_PROCTYPE_USER = 1; +constexpr uint32_t X_PROCTYPE_TITLE = 1; constexpr uint32_t X_PROCTYPE_SYSTEM = 2; struct X_KPROCESS { - xe::be unk_00; - xe::be unk_04; // blink - xe::be unk_08; // flink + X_KSPINLOCK thread_list_spinlock; + // list of threads in this process, guarded by the spinlock above + X_LIST_ENTRY thread_list; + xe::be unk_0C; - xe::be unk_10; + // kernel sets this to point to a section of size 0x2F700 called CLRDATAA, + // except it clears bit 31 of the pointer. in 17559 the address is 0x801C0000, + // so it sets this ptr to 0x1C0000 + xe::be clrdataa_masked_ptr; xe::be thread_count; uint8_t unk_18; uint8_t unk_19; uint8_t unk_1A; uint8_t unk_1B; xe::be kernel_stack_size; - xe::be unk_20; + xe::be tls_static_data_address; xe::be tls_data_size; xe::be tls_raw_data_size; xe::be tls_slot_size; - uint8_t unk_2E; + // ExCreateThread calls a subfunc references this field, returns + // X_STATUS_PROCESS_IS_TERMINATING if true + uint8_t is_terminating; + // one of X_PROCTYPE_ uint8_t process_type; - xe::be bitmap[0x20 / 4]; + xe::be bitmap[8]; xe::be unk_50; - xe::be unk_54; // blink - xe::be unk_58; // flink + X_LIST_ENTRY unk_54; xe::be unk_5C; }; +static_assert_size(X_KPROCESS, 0x60); + struct TerminateNotification { uint32_t guest_routine; @@ -92,6 +100,16 @@ struct X_TIME_STAMP_BUNDLE { uint32_t padding; }; +struct KernelGuestGlobals { + X_KPROCESS idle_process; // X_PROCTYPE_IDLE. runs in interrupt contexts. is also the context the kernel starts in? + X_KPROCESS title_process; // X_PROCTYPE_TITLE + X_KPROCESS system_process; // X_PROCTYPE_SYSTEM. no idea when this runs. can + // create threads in this process with + // ExCreateThread and the thread flag 0x2 +}; +struct DPCImpersonationScope { + uint8_t previous_irql_; +}; class KernelState { public: explicit KernelState(Emulator* emulator); @@ -147,10 +165,16 @@ class KernelState { // Access must be guarded by the global critical region. util::ObjectTable* object_table() { return &object_table_; } - uint32_t process_type() const; - void set_process_type(uint32_t value); - uint32_t process_info_block_address() const { - return process_info_block_address_; + uint32_t GetSystemProcess() const { + return kernel_guest_globals_ + offsetof(KernelGuestGlobals, system_process); + } + + uint32_t GetTitleProcess() const { + return kernel_guest_globals_ + offsetof(KernelGuestGlobals, title_process); + } + // also the "interrupt" process + uint32_t GetIdleProcess() const { + return kernel_guest_globals_ + offsetof(KernelGuestGlobals, idle_process); } uint32_t AllocateTLS(); @@ -246,9 +270,20 @@ class KernelState { uint32_t CreateKeTimestampBundle(); void UpdateKeTimestampBundle(); + void BeginDPCImpersonation(cpu::ppc::PPCContext* context, DPCImpersonationScope& scope); + void EndDPCImpersonation(cpu::ppc::PPCContext* context, + DPCImpersonationScope& end_scope); + + void EmulateCPInterruptDPC(uint32_t interrupt_callback,uint32_t interrupt_callback_data, uint32_t source, + uint32_t cpu); + private: void LoadKernelModule(object_ref kernel_module); - + void InitializeProcess(X_KPROCESS* process, uint32_t type, char unk_18, + char unk_19, char unk_1A); + void SetProcessTLSVars(X_KPROCESS* process, int num_slots, int tls_data_size, + int tls_static_data_address); + void InitializeKernelGuestGlobals(); Emulator* emulator_; Memory* memory_; cpu::Processor* processor_; @@ -267,13 +302,11 @@ class KernelState { std::vector> notify_listeners_; bool has_notified_startup_ = false; - uint32_t process_type_ = X_PROCTYPE_USER; object_ref executable_module_; std::vector> kernel_modules_; std::vector> user_modules_; std::vector terminate_notifications_; - - uint32_t process_info_block_address_ = 0; + uint32_t kernel_guest_globals_ = 0; std::atomic dispatch_thread_running_; object_ref dispatch_thread_; diff --git a/src/xenia/kernel/xam/xam_task.cc b/src/xenia/kernel/xam/xam_task.cc index 23358dae5..0a809b3fe 100644 --- a/src/xenia/kernel/xam/xam_task.cc +++ b/src/xenia/kernel/xam/xam_task.cc @@ -68,7 +68,7 @@ dword_result_t XamTaskSchedule_entry(lpvoid_t callback, auto thread = object_ref(new XThread(kernel_state(), stack_size, 0, callback, - message.guest_address(), 0, true)); + message.guest_address(), 0, true, false, kernel_state()->GetSystemProcess())); X_STATUS result = thread->Create(); diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index 22609c8a5..49512dc23 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -111,20 +111,31 @@ uint32_t ExCreateThread(xe::be* handle_ptr, uint32_t stack_size, // LPVOID StartContext, // DWORD CreationFlags // 0x80? + auto kernel_state_var = kernel_state(); + // xenia_assert((creation_flags & 2) == 0); // creating system thread? + if (creation_flags & 2) { + XELOGE("Guest is creating a system thread!"); + } + + uint32_t thread_process = (creation_flags & 2) + ? kernel_state_var->GetSystemProcess() + : kernel_state_var->GetTitleProcess(); + X_KPROCESS* target_process = + kernel_state_var->memory()->TranslateVirtual(thread_process); // Inherit default stack size uint32_t actual_stack_size = stack_size; if (actual_stack_size == 0) { - actual_stack_size = kernel_state()->GetExecutableModule()->stack_size(); + actual_stack_size = target_process->kernel_stack_size; } // Stack must be aligned to 16kb pages actual_stack_size = std::max((uint32_t)0x4000, ((actual_stack_size + 0xFFF) & 0xFFFFF000)); - auto thread = object_ref( - new XThread(kernel_state(), actual_stack_size, xapi_thread_startup, - start_address, start_context, creation_flags, true)); + auto thread = object_ref(new XThread( + kernel_state(), actual_stack_size, xapi_thread_startup, start_address, + start_context, creation_flags, true, false, thread_process)); X_STATUS result = thread->Create(); if (XFAILED(result)) { @@ -345,29 +356,42 @@ dword_result_t KeSetBasePriorityThread_entry(lpvoid_t thread_ptr, } DECLARE_XBOXKRNL_EXPORT1(KeSetBasePriorityThread, kThreading, kImplemented); -dword_result_t KeSetDisableBoostThread_entry(lpvoid_t thread_ptr, +dword_result_t KeSetDisableBoostThread_entry(pointer_t thread_ptr, dword_t disabled) { - auto thread = XObject::GetNativeObject(kernel_state(), thread_ptr); - if (thread) { - // Uhm? - } + // supposed to acquire dispatcher lock + a prcb lock, all just to exchange + // this char there is no other special behavior going on in this function, + // just acquiring locks to do this exchange + auto old_boost_disabled = + reinterpret_cast(&thread_ptr->boost_disabled) + ->exchange(static_cast(disabled)); - return 0; + return old_boost_disabled; } DECLARE_XBOXKRNL_EXPORT1(KeSetDisableBoostThread, kThreading, kImplemented); -dword_result_t KeGetCurrentProcessType_entry() { - return kernel_state()->process_type(); +uint32_t xeKeGetCurrentProcessType(cpu::ppc::PPCContext* context) { + auto pcr = context->TranslateVirtualGPR(context->r[13]); + + if (!pcr->prcb_data.dpc_active) + return context->TranslateVirtual(pcr->prcb_data.current_thread) + ->process_type; + return pcr->processtype_value_in_dpc; +} +void xeKeSetCurrentProcessType(uint32_t type, cpu::ppc::PPCContext* context) { + auto pcr = context->TranslateVirtualGPR(context->r[13]); + if (pcr->prcb_data.dpc_active) { + pcr->processtype_value_in_dpc = type; + } +} + +dword_result_t KeGetCurrentProcessType_entry(const ppc_context_t& context) { + return xeKeGetCurrentProcessType(context); } DECLARE_XBOXKRNL_EXPORT2(KeGetCurrentProcessType, kThreading, kImplemented, kHighFrequency); -void KeSetCurrentProcessType_entry(dword_t type) { - // One of X_PROCTYPE_? - - assert_true(type <= 2); - - kernel_state()->set_process_type(type); +void KeSetCurrentProcessType_entry(dword_t type, const ppc_context_t& context) { + xeKeSetCurrentProcessType(type, context); } DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentProcessType, kThreading, kImplemented); @@ -1108,7 +1132,7 @@ dword_result_t KfAcquireSpinLock_entry(pointer_t lock_ptr, DECLARE_XBOXKRNL_EXPORT3(KfAcquireSpinLock, kThreading, kImplemented, kBlocking, kHighFrequency); -void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, dword_t old_irql, +void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, uint32_t old_irql, bool change_irql) { assert_true(lock->prcb_of_owner == static_cast(ctx->r[13])); // Unlock. @@ -1385,7 +1409,7 @@ static void YankApcList(PPCContext* ctx, X_KTHREAD* current_thread, unsigned apc } } -void xeRundownApcs(PPCContext* ctx) { +void xeRundownApcs(cpu::ppc::PPCContext* ctx) { auto kpcr = ctx->TranslateVirtualGPR(ctx->r[13]); auto current_thread = ctx->TranslateVirtual(kpcr->prcb_data.current_thread); @@ -1429,7 +1453,7 @@ uint32_t xeKeInsertQueueApc(XAPC* apc, uint32_t arg1, uint32_t arg2, auto target_thread = context->TranslateVirtual(apc->thread_ptr); auto old_irql = xeKeKfAcquireSpinLock(context, &target_thread->apc_lock); uint32_t result; - if (!target_thread->apc_related || apc->enqueued) { + if (!target_thread->may_queue_apcs || apc->enqueued) { result = 0; } else { apc->arg1 = arg1; @@ -1470,9 +1494,7 @@ uint32_t xeKeInsertQueueApc(XAPC* apc, uint32_t arg1, uint32_t arg2, dword_result_t KeInsertQueueApc_entry(pointer_t apc, lpvoid_t arg1, lpvoid_t arg2, dword_t priority_increment, const ppc_context_t& context) { - return xeKeInsertQueueApc(apc, arg1, arg2, priority_increment, context); - } DECLARE_XBOXKRNL_EXPORT1(KeInsertQueueApc, kThreading, kImplemented); @@ -1480,7 +1502,6 @@ dword_result_t KeRemoveQueueApc_entry(pointer_t apc, const ppc_context_t& context) { bool result = false; - uint32_t thread_guest_pointer = apc->thread_ptr; if (!thread_guest_pointer) { return 0; diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h index db4163663..6125f7e89 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.h @@ -63,13 +63,15 @@ uint32_t xeNtQueueApcThread(uint32_t thread_handle, uint32_t apc_routine, void xeKfLowerIrql(PPCContext* ctx, unsigned char new_irql); unsigned char xeKfRaiseIrql(PPCContext* ctx, unsigned char new_irql); -void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, dword_t old_irql, bool change_irql=true); -uint32_t xeKeKfAcquireSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, bool change_irql=true); +void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, uint32_t old_irql, bool change_irql=true); +uint32_t xeKeKfAcquireSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, + bool change_irql = true); X_STATUS xeProcessUserApcs(PPCContext* ctx); void xeRundownApcs(PPCContext* ctx); - +uint32_t xeKeGetCurrentProcessType(PPCContext* context); +void xeKeSetCurrentProcessType(uint32_t type, PPCContext* context); } // namespace xboxkrnl } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index 9dd9a9690..130f4f06a 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -58,7 +58,7 @@ XThread::XThread(KernelState* kernel_state) XThread::XThread(KernelState* kernel_state, uint32_t stack_size, uint32_t xapi_thread_startup, uint32_t start_address, uint32_t start_context, uint32_t creation_flags, - bool guest_thread, bool main_thread) + bool guest_thread, bool main_thread, uint32_t guest_process) : XObject(kernel_state, kObjectType, !guest_thread), thread_id_(++next_xthread_id_), guest_thread_(guest_thread), @@ -76,7 +76,7 @@ XThread::XThread(KernelState* kernel_state, uint32_t stack_size, if (creation_params_.stack_size < 16 * 1024) { creation_params_.stack_size = 16 * 1024; } - + creation_params_.guest_process = guest_process; // The kernel does not take a reference. We must unregister in the dtor. kernel_state_->RegisterThread(this); } @@ -93,7 +93,6 @@ XThread::~XThread() { if (thread_state_) { delete thread_state_; } - kernel_state()->memory()->SystemHeapFree(scratch_address_); kernel_state()->memory()->SystemHeapFree(tls_static_address_); kernel_state()->memory()->SystemHeapFree(pcr_address_); FreeStack(); @@ -197,13 +196,14 @@ void XThread::InitializeGuestObject() { guest_thread->tls_address = (this->tls_static_address_); guest_thread->thread_state = 0; uint32_t process_info_block_address = - kernel_state_->process_info_block_address(); + creation_params_.guest_process ? creation_params_.guest_process + : this->kernel_state_->GetTitleProcess(); X_KPROCESS* process = memory()->TranslateVirtual(process_info_block_address); uint32_t kpcrb = pcr_address_ + offsetof(X_KPCR, prcb_data); - auto process_type = X_PROCTYPE_USER; // process->process_type; + auto process_type = process->process_type; guest_thread->process_type_dup = process_type; guest_thread->process_type = process_type; guest_thread->apc_lists[0].Initialize(memory()); @@ -212,7 +212,7 @@ void XThread::InitializeGuestObject() { guest_thread->a_prcb_ptr = kpcrb; guest_thread->another_prcb_ptr = kpcrb; - guest_thread->apc_related = 1; + guest_thread->may_queue_apcs = 1; guest_thread->msr_mask = 0xFDFFD7FF; guest_thread->process = process_info_block_address; guest_thread->stack_alloc_base = this->stack_base_; @@ -227,6 +227,24 @@ void XThread::InitializeGuestObject() { guest_thread->unk_158 = v9 + 340; guest_thread->creation_flags = this->creation_params_.creation_flags; guest_thread->unk_17C = 1; + + /* + * not doing this right at all! we're not using our threads context, because + * we may be on the host and have no underlying context. in reality we should + * have a context and acquire any locks using that context! + */ + auto context_here = thread_state_->context(); + auto old_irql = xboxkrnl::xeKeKfAcquireSpinLock( + context_here, &process->thread_list_spinlock); + + // todo: acquire dispatcher lock here? + + util::XeInsertTailList(&process->thread_list, &guest_thread->process_threads, + context_here); + process->thread_count += 1; + // todo: release dispatcher lock here? + xboxkrnl::xeKeKfReleaseSpinLock(context_here, &process->thread_list_spinlock, + old_irql); } bool XThread::AllocateStack(uint32_t size) { @@ -284,11 +302,6 @@ X_STATUS XThread::Create() { return X_STATUS_NO_MEMORY; } - // Allocate thread scratch. - // This is used by interrupts/APCs/etc so we can round-trip pointers through. - scratch_size_ = 4 * 16; - scratch_address_ = memory()->SystemHeapAlloc(scratch_size_); - // Allocate TLS block. // Games will specify a certain number of 4b slots that each thread will get. xex2_opt_tls_info* tls_header = nullptr; @@ -368,7 +381,8 @@ X_STATUS XThread::Create() { pcr->tls_ptr = tls_static_address_; pcr->pcr_ptr = pcr_address_; pcr->prcb_data.current_thread = guest_object(); - + pcr->prcb = pcr_address_ + offsetof(X_KPCR, prcb_data); + pcr->host_stash = reinterpret_cast(thread_state_->context()); pcr->stack_base_ptr = stack_base_; pcr->stack_end_ptr = stack_limit_; @@ -381,18 +395,7 @@ X_STATUS XThread::Create() { params.create_suspended = true; -#if 0 - uint64_t stack_size_mult = cvars::stack_size_multiplier_hack; - - if (main_thread_) { - stack_size_mult = - static_cast(cvars::main_xthread_stack_size_multiplier_hack); - - } -#else - uint64_t stack_size_mult = 1; -#endif - params.stack_size = 16_MiB * stack_size_mult; // Allocate a big host stack. + params.stack_size = 16_MiB; // Allocate a big host stack. thread_ = xe::threading::Thread::Create(params, [this]() { // Set thread ID override. This is used by logging. xe::threading::set_current_thread_id(handle()); @@ -406,6 +409,7 @@ X_STATUS XThread::Create() { // Execute user code. current_xthread_tls_ = this; current_thread_ = this; + cpu::ThreadState::Bind(this->thread_state()); running_ = true; Execute(); running_ = false; @@ -453,15 +457,28 @@ X_STATUS XThread::Exit(int exit_code) { assert_true(XThread::GetCurrentThread() == this); // TODO(chrispy): not sure if this order is correct, should it come after // apcs? - guest_object()->terminated = 1; + auto kthread = guest_object(); + auto cpu_context = thread_state_->context(); + kthread->terminated = 1; // TODO(benvanik): dispatch events? waiters? etc? RundownAPCs(); // Set exit code. - X_KTHREAD* thread = guest_object(); - thread->header.signal_state = 1; - thread->exit_status = exit_code; + kthread->header.signal_state = 1; + kthread->exit_status = exit_code; + + auto kprocess = cpu_context->TranslateVirtual(kthread->process); + + uint32_t old_irql = xboxkrnl::xeKeKfAcquireSpinLock( + cpu_context, &kprocess->thread_list_spinlock); + + util::XeRemoveEntryList(&kthread->process_threads, cpu_context); + + kprocess->thread_count = kprocess->thread_count - 1; + + xboxkrnl::xeKeKfReleaseSpinLock(cpu_context, &kprocess->thread_list_spinlock, + old_irql); kernel_state()->OnThreadExit(this); @@ -517,7 +534,6 @@ class reenter_exception { void XThread::Execute() { XELOGKERNEL("XThread::Execute thid {} (handle={:08X}, '{}', native={:08X})", thread_id_, handle(), thread_name_, thread_->system_id()); - // Let the kernel know we are starting. kernel_state()->OnThreadExecute(this); @@ -699,9 +715,16 @@ uint32_t XThread::suspend_count() { } X_STATUS XThread::Resume(uint32_t* out_suspend_count) { - --guest_object()->suspend_count; + auto guest_thread = guest_object(); - if (thread_->Resume(out_suspend_count)) { + uint8_t previous_suspend_count = + reinterpret_cast(&guest_thread->suspend_count) + ->fetch_sub(1); + if (out_suspend_count) { + *out_suspend_count = previous_suspend_count; + } + uint32_t unused_host_suspend_count = 0; + if (thread_->Resume(&unused_host_suspend_count)) { return X_STATUS_SUCCESS; } else { return X_STATUS_UNSUCCESSFUL; @@ -709,20 +732,20 @@ X_STATUS XThread::Resume(uint32_t* out_suspend_count) { } X_STATUS XThread::Suspend(uint32_t* out_suspend_count) { - - //this normally holds the apc lock for the thread, because it queues a kernel mode apc that does the actual suspension + // this normally holds the apc lock for the thread, because it queues a kernel + // mode apc that does the actual suspension X_KTHREAD* guest_thread = guest_object(); uint8_t previous_suspend_count = reinterpret_cast(&guest_thread->suspend_count) ->fetch_add(1); - - *out_suspend_count = previous_suspend_count; - + if (out_suspend_count) { + *out_suspend_count = previous_suspend_count; + } // If we are suspending ourselves, we can't hold the lock. - - if (thread_->Suspend(out_suspend_count)) { + uint32_t unused_host_suspend_count = 0; + if (thread_->Suspend(&unused_host_suspend_count)) { return X_STATUS_SUCCESS; } else { return X_STATUS_UNSUCCESSFUL; @@ -975,8 +998,10 @@ object_ref XThread::Restore(KernelState* kernel_state, } XHostThread::XHostThread(KernelState* kernel_state, uint32_t stack_size, - uint32_t creation_flags, std::function host_fn) - : XThread(kernel_state, stack_size, 0, 0, 0, creation_flags, false), + uint32_t creation_flags, std::function host_fn, + uint32_t guest_process) + : XThread(kernel_state, stack_size, 0, 0, 0, creation_flags, false, false, + guest_process), host_fn_(host_fn) { // By default host threads are not debugger suspendable. If the thread runs // any guest code this must be overridden. @@ -987,10 +1012,8 @@ void XHostThread::Execute() { XELOGKERNEL( "XThread::Execute thid {} (handle={:08X}, '{}', native={:08X}, )", thread_id_, handle(), thread_name_, thread_->system_id()); - // Let the kernel know we are starting. kernel_state()->OnThreadExecute(this); - int ret = host_fn_(); // Exit. diff --git a/src/xenia/kernel/xthread.h b/src/xenia/kernel/xthread.h index 4890ac11a..5dd562afe 100644 --- a/src/xenia/kernel/xthread.h +++ b/src/xenia/kernel/xthread.h @@ -206,7 +206,7 @@ struct X_KTHREAD { util::X_TYPED_LIST apc_lists[2]; TypedGuestPointer process; // 0x84 uint8_t unk_88[0x3]; // 0x88 - uint8_t apc_related; // 0x8B + uint8_t may_queue_apcs; // 0x8B X_KSPINLOCK apc_lock; // 0x8C uint8_t unk_90[0xC]; // 0x90 xe::be msr_mask; // 0x9C @@ -214,7 +214,11 @@ struct X_KTHREAD { uint8_t unk_A4; // 0xA4 uint8_t unk_A5[0xB]; // 0xA5 int32_t apc_disable_count; // 0xB0 - uint8_t unk_B4[0x8]; // 0xB4 + uint8_t unk_B4[4]; // 0xB4 + uint8_t unk_B8; // 0xB8 + uint8_t unk_B9; // 0xB9 + uint8_t unk_BA; // 0xBA + uint8_t boost_disabled; // 0xBB uint8_t suspend_count; // 0xBC uint8_t unk_BD; // 0xBD uint8_t terminated; // 0xBE @@ -273,13 +277,14 @@ class XThread : public XObject, public cpu::Thread { uint32_t start_address; uint32_t start_context; uint32_t creation_flags; + uint32_t guest_process; }; XThread(KernelState* kernel_state); XThread(KernelState* kernel_state, uint32_t stack_size, uint32_t xapi_thread_startup, uint32_t start_address, uint32_t start_context, uint32_t creation_flags, bool guest_thread, - bool main_thread = false); + bool main_thread = false, uint32_t guest_process = 0); ~XThread() override; static bool IsInThread(XThread* other); @@ -368,8 +373,6 @@ class XThread : public XObject, public cpu::Thread { std::vector> pending_mutant_acquires_; uint32_t thread_id_ = 0; - uint32_t scratch_address_ = 0; - uint32_t scratch_size_ = 0; uint32_t tls_static_address_ = 0; uint32_t tls_dynamic_address_ = 0; uint32_t tls_total_size_ = 0; @@ -388,7 +391,7 @@ class XThread : public XObject, public cpu::Thread { class XHostThread : public XThread { public: XHostThread(KernelState* kernel_state, uint32_t stack_size, - uint32_t creation_flags, std::function host_fn); + uint32_t creation_flags, std::function host_fn, uint32_t guest_process=0); virtual void Execute(); diff --git a/src/xenia/xbox.h b/src/xenia/xbox.h index 8ee0462d6..893436fa1 100644 --- a/src/xenia/xbox.h +++ b/src/xenia/xbox.h @@ -71,6 +71,7 @@ typedef uint32_t X_STATUS; #define X_STATUS_INVALID_PARAMETER_1 ((X_STATUS)0xC00000EFL) #define X_STATUS_INVALID_PARAMETER_2 ((X_STATUS)0xC00000F0L) #define X_STATUS_INVALID_PARAMETER_3 ((X_STATUS)0xC00000F1L) +#define X_STATUS_PROCESS_IS_TERMINATING ((X_STATUS)0xC000010AL) #define X_STATUS_DLL_NOT_FOUND ((X_STATUS)0xC0000135L) #define X_STATUS_ENTRYPOINT_NOT_FOUND ((X_STATUS)0xC0000139L) #define X_STATUS_MAPPED_ALIGNMENT ((X_STATUS)0xC0000220L)