From 666f5543a801cc3a76930c2b5ab0e9d1fe210818 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 27 Nov 2015 23:07:06 -0600 Subject: [PATCH] Cleanup ThreadState and XThread --- src/xenia/cpu/ppc/testing/ppc_testing_main.cc | 5 +- src/xenia/cpu/testing/util.h | 5 +- src/xenia/cpu/thread_state.cc | 55 +----- src/xenia/cpu/thread_state.h | 32 +--- src/xenia/kernel/xthread.cc | 177 +++++++++++------- src/xenia/kernel/xthread.h | 9 +- 6 files changed, 133 insertions(+), 150 deletions(-) diff --git a/src/xenia/cpu/ppc/testing/ppc_testing_main.cc b/src/xenia/cpu/ppc/testing/ppc_testing_main.cc index b5e9c700e..3618e0994 100644 --- a/src/xenia/cpu/ppc/testing/ppc_testing_main.cc +++ b/src/xenia/cpu/ppc/testing/ppc_testing_main.cc @@ -214,9 +214,8 @@ class TestRunner { uint32_t stack_size = 64 * 1024; uint32_t stack_address = START_ADDRESS - stack_size; uint32_t pcr_address = stack_address - 0x1000; - thread_state.reset(new ThreadState(processor.get(), 0x100, - ThreadStackType::kUserStack, - stack_address, stack_size, pcr_address)); + thread_state.reset( + new ThreadState(processor.get(), 0x100, stack_address, pcr_address)); return true; } diff --git a/src/xenia/cpu/testing/util.h b/src/xenia/cpu/testing/util.h index a9b908921..8557b1c0f 100644 --- a/src/xenia/cpu/testing/util.h +++ b/src/xenia/cpu/testing/util.h @@ -72,9 +72,8 @@ class TestFunction { uint32_t stack_size = 64 * 1024; uint32_t stack_address = memory_size - stack_size; uint32_t thread_state_address = stack_address - 0x1000; - auto thread_state = std::make_unique( - processor.get(), 0x100, ThreadStackType::kUserStack, stack_address, - stack_size, thread_state_address); + auto thread_state = std::make_unique(processor.get(), 0x100); + assert_always(); // TODO: Allocate a thread stack!!! auto ctx = thread_state->context(); ctx->lr = 0xBCBCBCBC; diff --git a/src/xenia/cpu/thread_state.cc b/src/xenia/cpu/thread_state.cc index 884ee08e4..fabd176e6 100644 --- a/src/xenia/cpu/thread_state.cc +++ b/src/xenia/cpu/thread_state.cc @@ -26,16 +26,10 @@ namespace cpu { thread_local ThreadState* thread_state_ = nullptr; ThreadState::ThreadState(Processor* processor, uint32_t thread_id, - ThreadStackType stack_type, uint32_t stack_address, - uint32_t stack_size, uint32_t pcr_address) + uint32_t stack_base, uint32_t pcr_address) : processor_(processor), memory_(processor->memory()), - thread_id_(thread_id), - stack_type_(stack_type), - name_(""), - backend_data_(0), - stack_size_(stack_size), - pcr_address_(pcr_address) { + thread_id_(thread_id) { if (thread_id_ == UINT_MAX) { // System thread. Assign the system thread ID with a high bit // set so people know what's up. @@ -44,44 +38,6 @@ ThreadState::ThreadState(Processor* processor, uint32_t thread_id, } backend_data_ = processor->backend()->AllocThreadData(); - if (!stack_address) { - // We must always allocate 64K as a guard region before stacks, as we can - // only Protect() on system page granularity. - auto heap = memory()->LookupHeap(0x40000000); - stack_size = (stack_size + 0xFFF) & 0xFFFFF000; - uint32_t stack_alignment = (stack_size & 0xF000) ? 0x1000 : 0x10000; - uint32_t stack_padding = heap->page_size(); - uint32_t actual_stack_size = stack_padding + stack_size; - bool top_down = false; - switch (stack_type) { - case ThreadStackType::kKernelStack: - top_down = true; - break; - case ThreadStackType::kUserStack: - top_down = false; - break; - default: - assert_unhandled_case(stack_type); - break; - } - heap->AllocRange(0x40000000, 0x7FFFFFFF, actual_stack_size, stack_alignment, - kMemoryAllocationReserve | kMemoryAllocationCommit, - kMemoryProtectRead | kMemoryProtectWrite, top_down, - &stack_address_); - assert_true(!(stack_address_ & 0xFFF)); // just to be safe - stack_allocated_ = true; - stack_base_ = stack_address_ + actual_stack_size; - stack_limit_ = stack_address_ + stack_padding; - memory()->Fill(stack_address_, actual_stack_size, 0xBE); - heap->Protect(stack_address_, stack_padding, kMemoryProtectNoAccess); - } else { - stack_address_ = stack_address; - stack_allocated_ = false; - stack_base_ = stack_address_ + stack_size; - stack_limit_ = stack_address_; - } - assert_not_zero(stack_address_); - // Allocate with 64b alignment. context_ = memory::AlignedAlloc(64); assert_true(((uint64_t)context_ & 0x3F) == 0); @@ -96,8 +52,8 @@ ThreadState::ThreadState(Processor* processor, uint32_t thread_id, context_->thread_id = thread_id_; // Set initial registers. - context_->r[1] = stack_base_; - context_->r[13] = pcr_address_; + context_->r[1] = stack_base; + context_->r[13] = pcr_address; } ThreadState::~ThreadState() { @@ -109,9 +65,6 @@ ThreadState::~ThreadState() { } memory::AlignedFree(context_); - if (stack_allocated_) { - memory()->LookupHeap(stack_address_)->Decommit(stack_address_, stack_size_); - } } void ThreadState::Bind(ThreadState* thread_state) { diff --git a/src/xenia/cpu/thread_state.h b/src/xenia/cpu/thread_state.h index 9e094b816..3b7a7c01d 100644 --- a/src/xenia/cpu/thread_state.h +++ b/src/xenia/cpu/thread_state.h @@ -21,31 +21,17 @@ namespace cpu { class Processor; -enum class ThreadStackType { - kKernelStack, - kUserStack, -}; - class ThreadState { public: - ThreadState(Processor* processor, uint32_t thread_id, - ThreadStackType stack_type, uint32_t stack_address, - uint32_t stack_size, uint32_t pcr_address); + ThreadState(Processor* processor, uint32_t thread_id, uint32_t stack_base = 0, + uint32_t pcr_address = 0); ~ThreadState(); Processor* processor() const { return processor_; } Memory* memory() const { return memory_; } - uint32_t thread_id() const { return thread_id_; } - ThreadStackType stack_type() const { return stack_type_; } - const std::string& name() const { return name_; } - void set_name(const std::string& value) { name_ = value; } void* backend_data() const { return backend_data_; } - uint32_t stack_address() const { return stack_address_; } - uint32_t stack_size() const { return stack_size_; } - uint32_t stack_base() const { return stack_base_; } - uint32_t stack_limit() const { return stack_limit_; } - uint32_t pcr_address() const { return pcr_address_; } ppc::PPCContext* context() const { return context_; } + uint32_t thread_id() const { return thread_id_; } static void Bind(ThreadState* thread_state); static ThreadState* Get(); @@ -54,16 +40,10 @@ class ThreadState { private: Processor* processor_; Memory* memory_; - uint32_t thread_id_; - ThreadStackType stack_type_; - std::string name_; void* backend_data_; - uint32_t stack_address_; - bool stack_allocated_; - uint32_t stack_size_; - uint32_t stack_base_; - uint32_t stack_limit_; - uint32_t pcr_address_; + + uint32_t pcr_address_ = 0; + uint32_t thread_id_ = 0; // NOTE: must be 64b aligned for SSE ops. ppc::PPCContext* context_; diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index 64b3e15cb..48a985ac5 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -35,6 +35,12 @@ namespace kernel { uint32_t next_xthread_id_ = 0; thread_local XThread* current_thread_tls_ = nullptr; +XThread::XThread(KernelState* kernel_state) + : XObject(kernel_state, kTypeThread), apc_list_(kernel_state->memory()) { + // The kernel does not take a reference. We must unregister in the dtor. + kernel_state_->RegisterThread(this); +} + XThread::XThread(KernelState* kernel_state, uint32_t stack_size, uint32_t xapi_thread_startup, uint32_t start_address, uint32_t start_context, uint32_t creation_flags, @@ -124,11 +130,15 @@ void XThread::set_last_error(uint32_t error_code) { } void XThread::set_name(const std::string& name) { - name_ = name; + StringBuffer buff; + buff.Append(name); + buff.AppendFormat(" (%.8X)", handle()); + + name_ = buff.ToString(); if (thread_) { // May be getting set before the thread is created. // One the thread is ready it will handle it. - thread_->set_name(name); + thread_->set_name(buff.ToString()); } } @@ -149,16 +159,96 @@ uint8_t GetFakeCpuNumber(uint8_t proc_mask) { return cpu_number; } +void XThread::InitializeGuestObject() { + auto guest_thread = guest_object(); + + // Setup the thread state block (last error/etc). + uint8_t* p = memory()->TranslateVirtual(guest_object()); + guest_thread->header.type = 6; + guest_thread->suspend_count = + (creation_params_.creation_flags & X_CREATE_SUSPENDED) ? 1 : 0; + + xe::store_and_swap(p + 0x010, guest_object() + 0x010); + xe::store_and_swap(p + 0x014, guest_object() + 0x010); + + xe::store_and_swap(p + 0x040, guest_object() + 0x018 + 8); + xe::store_and_swap(p + 0x044, guest_object() + 0x018 + 8); + xe::store_and_swap(p + 0x048, guest_object()); + xe::store_and_swap(p + 0x04C, guest_object() + 0x018); + + xe::store_and_swap(p + 0x054, 0x102); + xe::store_and_swap(p + 0x056, 1); + xe::store_and_swap(p + 0x05C, stack_base_); + xe::store_and_swap(p + 0x060, stack_limit_); + xe::store_and_swap(p + 0x068, tls_address_); + xe::store_and_swap(p + 0x06C, 0); + xe::store_and_swap(p + 0x074, guest_object() + 0x074); + xe::store_and_swap(p + 0x078, guest_object() + 0x074); + xe::store_and_swap(p + 0x07C, guest_object() + 0x07C); + xe::store_and_swap(p + 0x080, guest_object() + 0x07C); + xe::store_and_swap(p + 0x084, + kernel_state_->process_info_block_address()); + xe::store_and_swap(p + 0x08B, 1); + // 0xD4 = APC + // 0xFC = semaphore (ptr, 0, 2) + // 0xA88 = APC + // 0x18 = timer + xe::store_and_swap(p + 0x09C, 0xFDFFD7FF); + xe::store_and_swap(p + 0x0D0, stack_base_); + xe::store_and_swap(p + 0x130, Clock::QueryGuestSystemTime()); + xe::store_and_swap(p + 0x144, guest_object() + 0x144); + xe::store_and_swap(p + 0x148, guest_object() + 0x144); + xe::store_and_swap(p + 0x14C, thread_id_); + xe::store_and_swap(p + 0x150, creation_params_.start_address); + xe::store_and_swap(p + 0x154, guest_object() + 0x154); + xe::store_and_swap(p + 0x158, guest_object() + 0x154); + xe::store_and_swap(p + 0x160, 0); // last error + xe::store_and_swap(p + 0x16C, creation_params_.creation_flags); + xe::store_and_swap(p + 0x17C, 1); +} + +bool XThread::AllocateStack(uint32_t size) { + auto heap = memory()->LookupHeap(0x40000000); + + auto alignment = heap->page_size(); + auto padding = heap->page_size() * 2; // Guard pages + size = xe::round_up(size, alignment); + auto actual_size = size + padding; + + uint32_t address = 0; + if (!heap->AllocRange(0x40000000, 0x7F000000, actual_size, alignment, + kMemoryAllocationReserve | kMemoryAllocationCommit, + kMemoryProtectRead | kMemoryProtectWrite, false, + &address)) { + return false; + } + + stack_alloc_base_ = address; + stack_alloc_size_ = actual_size; + stack_limit_ = address + (padding / 2); + stack_base_ = stack_limit_ + size; + + // Initialize the stack with junk + memory()->Fill(stack_alloc_base_, actual_size, 0xBE); + + // Setup the guard pages + heap->Protect(stack_alloc_base_, padding / 2, kMemoryProtectNoAccess); + heap->Protect(stack_base_, padding / 2, kMemoryProtectNoAccess); + + return true; +} + X_STATUS XThread::Create() { // Thread kernel object. - // This call will also setup the native pointer for us. - auto guest_thread = CreateNative(); - if (!guest_thread) { + if (!CreateNative()) { XELOGW("Unable to allocate thread object"); return X_STATUS_NO_MEMORY; } - auto module = kernel_state()->GetExecutableModule(); + // Allocate a stack. + if (!AllocateStack(creation_params_.stack_size)) { + return X_STATUS_NO_MEMORY; + } // Allocate thread scratch. // This is used by interrupts/APCs/etc so we can round-trip pointers through. @@ -168,6 +258,7 @@ X_STATUS XThread::Create() { // Allocate TLS block. // Games will specify a certain number of 4b slots that each thread will get. xex2_opt_tls_info* tls_header = nullptr; + auto module = kernel_state()->GetExecutableModule(); if (module) { module->GetOptHeader(XEX_HEADER_TLS_INFO, &tls_header); } @@ -224,84 +315,38 @@ X_STATUS XThread::Create() { // Allocate processor thread state. // This is thread safe. - thread_state_ = new cpu::ThreadState( - kernel_state()->processor(), thread_id_, cpu::ThreadStackType::kUserStack, - 0, creation_params_.stack_size, pcr_address_); - XELOGI("XThread%08X (%X) Stack: %.8X-%.8X", handle(), - thread_state_->thread_id(), thread_state_->stack_limit(), - thread_state_->stack_base()); + thread_state_ = new cpu::ThreadState(kernel_state()->processor(), thread_id_, + stack_base_, pcr_address_); + XELOGI("XThread%08X (%X) Stack: %.8X-%.8X", handle(), thread_id_, + stack_limit_, stack_base_); // Exports use this to get the kernel. thread_state_->context()->kernel_state = kernel_state_; - uint8_t proc_mask = - static_cast(creation_params_.creation_flags >> 24); - X_KPCR* pcr = memory()->TranslateVirtual(pcr_address_); pcr->tls_ptr = tls_address_; pcr->pcr_ptr = pcr_address_; pcr->current_thread = guest_object(); - pcr->stack_base_ptr = - thread_state_->stack_address() + thread_state_->stack_size(); - pcr->stack_end_ptr = thread_state_->stack_address(); + pcr->stack_base_ptr = stack_base_; + pcr->stack_end_ptr = stack_limit_; + + uint8_t proc_mask = + static_cast(creation_params_.creation_flags >> 24); pcr->current_cpu = GetFakeCpuNumber(proc_mask); // Current CPU(?) pcr->dpc_active = 0; // DPC active bool? - // Setup the thread state block (last error/etc). - uint8_t* p = memory()->TranslateVirtual(guest_object()); - guest_thread->header.type = 6; - guest_thread->suspend_count = - (creation_params_.creation_flags & X_CREATE_SUSPENDED) ? 1 : 0; - - xe::store_and_swap(p + 0x010, guest_object() + 0x010); - xe::store_and_swap(p + 0x014, guest_object() + 0x010); - - xe::store_and_swap(p + 0x040, guest_object() + 0x018 + 8); - xe::store_and_swap(p + 0x044, guest_object() + 0x018 + 8); - xe::store_and_swap(p + 0x048, guest_object()); - xe::store_and_swap(p + 0x04C, guest_object() + 0x018); - - xe::store_and_swap(p + 0x054, 0x102); - xe::store_and_swap(p + 0x056, 1); - xe::store_and_swap( - p + 0x05C, thread_state_->stack_address() + thread_state_->stack_size()); - xe::store_and_swap(p + 0x060, thread_state_->stack_address()); - xe::store_and_swap(p + 0x068, tls_address_); - xe::store_and_swap(p + 0x06C, 0); - xe::store_and_swap(p + 0x074, guest_object() + 0x074); - xe::store_and_swap(p + 0x078, guest_object() + 0x074); - xe::store_and_swap(p + 0x07C, guest_object() + 0x07C); - xe::store_and_swap(p + 0x080, guest_object() + 0x07C); - xe::store_and_swap(p + 0x084, - kernel_state_->process_info_block_address()); - xe::store_and_swap(p + 0x08B, 1); - // 0xD4 = APC - // 0xFC = semaphore (ptr, 0, 2) - // 0xA88 = APC - // 0x18 = timer - xe::store_and_swap(p + 0x09C, 0xFDFFD7FF); - xe::store_and_swap( - p + 0x0D0, thread_state_->stack_address() + thread_state_->stack_size()); - xe::store_and_swap(p + 0x130, Clock::QueryGuestSystemTime()); - xe::store_and_swap(p + 0x144, guest_object() + 0x144); - xe::store_and_swap(p + 0x148, guest_object() + 0x144); - xe::store_and_swap(p + 0x14C, thread_id_); - xe::store_and_swap(p + 0x150, creation_params_.start_address); - xe::store_and_swap(p + 0x154, guest_object() + 0x154); - xe::store_and_swap(p + 0x158, guest_object() + 0x154); - xe::store_and_swap(p + 0x160, 0); // last error - xe::store_and_swap(p + 0x16C, creation_params_.creation_flags); - xe::store_and_swap(p + 0x17C, 1); + // Initialize the KTHREAD object. + InitializeGuestObject(); // Always retain when starting - the thread owns itself until exited. Retain(); RetainHandle(); xe::threading::Thread::CreationParameters params; - params.stack_size = 16 * 1024 * 1024; // Ignore game, always big! + params.stack_size = 16 * 1024 * 1024; // Allocate a big host stack. params.create_suspended = true; thread_ = xe::threading::Thread::Create(params, [this]() { // Set thread ID override. This is used by logging. @@ -334,8 +379,8 @@ X_STATUS XThread::Create() { // Set the thread name based on host ID (for easier debugging). if (name_.empty()) { char thread_name[32]; - snprintf(thread_name, xe::countof(thread_name), "XThread%04X (%04X)", - handle(), thread_->system_id()); + snprintf(thread_name, xe::countof(thread_name), "XThread%.04X", + thread_->system_id()); set_name(thread_name); } diff --git a/src/xenia/kernel/xthread.h b/src/xenia/kernel/xthread.h index 8dc9e8058..fbb881ee0 100644 --- a/src/xenia/kernel/xthread.h +++ b/src/xenia/kernel/xthread.h @@ -113,6 +113,7 @@ class XThread : public XObject { uint32_t creation_flags; }; + XThread(KernelState* kernel_state); XThread(KernelState* kernel_state, uint32_t stack_size, uint32_t xapi_thread_startup, uint32_t start_address, uint32_t start_context, uint32_t creation_flags, bool guest_thread); @@ -174,13 +175,15 @@ class XThread : public XObject { uint32_t suspend_count(); X_STATUS Resume(uint32_t* out_suspend_count = nullptr); - X_STATUS Suspend(uint32_t* out_suspend_count); + X_STATUS Suspend(uint32_t* out_suspend_count = nullptr); X_STATUS Delay(uint32_t processor_mode, uint32_t alertable, uint64_t interval); xe::threading::WaitHandle* GetWaitHandle() override { return thread_.get(); } protected: + bool AllocateStack(uint32_t size); + void InitializeGuestObject(); void DeliverAPCs(); void RundownAPCs(); @@ -192,6 +195,10 @@ class XThread : public XObject { uint32_t scratch_size_ = 0; uint32_t tls_address_ = 0; uint32_t pcr_address_ = 0; + uint32_t stack_alloc_base_ = 0; // Stack alloc base + uint32_t stack_alloc_size_ = 0; // Stack alloc size + uint32_t stack_base_ = 0; // High address + uint32_t stack_limit_ = 0; // Low address cpu::ThreadState* thread_state_ = nullptr; bool guest_thread_ = false; bool can_debugger_suspend_ = true;