diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index 48a985ac5..685326540 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -13,12 +13,16 @@ #include +#include "xenia/base/byte_stream.h" #include "xenia/base/clock.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" #include "xenia/base/threading.h" +#include "xenia/cpu/breakpoint.h" +#include "xenia/cpu/frontend/ppc_instr.h" #include "xenia/cpu/processor.h" +#include "xenia/cpu/stack_walker.h" #include "xenia/emulator.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/user_module.h" @@ -36,18 +40,17 @@ uint32_t next_xthread_id_ = 0; thread_local XThread* current_thread_tls_ = nullptr; XThread::XThread(KernelState* kernel_state) - : XObject(kernel_state, kTypeThread), apc_list_(kernel_state->memory()) { - // The kernel does not take a reference. We must unregister in the dtor. - kernel_state_->RegisterThread(this); -} + : XObject(kernel_state, kTypeThread), + guest_thread_(true) {} XThread::XThread(KernelState* kernel_state, uint32_t stack_size, uint32_t xapi_thread_startup, uint32_t start_address, uint32_t start_context, uint32_t creation_flags, - bool guest_thread) + bool guest_thread, bool main_thread) : XObject(kernel_state, kTypeThread), thread_id_(++next_xthread_id_), guest_thread_(guest_thread), + main_thread_(main_thread), apc_list_(kernel_state->memory()) { creation_params_.stack_size = stack_size; creation_params_.xapi_thread_startup = xapi_thread_startup; @@ -63,6 +66,10 @@ XThread::XThread(KernelState* kernel_state, uint32_t stack_size, creation_params_.stack_size = 16 * 1024; } + if (!guest_thread_) { + host_object_ = true; + } + // The kernel does not take a reference. We must unregister in the dtor. kernel_state_->RegisterThread(this); } @@ -754,6 +761,376 @@ X_STATUS XThread::Delay(uint32_t processor_mode, uint32_t alertable, } } +bool XThread::StepToAddress(uint32_t pc) { + auto functions = emulator()->processor()->FindFunctionsWithAddress(pc); + if (functions.empty()) { + // Function hasn't been generated yet. Generate it. + if (!emulator()->processor()->ResolveFunction(pc)) { + XELOGE("XThread::StepToAddress(%.8X) - Function could not be resolved", + pc); + return false; + } + } + + // Instruct the thread to step forwards. + threading::Fence fence; + cpu::Breakpoint bp(kernel_state()->processor(), pc, + [&fence](uint32_t guest_address, uint64_t host_address) { + fence.Signal(); + }); + if (bp.Install()) { + thread_->Resume(); + fence.Wait(); + bp.Uninstall(); + } else { + assert_always(); + XELOGE("XThread: Could not install breakpoint to step forward!"); + return false; + } + + return true; +} + +uint32_t XThread::StepIntoBranch(uint32_t pc) { + cpu::frontend::InstrData i; + i.address = pc; + i.code = xe::load_and_swap(memory()->TranslateVirtual(i.address)); + i.type = cpu::frontend::GetInstrType(i.code); + + auto context = thread_state_->context(); + + if (i.type->type & (1 << 4) /* BranchAlways */) { + if (i.type->opcode == 0x48000000) { + // bx + uint32_t nia = 0; + if (i.I.AA) { + nia = (uint32_t)cpu::frontend::XEEXTS26(i.I.LI << 2); + } else { + nia = + i.address + (uint32_t)cpu::frontend::XEEXTS26(i.I.LI << 2); + } + + StepToAddress(nia); + } else { + // Unknown opcode. + assert_always(); + } + } else if (i.type->type & (1 << 3) /* BranchCond */) { + threading::Fence fence; + auto callback = [&fence, &pc](uint32_t guest_pc, uint64_t) { + pc = guest_pc; + fence.Signal(); + }; + + cpu::Breakpoint bpt(kernel_state()->processor(), callback); + cpu::Breakpoint bpf(kernel_state()->processor(), pc + 4, callback); + if (!bpf.Install()) { + // FIXME: This won't work on non-conditional conditional branches. + XELOGE("XThread: Could not install breakpoint to step forward!"); + assert_always(); + return 0; + } + + uint32_t nia = 0; + if (i.type->opcode == 0x40000000) { + // bcx + if (i.B.AA) { + nia = (uint32_t)cpu::frontend::XEEXTS16(i.B.BD << 2); + } else { + nia = (uint32_t)(i.address + cpu::frontend::XEEXTS16(i.B.BD << 2)); + } + } else if (i.type->opcode == 0x4C000420) { + // bcctrx + nia = uint32_t(context->ctr); + } else if (i.type->opcode == 0x4C000020) { + // bclrx + nia = uint32_t(context->lr); + } else { + // Unknown opcode. + assert_always(); + } + + bpt.set_address(nia); + if (!bpt.Install()) { + assert_always(); + return 0; + } + + thread_->Resume(); + fence.Wait(); + bpf.Uninstall(); + bpt.Uninstall(); + } + + return pc; +} + +uint32_t XThread::StepToSafePoint() { + // This cannot be done if we're the calling thread! + if (IsInThread() && GetCurrentThread() == this) { + assert_always( + "XThread::StepToSafePoint(): target thread is the calling thread!"); + return 0; + } + + // Now the fun part begins: Registers are only guaranteed to be synchronized + // with the PPC context at a basic block boundary. Unfortunately, we most + // likely stopped the thread at some point other than a boundary. We need to + // step forward until we reach a boundary, and then perform the save. + auto stack_walker = kernel_state()->processor()->stack_walker(); + + uint64_t frame_host_pcs[64]; + cpu::StackFrame cpu_frames[64]; + size_t count = stack_walker->CaptureStackTrace( + thread_->native_handle(), frame_host_pcs, 0, xe::countof(frame_host_pcs), + nullptr, nullptr); + stack_walker->ResolveStack(frame_host_pcs, cpu_frames, count); + if (count == 0) { + return 0; + } + + // Check if we're in guest code or host code. + uint32_t pc = 0; + if (cpu_frames[0].type == cpu::StackFrame::Type::kGuest) { + pc = cpu_frames[0].guest_pc; + + // We're in guest code. + // First: Find a synchronizing instruction and go to it. + cpu::frontend::InstrData i; + i.address = cpu_frames[0].guest_pc - 4; + do { + i.address += 4; + i.code = + xe::load_and_swap(memory()->TranslateVirtual(i.address)); + i.type = cpu::frontend::GetInstrType(i.code); + } while ((i.type->type & (cpu::frontend::kXEPPCInstrTypeSynchronizeContext | + cpu::frontend::kXEPPCInstrTypeBranch)) == 0); + + if (i.address != pc) { + StepToAddress(i.address); + pc = i.address; + } + + // Okay. Now we're on a synchronizing instruction but we need to step + // past it in order to get a synchronized context. + // If we're on a branching instruction, it's guaranteed only going to have + // two possible targets. For non-branching instructions, we can just step + // over them. + if (i.type->type & cpu::frontend::kXEPPCInstrTypeBranch) { + pc = StepIntoBranch(i.address); + } + } else { + // We're in host code. Search backwards til we can get an idea of where + // we are. + cpu::GuestFunction* thunk_func = nullptr; + cpu::Export* export_data = nullptr; + for (int i = 0; i < count; i++) { + auto& frame = cpu_frames[i]; + if (frame.type == cpu::StackFrame::Type::kGuest && frame.guest_pc) { + auto func = frame.guest_symbol.function; + assert_true(func->is_guest()); + + thunk_func = reinterpret_cast(func); + export_data = thunk_func->export_data(); + if (export_data) { + break; + } + } + } + + // If the export is blocking, we wrap up and save inside the export thunk. + // When we're restored, we'll call the blocking export again. + // Otherwise, we return from the thunk and save. + if (export_data && export_data->tags & cpu::ExportTag::kBlocking) { + pc = thunk_func->address(); + } else if (export_data) { + // Non-blocking. Run until we return from the thunk. + StepToAddress(uint32_t(thread_state_->context()->lr)); + } else { + // We're in the MMIO handler or something. TODO. + assert_always(); + } + } + + return pc; +} + +struct ThreadSavedState { + uint32_t thread_id; + bool main_thread; + uint32_t tls_address; + uint32_t pcr_address; + uint32_t stack_base; // High address + uint32_t stack_limit; // Low address + uint32_t stack_alloc_base; // Allocation address + uint32_t stack_alloc_size; // Allocation size + struct { + uint64_t lr; + uint64_t ctr; + uint64_t r[32]; + double f[32]; + vec128_t v[128]; + uint32_t cr[8]; + uint32_t fpscr; + uint8_t xer_ca; + uint8_t xer_ov; + uint8_t xer_so; + uint8_t vscr_sat; + uint32_t pc; + } context; +}; + +bool XThread::Save(ByteStream* stream) { + if (!guest_thread_) { + // Host XThreads are expected to be recreated on their own. + return false; + } + + XELOGD("XThread %.8X serializing...", handle()); + + uint32_t pc = StepToSafePoint(); + if (!pc) { + XELOGE("XThread %.8X failed to save: could not step to a safe point!", + handle()); + return false; + } + + if (!SaveObject(stream)) { + return false; + } + + stream->Write('THRD'); + stream->Write(name_); + + ThreadSavedState state; + state.thread_id = thread_id_; + state.main_thread = main_thread_; + state.tls_address = tls_address_; + state.pcr_address = pcr_address_; + state.stack_base = stack_base_; + state.stack_limit = stack_limit_; + state.stack_alloc_base = stack_alloc_base_; + state.stack_alloc_size = stack_alloc_size_; + + // Context information + auto context = thread_state_->context(); + state.context.lr = context->lr; + state.context.ctr = context->ctr; + std::memcpy(state.context.r, context->r, 32 * 8); + std::memcpy(state.context.f, context->f, 32 * 8); + std::memcpy(state.context.v, context->v, 128 * 16); + state.context.cr[0] = context->cr0.value; + state.context.cr[1] = context->cr1.value; + state.context.cr[2] = context->cr2.value; + state.context.cr[3] = context->cr3.value; + state.context.cr[4] = context->cr4.value; + state.context.cr[5] = context->cr5.value; + state.context.cr[6] = context->cr6.value; + state.context.cr[7] = context->cr7.value; + state.context.fpscr = context->fpscr.value; + state.context.xer_ca = context->xer_ca; + state.context.xer_ov = context->xer_ov; + state.context.xer_so = context->xer_so; + state.context.vscr_sat = context->vscr_sat; + state.context.pc = pc; + + stream->Write(&state, sizeof(ThreadSavedState)); + return true; +} + +object_ref XThread::Restore(KernelState* kernel_state, + ByteStream* stream) { + // Kind-of a hack, but we need to set the kernel state outside of the object + // constructor so it doesn't register a handle with the object table. + auto thread = new XThread(nullptr); + thread->kernel_state_ = kernel_state; + + if (!thread->RestoreObject(stream)) { + return false; + } + + if (stream->Read() != 'THRD') { + XELOGE("Could not restore XThread - invalid magic!"); + return false; + } + + thread->name_ = stream->Read(); + + ThreadSavedState state; + stream->Read(&state, sizeof(ThreadSavedState)); + thread->thread_id_ = state.thread_id; + thread->main_thread_ = state.main_thread; + thread->tls_address_ = state.tls_address; + thread->pcr_address_ = state.pcr_address; + thread->stack_base_ = state.stack_base; + thread->stack_limit_ = state.stack_limit; + thread->stack_alloc_base_ = state.stack_alloc_base; + thread->stack_alloc_size_ = state.stack_alloc_size; + + // Register now that we know our thread ID. + kernel_state->RegisterThread(thread); + + thread->thread_state_ = + new cpu::ThreadState(kernel_state->processor(), thread->thread_id_, + thread->stack_base_, thread->pcr_address_); + auto context = thread->thread_state_->context(); + context->kernel_state = kernel_state; + context->lr = state.context.lr; + context->ctr = state.context.ctr; + std::memcpy(context->r, state.context.r, 32 * 8); + std::memcpy(context->f, state.context.f, 32 * 8); + std::memcpy(context->v, state.context.v, 128 * 16); + context->cr0.value = state.context.cr[0]; + context->cr1.value = state.context.cr[1]; + context->cr2.value = state.context.cr[2]; + context->cr3.value = state.context.cr[3]; + context->cr4.value = state.context.cr[4]; + context->cr5.value = state.context.cr[5]; + context->cr6.value = state.context.cr[6]; + context->cr7.value = state.context.cr[7]; + context->fpscr.value = state.context.fpscr; + context->xer_ca = state.context.xer_ca; + context->xer_ov = state.context.xer_ov; + context->xer_so = state.context.xer_so; + context->vscr_sat = state.context.vscr_sat; + auto pc = state.context.pc; + + // Always retain when starting - the thread owns itself until exited. + thread->Retain(); + thread->RetainHandle(); + + xe::threading::Thread::CreationParameters params; + params.create_suspended = true; // Not done restoring yet. + params.stack_size = 16 * 1024 * 1024; + thread->thread_ = xe::threading::Thread::Create(params, [thread, pc]() { + // Set thread ID override. This is used by logging. + xe::threading::set_current_thread_id(thread->handle()); + + // Set name immediately, if we have one. + thread->thread_->set_name(thread->name()); + + // Profiler needs to know about the thread. + xe::Profiler::ThreadEnter(thread->name().c_str()); + + // Execute user code. + thread->running_ = true; + current_thread_tls_ = thread; + thread->kernel_state()->processor()->ExecuteRaw(thread->thread_state_, pc); + current_thread_tls_ = nullptr; + + xe::Profiler::ThreadExit(); + + // Release the self-reference to the thread. + thread->Release(); + }); + + if (thread->emulator()->debugger()) { + thread->emulator()->debugger()->OnThreadCreated(thread); + } + + return retain_object(thread); +} + XHostThread::XHostThread(KernelState* kernel_state, uint32_t stack_size, uint32_t creation_flags, std::function host_fn) : XThread(kernel_state, stack_size, 0, 0, 0, creation_flags, false), diff --git a/src/xenia/kernel/xthread.h b/src/xenia/kernel/xthread.h index fbb881ee0..c94fd1fea 100644 --- a/src/xenia/kernel/xthread.h +++ b/src/xenia/kernel/xthread.h @@ -116,7 +116,8 @@ class XThread : public XObject { XThread(KernelState* kernel_state); XThread(KernelState* kernel_state, uint32_t stack_size, uint32_t xapi_thread_startup, uint32_t start_address, - uint32_t start_context, uint32_t creation_flags, bool guest_thread); + uint32_t start_context, uint32_t creation_flags, bool guest_thread, + bool main_thread = false); ~XThread() override; static bool IsInThread(XThread* other); @@ -133,6 +134,7 @@ class XThread : public XObject { uint32_t pcr_ptr() const { return pcr_address_; } // True if the thread is created by the guest app. bool is_guest_thread() const { return guest_thread_; } + bool main_thread() const { return main_thread_; } // True if the thread should be paused by the debugger. // All threads that can run guest code must be stopped for the debugger to // work properly. @@ -180,10 +182,22 @@ class XThread : public XObject { uint64_t interval); xe::threading::WaitHandle* GetWaitHandle() override { return thread_.get(); } + xe::threading::Thread* thread() { return thread_.get(); } + + virtual bool Save(ByteStream* stream) override; + static object_ref Restore(KernelState* kernel_state, + ByteStream* stream); + + // Steps the thread to a point where it's safe to terminate or read its + // context. Returns the PC after we've finished stepping. + uint32_t StepToSafePoint(); protected: bool AllocateStack(uint32_t size); void InitializeGuestObject(); + + bool StepToAddress(uint32_t pc); + uint32_t StepIntoBranch(uint32_t pc); void DeliverAPCs(); void RundownAPCs(); @@ -201,6 +215,7 @@ class XThread : public XObject { uint32_t stack_limit_ = 0; // Low address cpu::ThreadState* thread_state_ = nullptr; bool guest_thread_ = false; + bool main_thread_ = false; // Entry-point thread bool can_debugger_suspend_ = true; bool running_ = false;