From e657276996e4f82bbeaa0532b960f01102780fd3 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Wed, 29 Jul 2015 00:15:52 -0700 Subject: [PATCH] Mostly working stack walking (besides issue #372). --- src/xenia/cpu/backend/code_cache.h | 9 + src/xenia/cpu/backend/x64/x64_assembler.cc | 4 +- src/xenia/cpu/backend/x64/x64_backend.cc | 6 +- src/xenia/cpu/backend/x64/x64_code_cache.cc | 49 ++- src/xenia/cpu/backend/x64/x64_code_cache.h | 24 +- .../cpu/backend/x64/x64_code_cache_win.cc | 12 +- src/xenia/cpu/backend/x64/x64_emitter.cc | 15 +- src/xenia/cpu/backend/x64/x64_emitter.h | 5 +- src/xenia/cpu/processor.cc | 8 + src/xenia/cpu/processor.h | 4 + src/xenia/cpu/stack_walker.h | 95 ++++++ src/xenia/cpu/stack_walker_win.cc | 286 ++++++++++++++++++ src/xenia/debug/debugger.cc | 35 +++ src/xenia/debug/debugger.h | 2 + 14 files changed, 531 insertions(+), 23 deletions(-) create mode 100644 src/xenia/cpu/stack_walker.h create mode 100644 src/xenia/cpu/stack_walker_win.cc diff --git a/src/xenia/cpu/backend/code_cache.h b/src/xenia/cpu/backend/code_cache.h index c00e3302a..cc7949ac3 100644 --- a/src/xenia/cpu/backend/code_cache.h +++ b/src/xenia/cpu/backend/code_cache.h @@ -12,6 +12,8 @@ #include +#include "xenia/cpu/symbol_info.h" + namespace xe { namespace cpu { namespace backend { @@ -24,6 +26,13 @@ class CodeCache { virtual std::wstring file_name() const = 0; virtual uint32_t base_address() const = 0; virtual uint32_t total_size() const = 0; + + // Finds a function based on the given host PC (that may be within a + // function). + virtual FunctionInfo* LookupFunction(uint64_t host_pc) = 0; + + // Finds platform-specific function unwind info for the given host PC. + virtual void* LookupUnwindInfo(uint64_t host_pc) = 0; }; } // namespace backend diff --git a/src/xenia/cpu/backend/x64/x64_assembler.cc b/src/xenia/cpu/backend/x64/x64_assembler.cc index 3e30356c8..b585bf20b 100644 --- a/src/xenia/cpu/backend/x64/x64_assembler.cc +++ b/src/xenia/cpu/backend/x64/x64_assembler.cc @@ -93,8 +93,8 @@ bool X64Assembler::Assemble(FunctionInfo* symbol_info, HIRBuilder* builder, // Lower HIR -> x64. void* machine_code = nullptr; size_t code_size = 0; - if (!emitter_->Emit(symbol_info->address(), builder, debug_info_flags, - debug_info.get(), machine_code, code_size)) { + if (!emitter_->Emit(symbol_info, builder, debug_info_flags, debug_info.get(), + machine_code, code_size)) { return false; } diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 126102347..99c1bfa19 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -178,7 +178,7 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { mov(r8, qword[rsp + 8 * 3]); ret(); - void* fn = Emplace(0, stack_size); + void* fn = Emplace(stack_size); return (HostToGuestThunk)fn; } @@ -228,7 +228,7 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() { mov(rdx, qword[rsp + 8 * 2]); ret(); - void* fn = Emplace(0, stack_size); + void* fn = Emplace(stack_size); return (HostToGuestThunk)fn; } @@ -274,7 +274,7 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() { mov(rdx, qword[rsp + 8 * 2]); jmp(rax); - void* fn = Emplace(0, stack_size); + void* fn = Emplace(stack_size); return (ResolveFunctionThunk)fn; } diff --git a/src/xenia/cpu/backend/x64/x64_code_cache.cc b/src/xenia/cpu/backend/x64/x64_code_cache.cc index 81e2f5975..8fd5eda51 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache.cc +++ b/src/xenia/cpu/backend/x64/x64_code_cache.cc @@ -78,6 +78,9 @@ bool X64CodeCache::Initialize() { return false; } + // Preallocate the function map to a large, reasonable size. + generated_code_map_.reserve(kMaximumFunctionCount); + return true; } @@ -107,8 +110,17 @@ void X64CodeCache::CommitExecutableRange(uint32_t guest_low, } } -void* X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code, - size_t code_size, size_t stack_size) { +void* X64CodeCache::PlaceHostCode(uint32_t guest_address, void* machine_code, + size_t code_size, size_t stack_size) { + // Same for now. We may use different pools or whatnot later on, like when + // we only want to place guest code in a serialized cache on disk. + return PlaceGuestCode(guest_address, machine_code, code_size, stack_size, + nullptr); +} + +void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code, + size_t code_size, size_t stack_size, + FunctionInfo* function_info) { // Hold a lock while we bump the pointers up. This is important as the // unwind table requires entries AND code to be sorted in order. size_t low_mark; @@ -133,6 +145,13 @@ void* X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code, generated_code_offset_ += unwind_reservation.data_size; high_mark = generated_code_offset_; + + // Store in map. It is maintained in sorted order of host PC dependent on + // us also being append-only. + generated_code_map_.emplace_back( + (uint64_t(code_address - generated_code_base_) << 32) | + generated_code_offset_, + function_info); } // If we are going above the high water mark of committed memory, commit some @@ -201,6 +220,32 @@ uint32_t X64CodeCache::PlaceData(const void* data, size_t length) { return uint32_t(uintptr_t(data_address)); } +FunctionInfo* X64CodeCache::LookupFunction(uint64_t host_pc) { + uint32_t key = uint32_t(host_pc - kGeneratedCodeBase); + void* fn_entry = std::bsearch( + &key, generated_code_map_.data(), generated_code_map_.size() + 1, + sizeof(std::pair), + [](const void* key_ptr, const void* element_ptr) { + auto key = *reinterpret_cast(key_ptr); + auto element = + reinterpret_cast*>( + element_ptr); + if (key < (element->first >> 32)) { + return -1; + } else if (key > uint32_t(element->first)) { + return 1; + } else { + return 0; + } + }); + if (fn_entry) { + return reinterpret_cast*>(fn_entry) + ->second; + } else { + return nullptr; + } +} + } // namespace x64 } // namespace backend } // namespace cpu diff --git a/src/xenia/cpu/backend/x64/x64_code_cache.h b/src/xenia/cpu/backend/x64/x64_code_cache.h index a60993fd1..0c2f83659 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache.h +++ b/src/xenia/cpu/backend/x64/x64_code_cache.h @@ -46,17 +46,31 @@ class X64CodeCache : public CodeCache { void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high); - void* PlaceCode(uint32_t guest_address, void* machine_code, size_t code_size, - size_t stack_size); - + void* PlaceHostCode(uint32_t guest_address, void* machine_code, + size_t code_size, size_t stack_size); + void* PlaceGuestCode(uint32_t guest_address, void* machine_code, + size_t code_size, size_t stack_size, + FunctionInfo* function_info); uint32_t PlaceData(const void* data, size_t length); + FunctionInfo* LookupFunction(uint64_t host_pc) override; + protected: + // All executable code falls within 0x80000000 to 0x9FFFFFFF, so we can + // only map enough for lookups within that range. const static uint64_t kIndirectionTableBase = 0x80000000; const static uint64_t kIndirectionTableSize = 0x1FFFFFFF; + // The code range is 512MB, but we know the total code games will have is + // pretty small (dozens of mb at most) and our expansion is reasonablish + // so 256MB should be more than enough. const static uint64_t kGeneratedCodeBase = 0xA0000000; const static uint64_t kGeneratedCodeSize = 0x0FFFFFFF; + // This is picked to be high enough to cover whatever we can reasonably + // expect. If we hit issues with this it probably means some corner case + // in analysis triggering. + const static size_t kMaximumFunctionCount = 30000; + struct UnwindReservation { size_t data_size = 0; size_t table_slot = 0; @@ -94,6 +108,10 @@ class X64CodeCache : public CodeCache { size_t generated_code_offset_ = 0; // Current high water mark of COMMITTED code. std::atomic generated_code_commit_mark_ = {0}; + // Sorted map by host PC base offsets to source function info. + // This can be used to bsearch on host PC to find the guest function. + // The key is [start address | end address]. + std::vector> generated_code_map_; }; } // namespace x64 diff --git a/src/xenia/cpu/backend/x64/x64_code_cache_win.cc b/src/xenia/cpu/backend/x64/x64_code_cache_win.cc index 7ae55cab7..a8cb5333d 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache_win.cc +++ b/src/xenia/cpu/backend/x64/x64_code_cache_win.cc @@ -39,6 +39,8 @@ class Win32X64CodeCache : public X64CodeCache { bool Initialize() override; + void* LookupUnwindInfo(uint64_t host_pc) override; + private: UnwindReservation RequestUnwindReservation(uint8_t* entry_address) override; void PlaceCode(uint32_t guest_address, void* machine_code, size_t code_size, @@ -48,7 +50,6 @@ class Win32X64CodeCache : public X64CodeCache { void InitializeUnwindEntry(uint8_t* unwind_entry_address, size_t unwind_table_slot, void* code_address, size_t code_size, size_t stack_size); - void* LookupUnwindEntry(uintptr_t host_address); // Growable function table system handle. void* unwind_table_handle_ = nullptr; @@ -84,7 +85,7 @@ bool Win32X64CodeCache::Initialize() { // Compute total number of unwind entries we should allocate. // We don't support reallocing right now, so this should be high. - unwind_table_.resize(30000); + unwind_table_.resize(kMaximumFunctionCount); #ifdef USE_GROWABLE_FUNCTION_TABLE // Create table and register with the system. It's empty now, but we'll grow @@ -268,9 +269,9 @@ void Win32X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address, fn_entry.UnwindData = (DWORD)(unwind_entry_address - generated_code_base_); } -void* Win32X64CodeCache::LookupUnwindEntry(uintptr_t host_address) { - void* fn_entry = std::bsearch( - &host_address, unwind_table_.data(), unwind_table_count_ + 1, +void* Win32X64CodeCache::LookupUnwindInfo(uint64_t host_pc) { + return std::bsearch( + &host_pc, unwind_table_.data(), unwind_table_count_ + 1, sizeof(RUNTIME_FUNCTION), [](const void* key_ptr, const void* element_ptr) { auto key = @@ -284,7 +285,6 @@ void* Win32X64CodeCache::LookupUnwindEntry(uintptr_t host_address) { return 0; } }); - return reinterpret_cast(fn_entry); } } // namespace x64 diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index d43b9bcff..0b1d9d32c 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -93,7 +93,7 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) X64Emitter::~X64Emitter() = default; -bool X64Emitter::Emit(uint32_t guest_address, HIRBuilder* builder, +bool X64Emitter::Emit(FunctionInfo* function_info, HIRBuilder* builder, uint32_t debug_info_flags, DebugInfo* debug_info, void*& out_code_address, size_t& out_code_size) { SCOPE_profile_cpu_f("cpu"); @@ -114,7 +114,7 @@ bool X64Emitter::Emit(uint32_t guest_address, HIRBuilder* builder, // Copy the final code to the cache and relocate it. out_code_size = getSize(); - out_code_address = Emplace(guest_address, stack_size); + out_code_address = Emplace(stack_size, function_info); // Stash source map. if (debug_info_flags_ & DebugInfoFlags::kDebugInfoSourceMap) { @@ -125,14 +125,19 @@ bool X64Emitter::Emit(uint32_t guest_address, HIRBuilder* builder, return true; } -void* X64Emitter::Emplace(uint32_t guest_address, size_t stack_size) { +void* X64Emitter::Emplace(size_t stack_size, FunctionInfo* function_info) { // To avoid changing xbyak, we do a switcharoo here. // top_ points to the Xbyak buffer, and since we are in AutoGrow mode // it has pending relocations. We copy the top_ to our buffer, swap the // pointer, relocate, then return the original scratch pointer for use. uint8_t* old_address = top_; - void* new_address = - code_cache_->PlaceCode(guest_address, top_, size_, stack_size); + void* new_address; + if (function_info) { + new_address = code_cache_->PlaceGuestCode(function_info->address(), top_, + size_, stack_size, function_info); + } else { + new_address = code_cache_->PlaceHostCode(0, top_, size_, stack_size); + } top_ = (uint8_t*)new_address; ready(); top_ = old_address; diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index abda0ee4d..dd45c8a5a 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -14,6 +14,7 @@ #include "xenia/cpu/hir/hir_builder.h" #include "xenia/cpu/hir/instr.h" #include "xenia/cpu/hir/value.h" +#include "xenia/cpu/symbol_info.h" #include "xenia/debug/function_trace_data.h" #include "xenia/memory.h" @@ -114,7 +115,7 @@ class X64Emitter : public Xbyak::CodeGenerator { Processor* processor() const { return processor_; } X64Backend* backend() const { return backend_; } - bool Emit(uint32_t guest_address, hir::HIRBuilder* builder, + bool Emit(FunctionInfo* function_info, hir::HIRBuilder* builder, uint32_t debug_info_flags, DebugInfo* debug_info, void*& out_code_address, size_t& out_code_size); @@ -192,7 +193,7 @@ class X64Emitter : public Xbyak::CodeGenerator { size_t stack_size() const { return stack_size_; } protected: - void* Emplace(uint32_t guest_address, size_t stack_size); + void* Emplace(size_t stack_size, FunctionInfo* function_info = nullptr); bool Emit(hir::HIRBuilder* builder, size_t& out_stack_size); void EmitGetCurrentThreadId(); void EmitTraceUserCallReturn(); diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index 7f11e0dd0..d914c2c29 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -20,6 +20,7 @@ #include "xenia/cpu/export_resolver.h" #include "xenia/cpu/frontend/ppc_frontend.h" #include "xenia/cpu/module.h" +#include "xenia/cpu/stack_walker.h" #include "xenia/cpu/thread_state.h" #include "xenia/cpu/xex_module.h" #include "xenia/debug/debugger.h" @@ -106,6 +107,13 @@ bool Processor::Setup() { backend_ = std::move(backend); frontend_ = std::move(frontend); + // Stack walker is used when profiling, debugging, and dumping. + stack_walker_ = StackWalker::Create(backend_->code_cache()); + if (!stack_walker_) { + XELOGE("Unable to create stack walker"); + return false; + } + return true; } diff --git a/src/xenia/cpu/processor.h b/src/xenia/cpu/processor.h index 0300d17fd..a5ef6da5c 100644 --- a/src/xenia/cpu/processor.h +++ b/src/xenia/cpu/processor.h @@ -10,6 +10,7 @@ #ifndef XENIA_CPU_PROCESSOR_H_ #define XENIA_CPU_PROCESSOR_H_ +#include #include #include @@ -32,6 +33,7 @@ class Debugger; namespace xe { namespace cpu { +class StackWalker; class ThreadState; class XexModule; @@ -50,6 +52,7 @@ class Processor { Memory* memory() const { return memory_; } debug::Debugger* debugger() const { return debugger_; } + StackWalker* stack_walker() const { return stack_walker_.get(); } frontend::PPCFrontend* frontend() const { return frontend_.get(); } backend::Backend* backend() const { return backend_.get(); } ExportResolver* export_resolver() const { return export_resolver_; } @@ -90,6 +93,7 @@ class Processor { Memory* memory_ = nullptr; debug::Debugger* debugger_ = nullptr; + std::unique_ptr stack_walker_; uint32_t debug_info_flags_ = 0; diff --git a/src/xenia/cpu/stack_walker.h b/src/xenia/cpu/stack_walker.h new file mode 100644 index 000000000..eafdb8825 --- /dev/null +++ b/src/xenia/cpu/stack_walker.h @@ -0,0 +1,95 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_CPU_STACK_WALKER_H_ +#define XENIA_CPU_STACK_WALKER_H_ + +#include +#include + +#include "xenia/cpu/symbol_info.h" + +namespace xe { +namespace cpu { +namespace backend { +class CodeCache; +} // namespace backend +} // namespace cpu +} // namespace xe + +namespace xe { +namespace cpu { + +struct StackFrame { + enum class Type { + // Host frame, likely in kernel or emulator code. + kHost, + // Guest frame, somewhere in PPC code. + kGuest, + }; + Type type; + + // Always valid, indicating the address in a backend-defined range. + uint64_t host_pc; + // Only valid for kGuest frames, indicating the PPC address. + uint32_t guest_pc; + + union { + // Contains symbol information for kHost frames. + struct { + // TODO(benvanik): better name, displacement, etc. + char name[256]; + } host_symbol; + // Contains symbol information for kGuest frames. + struct { + FunctionInfo* function_info; + } guest_symbol; + }; +}; + +class StackWalker { + public: + // Creates a stack walker. Only one should exist within a process. + static std::unique_ptr Create(backend::CodeCache* code_cache); + + // Dumps all thread stacks to the log. + void Dump(); + + // Captures up to the given number of stack frames from the current thread. + // Use ResolveStackTrace to populate additional information. + // Returns the number of frames captured, or 0 if an error occurred. + // Optionally provides a hash value for the stack that can be used for + // deduping. + virtual size_t CaptureStackTrace(uint64_t* frame_host_pcs, + size_t frame_offset, size_t frame_count, + uint64_t* out_stack_hash = nullptr) = 0; + + // Captures up to the given number of stack frames from the given thread, + // referenced by native thread handle. The thread must be suspended. + // This does not populate any information other than host_pc. + // Use ResolveStackTrace to populate additional information. + // Returns the number of frames captured, or 0 if an error occurred. + // Optionally provides a hash value for the stack that can be used for + // deduping. + virtual size_t CaptureStackTrace(void* thread_handle, + uint64_t* frame_host_pcs, + size_t frame_offset, size_t frame_count, + uint64_t* out_stack_hash = nullptr) = 0; + + // Resolves symbol information for the given stack frames. + // Each frame provided must have host_pc set, and all other fields will be + // populated. + virtual bool ResolveStack(uint64_t* frame_host_pcs, StackFrame* frames, + size_t frame_count) = 0; +}; + +} // namespace cpu +} // namespace xe + +#endif // XENIA_CPU_STACK_WALKER_H_ diff --git a/src/xenia/cpu/stack_walker_win.cc b/src/xenia/cpu/stack_walker_win.cc new file mode 100644 index 000000000..d831bbfb8 --- /dev/null +++ b/src/xenia/cpu/stack_walker_win.cc @@ -0,0 +1,286 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/cpu/stack_walker.h" + +#include + +#include + +#include "xenia/base/logging.h" +#include "xenia/base/platform_win.h" +#include "xenia/cpu/backend/backend.h" +#include "xenia/cpu/backend/code_cache.h" +#include "xenia/cpu/processor.h" + +DEFINE_bool(debug_symbol_loader, false, + "Enable dbghelp debug logging and validation."); + +// Must be included after platform_win.h: +#pragma warning(push) +#pragma warning(disable : 4091) +#include +#pragma warning(pop) + +typedef DWORD(__stdcall* LPSYMGETOPTIONS)(VOID); +typedef DWORD(__stdcall* LPSYMSETOPTIONS)(IN DWORD SymOptions); +typedef BOOL(__stdcall* LPSYMINITIALIZE)(IN HANDLE hProcess, + IN PSTR UserSearchPath, + IN BOOL fInvadeProcess); +typedef BOOL(__stdcall* LPSTACKWALK64)( + DWORD MachineType, HANDLE hProcess, HANDLE hThread, + LPSTACKFRAME64 StackFrame, PVOID ContextRecord, + PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine, + PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine, + PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine, + PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress); +typedef PVOID(__stdcall* LPSYMFUNCTIONTABLEACCESS64)( + HANDLE hProcess, + DWORD64 AddrBase); // DbgHelp.h typedef PFUNCTION_TABLE_ACCESS_ROUTINE64 +typedef DWORD64(__stdcall* LPSYMGETMODULEBASE64)( + HANDLE hProcess, + DWORD64 AddrBase); // DbgHelp.h typedef PGET_MODULE_BASE_ROUTINE64 +typedef BOOL(__stdcall* LPSYMGETSYMFROMADDR64)(IN HANDLE hProcess, + IN DWORD64 qwAddr, + OUT PDWORD64 pdwDisplacement, + OUT PIMAGEHLP_SYMBOL64 Symbol); + +LPSYMGETOPTIONS sym_get_options_ = nullptr; +LPSYMSETOPTIONS sym_set_options_ = nullptr; +LPSYMINITIALIZE sym_initialize_ = nullptr; +LPSTACKWALK64 stack_walk_64_ = nullptr; +LPSYMFUNCTIONTABLEACCESS64 sym_function_table_access_64_ = nullptr; +LPSYMGETMODULEBASE64 sym_get_module_base_64_ = nullptr; +LPSYMGETSYMFROMADDR64 sym_get_sym_from_addr_64_ = nullptr; + +namespace xe { +namespace cpu { + +class Win32StackWalker : public StackWalker { + public: + Win32StackWalker(backend::CodeCache* code_cache) { + // Get the boundaries of the code cache so we can quickly tell if a symbol + // is ours or not. + // We store these globally so that the Sym* callbacks can access them. + // They never change, so it's fine even if they are touched from multiple + // threads. + code_cache_ = code_cache; + code_cache_min_ = code_cache_->base_address(); + code_cache_max_ = code_cache_->base_address() + code_cache_->total_size(); + } + + bool Initialize() { + std::lock_guard lock(dbghelp_mutex_); + + // Attempt to load dbghelp. + // NOTE: we never free it. That's fine. + HMODULE module = LoadLibrary(TEXT("dbghelp.dll")); + if (!module) { + XELOGE("Unable to load dbghelp.dll - not found on path or invalid"); + return false; + } + sym_get_options_ = reinterpret_cast( + GetProcAddress(module, "SymGetOptions")); + sym_set_options_ = reinterpret_cast( + GetProcAddress(module, "SymSetOptions")); + sym_initialize_ = reinterpret_cast( + GetProcAddress(module, "SymInitialize")); + stack_walk_64_ = + reinterpret_cast(GetProcAddress(module, "StackWalk64")); + sym_function_table_access_64_ = + reinterpret_cast( + GetProcAddress(module, "SymFunctionTableAccess64")); + sym_get_module_base_64_ = reinterpret_cast( + GetProcAddress(module, "SymGetModuleBase64")); + sym_get_sym_from_addr_64_ = reinterpret_cast( + GetProcAddress(module, "SymGetSymFromAddr64")); + if (!sym_get_options_ || !sym_set_options_ || !sym_initialize_ || + !stack_walk_64_ || !sym_function_table_access_64_ || + !sym_get_module_base_64_ || !sym_get_sym_from_addr_64_) { + XELOGE("Unable to get one or more symbols from dbghelp.dll"); + return false; + } + + // Initialize the symbol lookup services. + DWORD options = sym_get_options_(); + if (FLAGS_debug_symbol_loader) { + options |= SYMOPT_DEBUG; + } + options |= SYMOPT_DEFERRED_LOADS; + options |= SYMOPT_LOAD_LINES; + options |= SYMOPT_FAIL_CRITICAL_ERRORS; + sym_set_options_(options); + if (!sym_initialize_(GetCurrentProcess(), nullptr, TRUE)) { + XELOGE("Unable to initialize symbol services"); + return false; + } + + return true; + } + + size_t CaptureStackTrace(uint64_t* frame_host_pcs, size_t frame_offset, + size_t frame_count, + uint64_t* out_stack_hash) override { + *out_stack_hash = 0; + // Simple method: captures just stack frame PC addresses, optionally + // computing a whole-stack hash. + ULONG back_trace_hash = 0; + DWORD frames_to_skip = DWORD(frame_offset) + 1; + DWORD frames_to_capture = + std::min(DWORD(frame_count), UINT16_MAX - frames_to_skip); + USHORT captured_count = CaptureStackBackTrace( + frames_to_skip, frames_to_capture, + reinterpret_cast(frame_host_pcs), &back_trace_hash); + if (out_stack_hash) { + *out_stack_hash = back_trace_hash; + } + return captured_count; + } + + size_t CaptureStackTrace(void* thread_handle, uint64_t* frame_host_pcs, + size_t frame_offset, size_t frame_count, + uint64_t* out_stack_hash) override { + // Query context. Thread must be suspended. + // Need at least CONTEXT_CONTROL (for rip and rsp) and CONTEXT_INTEGER (for + // rbp). + CONTEXT thread_context; + thread_context.ContextFlags = CONTEXT_FULL; + if (!GetThreadContext(thread_handle, &thread_context)) { + XELOGE("Unable to read thread context for stack walk"); + return 0; + } + + // Setup the frame for walking. + STACKFRAME64 stack_frame = {0}; + stack_frame.AddrPC.Mode = AddrModeFlat; + stack_frame.AddrPC.Offset = thread_context.Rip; + stack_frame.AddrFrame.Mode = AddrModeFlat; + stack_frame.AddrFrame.Offset = thread_context.Rbp; + stack_frame.AddrStack.Mode = AddrModeFlat; + stack_frame.AddrStack.Offset = thread_context.Rsp; + + // Walk the stack. + // Note that StackWalk64 is thread safe, though other dbghelp functions are + // not. + size_t frame_index = 0; + while (frame_index < frame_count && + stack_walk_64_(IMAGE_FILE_MACHINE_AMD64, GetCurrentProcess(), + thread_handle, &stack_frame, &thread_context, nullptr, + XSymFunctionTableAccess64, XSymGetModuleBase64, + nullptr) == TRUE) { + if (frame_index >= frame_offset) { + frame_host_pcs[frame_index - frame_offset] = stack_frame.AddrPC.Offset; + } + ++frame_index; + } + + return frame_index - frame_offset; + } + + bool ResolveStack(uint64_t* frame_host_pcs, StackFrame* frames, + size_t frame_count) override { + // TODO(benvanik): collect symbols to resolve with dbghelp and resolve + // afterward in a smaller lock. + std::lock_guard lock(dbghelp_mutex_); + + for (size_t i = 0; i < frame_count; ++i) { + auto& frame = frames[i]; + frame.host_pc = frame_host_pcs[i]; + frame.host_symbol.name[0] = 0; + frame.guest_pc = 0; + frame.guest_symbol.function_info = nullptr; + + // If in the generated range, we know it's ours. + if (frame.host_pc >= code_cache_min_ && frame.host_pc < code_cache_max_) { + // Guest symbol, so we can look it up quickly in the code cache. + frame.type = StackFrame::Type::kGuest; + auto function_info = code_cache_->LookupFunction(frame.host_pc); + if (function_info) { + frame.guest_symbol.function_info = function_info; + // Figure out where in guest code we are by looking up the + // displacement in x64 from the JIT'ed code start to the PC. + uint32_t host_displacement = + uint32_t(frame.host_pc) - + uint32_t(uint64_t(function_info->function()->machine_code())); + auto entry = + function_info->function()->debug_info()->LookupCodeOffset( + host_displacement); + frame.guest_pc = entry->source_offset; + } else { + frame.guest_symbol.function_info = nullptr; + } + } else { + // Host symbol, which means either emulator or system. + frame.type = StackFrame::Type::kHost; + // TODO(benvanik): cache so that we can avoid calling into dbghelp (and + // taking the lock). + union { + IMAGEHLP_SYMBOL64 info; + uint8_t buffer[sizeof(IMAGEHLP_SYMBOL64) + + MAX_SYM_NAME * sizeof(CHAR) + sizeof(ULONG64) - 1]; + } symbol; + symbol.info.SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64); + symbol.info.MaxNameLength = MAX_SYM_NAME; + uint64_t displacement = 0; + if (sym_get_sym_from_addr_64_(GetCurrentProcess(), frame.host_pc, + &displacement, &symbol.info)) { + // Resolved successfully. + // TODO(benvanik): stash: module, base, displacement, name? + std::strncpy(frame.host_symbol.name, symbol.info.Name, 256); + } + } + } + return true; + } + + private: + static PVOID WINAPI XSymFunctionTableAccess64(__in HANDLE hProcess, + __in DWORD64 AddrBase) { + if (AddrBase >= code_cache_min_ && AddrBase < code_cache_max_) { + // Within our generated range so ask code cache. + return code_cache_->LookupUnwindInfo(AddrBase); + } + // Normal symbol lookup. + return sym_function_table_access_64_(hProcess, AddrBase); + } + + static DWORD64 WINAPI XSymGetModuleBase64(_In_ HANDLE hProcess, + _In_ DWORD64 dwAddr) { + if (dwAddr >= code_cache_min_ && dwAddr < code_cache_max_) { + // In our generated range all addresses are relative to the code cache + // base. + return code_cache_min_; + } + // Normal module base lookup. + return sym_get_module_base_64_(hProcess, dwAddr); + } + + std::mutex dbghelp_mutex_; + + static xe::cpu::backend::CodeCache* code_cache_; + static uint32_t code_cache_min_; + static uint32_t code_cache_max_; +}; + +xe::cpu::backend::CodeCache* Win32StackWalker::code_cache_ = nullptr; +uint32_t Win32StackWalker::code_cache_min_ = 0; +uint32_t Win32StackWalker::code_cache_max_ = 0; + +std::unique_ptr StackWalker::Create( + backend::CodeCache* code_cache) { + auto stack_walker = std::make_unique(code_cache); + if (!stack_walker->Initialize()) { + XELOGE("Unable to initialize stack walker"); + return nullptr; + } + return std::unique_ptr(stack_walker.release()); +} + +} // namespace cpu +} // namespace xe diff --git a/src/xenia/debug/debugger.cc b/src/xenia/debug/debugger.cc index 6a2bae882..fe097b9bb 100644 --- a/src/xenia/debug/debugger.cc +++ b/src/xenia/debug/debugger.cc @@ -29,6 +29,8 @@ #include "xenia/kernel/objects/xthread.h" #include "xenia/kernel/objects/xuser_module.h" +#include "xenia/cpu/stack_walker.h" + #if 0 && DEBUG #define DEFAULT_DEBUG_FLAG true #else @@ -164,6 +166,35 @@ uint8_t* Debugger::AllocateFunctionTraceData(size_t size) { return functions_trace_file_->Allocate(size); } +void Debugger::DumpThreadStacks() { + auto stack_walker = emulator()->processor()->stack_walker(); + auto threads = + emulator_->kernel_state()->object_table()->GetObjectsByType( + XObject::kTypeThread); + for (auto& thread : threads) { + XELOGI("Thread %s (%s)", thread->name().c_str(), + thread->is_guest_thread() ? "guest" : "host"); + uint64_t frame_host_pcs[64]; + uint64_t hash; + size_t count = stack_walker->CaptureStackTrace( + thread->GetWaitHandle()->native_handle(), frame_host_pcs, 0, 64, &hash); + cpu::StackFrame frames[64]; + stack_walker->ResolveStack(frame_host_pcs, frames, count); + for (size_t i = 0; i < count; ++i) { + auto& frame = frames[i]; + if (frame.type == cpu::StackFrame::Type::kHost) { + XELOGI(" %.2lld %.16llX %s", count - i - 1, frame.host_pc, + frame.host_symbol.name); + } else { + auto function_info = frame.guest_symbol.function_info; + XELOGI(" %.2lld %.16llX %.8X %s", count - i - 1, frame.host_pc, + frame.guest_pc, + function_info ? function_info->name().c_str() : "?"); + } + } + } +} + int Debugger::AddBreakpoint(Breakpoint* breakpoint) { // Add to breakpoints map. { @@ -274,6 +305,10 @@ void Debugger::Interrupt() { SuspendAllThreads(); execution_state_ = ExecutionState::kStopped; server_->OnExecutionInterrupted(); + + // TEST CODE. + // TODO(benvanik): remove when UI shows threads. + DumpThreadStacks(); } void Debugger::Continue() { diff --git a/src/xenia/debug/debugger.h b/src/xenia/debug/debugger.h index 82f044e7d..68ac64bad 100644 --- a/src/xenia/debug/debugger.h +++ b/src/xenia/debug/debugger.h @@ -64,6 +64,8 @@ class Debugger { ExecutionState execution_state() const { return execution_state_; } + void DumpThreadStacks(); + int AddBreakpoint(Breakpoint* breakpoint); int RemoveBreakpoint(Breakpoint* breakpoint); void FindBreakpoints(uint32_t address,