Mostly working stack walking (besides issue #372).
This commit is contained in:
parent
e01c2ac98d
commit
e657276996
|
@ -12,6 +12,8 @@
|
|||
|
||||
#include <string>
|
||||
|
||||
#include "xenia/cpu/symbol_info.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
|
@ -24,6 +26,13 @@ class CodeCache {
|
|||
virtual std::wstring file_name() const = 0;
|
||||
virtual uint32_t base_address() const = 0;
|
||||
virtual uint32_t total_size() const = 0;
|
||||
|
||||
// Finds a function based on the given host PC (that may be within a
|
||||
// function).
|
||||
virtual FunctionInfo* LookupFunction(uint64_t host_pc) = 0;
|
||||
|
||||
// Finds platform-specific function unwind info for the given host PC.
|
||||
virtual void* LookupUnwindInfo(uint64_t host_pc) = 0;
|
||||
};
|
||||
|
||||
} // namespace backend
|
||||
|
|
|
@ -93,8 +93,8 @@ bool X64Assembler::Assemble(FunctionInfo* symbol_info, HIRBuilder* builder,
|
|||
// Lower HIR -> x64.
|
||||
void* machine_code = nullptr;
|
||||
size_t code_size = 0;
|
||||
if (!emitter_->Emit(symbol_info->address(), builder, debug_info_flags,
|
||||
debug_info.get(), machine_code, code_size)) {
|
||||
if (!emitter_->Emit(symbol_info, builder, debug_info_flags, debug_info.get(),
|
||||
machine_code, code_size)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -178,7 +178,7 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
|
|||
mov(r8, qword[rsp + 8 * 3]);
|
||||
ret();
|
||||
|
||||
void* fn = Emplace(0, stack_size);
|
||||
void* fn = Emplace(stack_size);
|
||||
return (HostToGuestThunk)fn;
|
||||
}
|
||||
|
||||
|
@ -228,7 +228,7 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
|
|||
mov(rdx, qword[rsp + 8 * 2]);
|
||||
ret();
|
||||
|
||||
void* fn = Emplace(0, stack_size);
|
||||
void* fn = Emplace(stack_size);
|
||||
return (HostToGuestThunk)fn;
|
||||
}
|
||||
|
||||
|
@ -274,7 +274,7 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
|
|||
mov(rdx, qword[rsp + 8 * 2]);
|
||||
jmp(rax);
|
||||
|
||||
void* fn = Emplace(0, stack_size);
|
||||
void* fn = Emplace(stack_size);
|
||||
return (ResolveFunctionThunk)fn;
|
||||
}
|
||||
|
||||
|
|
|
@ -78,6 +78,9 @@ bool X64CodeCache::Initialize() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Preallocate the function map to a large, reasonable size.
|
||||
generated_code_map_.reserve(kMaximumFunctionCount);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -107,8 +110,17 @@ void X64CodeCache::CommitExecutableRange(uint32_t guest_low,
|
|||
}
|
||||
}
|
||||
|
||||
void* X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code,
|
||||
void* X64CodeCache::PlaceHostCode(uint32_t guest_address, void* machine_code,
|
||||
size_t code_size, size_t stack_size) {
|
||||
// Same for now. We may use different pools or whatnot later on, like when
|
||||
// we only want to place guest code in a serialized cache on disk.
|
||||
return PlaceGuestCode(guest_address, machine_code, code_size, stack_size,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code,
|
||||
size_t code_size, size_t stack_size,
|
||||
FunctionInfo* function_info) {
|
||||
// Hold a lock while we bump the pointers up. This is important as the
|
||||
// unwind table requires entries AND code to be sorted in order.
|
||||
size_t low_mark;
|
||||
|
@ -133,6 +145,13 @@ void* X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code,
|
|||
generated_code_offset_ += unwind_reservation.data_size;
|
||||
|
||||
high_mark = generated_code_offset_;
|
||||
|
||||
// Store in map. It is maintained in sorted order of host PC dependent on
|
||||
// us also being append-only.
|
||||
generated_code_map_.emplace_back(
|
||||
(uint64_t(code_address - generated_code_base_) << 32) |
|
||||
generated_code_offset_,
|
||||
function_info);
|
||||
}
|
||||
|
||||
// If we are going above the high water mark of committed memory, commit some
|
||||
|
@ -201,6 +220,32 @@ uint32_t X64CodeCache::PlaceData(const void* data, size_t length) {
|
|||
return uint32_t(uintptr_t(data_address));
|
||||
}
|
||||
|
||||
FunctionInfo* X64CodeCache::LookupFunction(uint64_t host_pc) {
|
||||
uint32_t key = uint32_t(host_pc - kGeneratedCodeBase);
|
||||
void* fn_entry = std::bsearch(
|
||||
&key, generated_code_map_.data(), generated_code_map_.size() + 1,
|
||||
sizeof(std::pair<uint32_t, FunctionInfo*>),
|
||||
[](const void* key_ptr, const void* element_ptr) {
|
||||
auto key = *reinterpret_cast<const uint32_t*>(key_ptr);
|
||||
auto element =
|
||||
reinterpret_cast<const std::pair<uint64_t, FunctionInfo*>*>(
|
||||
element_ptr);
|
||||
if (key < (element->first >> 32)) {
|
||||
return -1;
|
||||
} else if (key > uint32_t(element->first)) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
});
|
||||
if (fn_entry) {
|
||||
return reinterpret_cast<const std::pair<uint64_t, FunctionInfo*>*>(fn_entry)
|
||||
->second;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
|
|
|
@ -46,17 +46,31 @@ class X64CodeCache : public CodeCache {
|
|||
|
||||
void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high);
|
||||
|
||||
void* PlaceCode(uint32_t guest_address, void* machine_code, size_t code_size,
|
||||
size_t stack_size);
|
||||
|
||||
void* PlaceHostCode(uint32_t guest_address, void* machine_code,
|
||||
size_t code_size, size_t stack_size);
|
||||
void* PlaceGuestCode(uint32_t guest_address, void* machine_code,
|
||||
size_t code_size, size_t stack_size,
|
||||
FunctionInfo* function_info);
|
||||
uint32_t PlaceData(const void* data, size_t length);
|
||||
|
||||
FunctionInfo* LookupFunction(uint64_t host_pc) override;
|
||||
|
||||
protected:
|
||||
// All executable code falls within 0x80000000 to 0x9FFFFFFF, so we can
|
||||
// only map enough for lookups within that range.
|
||||
const static uint64_t kIndirectionTableBase = 0x80000000;
|
||||
const static uint64_t kIndirectionTableSize = 0x1FFFFFFF;
|
||||
// The code range is 512MB, but we know the total code games will have is
|
||||
// pretty small (dozens of mb at most) and our expansion is reasonablish
|
||||
// so 256MB should be more than enough.
|
||||
const static uint64_t kGeneratedCodeBase = 0xA0000000;
|
||||
const static uint64_t kGeneratedCodeSize = 0x0FFFFFFF;
|
||||
|
||||
// This is picked to be high enough to cover whatever we can reasonably
|
||||
// expect. If we hit issues with this it probably means some corner case
|
||||
// in analysis triggering.
|
||||
const static size_t kMaximumFunctionCount = 30000;
|
||||
|
||||
struct UnwindReservation {
|
||||
size_t data_size = 0;
|
||||
size_t table_slot = 0;
|
||||
|
@ -94,6 +108,10 @@ class X64CodeCache : public CodeCache {
|
|||
size_t generated_code_offset_ = 0;
|
||||
// Current high water mark of COMMITTED code.
|
||||
std::atomic<size_t> generated_code_commit_mark_ = {0};
|
||||
// Sorted map by host PC base offsets to source function info.
|
||||
// This can be used to bsearch on host PC to find the guest function.
|
||||
// The key is [start address | end address].
|
||||
std::vector<std::pair<uint64_t, FunctionInfo*>> generated_code_map_;
|
||||
};
|
||||
|
||||
} // namespace x64
|
||||
|
|
|
@ -39,6 +39,8 @@ class Win32X64CodeCache : public X64CodeCache {
|
|||
|
||||
bool Initialize() override;
|
||||
|
||||
void* LookupUnwindInfo(uint64_t host_pc) override;
|
||||
|
||||
private:
|
||||
UnwindReservation RequestUnwindReservation(uint8_t* entry_address) override;
|
||||
void PlaceCode(uint32_t guest_address, void* machine_code, size_t code_size,
|
||||
|
@ -48,7 +50,6 @@ class Win32X64CodeCache : public X64CodeCache {
|
|||
void InitializeUnwindEntry(uint8_t* unwind_entry_address,
|
||||
size_t unwind_table_slot, void* code_address,
|
||||
size_t code_size, size_t stack_size);
|
||||
void* LookupUnwindEntry(uintptr_t host_address);
|
||||
|
||||
// Growable function table system handle.
|
||||
void* unwind_table_handle_ = nullptr;
|
||||
|
@ -84,7 +85,7 @@ bool Win32X64CodeCache::Initialize() {
|
|||
|
||||
// Compute total number of unwind entries we should allocate.
|
||||
// We don't support reallocing right now, so this should be high.
|
||||
unwind_table_.resize(30000);
|
||||
unwind_table_.resize(kMaximumFunctionCount);
|
||||
|
||||
#ifdef USE_GROWABLE_FUNCTION_TABLE
|
||||
// Create table and register with the system. It's empty now, but we'll grow
|
||||
|
@ -268,9 +269,9 @@ void Win32X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address,
|
|||
fn_entry.UnwindData = (DWORD)(unwind_entry_address - generated_code_base_);
|
||||
}
|
||||
|
||||
void* Win32X64CodeCache::LookupUnwindEntry(uintptr_t host_address) {
|
||||
void* fn_entry = std::bsearch(
|
||||
&host_address, unwind_table_.data(), unwind_table_count_ + 1,
|
||||
void* Win32X64CodeCache::LookupUnwindInfo(uint64_t host_pc) {
|
||||
return std::bsearch(
|
||||
&host_pc, unwind_table_.data(), unwind_table_count_ + 1,
|
||||
sizeof(RUNTIME_FUNCTION),
|
||||
[](const void* key_ptr, const void* element_ptr) {
|
||||
auto key =
|
||||
|
@ -284,7 +285,6 @@ void* Win32X64CodeCache::LookupUnwindEntry(uintptr_t host_address) {
|
|||
return 0;
|
||||
}
|
||||
});
|
||||
return reinterpret_cast<RUNTIME_FUNCTION*>(fn_entry);
|
||||
}
|
||||
|
||||
} // namespace x64
|
||||
|
|
|
@ -93,7 +93,7 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator)
|
|||
|
||||
X64Emitter::~X64Emitter() = default;
|
||||
|
||||
bool X64Emitter::Emit(uint32_t guest_address, HIRBuilder* builder,
|
||||
bool X64Emitter::Emit(FunctionInfo* function_info, HIRBuilder* builder,
|
||||
uint32_t debug_info_flags, DebugInfo* debug_info,
|
||||
void*& out_code_address, size_t& out_code_size) {
|
||||
SCOPE_profile_cpu_f("cpu");
|
||||
|
@ -114,7 +114,7 @@ bool X64Emitter::Emit(uint32_t guest_address, HIRBuilder* builder,
|
|||
|
||||
// Copy the final code to the cache and relocate it.
|
||||
out_code_size = getSize();
|
||||
out_code_address = Emplace(guest_address, stack_size);
|
||||
out_code_address = Emplace(stack_size, function_info);
|
||||
|
||||
// Stash source map.
|
||||
if (debug_info_flags_ & DebugInfoFlags::kDebugInfoSourceMap) {
|
||||
|
@ -125,14 +125,19 @@ bool X64Emitter::Emit(uint32_t guest_address, HIRBuilder* builder,
|
|||
return true;
|
||||
}
|
||||
|
||||
void* X64Emitter::Emplace(uint32_t guest_address, size_t stack_size) {
|
||||
void* X64Emitter::Emplace(size_t stack_size, FunctionInfo* function_info) {
|
||||
// To avoid changing xbyak, we do a switcharoo here.
|
||||
// top_ points to the Xbyak buffer, and since we are in AutoGrow mode
|
||||
// it has pending relocations. We copy the top_ to our buffer, swap the
|
||||
// pointer, relocate, then return the original scratch pointer for use.
|
||||
uint8_t* old_address = top_;
|
||||
void* new_address =
|
||||
code_cache_->PlaceCode(guest_address, top_, size_, stack_size);
|
||||
void* new_address;
|
||||
if (function_info) {
|
||||
new_address = code_cache_->PlaceGuestCode(function_info->address(), top_,
|
||||
size_, stack_size, function_info);
|
||||
} else {
|
||||
new_address = code_cache_->PlaceHostCode(0, top_, size_, stack_size);
|
||||
}
|
||||
top_ = (uint8_t*)new_address;
|
||||
ready();
|
||||
top_ = old_address;
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "xenia/cpu/hir/hir_builder.h"
|
||||
#include "xenia/cpu/hir/instr.h"
|
||||
#include "xenia/cpu/hir/value.h"
|
||||
#include "xenia/cpu/symbol_info.h"
|
||||
#include "xenia/debug/function_trace_data.h"
|
||||
#include "xenia/memory.h"
|
||||
|
||||
|
@ -114,7 +115,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
Processor* processor() const { return processor_; }
|
||||
X64Backend* backend() const { return backend_; }
|
||||
|
||||
bool Emit(uint32_t guest_address, hir::HIRBuilder* builder,
|
||||
bool Emit(FunctionInfo* function_info, hir::HIRBuilder* builder,
|
||||
uint32_t debug_info_flags, DebugInfo* debug_info,
|
||||
void*& out_code_address, size_t& out_code_size);
|
||||
|
||||
|
@ -192,7 +193,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
size_t stack_size() const { return stack_size_; }
|
||||
|
||||
protected:
|
||||
void* Emplace(uint32_t guest_address, size_t stack_size);
|
||||
void* Emplace(size_t stack_size, FunctionInfo* function_info = nullptr);
|
||||
bool Emit(hir::HIRBuilder* builder, size_t& out_stack_size);
|
||||
void EmitGetCurrentThreadId();
|
||||
void EmitTraceUserCallReturn();
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "xenia/cpu/export_resolver.h"
|
||||
#include "xenia/cpu/frontend/ppc_frontend.h"
|
||||
#include "xenia/cpu/module.h"
|
||||
#include "xenia/cpu/stack_walker.h"
|
||||
#include "xenia/cpu/thread_state.h"
|
||||
#include "xenia/cpu/xex_module.h"
|
||||
#include "xenia/debug/debugger.h"
|
||||
|
@ -106,6 +107,13 @@ bool Processor::Setup() {
|
|||
backend_ = std::move(backend);
|
||||
frontend_ = std::move(frontend);
|
||||
|
||||
// Stack walker is used when profiling, debugging, and dumping.
|
||||
stack_walker_ = StackWalker::Create(backend_->code_cache());
|
||||
if (!stack_walker_) {
|
||||
XELOGE("Unable to create stack walker");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#ifndef XENIA_CPU_PROCESSOR_H_
|
||||
#define XENIA_CPU_PROCESSOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
|
@ -32,6 +33,7 @@ class Debugger;
|
|||
namespace xe {
|
||||
namespace cpu {
|
||||
|
||||
class StackWalker;
|
||||
class ThreadState;
|
||||
class XexModule;
|
||||
|
||||
|
@ -50,6 +52,7 @@ class Processor {
|
|||
|
||||
Memory* memory() const { return memory_; }
|
||||
debug::Debugger* debugger() const { return debugger_; }
|
||||
StackWalker* stack_walker() const { return stack_walker_.get(); }
|
||||
frontend::PPCFrontend* frontend() const { return frontend_.get(); }
|
||||
backend::Backend* backend() const { return backend_.get(); }
|
||||
ExportResolver* export_resolver() const { return export_resolver_; }
|
||||
|
@ -90,6 +93,7 @@ class Processor {
|
|||
|
||||
Memory* memory_ = nullptr;
|
||||
debug::Debugger* debugger_ = nullptr;
|
||||
std::unique_ptr<StackWalker> stack_walker_;
|
||||
|
||||
uint32_t debug_info_flags_ = 0;
|
||||
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_STACK_WALKER_H_
|
||||
#define XENIA_CPU_STACK_WALKER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "xenia/cpu/symbol_info.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
class CodeCache;
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
|
||||
struct StackFrame {
|
||||
enum class Type {
|
||||
// Host frame, likely in kernel or emulator code.
|
||||
kHost,
|
||||
// Guest frame, somewhere in PPC code.
|
||||
kGuest,
|
||||
};
|
||||
Type type;
|
||||
|
||||
// Always valid, indicating the address in a backend-defined range.
|
||||
uint64_t host_pc;
|
||||
// Only valid for kGuest frames, indicating the PPC address.
|
||||
uint32_t guest_pc;
|
||||
|
||||
union {
|
||||
// Contains symbol information for kHost frames.
|
||||
struct {
|
||||
// TODO(benvanik): better name, displacement, etc.
|
||||
char name[256];
|
||||
} host_symbol;
|
||||
// Contains symbol information for kGuest frames.
|
||||
struct {
|
||||
FunctionInfo* function_info;
|
||||
} guest_symbol;
|
||||
};
|
||||
};
|
||||
|
||||
class StackWalker {
|
||||
public:
|
||||
// Creates a stack walker. Only one should exist within a process.
|
||||
static std::unique_ptr<StackWalker> Create(backend::CodeCache* code_cache);
|
||||
|
||||
// Dumps all thread stacks to the log.
|
||||
void Dump();
|
||||
|
||||
// Captures up to the given number of stack frames from the current thread.
|
||||
// Use ResolveStackTrace to populate additional information.
|
||||
// Returns the number of frames captured, or 0 if an error occurred.
|
||||
// Optionally provides a hash value for the stack that can be used for
|
||||
// deduping.
|
||||
virtual size_t CaptureStackTrace(uint64_t* frame_host_pcs,
|
||||
size_t frame_offset, size_t frame_count,
|
||||
uint64_t* out_stack_hash = nullptr) = 0;
|
||||
|
||||
// Captures up to the given number of stack frames from the given thread,
|
||||
// referenced by native thread handle. The thread must be suspended.
|
||||
// This does not populate any information other than host_pc.
|
||||
// Use ResolveStackTrace to populate additional information.
|
||||
// Returns the number of frames captured, or 0 if an error occurred.
|
||||
// Optionally provides a hash value for the stack that can be used for
|
||||
// deduping.
|
||||
virtual size_t CaptureStackTrace(void* thread_handle,
|
||||
uint64_t* frame_host_pcs,
|
||||
size_t frame_offset, size_t frame_count,
|
||||
uint64_t* out_stack_hash = nullptr) = 0;
|
||||
|
||||
// Resolves symbol information for the given stack frames.
|
||||
// Each frame provided must have host_pc set, and all other fields will be
|
||||
// populated.
|
||||
virtual bool ResolveStack(uint64_t* frame_host_pcs, StackFrame* frames,
|
||||
size_t frame_count) = 0;
|
||||
};
|
||||
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_STACK_WALKER_H_
|
|
@ -0,0 +1,286 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/stack_walker.h"
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/platform_win.h"
|
||||
#include "xenia/cpu/backend/backend.h"
|
||||
#include "xenia/cpu/backend/code_cache.h"
|
||||
#include "xenia/cpu/processor.h"
|
||||
|
||||
DEFINE_bool(debug_symbol_loader, false,
|
||||
"Enable dbghelp debug logging and validation.");
|
||||
|
||||
// Must be included after platform_win.h:
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4091)
|
||||
#include <dbghelp.h>
|
||||
#pragma warning(pop)
|
||||
|
||||
typedef DWORD(__stdcall* LPSYMGETOPTIONS)(VOID);
|
||||
typedef DWORD(__stdcall* LPSYMSETOPTIONS)(IN DWORD SymOptions);
|
||||
typedef BOOL(__stdcall* LPSYMINITIALIZE)(IN HANDLE hProcess,
|
||||
IN PSTR UserSearchPath,
|
||||
IN BOOL fInvadeProcess);
|
||||
typedef BOOL(__stdcall* LPSTACKWALK64)(
|
||||
DWORD MachineType, HANDLE hProcess, HANDLE hThread,
|
||||
LPSTACKFRAME64 StackFrame, PVOID ContextRecord,
|
||||
PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine,
|
||||
PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine,
|
||||
PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine,
|
||||
PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress);
|
||||
typedef PVOID(__stdcall* LPSYMFUNCTIONTABLEACCESS64)(
|
||||
HANDLE hProcess,
|
||||
DWORD64 AddrBase); // DbgHelp.h typedef PFUNCTION_TABLE_ACCESS_ROUTINE64
|
||||
typedef DWORD64(__stdcall* LPSYMGETMODULEBASE64)(
|
||||
HANDLE hProcess,
|
||||
DWORD64 AddrBase); // DbgHelp.h typedef PGET_MODULE_BASE_ROUTINE64
|
||||
typedef BOOL(__stdcall* LPSYMGETSYMFROMADDR64)(IN HANDLE hProcess,
|
||||
IN DWORD64 qwAddr,
|
||||
OUT PDWORD64 pdwDisplacement,
|
||||
OUT PIMAGEHLP_SYMBOL64 Symbol);
|
||||
|
||||
LPSYMGETOPTIONS sym_get_options_ = nullptr;
|
||||
LPSYMSETOPTIONS sym_set_options_ = nullptr;
|
||||
LPSYMINITIALIZE sym_initialize_ = nullptr;
|
||||
LPSTACKWALK64 stack_walk_64_ = nullptr;
|
||||
LPSYMFUNCTIONTABLEACCESS64 sym_function_table_access_64_ = nullptr;
|
||||
LPSYMGETMODULEBASE64 sym_get_module_base_64_ = nullptr;
|
||||
LPSYMGETSYMFROMADDR64 sym_get_sym_from_addr_64_ = nullptr;
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
|
||||
class Win32StackWalker : public StackWalker {
|
||||
public:
|
||||
Win32StackWalker(backend::CodeCache* code_cache) {
|
||||
// Get the boundaries of the code cache so we can quickly tell if a symbol
|
||||
// is ours or not.
|
||||
// We store these globally so that the Sym* callbacks can access them.
|
||||
// They never change, so it's fine even if they are touched from multiple
|
||||
// threads.
|
||||
code_cache_ = code_cache;
|
||||
code_cache_min_ = code_cache_->base_address();
|
||||
code_cache_max_ = code_cache_->base_address() + code_cache_->total_size();
|
||||
}
|
||||
|
||||
bool Initialize() {
|
||||
std::lock_guard<std::mutex> lock(dbghelp_mutex_);
|
||||
|
||||
// Attempt to load dbghelp.
|
||||
// NOTE: we never free it. That's fine.
|
||||
HMODULE module = LoadLibrary(TEXT("dbghelp.dll"));
|
||||
if (!module) {
|
||||
XELOGE("Unable to load dbghelp.dll - not found on path or invalid");
|
||||
return false;
|
||||
}
|
||||
sym_get_options_ = reinterpret_cast<LPSYMGETOPTIONS>(
|
||||
GetProcAddress(module, "SymGetOptions"));
|
||||
sym_set_options_ = reinterpret_cast<LPSYMSETOPTIONS>(
|
||||
GetProcAddress(module, "SymSetOptions"));
|
||||
sym_initialize_ = reinterpret_cast<LPSYMINITIALIZE>(
|
||||
GetProcAddress(module, "SymInitialize"));
|
||||
stack_walk_64_ =
|
||||
reinterpret_cast<LPSTACKWALK64>(GetProcAddress(module, "StackWalk64"));
|
||||
sym_function_table_access_64_ =
|
||||
reinterpret_cast<LPSYMFUNCTIONTABLEACCESS64>(
|
||||
GetProcAddress(module, "SymFunctionTableAccess64"));
|
||||
sym_get_module_base_64_ = reinterpret_cast<LPSYMGETMODULEBASE64>(
|
||||
GetProcAddress(module, "SymGetModuleBase64"));
|
||||
sym_get_sym_from_addr_64_ = reinterpret_cast<LPSYMGETSYMFROMADDR64>(
|
||||
GetProcAddress(module, "SymGetSymFromAddr64"));
|
||||
if (!sym_get_options_ || !sym_set_options_ || !sym_initialize_ ||
|
||||
!stack_walk_64_ || !sym_function_table_access_64_ ||
|
||||
!sym_get_module_base_64_ || !sym_get_sym_from_addr_64_) {
|
||||
XELOGE("Unable to get one or more symbols from dbghelp.dll");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Initialize the symbol lookup services.
|
||||
DWORD options = sym_get_options_();
|
||||
if (FLAGS_debug_symbol_loader) {
|
||||
options |= SYMOPT_DEBUG;
|
||||
}
|
||||
options |= SYMOPT_DEFERRED_LOADS;
|
||||
options |= SYMOPT_LOAD_LINES;
|
||||
options |= SYMOPT_FAIL_CRITICAL_ERRORS;
|
||||
sym_set_options_(options);
|
||||
if (!sym_initialize_(GetCurrentProcess(), nullptr, TRUE)) {
|
||||
XELOGE("Unable to initialize symbol services");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t CaptureStackTrace(uint64_t* frame_host_pcs, size_t frame_offset,
|
||||
size_t frame_count,
|
||||
uint64_t* out_stack_hash) override {
|
||||
*out_stack_hash = 0;
|
||||
// Simple method: captures just stack frame PC addresses, optionally
|
||||
// computing a whole-stack hash.
|
||||
ULONG back_trace_hash = 0;
|
||||
DWORD frames_to_skip = DWORD(frame_offset) + 1;
|
||||
DWORD frames_to_capture =
|
||||
std::min(DWORD(frame_count), UINT16_MAX - frames_to_skip);
|
||||
USHORT captured_count = CaptureStackBackTrace(
|
||||
frames_to_skip, frames_to_capture,
|
||||
reinterpret_cast<PVOID*>(frame_host_pcs), &back_trace_hash);
|
||||
if (out_stack_hash) {
|
||||
*out_stack_hash = back_trace_hash;
|
||||
}
|
||||
return captured_count;
|
||||
}
|
||||
|
||||
size_t CaptureStackTrace(void* thread_handle, uint64_t* frame_host_pcs,
|
||||
size_t frame_offset, size_t frame_count,
|
||||
uint64_t* out_stack_hash) override {
|
||||
// Query context. Thread must be suspended.
|
||||
// Need at least CONTEXT_CONTROL (for rip and rsp) and CONTEXT_INTEGER (for
|
||||
// rbp).
|
||||
CONTEXT thread_context;
|
||||
thread_context.ContextFlags = CONTEXT_FULL;
|
||||
if (!GetThreadContext(thread_handle, &thread_context)) {
|
||||
XELOGE("Unable to read thread context for stack walk");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Setup the frame for walking.
|
||||
STACKFRAME64 stack_frame = {0};
|
||||
stack_frame.AddrPC.Mode = AddrModeFlat;
|
||||
stack_frame.AddrPC.Offset = thread_context.Rip;
|
||||
stack_frame.AddrFrame.Mode = AddrModeFlat;
|
||||
stack_frame.AddrFrame.Offset = thread_context.Rbp;
|
||||
stack_frame.AddrStack.Mode = AddrModeFlat;
|
||||
stack_frame.AddrStack.Offset = thread_context.Rsp;
|
||||
|
||||
// Walk the stack.
|
||||
// Note that StackWalk64 is thread safe, though other dbghelp functions are
|
||||
// not.
|
||||
size_t frame_index = 0;
|
||||
while (frame_index < frame_count &&
|
||||
stack_walk_64_(IMAGE_FILE_MACHINE_AMD64, GetCurrentProcess(),
|
||||
thread_handle, &stack_frame, &thread_context, nullptr,
|
||||
XSymFunctionTableAccess64, XSymGetModuleBase64,
|
||||
nullptr) == TRUE) {
|
||||
if (frame_index >= frame_offset) {
|
||||
frame_host_pcs[frame_index - frame_offset] = stack_frame.AddrPC.Offset;
|
||||
}
|
||||
++frame_index;
|
||||
}
|
||||
|
||||
return frame_index - frame_offset;
|
||||
}
|
||||
|
||||
bool ResolveStack(uint64_t* frame_host_pcs, StackFrame* frames,
|
||||
size_t frame_count) override {
|
||||
// TODO(benvanik): collect symbols to resolve with dbghelp and resolve
|
||||
// afterward in a smaller lock.
|
||||
std::lock_guard<std::mutex> lock(dbghelp_mutex_);
|
||||
|
||||
for (size_t i = 0; i < frame_count; ++i) {
|
||||
auto& frame = frames[i];
|
||||
frame.host_pc = frame_host_pcs[i];
|
||||
frame.host_symbol.name[0] = 0;
|
||||
frame.guest_pc = 0;
|
||||
frame.guest_symbol.function_info = nullptr;
|
||||
|
||||
// If in the generated range, we know it's ours.
|
||||
if (frame.host_pc >= code_cache_min_ && frame.host_pc < code_cache_max_) {
|
||||
// Guest symbol, so we can look it up quickly in the code cache.
|
||||
frame.type = StackFrame::Type::kGuest;
|
||||
auto function_info = code_cache_->LookupFunction(frame.host_pc);
|
||||
if (function_info) {
|
||||
frame.guest_symbol.function_info = function_info;
|
||||
// Figure out where in guest code we are by looking up the
|
||||
// displacement in x64 from the JIT'ed code start to the PC.
|
||||
uint32_t host_displacement =
|
||||
uint32_t(frame.host_pc) -
|
||||
uint32_t(uint64_t(function_info->function()->machine_code()));
|
||||
auto entry =
|
||||
function_info->function()->debug_info()->LookupCodeOffset(
|
||||
host_displacement);
|
||||
frame.guest_pc = entry->source_offset;
|
||||
} else {
|
||||
frame.guest_symbol.function_info = nullptr;
|
||||
}
|
||||
} else {
|
||||
// Host symbol, which means either emulator or system.
|
||||
frame.type = StackFrame::Type::kHost;
|
||||
// TODO(benvanik): cache so that we can avoid calling into dbghelp (and
|
||||
// taking the lock).
|
||||
union {
|
||||
IMAGEHLP_SYMBOL64 info;
|
||||
uint8_t buffer[sizeof(IMAGEHLP_SYMBOL64) +
|
||||
MAX_SYM_NAME * sizeof(CHAR) + sizeof(ULONG64) - 1];
|
||||
} symbol;
|
||||
symbol.info.SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
|
||||
symbol.info.MaxNameLength = MAX_SYM_NAME;
|
||||
uint64_t displacement = 0;
|
||||
if (sym_get_sym_from_addr_64_(GetCurrentProcess(), frame.host_pc,
|
||||
&displacement, &symbol.info)) {
|
||||
// Resolved successfully.
|
||||
// TODO(benvanik): stash: module, base, displacement, name?
|
||||
std::strncpy(frame.host_symbol.name, symbol.info.Name, 256);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
static PVOID WINAPI XSymFunctionTableAccess64(__in HANDLE hProcess,
|
||||
__in DWORD64 AddrBase) {
|
||||
if (AddrBase >= code_cache_min_ && AddrBase < code_cache_max_) {
|
||||
// Within our generated range so ask code cache.
|
||||
return code_cache_->LookupUnwindInfo(AddrBase);
|
||||
}
|
||||
// Normal symbol lookup.
|
||||
return sym_function_table_access_64_(hProcess, AddrBase);
|
||||
}
|
||||
|
||||
static DWORD64 WINAPI XSymGetModuleBase64(_In_ HANDLE hProcess,
|
||||
_In_ DWORD64 dwAddr) {
|
||||
if (dwAddr >= code_cache_min_ && dwAddr < code_cache_max_) {
|
||||
// In our generated range all addresses are relative to the code cache
|
||||
// base.
|
||||
return code_cache_min_;
|
||||
}
|
||||
// Normal module base lookup.
|
||||
return sym_get_module_base_64_(hProcess, dwAddr);
|
||||
}
|
||||
|
||||
std::mutex dbghelp_mutex_;
|
||||
|
||||
static xe::cpu::backend::CodeCache* code_cache_;
|
||||
static uint32_t code_cache_min_;
|
||||
static uint32_t code_cache_max_;
|
||||
};
|
||||
|
||||
xe::cpu::backend::CodeCache* Win32StackWalker::code_cache_ = nullptr;
|
||||
uint32_t Win32StackWalker::code_cache_min_ = 0;
|
||||
uint32_t Win32StackWalker::code_cache_max_ = 0;
|
||||
|
||||
std::unique_ptr<StackWalker> StackWalker::Create(
|
||||
backend::CodeCache* code_cache) {
|
||||
auto stack_walker = std::make_unique<Win32StackWalker>(code_cache);
|
||||
if (!stack_walker->Initialize()) {
|
||||
XELOGE("Unable to initialize stack walker");
|
||||
return nullptr;
|
||||
}
|
||||
return std::unique_ptr<StackWalker>(stack_walker.release());
|
||||
}
|
||||
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -29,6 +29,8 @@
|
|||
#include "xenia/kernel/objects/xthread.h"
|
||||
#include "xenia/kernel/objects/xuser_module.h"
|
||||
|
||||
#include "xenia/cpu/stack_walker.h"
|
||||
|
||||
#if 0 && DEBUG
|
||||
#define DEFAULT_DEBUG_FLAG true
|
||||
#else
|
||||
|
@ -164,6 +166,35 @@ uint8_t* Debugger::AllocateFunctionTraceData(size_t size) {
|
|||
return functions_trace_file_->Allocate(size);
|
||||
}
|
||||
|
||||
void Debugger::DumpThreadStacks() {
|
||||
auto stack_walker = emulator()->processor()->stack_walker();
|
||||
auto threads =
|
||||
emulator_->kernel_state()->object_table()->GetObjectsByType<XThread>(
|
||||
XObject::kTypeThread);
|
||||
for (auto& thread : threads) {
|
||||
XELOGI("Thread %s (%s)", thread->name().c_str(),
|
||||
thread->is_guest_thread() ? "guest" : "host");
|
||||
uint64_t frame_host_pcs[64];
|
||||
uint64_t hash;
|
||||
size_t count = stack_walker->CaptureStackTrace(
|
||||
thread->GetWaitHandle()->native_handle(), frame_host_pcs, 0, 64, &hash);
|
||||
cpu::StackFrame frames[64];
|
||||
stack_walker->ResolveStack(frame_host_pcs, frames, count);
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
auto& frame = frames[i];
|
||||
if (frame.type == cpu::StackFrame::Type::kHost) {
|
||||
XELOGI(" %.2lld %.16llX %s", count - i - 1, frame.host_pc,
|
||||
frame.host_symbol.name);
|
||||
} else {
|
||||
auto function_info = frame.guest_symbol.function_info;
|
||||
XELOGI(" %.2lld %.16llX %.8X %s", count - i - 1, frame.host_pc,
|
||||
frame.guest_pc,
|
||||
function_info ? function_info->name().c_str() : "?");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int Debugger::AddBreakpoint(Breakpoint* breakpoint) {
|
||||
// Add to breakpoints map.
|
||||
{
|
||||
|
@ -274,6 +305,10 @@ void Debugger::Interrupt() {
|
|||
SuspendAllThreads();
|
||||
execution_state_ = ExecutionState::kStopped;
|
||||
server_->OnExecutionInterrupted();
|
||||
|
||||
// TEST CODE.
|
||||
// TODO(benvanik): remove when UI shows threads.
|
||||
DumpThreadStacks();
|
||||
}
|
||||
|
||||
void Debugger::Continue() {
|
||||
|
|
|
@ -64,6 +64,8 @@ class Debugger {
|
|||
|
||||
ExecutionState execution_state() const { return execution_state_; }
|
||||
|
||||
void DumpThreadStacks();
|
||||
|
||||
int AddBreakpoint(Breakpoint* breakpoint);
|
||||
int RemoveBreakpoint(Breakpoint* breakpoint);
|
||||
void FindBreakpoints(uint32_t address,
|
||||
|
|
Loading…
Reference in New Issue