From 852536ae0acb01914d5d5c9a45b54619792bb80c Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 20 Jan 2013 16:46:08 -0800 Subject: [PATCH] Basic function analysis. Finds basic blocks and estimates proper function bounds. Seems legit for compiled code. --- include/xenia/config.h | 1 + include/xenia/cpu/ppc/instr.h | 18 +- include/xenia/cpu/sdb.h | 41 +++- include/xenia/logging.h | 5 + src/cpu/exec_module.cc | 1 + src/cpu/ppc/instr_tables.h | 2 +- src/cpu/sdb.cc | 357 ++++++++++++++++++++++++++++++++-- 7 files changed, 400 insertions(+), 25 deletions(-) diff --git a/include/xenia/config.h b/include/xenia/config.h index ebcc4f04c..7478a3325 100644 --- a/include/xenia/config.h +++ b/include/xenia/config.h @@ -23,6 +23,7 @@ #define XE_OPTION_LOG_INFO 1 #define XE_OPTION_LOG_DEBUG 1 #define XE_OPTION_LOG_CPU 1 +#define XE_OPTION_LOG_SDB 0 #define XE_OPTION_LOG_GPU 1 #define XE_OPTION_LOG_KERNEL 1 diff --git a/include/xenia/cpu/ppc/instr.h b/include/xenia/cpu/ppc/instr.h index 8b90c4141..f9e59391a 100644 --- a/include/xenia/cpu/ppc/instr.h +++ b/include/xenia/cpu/ppc/instr.h @@ -56,6 +56,14 @@ typedef enum { class InstrType; +static inline int32_t XEEXTS16(uint32_t v) { + return (int32_t)((int16_t)v); +} +static inline int32_t XEEXTS26(uint32_t v) { + return v & 0x02000000 ? (int32_t)v | 0xFC000000 : (int32_t)(v); +} + + typedef struct { InstrType* type; uint32_t address; @@ -97,6 +105,14 @@ typedef struct { } DS; // kXEPPCInstrFormatX // kXEPPCInstrFormatXL + struct { + uint32_t LK : 1; + uint32_t : 10; + uint32_t BB : 5; + uint32_t BI : 5; + uint32_t BO : 5; + uint32_t OPCD : 6; + } XL; struct { uint32_t : 1; uint32_t : 10; @@ -114,7 +130,7 @@ typedef struct { // kXEPPCInstrFormatVA // kXEPPCInstrFormatVX // kXEPPCInstrFormatVXR - } data; + }; } InstrData; class Instr { diff --git a/include/xenia/cpu/sdb.h b/include/xenia/cpu/sdb.h index c7b17db46..cfae9e9c8 100644 --- a/include/xenia/cpu/sdb.h +++ b/include/xenia/cpu/sdb.h @@ -45,8 +45,9 @@ public: class Symbol { public: enum SymbolType { - Function = 0, - Variable = 1, + Function = 0, + Variable = 1, + ExceptionEntry = 2, }; virtual ~Symbol() {} @@ -57,6 +58,18 @@ protected: Symbol(SymbolType type) : symbol_type(type) {} }; +class ExceptionEntrySymbol; + +class FunctionBlock { +public: + uint32_t start_address; + uint32_t end_address; + + vector incoming_blocks; + FunctionBlock* outgoing_block; + uint32_t outgoing_address; +}; + class FunctionSymbol : public Symbol { public: enum FunctionType { @@ -64,6 +77,10 @@ public: Kernel = 1, User = 2, }; + enum Flags { + kFlagSaveGprLr = 1 << 1, + kFlagRestGprLr = 1 << 2, + }; FunctionSymbol() : Symbol(Function) {} virtual ~FunctionSymbol() {} @@ -74,9 +91,13 @@ public: FunctionType type; uint32_t flags; + ExceptionEntrySymbol* ee; + vector incoming_calls; vector outgoing_calls; vector variable_accesses; + + map blocks; }; class VariableSymbol : public Symbol { @@ -88,6 +109,15 @@ public: char *name; }; +class ExceptionEntrySymbol : public Symbol { +public: + ExceptionEntrySymbol() : Symbol(ExceptionEntry) {} + virtual ~ExceptionEntrySymbol() {} + + uint32_t address; + FunctionSymbol* function; +}; + class SymbolDatabase { public: @@ -96,6 +126,7 @@ public: int Analyze(); + ExceptionEntrySymbol* GetOrInsertExceptionEntry(uint32_t address); FunctionSymbol* GetOrInsertFunction(uint32_t address); VariableSymbol* GetOrInsertVariable(uint32_t address); FunctionSymbol* GetFunction(uint32_t address); @@ -105,6 +136,7 @@ public: int GetAllFunctions(vector& functions); void Dump(); + void DumpFunctionBlocks(FunctionSymbol* fn); private: typedef std::map SymbolMap; @@ -114,9 +146,12 @@ private: int AddImports(const xe_xex2_import_library_t *library); int AddMethodHints(); int AnalyzeFunction(FunctionSymbol* fn); - int FillHoles(); + bool FillHoles(); int FlushQueue(); + bool IsValueInTextRange(uint32_t value); + bool IsRestGprLr(uint32_t addr); + xe_memory_ref memory_; kernel::UserModule* module_; size_t function_count_; diff --git a/include/xenia/logging.h b/include/xenia/logging.h index 5a8e0009a..582717887 100644 --- a/include/xenia/logging.h +++ b/include/xenia/logging.h @@ -57,6 +57,11 @@ void xe_log_line(const xechar_t* file_path, const uint32_t line_number, #else #define XELOGCPU(fmt, ...) XE_EMPTY_MACRO #endif +#if XE_OPTION(LOG_SDB) +#define XELOGSDB(fmt, ...) XELOGCORE('S', fmt, ##__VA_ARGS__) +#else +#define XELOGSDB(fmt, ...) XE_EMPTY_MACRO +#endif #if XE_OPTION(LOG_GPU) #define XELOGGPU(fmt, ...) XELOGCORE('G', fmt, ##__VA_ARGS__) #else diff --git a/src/cpu/exec_module.cc b/src/cpu/exec_module.cc index 0a893c404..1a812b7b9 100644 --- a/src/cpu/exec_module.cc +++ b/src/cpu/exec_module.cc @@ -196,5 +196,6 @@ int ExecModule::Uninit() { } void ExecModule::Dump() { + sdb_->Dump(); gen_module_->dump(); } diff --git a/src/cpu/ppc/instr_tables.h b/src/cpu/ppc/instr_tables.h index 11306edef..80bf87134 100644 --- a/src/cpu/ppc/instr_tables.h +++ b/src/cpu/ppc/instr_tables.h @@ -82,7 +82,7 @@ static InstrType instr_table_30_unprep[] = { INSTRUCTION(rldcrx, 0x78000012, MDS, General , 0), }; static InstrType* instr_table_30 = instr_table_prep( - instr_table_30_unprep, XECOUNT(instr_table_30_unprep), 1, 5); + instr_table_30_unprep, XECOUNT(instr_table_30_unprep), 2, 4); // Opcode = 31, index = bits 10-1 (10) static InstrType instr_table_31_unprep[] = { diff --git a/src/cpu/sdb.cc b/src/cpu/sdb.cc index 601bdd419..1730f9929 100644 --- a/src/cpu/sdb.cc +++ b/src/cpu/sdb.cc @@ -12,9 +12,12 @@ #include #include +#include + using namespace xe; using namespace xe::cpu; +using namespace xe::cpu::ppc; using namespace xe::cpu::sdb; using namespace xe::kernel; @@ -58,25 +61,47 @@ int SymbolDatabase::Analyze() { // Queue entry point of the application. FunctionSymbol* fn = GetOrInsertFunction(header->exe_entry_point); - fn->name = strdup(""); + fn->name = xestrdupa(""); // Keep pumping the queue until there's nothing left to do. FlushQueue(); - // Do a pass over the functions to fill holes. - FillHoles(); - FlushQueue(); + // Do a pass over the functions to fill holes. A few times. Just to be safe. + while (true) { + if (!FillHoles()) { + break; + } + FlushQueue(); + } return 0; } +ExceptionEntrySymbol* SymbolDatabase::GetOrInsertExceptionEntry( + uint32_t address) { + SymbolMap::iterator i = symbols_.find(address); + if (i != symbols_.end() && i->second->symbol_type == Symbol::Function) { + return static_cast(i->second); + } + + ExceptionEntrySymbol* ee = new ExceptionEntrySymbol(); + ee->address = address; + symbols_.insert(SymbolMap::value_type(address, ee)); + return ee; +} + FunctionSymbol* SymbolDatabase::GetOrInsertFunction(uint32_t address) { FunctionSymbol* fn = GetFunction(address); if (fn) { return fn; } - printf("add fn %.8X\n", address); + // Ignore values outside of the .text range. + if (!IsValueInTextRange(address)) { + XELOGSDB("Ignoring function outside of .text: %.8X\n", address); + return NULL; + } + fn = new FunctionSymbol(); fn->start_address = address; function_count_++; @@ -91,7 +116,6 @@ VariableSymbol* SymbolDatabase::GetOrInsertVariable(uint32_t address) { return var; } - printf("add var %.8X\n", address); var = new VariableSymbol(); var->address = address; variable_count_++; @@ -132,14 +156,18 @@ void SymbolDatabase::Dump() { { FunctionSymbol* fn = static_cast(it->second); if (previous && (int)(fn->start_address - previous) > 0) { - printf("%.8X-%.8X (%5d) h\n", previous, fn->start_address, - fn->start_address - previous); + if (fn->start_address - previous > 4 || + *((uint32_t*)xe_memory_addr(memory_, previous)) != 0) { + printf("%.8X-%.8X (%5d) h\n", previous, fn->start_address, + fn->start_address - previous); + } } printf("%.8X-%.8X (%5d) f %s\n", fn->start_address, fn->end_address + 4, fn->end_address - fn->start_address + 4, fn->name ? fn->name : ""); previous = fn->end_address + 4; + DumpFunctionBlocks(fn); } break; case Symbol::Variable: @@ -149,10 +177,28 @@ void SymbolDatabase::Dump() { var->name ? var->name : ""); } break; + case Symbol::ExceptionEntry: + { + ExceptionEntrySymbol* ee = static_cast( + it->second); + printf("%.8X-%.8X (%5d) e of %.8X\n", + ee->address, ee->address + 8, 8, + ee->function ? ee->function->start_address : 0); + previous = ee->address + 8 + 4; + } + break; } } } +void SymbolDatabase::DumpFunctionBlocks(FunctionSymbol* fn) { + for (std::map::iterator it = fn->blocks.begin(); + it != fn->blocks.end(); ++it) { + FunctionBlock* bb = it->second; + printf(" bb %.8X\n", bb->start_address); + } +} + int SymbolDatabase::FindGplr() { // Special stack save/restore functions. // __savegprlr_14 to __savegprlr_31 @@ -233,8 +279,9 @@ int SymbolDatabase::FindGplr() { xesnprintf(name, XECOUNT(name), "__savegprlr_%d", n); FunctionSymbol* fn = GetOrInsertFunction(address); fn->end_address = fn->start_address + (31 - n) * 4 + 2 * 4; - fn->name = xestrdup(name); + fn->name = xestrdupa(name); fn->type = FunctionSymbol::User; + fn->flags |= FunctionSymbol::kFlagSaveGprLr; address += 4; } address = gplr_start + 20 * 4; @@ -242,8 +289,9 @@ int SymbolDatabase::FindGplr() { xesnprintf(name, XECOUNT(name), "__restgprlr_%d", n); FunctionSymbol* fn = GetOrInsertFunction(address); fn->end_address = fn->start_address + (31 - n) * 4 + 3 * 4; - fn->name = xestrdup(name); + fn->name = xestrdupa(name); fn->type = FunctionSymbol::User; + fn->flags |= FunctionSymbol::kFlagRestGprLr; address += 4; } @@ -267,14 +315,14 @@ int SymbolDatabase::AddImports(const xe_xex2_import_library_t* library) { // TODO(benvanik): use kernel name xesnprintf(name, XECOUNT(name), "__var_%s_%.3X", library->name, info->ordinal); - var->name = strdup(name); + var->name = xestrdupa(name); if (info->thunk_address) { FunctionSymbol* fn = GetOrInsertFunction(info->thunk_address); // TODO(benvanik): use kernel name xesnprintf(name, XECOUNT(name), "__thunk_%s_%.3X", library->name, info->ordinal); fn->end_address = fn->start_address + 16 - 4; - fn->name = strdup(name); + fn->name = xestrdupa(name); fn->type = FunctionSymbol::Kernel; } } @@ -301,15 +349,191 @@ int SymbolDatabase::AddMethodHints() { return 0; } +bool SymbolDatabase::IsRestGprLr(uint32_t addr) { + FunctionSymbol* fn = GetFunction(addr); + return fn && (fn->flags & FunctionSymbol::kFlagRestGprLr); +} + int SymbolDatabase::AnalyzeFunction(FunctionSymbol* fn) { // Ignore functions already analyzed. - if (fn->type != FunctionSymbol::Unknown) { + if (fn->blocks.size()) { + return 0; + } + // Ignore kernel thunks. + if (fn->type == FunctionSymbol::Kernel) { return 0; } - // TODO(benvanik): analysis. - // Search forward from start address to find the end address. - // Use branch tracking to figure that out. + // This is a simple basic block analyizer. It walks the start address to the + // end address looking for branches. Each span of instructions between + // branches is considered a basic block, and the blocks are linked up to + // create a CFG for the function. When the last blr (that has no branches + // to after it) is found the function is considered ended. If this is before + // the expected end address then the function address range is split up and + // the second half is treated as another function. + + // TODO(benvanik): special branch checks: + // bl to _XamLoaderTerminateTitle should be treated as b + // bl to KeBugCheck should be treated as b, and b KeBugCheck should die + + // TODO(benvanik): identify thunks: + // These look like: + // li r5, 0 + // [etc] + // b some_function + // Can probably be detected by lack of use of LR? + + uint8_t* p = xe_memory_addr(memory_, 0); + + if (*((uint32_t*)(p + fn->start_address)) == 0) { + // Function starts with 0x00000000 - we want to skip this and split. + XELOGSDB("function starts with 0: %.8X\n", fn->start_address); + symbols_.erase(fn->start_address); + if (!GetFunction(fn->start_address + 4)) { + fn->start_address += 4; + symbols_.insert(SymbolMap::value_type(fn->start_address, fn)); + scan_queue_.push_back(fn); + } else { + delete fn; + } + return 0; + } + + XELOGSDB("Analyzing function %.8X...\n", fn->start_address); + + InstrData i; + FunctionBlock* block = NULL; + uint32_t furthest_target = fn->start_address; + uint32_t addr = fn->start_address; + while (true) { + i.code = XEGETUINT32BE(p + addr); + i.type = ppc::GetInstrType(i.code); + i.address = addr; + + // If we fetched 0 assume that we somehow hit one of the awesome + // 'no really we meant to end after that bl' functions. + if (!i.code) { + XELOGSDB("function end %.8X (0x00000000 read)\n", addr); + break; + } + + if (!i.type) { + // Invalid instruction. + XELOGSDB("Invalid instruction at %.8X: %.8X\n", addr, i.code); + return 1; + } + + // Create a new basic block, if needed. + if (!block) { + block = new FunctionBlock(); + block->start_address = addr; + block->end_address = addr; + fn->blocks.insert(std::pair( + block->start_address, block)); + } + + bool ends_block = false; + if (i.code == 0x4E800020) { + // blr -- unconditional branch to LR. + // This is generally a return. + if (furthest_target > addr) { + // Remaining targets within function, not end. + XELOGSDB("ignoring blr %.8X (branch to %.8X)\n", addr, furthest_target); + } else { + // Function end point. + XELOGSDB("function end %.8X\n", addr); + break; + } + ends_block = true; + } else if (i.type->opcode == 0x48000000) { + // b/ba/bl/bla + uint32_t target = XEEXTS26(i.I.LI << 2) + (i.I.AA ? 0 : (int32_t)addr); + + if (i.I.LK) { + XELOGSDB("bl %.8X -> %.8X\n", addr, target); + + // Queue target if needed. + } else { + XELOGSDB("b %.8X -> %.8X\n", addr, target); + // If the target is back into the function and there's no further target + // we are at the end of a function. + if (target >= fn->start_address && + target < addr && furthest_target <= addr) { + XELOGSDB("function end %.8X (back b)\n", addr); + break; + } + + // If the target is a __restgprlr_* method it's the end of a function. + // Note that sometimes functions stick this in a basic block *inside* + // of the function somewhere, so ensure we don't have any branches over + // it. + if (furthest_target <= addr && IsRestGprLr(target)) { + XELOGSDB("function end %.8X (__restgprlr_*)\n", addr); + break; + } + + furthest_target = MAX(furthest_target, target); + } + ends_block = true; + } else if (i.type->opcode == 0x40000000) { + // bc/bca/bcl/bcla + uint32_t target = XEEXTS16(i.B.BD << 2) + (i.B.AA ? 0 : (int32_t)addr); + if (i.B.LK) { + XELOGSDB("bcl %.8X -> %.8X\n", addr, target); + } else { + XELOGSDB("bc %.8X -> %.8X\n", addr, target); + + furthest_target = MAX(furthest_target, target); + } + ends_block = true; + } else if (i.type->opcode == 0x4C000020) { + // bclr/bclrl + if (i.XL.LK) { + XELOGSDB("bclrl %.8X\n", addr); + } else { + XELOGSDB("bclr %.8X\n", addr); + } + ends_block = true; + } else if (i.type->opcode == 0x4C000420) { + // bcctr/bcctrl + if (i.XL.LK) { + XELOGSDB("bcctrl %.8X\n", addr); + } else { + XELOGSDB("bcctr %.8X\n", addr); + } + ends_block = true; + } + + block->end_address = addr; + if (ends_block) { + // This instruction is the end of a basic block. + // Finish up the one we are working on. The next loop around will create + // a new one to scribble into. + block = NULL; + } + + addr += 4; + if (fn->end_address && addr > fn->end_address) { + // Hmm.... + XELOGSDB("Ran over function bounds! %.8X-%.8X\n", + fn->start_address, fn->end_address); + break; + } + } + + if (addr + 4 < fn->end_address) { + // Ran under the expected value - since we probably got the initial bounds + // from someplace valid (like method hints) this may indicate an error. + // It's also possible that we guessed in hole-filling and there's another + // function below this one. + XELOGSDB("Function ran under: %.8X-%.8X ended at %.8X\n", + fn->start_address, fn->end_address, addr + 4); + } + fn->end_address = addr; + + // If there's spare bits at the end, split the function. + + XELOGSDB("Finished analyzing %.8X\n", fn->start_address); return 0; } @@ -317,17 +541,110 @@ int SymbolDatabase::FlushQueue() { while (scan_queue_.size()) { FunctionSymbol* fn = scan_queue_.front(); scan_queue_.pop_front(); - if (!AnalyzeFunction(fn)) { + if (AnalyzeFunction(fn)) { + XELOGSDB("Aborting analysis!\n"); return 1; } } return 0; } -int SymbolDatabase::FillHoles() { - // TODO(benvanik): scan all holes +bool SymbolDatabase::IsValueInTextRange(uint32_t value) { + const xe_xex2_header_t* header = module_->xex_header(); + for (size_t n = 0, i = 0; n < header->section_count; n++) { + const xe_xex2_section_t* section = &header->sections[n]; + const size_t start_address = + header->exe_address + (i * xe_xex2_section_length); + const size_t end_address = + start_address + (section->info.page_count * xe_xex2_section_length); + if (value >= start_address && value < end_address) { + return section->info.type == XEX_SECTION_CODE; + } + i += section->info.page_count; + } + return false; +} + +typedef struct { + uint32_t start_address; + uint32_t end_address; +} HoleInfo; + +bool SymbolDatabase::FillHoles() { // If 4b, check if 0x00000000 and ignore (alignment padding) // If 8b, check if first value is within .text and ignore (EH entry) // Else, add to scan queue as function? - return 0; + + std::vector holes; + std::vector ees; + + uint32_t previous = 0; + for (SymbolMap::iterator it = symbols_.begin(); it != symbols_.end(); ++it) { + switch (it->second->symbol_type) { + case Symbol::Function: + { + FunctionSymbol* fn = static_cast(it->second); + if (previous && (int)(fn->start_address - previous) > 0) { + // Hole! + uint32_t* p = (uint32_t*)xe_memory_addr(memory_, previous); + size_t hole_length = fn->start_address - previous; + if (hole_length == 4) { + // Likely a pointer or 0. + if (*p == 0) { + // Skip - just a zero. + } else if (IsValueInTextRange(XEGETUINT32BE(p))) { + // An address - probably an indirection data value. + } + } else if (hole_length == 8) { + // Possibly an exception handler entry. + // They look like [some value in .text] + [some pointer]. + if (*p == 0 || IsValueInTextRange(XEGETUINT32BE(p))) { + // Skip! + ees.push_back(previous); + } else { + // Probably legit. + holes.push_back((HoleInfo){previous, fn->start_address}); + } + } else { + // Probably legit. + holes.push_back((HoleInfo){previous, fn->start_address}); + } + } + previous = fn->end_address + 4; + } + break; + case Symbol::Variable: + case Symbol::ExceptionEntry: + break; + } + } + + for (std::vector::iterator it = ees.begin(); it != ees.end(); + ++it) { + ExceptionEntrySymbol* ee = GetOrInsertExceptionEntry(*it); + ee->function = GetFunction(ee->address + 8); + if (ee->function) { + ee->function->ee = ee; + } + uint32_t* p = (uint32_t*)xe_memory_addr(memory_, ee->address); + uint32_t handler_addr = XEGETUINT32BE(p); + if (handler_addr) { + GetOrInsertFunction(handler_addr); + } + uint32_t data_addr = XEGETUINT32BE(p + 1); + if (data_addr) { + VariableSymbol* var = GetOrInsertVariable(data_addr); + char name[32]; + xesnprintf(name, XECOUNT(name), "__ee_data_%.8X", *it); + var->name = xestrdupa(name); + } + } + + for (std::vector::iterator it = holes.begin(); it != holes.end(); + ++it) { + FunctionSymbol* fn = GetOrInsertFunction(it->start_address); + fn->end_address = it->end_address; + } + + return holes.size() > 0; }