Basic function analysis.

Finds basic blocks and estimates proper function bounds.
Seems legit for compiled code.
This commit is contained in:
Ben Vanik 2013-01-20 16:46:08 -08:00
parent d4b0bf73c1
commit 852536ae0a
7 changed files with 400 additions and 25 deletions

View File

@ -23,6 +23,7 @@
#define XE_OPTION_LOG_INFO 1
#define XE_OPTION_LOG_DEBUG 1
#define XE_OPTION_LOG_CPU 1
#define XE_OPTION_LOG_SDB 0
#define XE_OPTION_LOG_GPU 1
#define XE_OPTION_LOG_KERNEL 1

View File

@ -56,6 +56,14 @@ typedef enum {
class InstrType;
static inline int32_t XEEXTS16(uint32_t v) {
return (int32_t)((int16_t)v);
}
static inline int32_t XEEXTS26(uint32_t v) {
return v & 0x02000000 ? (int32_t)v | 0xFC000000 : (int32_t)(v);
}
typedef struct {
InstrType* type;
uint32_t address;
@ -97,6 +105,14 @@ typedef struct {
} DS;
// kXEPPCInstrFormatX
// kXEPPCInstrFormatXL
struct {
uint32_t LK : 1;
uint32_t : 10;
uint32_t BB : 5;
uint32_t BI : 5;
uint32_t BO : 5;
uint32_t OPCD : 6;
} XL;
struct {
uint32_t : 1;
uint32_t : 10;
@ -114,7 +130,7 @@ typedef struct {
// kXEPPCInstrFormatVA
// kXEPPCInstrFormatVX
// kXEPPCInstrFormatVXR
} data;
};
} InstrData;
class Instr {

View File

@ -45,8 +45,9 @@ public:
class Symbol {
public:
enum SymbolType {
Function = 0,
Variable = 1,
Function = 0,
Variable = 1,
ExceptionEntry = 2,
};
virtual ~Symbol() {}
@ -57,6 +58,18 @@ protected:
Symbol(SymbolType type) : symbol_type(type) {}
};
class ExceptionEntrySymbol;
class FunctionBlock {
public:
uint32_t start_address;
uint32_t end_address;
vector<FunctionBlock*> incoming_blocks;
FunctionBlock* outgoing_block;
uint32_t outgoing_address;
};
class FunctionSymbol : public Symbol {
public:
enum FunctionType {
@ -64,6 +77,10 @@ public:
Kernel = 1,
User = 2,
};
enum Flags {
kFlagSaveGprLr = 1 << 1,
kFlagRestGprLr = 1 << 2,
};
FunctionSymbol() : Symbol(Function) {}
virtual ~FunctionSymbol() {}
@ -74,9 +91,13 @@ public:
FunctionType type;
uint32_t flags;
ExceptionEntrySymbol* ee;
vector<FunctionCall*> incoming_calls;
vector<FunctionCall*> outgoing_calls;
vector<VariableAccess*> variable_accesses;
map<uint32_t, FunctionBlock*> blocks;
};
class VariableSymbol : public Symbol {
@ -88,6 +109,15 @@ public:
char *name;
};
class ExceptionEntrySymbol : public Symbol {
public:
ExceptionEntrySymbol() : Symbol(ExceptionEntry) {}
virtual ~ExceptionEntrySymbol() {}
uint32_t address;
FunctionSymbol* function;
};
class SymbolDatabase {
public:
@ -96,6 +126,7 @@ public:
int Analyze();
ExceptionEntrySymbol* GetOrInsertExceptionEntry(uint32_t address);
FunctionSymbol* GetOrInsertFunction(uint32_t address);
VariableSymbol* GetOrInsertVariable(uint32_t address);
FunctionSymbol* GetFunction(uint32_t address);
@ -105,6 +136,7 @@ public:
int GetAllFunctions(vector<FunctionSymbol*>& functions);
void Dump();
void DumpFunctionBlocks(FunctionSymbol* fn);
private:
typedef std::map<uint32_t, Symbol*> SymbolMap;
@ -114,9 +146,12 @@ private:
int AddImports(const xe_xex2_import_library_t *library);
int AddMethodHints();
int AnalyzeFunction(FunctionSymbol* fn);
int FillHoles();
bool FillHoles();
int FlushQueue();
bool IsValueInTextRange(uint32_t value);
bool IsRestGprLr(uint32_t addr);
xe_memory_ref memory_;
kernel::UserModule* module_;
size_t function_count_;

View File

@ -57,6 +57,11 @@ void xe_log_line(const xechar_t* file_path, const uint32_t line_number,
#else
#define XELOGCPU(fmt, ...) XE_EMPTY_MACRO
#endif
#if XE_OPTION(LOG_SDB)
#define XELOGSDB(fmt, ...) XELOGCORE('S', fmt, ##__VA_ARGS__)
#else
#define XELOGSDB(fmt, ...) XE_EMPTY_MACRO
#endif
#if XE_OPTION(LOG_GPU)
#define XELOGGPU(fmt, ...) XELOGCORE('G', fmt, ##__VA_ARGS__)
#else

View File

@ -196,5 +196,6 @@ int ExecModule::Uninit() {
}
void ExecModule::Dump() {
sdb_->Dump();
gen_module_->dump();
}

View File

@ -82,7 +82,7 @@ static InstrType instr_table_30_unprep[] = {
INSTRUCTION(rldcrx, 0x78000012, MDS, General , 0),
};
static InstrType* instr_table_30 = instr_table_prep(
instr_table_30_unprep, XECOUNT(instr_table_30_unprep), 1, 5);
instr_table_30_unprep, XECOUNT(instr_table_30_unprep), 2, 4);
// Opcode = 31, index = bits 10-1 (10)
static InstrType instr_table_31_unprep[] = {

View File

@ -12,9 +12,12 @@
#include <list>
#include <map>
#include <xenia/cpu/ppc/instr.h>
using namespace xe;
using namespace xe::cpu;
using namespace xe::cpu::ppc;
using namespace xe::cpu::sdb;
using namespace xe::kernel;
@ -58,25 +61,47 @@ int SymbolDatabase::Analyze() {
// Queue entry point of the application.
FunctionSymbol* fn = GetOrInsertFunction(header->exe_entry_point);
fn->name = strdup("<entry>");
fn->name = xestrdupa("<entry>");
// Keep pumping the queue until there's nothing left to do.
FlushQueue();
// Do a pass over the functions to fill holes.
FillHoles();
FlushQueue();
// Do a pass over the functions to fill holes. A few times. Just to be safe.
while (true) {
if (!FillHoles()) {
break;
}
FlushQueue();
}
return 0;
}
ExceptionEntrySymbol* SymbolDatabase::GetOrInsertExceptionEntry(
uint32_t address) {
SymbolMap::iterator i = symbols_.find(address);
if (i != symbols_.end() && i->second->symbol_type == Symbol::Function) {
return static_cast<ExceptionEntrySymbol*>(i->second);
}
ExceptionEntrySymbol* ee = new ExceptionEntrySymbol();
ee->address = address;
symbols_.insert(SymbolMap::value_type(address, ee));
return ee;
}
FunctionSymbol* SymbolDatabase::GetOrInsertFunction(uint32_t address) {
FunctionSymbol* fn = GetFunction(address);
if (fn) {
return fn;
}
printf("add fn %.8X\n", address);
// Ignore values outside of the .text range.
if (!IsValueInTextRange(address)) {
XELOGSDB("Ignoring function outside of .text: %.8X\n", address);
return NULL;
}
fn = new FunctionSymbol();
fn->start_address = address;
function_count_++;
@ -91,7 +116,6 @@ VariableSymbol* SymbolDatabase::GetOrInsertVariable(uint32_t address) {
return var;
}
printf("add var %.8X\n", address);
var = new VariableSymbol();
var->address = address;
variable_count_++;
@ -132,14 +156,18 @@ void SymbolDatabase::Dump() {
{
FunctionSymbol* fn = static_cast<FunctionSymbol*>(it->second);
if (previous && (int)(fn->start_address - previous) > 0) {
printf("%.8X-%.8X (%5d) h\n", previous, fn->start_address,
fn->start_address - previous);
if (fn->start_address - previous > 4 ||
*((uint32_t*)xe_memory_addr(memory_, previous)) != 0) {
printf("%.8X-%.8X (%5d) h\n", previous, fn->start_address,
fn->start_address - previous);
}
}
printf("%.8X-%.8X (%5d) f %s\n", fn->start_address,
fn->end_address + 4,
fn->end_address - fn->start_address + 4,
fn->name ? fn->name : "<unknown>");
previous = fn->end_address + 4;
DumpFunctionBlocks(fn);
}
break;
case Symbol::Variable:
@ -149,10 +177,28 @@ void SymbolDatabase::Dump() {
var->name ? var->name : "<unknown>");
}
break;
case Symbol::ExceptionEntry:
{
ExceptionEntrySymbol* ee = static_cast<ExceptionEntrySymbol*>(
it->second);
printf("%.8X-%.8X (%5d) e of %.8X\n",
ee->address, ee->address + 8, 8,
ee->function ? ee->function->start_address : 0);
previous = ee->address + 8 + 4;
}
break;
}
}
}
void SymbolDatabase::DumpFunctionBlocks(FunctionSymbol* fn) {
for (std::map<uint32_t, FunctionBlock*>::iterator it = fn->blocks.begin();
it != fn->blocks.end(); ++it) {
FunctionBlock* bb = it->second;
printf(" bb %.8X\n", bb->start_address);
}
}
int SymbolDatabase::FindGplr() {
// Special stack save/restore functions.
// __savegprlr_14 to __savegprlr_31
@ -233,8 +279,9 @@ int SymbolDatabase::FindGplr() {
xesnprintf(name, XECOUNT(name), "__savegprlr_%d", n);
FunctionSymbol* fn = GetOrInsertFunction(address);
fn->end_address = fn->start_address + (31 - n) * 4 + 2 * 4;
fn->name = xestrdup(name);
fn->name = xestrdupa(name);
fn->type = FunctionSymbol::User;
fn->flags |= FunctionSymbol::kFlagSaveGprLr;
address += 4;
}
address = gplr_start + 20 * 4;
@ -242,8 +289,9 @@ int SymbolDatabase::FindGplr() {
xesnprintf(name, XECOUNT(name), "__restgprlr_%d", n);
FunctionSymbol* fn = GetOrInsertFunction(address);
fn->end_address = fn->start_address + (31 - n) * 4 + 3 * 4;
fn->name = xestrdup(name);
fn->name = xestrdupa(name);
fn->type = FunctionSymbol::User;
fn->flags |= FunctionSymbol::kFlagRestGprLr;
address += 4;
}
@ -267,14 +315,14 @@ int SymbolDatabase::AddImports(const xe_xex2_import_library_t* library) {
// TODO(benvanik): use kernel name
xesnprintf(name, XECOUNT(name), "__var_%s_%.3X", library->name,
info->ordinal);
var->name = strdup(name);
var->name = xestrdupa(name);
if (info->thunk_address) {
FunctionSymbol* fn = GetOrInsertFunction(info->thunk_address);
// TODO(benvanik): use kernel name
xesnprintf(name, XECOUNT(name), "__thunk_%s_%.3X", library->name,
info->ordinal);
fn->end_address = fn->start_address + 16 - 4;
fn->name = strdup(name);
fn->name = xestrdupa(name);
fn->type = FunctionSymbol::Kernel;
}
}
@ -301,15 +349,191 @@ int SymbolDatabase::AddMethodHints() {
return 0;
}
bool SymbolDatabase::IsRestGprLr(uint32_t addr) {
FunctionSymbol* fn = GetFunction(addr);
return fn && (fn->flags & FunctionSymbol::kFlagRestGprLr);
}
int SymbolDatabase::AnalyzeFunction(FunctionSymbol* fn) {
// Ignore functions already analyzed.
if (fn->type != FunctionSymbol::Unknown) {
if (fn->blocks.size()) {
return 0;
}
// Ignore kernel thunks.
if (fn->type == FunctionSymbol::Kernel) {
return 0;
}
// TODO(benvanik): analysis.
// Search forward from start address to find the end address.
// Use branch tracking to figure that out.
// This is a simple basic block analyizer. It walks the start address to the
// end address looking for branches. Each span of instructions between
// branches is considered a basic block, and the blocks are linked up to
// create a CFG for the function. When the last blr (that has no branches
// to after it) is found the function is considered ended. If this is before
// the expected end address then the function address range is split up and
// the second half is treated as another function.
// TODO(benvanik): special branch checks:
// bl to _XamLoaderTerminateTitle should be treated as b
// bl to KeBugCheck should be treated as b, and b KeBugCheck should die
// TODO(benvanik): identify thunks:
// These look like:
// li r5, 0
// [etc]
// b some_function
// Can probably be detected by lack of use of LR?
uint8_t* p = xe_memory_addr(memory_, 0);
if (*((uint32_t*)(p + fn->start_address)) == 0) {
// Function starts with 0x00000000 - we want to skip this and split.
XELOGSDB("function starts with 0: %.8X\n", fn->start_address);
symbols_.erase(fn->start_address);
if (!GetFunction(fn->start_address + 4)) {
fn->start_address += 4;
symbols_.insert(SymbolMap::value_type(fn->start_address, fn));
scan_queue_.push_back(fn);
} else {
delete fn;
}
return 0;
}
XELOGSDB("Analyzing function %.8X...\n", fn->start_address);
InstrData i;
FunctionBlock* block = NULL;
uint32_t furthest_target = fn->start_address;
uint32_t addr = fn->start_address;
while (true) {
i.code = XEGETUINT32BE(p + addr);
i.type = ppc::GetInstrType(i.code);
i.address = addr;
// If we fetched 0 assume that we somehow hit one of the awesome
// 'no really we meant to end after that bl' functions.
if (!i.code) {
XELOGSDB("function end %.8X (0x00000000 read)\n", addr);
break;
}
if (!i.type) {
// Invalid instruction.
XELOGSDB("Invalid instruction at %.8X: %.8X\n", addr, i.code);
return 1;
}
// Create a new basic block, if needed.
if (!block) {
block = new FunctionBlock();
block->start_address = addr;
block->end_address = addr;
fn->blocks.insert(std::pair<uint32_t, FunctionBlock*>(
block->start_address, block));
}
bool ends_block = false;
if (i.code == 0x4E800020) {
// blr -- unconditional branch to LR.
// This is generally a return.
if (furthest_target > addr) {
// Remaining targets within function, not end.
XELOGSDB("ignoring blr %.8X (branch to %.8X)\n", addr, furthest_target);
} else {
// Function end point.
XELOGSDB("function end %.8X\n", addr);
break;
}
ends_block = true;
} else if (i.type->opcode == 0x48000000) {
// b/ba/bl/bla
uint32_t target = XEEXTS26(i.I.LI << 2) + (i.I.AA ? 0 : (int32_t)addr);
if (i.I.LK) {
XELOGSDB("bl %.8X -> %.8X\n", addr, target);
// Queue target if needed.
} else {
XELOGSDB("b %.8X -> %.8X\n", addr, target);
// If the target is back into the function and there's no further target
// we are at the end of a function.
if (target >= fn->start_address &&
target < addr && furthest_target <= addr) {
XELOGSDB("function end %.8X (back b)\n", addr);
break;
}
// If the target is a __restgprlr_* method it's the end of a function.
// Note that sometimes functions stick this in a basic block *inside*
// of the function somewhere, so ensure we don't have any branches over
// it.
if (furthest_target <= addr && IsRestGprLr(target)) {
XELOGSDB("function end %.8X (__restgprlr_*)\n", addr);
break;
}
furthest_target = MAX(furthest_target, target);
}
ends_block = true;
} else if (i.type->opcode == 0x40000000) {
// bc/bca/bcl/bcla
uint32_t target = XEEXTS16(i.B.BD << 2) + (i.B.AA ? 0 : (int32_t)addr);
if (i.B.LK) {
XELOGSDB("bcl %.8X -> %.8X\n", addr, target);
} else {
XELOGSDB("bc %.8X -> %.8X\n", addr, target);
furthest_target = MAX(furthest_target, target);
}
ends_block = true;
} else if (i.type->opcode == 0x4C000020) {
// bclr/bclrl
if (i.XL.LK) {
XELOGSDB("bclrl %.8X\n", addr);
} else {
XELOGSDB("bclr %.8X\n", addr);
}
ends_block = true;
} else if (i.type->opcode == 0x4C000420) {
// bcctr/bcctrl
if (i.XL.LK) {
XELOGSDB("bcctrl %.8X\n", addr);
} else {
XELOGSDB("bcctr %.8X\n", addr);
}
ends_block = true;
}
block->end_address = addr;
if (ends_block) {
// This instruction is the end of a basic block.
// Finish up the one we are working on. The next loop around will create
// a new one to scribble into.
block = NULL;
}
addr += 4;
if (fn->end_address && addr > fn->end_address) {
// Hmm....
XELOGSDB("Ran over function bounds! %.8X-%.8X\n",
fn->start_address, fn->end_address);
break;
}
}
if (addr + 4 < fn->end_address) {
// Ran under the expected value - since we probably got the initial bounds
// from someplace valid (like method hints) this may indicate an error.
// It's also possible that we guessed in hole-filling and there's another
// function below this one.
XELOGSDB("Function ran under: %.8X-%.8X ended at %.8X\n",
fn->start_address, fn->end_address, addr + 4);
}
fn->end_address = addr;
// If there's spare bits at the end, split the function.
XELOGSDB("Finished analyzing %.8X\n", fn->start_address);
return 0;
}
@ -317,17 +541,110 @@ int SymbolDatabase::FlushQueue() {
while (scan_queue_.size()) {
FunctionSymbol* fn = scan_queue_.front();
scan_queue_.pop_front();
if (!AnalyzeFunction(fn)) {
if (AnalyzeFunction(fn)) {
XELOGSDB("Aborting analysis!\n");
return 1;
}
}
return 0;
}
int SymbolDatabase::FillHoles() {
// TODO(benvanik): scan all holes
bool SymbolDatabase::IsValueInTextRange(uint32_t value) {
const xe_xex2_header_t* header = module_->xex_header();
for (size_t n = 0, i = 0; n < header->section_count; n++) {
const xe_xex2_section_t* section = &header->sections[n];
const size_t start_address =
header->exe_address + (i * xe_xex2_section_length);
const size_t end_address =
start_address + (section->info.page_count * xe_xex2_section_length);
if (value >= start_address && value < end_address) {
return section->info.type == XEX_SECTION_CODE;
}
i += section->info.page_count;
}
return false;
}
typedef struct {
uint32_t start_address;
uint32_t end_address;
} HoleInfo;
bool SymbolDatabase::FillHoles() {
// If 4b, check if 0x00000000 and ignore (alignment padding)
// If 8b, check if first value is within .text and ignore (EH entry)
// Else, add to scan queue as function?
return 0;
std::vector<HoleInfo> holes;
std::vector<uint32_t> ees;
uint32_t previous = 0;
for (SymbolMap::iterator it = symbols_.begin(); it != symbols_.end(); ++it) {
switch (it->second->symbol_type) {
case Symbol::Function:
{
FunctionSymbol* fn = static_cast<FunctionSymbol*>(it->second);
if (previous && (int)(fn->start_address - previous) > 0) {
// Hole!
uint32_t* p = (uint32_t*)xe_memory_addr(memory_, previous);
size_t hole_length = fn->start_address - previous;
if (hole_length == 4) {
// Likely a pointer or 0.
if (*p == 0) {
// Skip - just a zero.
} else if (IsValueInTextRange(XEGETUINT32BE(p))) {
// An address - probably an indirection data value.
}
} else if (hole_length == 8) {
// Possibly an exception handler entry.
// They look like [some value in .text] + [some pointer].
if (*p == 0 || IsValueInTextRange(XEGETUINT32BE(p))) {
// Skip!
ees.push_back(previous);
} else {
// Probably legit.
holes.push_back((HoleInfo){previous, fn->start_address});
}
} else {
// Probably legit.
holes.push_back((HoleInfo){previous, fn->start_address});
}
}
previous = fn->end_address + 4;
}
break;
case Symbol::Variable:
case Symbol::ExceptionEntry:
break;
}
}
for (std::vector<uint32_t>::iterator it = ees.begin(); it != ees.end();
++it) {
ExceptionEntrySymbol* ee = GetOrInsertExceptionEntry(*it);
ee->function = GetFunction(ee->address + 8);
if (ee->function) {
ee->function->ee = ee;
}
uint32_t* p = (uint32_t*)xe_memory_addr(memory_, ee->address);
uint32_t handler_addr = XEGETUINT32BE(p);
if (handler_addr) {
GetOrInsertFunction(handler_addr);
}
uint32_t data_addr = XEGETUINT32BE(p + 1);
if (data_addr) {
VariableSymbol* var = GetOrInsertVariable(data_addr);
char name[32];
xesnprintf(name, XECOUNT(name), "__ee_data_%.8X", *it);
var->name = xestrdupa(name);
}
}
for (std::vector<HoleInfo>::iterator it = holes.begin(); it != holes.end();
++it) {
FunctionSymbol* fn = GetOrInsertFunction(it->start_address);
fn->end_address = it->end_address;
}
return holes.size() > 0;
}