From c18e94c5be417714f32964965c811cccb6bef648 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 17 Jan 2013 23:17:49 -0800 Subject: [PATCH] Fleshing out the symbol database. Now detecting a lot of functions via method hints and generating stub functions. A few holes (in the test xex), but enough to move forward with codegen. --- common.gypi | 3 + include/xenia/core/memory.h | 4 + include/xenia/cpu.h | 2 - include/xenia/cpu/sdb.h | 15 +- include/xenia/string.h | 4 + src/core/memory.cc | 25 +++ src/cpu/codegen.cc | 187 ++++++++++++-------- src/cpu/codegen.h | 16 +- src/cpu/cpu.cc | 38 ++-- src/cpu/sdb.cc | 336 ++++++++++++++++++++++++++++++++++-- 10 files changed, 517 insertions(+), 113 deletions(-) diff --git a/common.gypi b/common.gypi index 5f00deab8..795da2f9c 100644 --- a/common.gypi +++ b/common.gypi @@ -53,6 +53,9 @@ 'StackReserveSize': '2097152', }, }, + 'xcode_settings': { + 'GCC_OPTIMIZATION_LEVEL': '0', + }, }, 'release': { 'defines': [ diff --git a/include/xenia/core/memory.h b/include/xenia/core/memory.h index 6411bf759..58d7e5ab6 100644 --- a/include/xenia/core/memory.h +++ b/include/xenia/core/memory.h @@ -31,5 +31,9 @@ void xe_memory_release(xe_memory_ref memory); size_t xe_memory_get_length(xe_memory_ref memory); uint8_t *xe_memory_addr(xe_memory_ref memory, uint32_t guest_addr); +uint32_t xe_memory_search_aligned(xe_memory_ref memory, uint32_t start, + uint32_t end, const uint32_t *values, + const size_t value_count); + #endif // XENIA_CORE_MEMORY_H_ diff --git a/include/xenia/cpu.h b/include/xenia/cpu.h index 61f19be8c..72f598dcc 100644 --- a/include/xenia/cpu.h +++ b/include/xenia/cpu.h @@ -13,7 +13,6 @@ #include #include -#include #include #include @@ -34,7 +33,6 @@ void xe_cpu_release(xe_cpu_ref cpu); xe_pal_ref xe_cpu_get_pal(xe_cpu_ref cpu); xe_memory_ref xe_cpu_get_memory(xe_cpu_ref cpu); -xe_sdb_ref xe_cpu_get_sdb(xe_cpu_ref cpu); int xe_cpu_prepare_module(xe_cpu_ref cpu, xe_module_ref module, xe_kernel_export_resolver_ref export_resolver); diff --git a/include/xenia/cpu/sdb.h b/include/xenia/cpu/sdb.h index 255f80d44..5d70399d8 100644 --- a/include/xenia/cpu/sdb.h +++ b/include/xenia/cpu/sdb.h @@ -58,12 +58,20 @@ struct xe_sdb_variable { char *name; }; +typedef struct { + int type; + union { + xe_sdb_function_t* function; + xe_sdb_variable_t* variable; + }; +} xe_sdb_symbol_t; + struct xe_sdb; typedef struct xe_sdb* xe_sdb_ref; -xe_sdb_ref xe_sdb_create(xe_memory_ref memory); +xe_sdb_ref xe_sdb_create(xe_memory_ref memory, xe_module_ref module); xe_sdb_ref xe_sdb_retain(xe_sdb_ref sdb); void xe_sdb_release(xe_sdb_ref sdb); @@ -73,9 +81,10 @@ xe_sdb_variable_t* xe_sdb_insert_variable(xe_sdb_ref sdb, uint32_t address); xe_sdb_function_t* xe_sdb_get_function(xe_sdb_ref sdb, uint32_t address); xe_sdb_variable_t* xe_sdb_get_variable(xe_sdb_ref sdb, uint32_t address); -void xe_sdb_dump(xe_sdb_ref sdb); +int xe_sdb_get_functions(xe_sdb_ref sdb, xe_sdb_function_t ***out_functions, + size_t *out_function_count); -int xe_sdb_analyze_module(xe_sdb_ref sdb, xe_module_ref module); +void xe_sdb_dump(xe_sdb_ref sdb); #endif // XENIA_CPU_SDB_H_ diff --git a/include/xenia/string.h b/include/xenia/string.h index 6e5e253ab..10944ed7b 100644 --- a/include/xenia/string.h +++ b/include/xenia/string.h @@ -31,6 +31,7 @@ int strncpy_s(char* dest, size_t destLength, const char* source, size_t count); #define xestrlenw wcslen #define xestrcmpw wcscmp #define xestrcasecmpw _wcsicmp +#define xsstrdupw wcsdup #define xestrchrw wcschr #define xestrrchrw wcsrchr #define xestrstrw wcsstr @@ -44,6 +45,7 @@ int strncpy_s(char* dest, size_t destLength, const char* source, size_t count); #define xestrlena strlen #define xestrcmpa strcmp #define xestrcasecmpa strcasecmp +#define xestrdupa strdup #define xestrchra strchr #define xestrrchra strrchr #define xestrstra strstr @@ -62,6 +64,7 @@ typedef wchar_t xechar_t; #define xestrlen xestrlenw #define xestrcmp xestrcmpw #define xestrcasecmp xestrcasecmpw +#define xestrdup xestrdupw #define xestrchr xestrchrw #define xestrrchr xestrrchrw #define xestrstr xestrstrw @@ -82,6 +85,7 @@ typedef char xechar_t; #define xestrlen xestrlena #define xestrcmp xestrcmpa #define xestrcasecmp xestrcasecmpa +#define xestrdup xestrdupa #define xestrchr xestrchra #define xestrrchr xestrrchra #define xestrstr xestrstra diff --git a/src/core/memory.cc b/src/core/memory.cc index e8e0eea28..9b98dd6a0 100644 --- a/src/core/memory.cc +++ b/src/core/memory.cc @@ -72,3 +72,28 @@ size_t xe_memory_get_length(xe_memory_ref memory) { uint8_t *xe_memory_addr(xe_memory_ref memory, uint32_t guest_addr) { return (uint8_t*)memory->ptr + guest_addr; } + +uint32_t xe_memory_search_aligned(xe_memory_ref memory, uint32_t start, + uint32_t end, const uint32_t *values, + const size_t value_count) { + XEASSERT(start <= end); + const uint32_t *p = (const uint32_t*)xe_memory_addr(memory, start); + const uint32_t *pe = (const uint32_t*)xe_memory_addr(memory, end); + while (p != pe) { + if (*p == values[0]) { + const uint32_t *pc = p + 1; + size_t matched = 1; + for (size_t n = 1; n < value_count; n++, pc++) { + if (*pc != values[n]) { + break; + } + matched++; + } + if (matched == value_count) { + return (uint32_t)((uint8_t*)p - (uint8_t*)memory->ptr); + } + } + p++; + } + return 0; +} diff --git a/src/cpu/codegen.cc b/src/cpu/codegen.cc index c1f5f49b2..09e77c5e3 100644 --- a/src/cpu/codegen.cc +++ b/src/cpu/codegen.cc @@ -24,95 +24,71 @@ using namespace llvm; -void xe_cpu_codegen_add_imports(xe_memory_ref memory, - xe_kernel_export_resolver_ref export_resolver, - xe_module_ref module, Module *m); +// TODO(benvanik): +typedef struct { + xe_codegen_options_t options; + xe_memory_ref memory; + xe_kernel_export_resolver_ref export_resolver; + + xe_module_ref module; + xe_sdb_ref sdb; + + LLVMContext *context; + Module *shared_module; + Module *gen_module; +} xe_cpu_codegen_ctx_t; + + +void xe_cpu_codegen_add_imports(xe_codegen_ctx_t *ctx); void xe_cpu_codegen_add_missing_import( - Module *m, const xe_xex2_import_library_t *library, + xe_codegen_ctx_t *ctx, const xe_xex2_import_library_t *library, const xe_xex2_import_info_t* info, xe_kernel_export_t *kernel_export); void xe_cpu_codegen_add_import( - Module *m, const xe_xex2_import_library_t *library, + xe_codegen_ctx_t *ctx, const xe_xex2_import_library_t *library, const xe_xex2_import_info_t* info, xe_kernel_export_t *kernel_export); +void xe_cpu_codegen_add_function(xe_codegen_ctx_t *ctx, xe_sdb_function_t *fn); void xe_cpu_codegen_optimize(Module *m, Function *fn); -llvm::Module *xe_cpu_codegen(llvm::LLVMContext& context, xe_memory_ref memory, - xe_kernel_export_resolver_ref export_resolver, - xe_module_ref module, Module *shared_module, +llvm::Module *xe_cpu_codegen(xe_codegen_ctx_t *ctx, xe_codegen_options_t options) { + LLVMContext& context = *ctx->context; std::string error_message; // Initialize the module. Module *m = new Module("generated.xex", context); + ctx->m = m; // TODO(benavnik): addModuleFlag? // Link shared module into generated module. // This gives us a single module that we can optimize and prevents the need // for foreward declarations. - Linker::LinkModules(m, shared_module, 0, &error_message); + Linker::LinkModules(m, ctx->shared_module, 0, &error_message); // Add import thunks/etc. - xe_cpu_codegen_add_imports(memory, export_resolver, module, m); + xe_cpu_codegen_add_imports(ctx); // Add export wrappers. // - xe_xex2_ref xex = xe_module_get_xex(module); - const xe_xex2_header_t *header = xe_xex2_get_header(xex); - uint8_t *mem = xe_memory_addr(memory, 0); - uint32_t *pc = (uint32_t*)(mem + header->exe_entry_point); - uint32_t pcdata = XEGETUINT32BE(pc); - printf("data %.8X %.8X\n", header->exe_entry_point, pcdata); - xe_ppc_instr_type_t *instr_type = xe_ppc_get_instr_type(pcdata); - if (instr_type) { - printf("instr %.8X %s\n", header->exe_entry_point, instr_type->name); - xe_ppc_instr_t instr; - instr.data.code = pcdata; - printf("%d %d\n", instr.data.XFX.D, instr.data.XFX.spr); - } else { - printf("instr not found\n"); + // Add all functions/ + xe_sdb_function_t **functions; + size_t function_count; + if (!xe_sdb_get_functions(ctx->sdb, &functions, &function_count)) { + for (size_t n = 0; n < function_count; n++) { + // kernel functions will be handled by the add imports handlers. + if (functions[n]->type == kXESDBFunctionUser) { + xe_cpu_codegen_add_function(ctx, functions[n]); + } + } + xe_free(functions); } - Constant* c = m->getOrInsertFunction("mul_add", - /*ret type*/ IntegerType::get(context, 32), - /*args*/ IntegerType::get(context, 32), - IntegerType::get(context, 32), - IntegerType::get(context, 32), - /*varargs terminated with null*/ NULL); - - Function* mul_add = cast(c); - mul_add->setCallingConv(CallingConv::C); - - Function::arg_iterator args = mul_add->arg_begin(); - Value* x = args++; - x->setName("x"); - Value* y = args++; - y->setName("y"); - Value* z = args++; - z->setName("z"); - - BasicBlock* block = BasicBlock::Create(getGlobalContext(), "entry", mul_add); - IRBuilder<> builder(block); - - Value* tmp = builder.CreateBinOp(Instruction::Mul, - x, y, "tmp"); - Value* tmp2 = builder.CreateBinOp(Instruction::Add, - tmp, z, "tmp2"); - - builder.CreateRet(tmp2); - - // Run the optimizer on the function. - // Doing this here keeps the size of the IR small and speeds up the later - // passes. - xe_cpu_codegen_optimize(m, mul_add); - return m; } -void xe_cpu_codegen_add_imports(xe_memory_ref memory, - xe_kernel_export_resolver_ref export_resolver, - xe_module_ref module, Module *m) { - xe_xex2_ref xex = xe_module_get_xex(module); +void xe_cpu_codegen_add_imports(xe_codegen_ctx_t *ctx) { + xe_xex2_ref xex = xe_module_get_xex(ctx->module); const xe_xex2_header_t *header = xe_xex2_get_header(xex); for (size_t n = 0; n < header->import_library_count; n++) { @@ -127,13 +103,13 @@ void xe_cpu_codegen_add_imports(xe_memory_ref memory, const xe_xex2_import_info_t *info = &import_infos[i]; xe_kernel_export_t *kernel_export = xe_kernel_export_resolver_get_by_ordinal( - export_resolver, library->name, info->ordinal); + ctx->export_resolver, library->name, info->ordinal); if (!kernel_export || !xe_kernel_export_is_implemented(kernel_export)) { // Not implemented or known. - xe_cpu_codegen_add_missing_import(m, library, info, kernel_export); + xe_cpu_codegen_add_missing_import(ctx, library, info, kernel_export); } else { // Implemented. - xe_cpu_codegen_add_import(m, library, info, kernel_export); + xe_cpu_codegen_add_import(ctx, library, info, kernel_export); } } @@ -144,12 +120,13 @@ void xe_cpu_codegen_add_imports(xe_memory_ref memory, } void xe_cpu_codegen_add_missing_import( - Module *m, const xe_xex2_import_library_t *library, + xe_codegen_ctx_t *ctx, const xe_xex2_import_library_t *library, const xe_xex2_import_info_t* info, xe_kernel_export_t *kernel_export) { + Module *m = ctx->m; LLVMContext& context = m->getContext(); char name[128]; - xesnprintfa(name, XECOUNT(name), "__%s_%.8X", + xesnprintfa(name, XECOUNT(name), "__thunk_%s_%.8X", library->name, kernel_export->ordinal); // TODO(benvanik): add name as comment/alias? @@ -173,29 +150,89 @@ void xe_cpu_codegen_add_missing_import( f->setCallingConv(CallingConv::C); f->setVisibility(GlobalValue::DefaultVisibility); + // TODO(benvanik): log errors. BasicBlock* block = BasicBlock::Create(context, "entry", f); IRBuilder<> builder(block); Value *tmp = builder.getInt32(0); - builder.getInt32(123); builder.CreateRet(tmp); xe_cpu_codegen_optimize(m, f); //GlobalAlias *alias = new GlobalAlias(f->getType(), GlobalValue::InternalLinkage, name, f, m); - // printf(" F %.8X %.8X %.3X (%3d) %s %s\n", - // info->value_address, info->thunk_address, info->ordinal, - // info->ordinal, implemented ? " " : "!!", name); + // printf(" F %.8X %.8X %.3X (%3d) %s %s\n", + // info->value_address, info->thunk_address, info->ordinal, + // info->ordinal, implemented ? " " : "!!", name); } else { - // printf(" V %.8X %.3X (%3d) %s %s\n", - // info->value_address, info->ordinal, info->ordinal, - // implemented ? " " : "!!", name); + // printf(" V %.8X %.3X (%3d) %s %s\n", + // info->value_address, info->ordinal, info->ordinal, + // implemented ? " " : "!!", name); } } void xe_cpu_codegen_add_import( - Module *m, const xe_xex2_import_library_t *library, + xe_codegen_ctx_t *ctx, const xe_xex2_import_library_t *library, const xe_xex2_import_info_t* info, xe_kernel_export_t *kernel_export) { - // + // Module *m = ctx->m; + // LLVMContext& context = m->getContext(); + + // TODO(benvanik): add import thunk code. +} + +void xe_cpu_codegen_add_function(xe_codegen_ctx_t *ctx, xe_sdb_function_t *fn) { + Module *m = ctx->m; + LLVMContext& context = m->getContext(); + + AttributeWithIndex awi[] = { + //AttributeWithIndex::get(context, 2, Attributes::NoCapture), + AttributeWithIndex::get(context, + AttributeSet::FunctionIndex, Attribute::NoUnwind), + }; + AttributeSet attrs = AttributeSet::get(context, awi); + + std::vector args; + Type *return_type = Type::getInt32Ty(context); + + char name[64]; + char *pname = name; + if (fn->name) { + pname = fn->name; + } else { + xesnprintfa(name, XECOUNT(name), "fn_%.8X", fn->start_address); + } + + FunctionType *ft = FunctionType::get(return_type, + ArrayRef(args), false); + Function *f = cast(m->getOrInsertFunction( + StringRef(pname), ft, attrs)); + f->setCallingConv(CallingConv::C); + f->setVisibility(GlobalValue::DefaultVisibility); + + // TODO(benvanik): generate code! + BasicBlock* block = BasicBlock::Create(context, "entry", f); + IRBuilder<> builder(block); + Value *tmp = builder.getInt32(0); + builder.CreateRet(tmp); + + // i->setMetadata("some.name", MDNode::get(context, MDString::get(context, pname))); + + uint8_t *mem = xe_memory_addr(ctx->memory, 0); + uint32_t *pc = (uint32_t*)(mem + fn->start_address); + uint32_t pcdata = XEGETUINT32BE(pc); + printf("data %.8X %.8X\n", fn->start_address, pcdata); + xe_ppc_instr_type_t *instr_type = xe_ppc_get_instr_type(pcdata); + if (instr_type) { + printf("instr %.8X %s\n", fn->start_address, instr_type->name); + xe_ppc_instr_t instr; + instr.data.code = pcdata; + printf("%d %d\n", instr.data.XFX.D, instr.data.XFX.spr); + } else { + printf("instr not found\n"); + } + + // Run the optimizer on the function. + // Doing this here keeps the size of the IR small and speeds up the later + // passes. + xe_cpu_codegen_optimize(m, f); } void xe_cpu_codegen_optimize(Module *m, Function *fn) { diff --git a/src/cpu/codegen.h b/src/cpu/codegen.h index 38860bd4e..08d5d80f9 100644 --- a/src/cpu/codegen.h +++ b/src/cpu/codegen.h @@ -10,6 +10,7 @@ #ifndef XENIA_CPU_CODEGEN_H_ #define XENIA_CPU_CODEGEN_H_ +#include #include #include @@ -21,10 +22,19 @@ typedef struct { int reserved; } xe_codegen_options_t; +typedef struct { + xe_memory_ref memory; + xe_kernel_export_resolver_ref export_resolver; + xe_module_ref module; + xe_sdb_ref sdb; -llvm::Module *xe_cpu_codegen(llvm::LLVMContext& context, xe_memory_ref memory, - xe_kernel_export_resolver_ref export_resolver, - xe_module_ref module, llvm::Module *shared_module, + llvm::LLVMContext *context; + llvm::Module *shared_module; + llvm::Module *m; +} xe_codegen_ctx_t; + + +llvm::Module *xe_cpu_codegen(xe_codegen_ctx_t *ctx, xe_codegen_options_t options); diff --git a/src/cpu/cpu.cc b/src/cpu/cpu.cc index 1378ceee7..84c1aebd8 100644 --- a/src/cpu/cpu.cc +++ b/src/cpu/cpu.cc @@ -30,6 +30,8 @@ #include #include +#include + #include "cpu/codegen.h" #include "cpu/xethunk/xethunk.h" @@ -37,9 +39,10 @@ using namespace llvm; typedef struct { - xe_module_ref module; - LLVMContext *context; - Module *m; + xe_module_ref module; + xe_sdb_ref sdb; + LLVMContext *context; + Module *m; } xe_cpu_module_entry_t; typedef struct xe_cpu { @@ -49,7 +52,6 @@ typedef struct xe_cpu { xe_pal_ref pal; xe_memory_ref memory; - xe_sdb_ref sdb; std::vector entries; @@ -71,7 +73,6 @@ xe_cpu_ref xe_cpu_create(xe_pal_ref pal, xe_memory_ref memory, cpu->pal = xe_pal_retain(pal); cpu->memory = xe_memory_retain(memory); - cpu->sdb = xe_sdb_create(memory); LLVMLinkInInterpreter(); LLVMLinkInJIT(); @@ -93,13 +94,13 @@ void xe_cpu_dealloc(xe_cpu_ref cpu) { cpu->engine->removeModule(it->m); delete it->m; delete it->context; + xe_sdb_release(it->sdb); xe_module_release(it->module); } delete cpu->engine; llvm_shutdown(); - xe_sdb_release(cpu->sdb); xe_memory_release(cpu->memory); xe_pal_release(cpu->pal); } @@ -121,10 +122,6 @@ xe_memory_ref xe_cpu_get_memory(xe_cpu_ref cpu) { return xe_memory_retain(cpu->memory); } -xe_sdb_ref xe_cpu_get_sdb(xe_cpu_ref cpu) { - return xe_sdb_retain(cpu->sdb); -} - int xe_cpu_setup_engine(xe_cpu_ref cpu, Module *gen_module) { if (cpu->engine) { // Engine already initialized - just add the module. @@ -144,6 +141,7 @@ int xe_cpu_prepare_module(xe_cpu_ref cpu, xe_module_ref module, int result_code = 1; std::string error_message; + xe_sdb_ref sdb = NULL; LLVMContext *context = NULL; OwningPtr shared_module_buffer; Module *gen_module = NULL; @@ -171,6 +169,7 @@ int xe_cpu_prepare_module(xe_cpu_ref cpu, xe_module_ref module, // TODO(benvanik): check cache for module bitcode and load. // if (path_exists(cache_key)) { // gen_module = load_bitcode(cache_key); + // sdb = load_symbol_table(cache_key); // } // If not found in cache, generate a new module. @@ -183,12 +182,23 @@ int xe_cpu_prepare_module(xe_cpu_ref cpu, xe_module_ref module, &error_message); XEEXPECTNOTNULL(shared_module); + // Analyze the module and add its symbols to the symbol database. + sdb = xe_sdb_create(cpu->memory, module); + XEEXPECTNOTNULL(sdb); + xe_sdb_dump(sdb); + // Build the module from the source code. xe_codegen_options_t codegen_options; xe_zero_struct(&codegen_options, sizeof(codegen_options)); - gen_module = xe_cpu_codegen(*context, cpu->memory, export_resolver, - module, shared_module, - codegen_options); + xe_codegen_ctx_t codegen_ctx; + xe_zero_struct(&codegen_ctx, sizeof(codegen_ctx)); + codegen_ctx.memory = cpu->memory; + codegen_ctx.export_resolver = export_resolver; + codegen_ctx.module = module; + codegen_ctx.sdb = sdb; + codegen_ctx.context = context; + codegen_ctx.shared_module = shared_module; + gen_module = xe_cpu_codegen(&codegen_ctx, codegen_options); // Write to cache. outs = new raw_fd_ostream(cache_path, error_message, @@ -239,6 +249,7 @@ int xe_cpu_prepare_module(xe_cpu_ref cpu, xe_module_ref module, // Stash the module entry to allow cleanup later. xe_cpu_module_entry_t module_entry; module_entry.module = xe_module_retain(module); + module_entry.sdb = xe_sdb_retain(sdb); module_entry.context = context; module_entry.m = gen_module; cpu->entries.push_back(module_entry); @@ -251,6 +262,7 @@ XECLEANUP: delete gen_module; delete context; } + xe_sdb_release(sdb); return result_code; } diff --git a/src/cpu/sdb.cc b/src/cpu/sdb.cc index 3452d2d76..5f8840cc5 100644 --- a/src/cpu/sdb.cc +++ b/src/cpu/sdb.cc @@ -9,24 +9,64 @@ #include +#include +#include + + +typedef std::map xe_sdb_symbol_map; +typedef std::list xe_sdb_function_queue; struct xe_sdb { xe_ref_t ref; xe_memory_ref memory; + + size_t function_count; + size_t variable_count; + xe_sdb_symbol_map *symbols; + xe_sdb_function_queue *scan_queue; }; -xe_sdb_ref xe_sdb_create(xe_memory_ref memory) { +int xe_sdb_analyze_module(xe_sdb_ref sdb, xe_module_ref module); + + +xe_sdb_ref xe_sdb_create(xe_memory_ref memory, xe_module_ref module) { xe_sdb_ref sdb = (xe_sdb_ref)xe_calloc(sizeof(xe_sdb)); xe_ref_init((xe_ref)sdb); sdb->memory = xe_memory_retain(memory); + sdb->symbols = new xe_sdb_symbol_map(); + sdb->scan_queue = new xe_sdb_function_queue(); + + XEEXPECTZERO(xe_sdb_analyze_module(sdb, module)); + return sdb; + +XECLEANUP: + xe_sdb_release(sdb); + return NULL; } void xe_sdb_dealloc(xe_sdb_ref sdb) { + // TODO(benvanik): release strdup results + + for (xe_sdb_symbol_map::iterator it = sdb->symbols->begin(); it != + sdb->symbols->end(); ++it) { + switch (it->second.type) { + case 0: + delete it->second.function; + break; + case 1: + delete it->second.variable; + break; + } + } + + delete sdb->scan_queue; + delete sdb->symbols; + xe_memory_release(sdb->memory); } @@ -40,23 +80,251 @@ void xe_sdb_release(xe_sdb_ref sdb) { } xe_sdb_function_t* xe_sdb_insert_function(xe_sdb_ref sdb, uint32_t address) { - return NULL; + xe_sdb_function_t *fn = xe_sdb_get_function(sdb, address); + if (fn) { + return fn; + } + + printf("add fn %.8X\n", address); + fn = (xe_sdb_function_t*)xe_calloc(sizeof(xe_sdb_function_t)); + fn->start_address = address; + xe_sdb_symbol_t symbol; + symbol.type = 0; + symbol.function = fn; + sdb->function_count++; + sdb->symbols->insert(xe_sdb_symbol_map::value_type(address, symbol)); + sdb->scan_queue->push_back(fn); + return fn; } xe_sdb_variable_t* xe_sdb_insert_variable(xe_sdb_ref sdb, uint32_t address) { - return NULL; + xe_sdb_variable_t *var = xe_sdb_get_variable(sdb, address); + if (var) { + return var; + } + + printf("add var %.8X\n", address); + var = (xe_sdb_variable_t*)xe_calloc(sizeof(xe_sdb_variable_t)); + var->address = address; + xe_sdb_symbol_t symbol; + symbol.type = 1; + symbol.variable = var; + sdb->variable_count++; + sdb->symbols->insert(xe_sdb_symbol_map::value_type(address, symbol)); + return var; } xe_sdb_function_t* xe_sdb_get_function(xe_sdb_ref sdb, uint32_t address) { + xe_sdb_symbol_map::iterator i = sdb->symbols->find(address); + if (i != sdb->symbols->end() && + i->second.type == 0) { + return i->second.function; + } return NULL; } xe_sdb_variable_t* xe_sdb_get_variable(xe_sdb_ref sdb, uint32_t address) { + xe_sdb_symbol_map::iterator i = sdb->symbols->find(address); + if (i != sdb->symbols->end() && + i->second.type == 1) { + return i->second.variable; + } return NULL; } +int xe_sdb_get_functions(xe_sdb_ref sdb, xe_sdb_function_t ***out_functions, + size_t *out_function_count) { + xe_sdb_function_t **functions = (xe_sdb_function_t**)xe_malloc( + sizeof(xe_sdb_function_t*) * sdb->function_count); + int n = 0; + for (xe_sdb_symbol_map::iterator it = sdb->symbols->begin(); + it != sdb->symbols->end(); ++it) { + switch (it->second.type) { + case 0: + functions[n++] = it->second.function; + break; + } + } + *out_functions = functions; + *out_function_count = sdb->function_count; + return 0; +} + void xe_sdb_dump(xe_sdb_ref sdb) { - // TODO(benvanik): dump all functions and symbols + uint32_t previous = 0; + for (xe_sdb_symbol_map::iterator it = sdb->symbols->begin(); + it != sdb->symbols->end(); ++it) { + switch (it->second.type) { + case 0: + { + xe_sdb_function_t *fn = it->second.function; + if (previous && (int)(fn->start_address - previous) > 0) { + printf("%.8X-%.8X (%5d) h\n", previous, fn->start_address, + fn->start_address - previous); + } + printf("%.8X-%.8X (%5d) f %s\n", fn->start_address, + fn->end_address + 4, + fn->end_address - fn->start_address + 4, + fn->name ? fn->name : ""); + previous = fn->end_address + 4; + } + break; + case 1: + { + xe_sdb_variable_t *var = it->second.variable; + printf("%.8X v %s\n", var->address, + var->name ? var->name : ""); + } + break; + } + } +} + +int xe_sdb_find_gplr(xe_sdb_ref sdb, xe_module_ref module) { + // Special stack save/restore functions. + // __savegprlr_14 to __savegprlr_31 + // __restgprlr_14 to __restgprlr_31 + // http://research.microsoft.com/en-us/um/redmond/projects/invisible/src/crt/md/ppc/xxx.s.htm + // It'd be nice to stash these away and mark them as such to allow for + // special codegen. + static const uint32_t code_values[] = { + 0x68FFC1F9, // __savegprlr_14 + 0x70FFE1F9, // __savegprlr_15 + 0x78FF01FA, // __savegprlr_16 + 0x80FF21FA, // __savegprlr_17 + 0x88FF41FA, // __savegprlr_18 + 0x90FF61FA, // __savegprlr_19 + 0x98FF81FA, // __savegprlr_20 + 0xA0FFA1FA, // __savegprlr_21 + 0xA8FFC1FA, // __savegprlr_22 + 0xB0FFE1FA, // __savegprlr_23 + 0xB8FF01FB, // __savegprlr_24 + 0xC0FF21FB, // __savegprlr_25 + 0xC8FF41FB, // __savegprlr_26 + 0xD0FF61FB, // __savegprlr_27 + 0xD8FF81FB, // __savegprlr_28 + 0xE0FFA1FB, // __savegprlr_29 + 0xE8FFC1FB, // __savegprlr_30 + 0xF0FFE1FB, // __savegprlr_31 + 0xF8FF8191, + 0x2000804E, + 0x68FFC1E9, // __restgprlr_14 + 0x70FFE1E9, // __restgprlr_15 + 0x78FF01EA, // __restgprlr_16 + 0x80FF21EA, // __restgprlr_17 + 0x88FF41EA, // __restgprlr_18 + 0x90FF61EA, // __restgprlr_19 + 0x98FF81EA, // __restgprlr_20 + 0xA0FFA1EA, // __restgprlr_21 + 0xA8FFC1EA, // __restgprlr_22 + 0xB0FFE1EA, // __restgprlr_23 + 0xB8FF01EB, // __restgprlr_24 + 0xC0FF21EB, // __restgprlr_25 + 0xC8FF41EB, // __restgprlr_26 + 0xD0FF61EB, // __restgprlr_27 + 0xD8FF81EB, // __restgprlr_28 + 0xE0FFA1EB, // __restgprlr_29 + 0xE8FFC1EB, // __restgprlr_30 + 0xF0FFE1EB, // __restgprlr_31 + 0xF8FF8181, + 0xA603887D, + 0x2000804E, + }; + + uint32_t gplr_start = 0; + const xe_xex2_header_t *header = xe_module_get_xex_header(module); + for (size_t n = 0, i = 0; n < header->section_count; n++) { + const xe_xex2_section_t *section = &header->sections[n]; + const size_t start_address = header->exe_address + + (i * xe_xex2_section_length); + const size_t end_address = start_address + (section->info.page_count * + xe_xex2_section_length); + if (section->info.type == XEX_SECTION_CODE) { + gplr_start = xe_memory_search_aligned( + sdb->memory, start_address, end_address, + code_values, XECOUNT(code_values)); + if (gplr_start) { + break; + } + } + i += section->info.page_count; + } + if (!gplr_start) { + return 0; + } + + // Add function stubs. + char name[32]; + uint32_t addr = gplr_start; + for (int n = 14; n <= 31; n++) { + xesnprintf(name, XECOUNT(name), "__savegprlr_%d", n); + xe_sdb_function_t *fn = xe_sdb_insert_function(sdb, addr); + fn->end_address = fn->start_address + (31 - n) * 4 + 2 * 4; + fn->name = xestrdup(name); + fn->type = kXESDBFunctionUser; + addr += 4; + } + addr = gplr_start + 20 * 4; + for (int n = 14; n <= 31; n++) { + xesnprintf(name, XECOUNT(name), "__restgprlr_%d", n); + xe_sdb_function_t *fn = xe_sdb_insert_function(sdb, addr); + fn->end_address = fn->start_address + (31 - n) * 4 + 3 * 4; + fn->name = xestrdup(name); + fn->type = kXESDBFunctionUser; + addr += 4; + } + + return 0; +} + +int xe_sdb_add_imports(xe_sdb_ref sdb, xe_module_ref module, + const xe_xex2_import_library_t *library) { + xe_xex2_ref xex = xe_module_get_xex(module); + xe_xex2_import_info_t *import_infos; + size_t import_info_count; + if (xe_xex2_get_import_infos(xex, library, &import_infos, + &import_info_count)) { + return 1; + } + + char name[64]; + for (size_t n = 0; n < import_info_count; n++) { + const xe_xex2_import_info_t *info = &import_infos[n]; + xe_sdb_variable_t *var = xe_sdb_insert_variable(sdb, info->value_address); + // TODO(benvanik): use kernel name + xesnprintf(name, XECOUNT(name), "__var_%s_%.3X", library->name, + info->ordinal); + var->name = strdup(name); + if (info->thunk_address) { + xe_sdb_function_t *fn = xe_sdb_insert_function(sdb, info->thunk_address); + // TODO(benvanik): use kernel name + xesnprintf(name, XECOUNT(name), "__thunk_%s_%.3X", library->name, + info->ordinal); + fn->end_address = fn->start_address + 16 - 4; + fn->name = strdup(name); + fn->type = kXESDBFunctionKernel; + } + } + + return 0; +} + +int xe_sdb_add_method_hints(xe_sdb_ref sdb, xe_module_ref module) { + xe_module_pe_method_info_t *method_infos; + size_t method_info_count; + if (xe_module_get_method_hints(module, &method_infos, &method_info_count)) { + return 1; + } + + for (size_t n = 0; n < method_info_count; n++) { + xe_module_pe_method_info_t *method_info = &method_infos[n]; + xe_sdb_function_t *fn = xe_sdb_insert_function(sdb, method_info->address); + fn->end_address = method_info->address + method_info->total_length - 4; + fn->type = kXESDBFunctionUser; + // TODO(benvanik): something with prolog_length? + } + + return 0; } int xe_sdb_analyze_function(xe_sdb_ref sdb, xe_sdb_function_t *fn) { @@ -71,18 +339,52 @@ int xe_sdb_analyze_function(xe_sdb_ref sdb, xe_sdb_function_t *fn) { return 0; } -int xe_sdb_analyze_module(xe_sdb_ref sdb, xe_module_ref module) { - // TODO(benvanik): analysis. - // Iteratively run passes over the db: - // - for each import: - // - insert fn and setup as a thunk - // - for each export - // - insert fn or variable - // - queue fn - // - insert entry point - // - queue entry point - // - while (process_queue.length()): - // - fn = shift() - // - analyze_function(fn) +int xe_sdb_flush_queue(xe_sdb_ref sdb) { + while (sdb->scan_queue->size()) { + xe_sdb_function_t *fn = sdb->scan_queue->front(); + sdb->scan_queue->pop_front(); + if (!xe_sdb_analyze_function(sdb, fn)) { + return 1; + } + } + return 0; +} + +int xe_sdb_analyze_module(xe_sdb_ref sdb, xe_module_ref module) { + // Iteratively run passes over the db. + // This uses a queue to do a breadth-first search of all accessible + // functions. Callbacks and such likely won't be hit. + + const xe_xex2_header_t *header = xe_module_get_xex_header(module); + + // Find __savegprlr_* and __restgprlr_*. + xe_sdb_find_gplr(sdb, module); + + // Add each import thunk. + for (size_t n = 0; n < header->import_library_count; n++) { + const xe_xex2_import_library_t *library = &header->import_libraries[n]; + xe_sdb_add_imports(sdb, module, library); + } + + // Add each export root. + // TODO(benvanik): exports. + // - insert fn or variable + // - queue fn + + // Add method hints, if available. + // Not all XEXs have these. + xe_sdb_add_method_hints(sdb, module); + + // Queue entry point of the application. + xe_sdb_function_t *fn = xe_sdb_insert_function(sdb, header->exe_entry_point); + fn->name = strdup(""); + + // Keep pumping the queue until there's nothing left to do. + xe_sdb_flush_queue(sdb); + + // Do a pass over the functions to fill holes. + // TODO(benvanik): hole filling. + xe_sdb_flush_queue(sdb); + return 0; }