diff --git a/src/alloy/backend/x64/lir/lir_builder.cc b/src/alloy/backend/x64/lir/lir_builder.cc index 20e78d7f9..f683a1bac 100644 --- a/src/alloy/backend/x64/lir/lir_builder.cc +++ b/src/alloy/backend/x64/lir/lir_builder.cc @@ -106,14 +106,21 @@ LIRInstr* LIRBuilder::last_instr() const { return NULL; } -LIRLabel* LIRBuilder::NewLabel(bool local) { +LIRLabel* LIRBuilder::NewLabel(const char* name, bool local) { LIRLabel* label = arena_->Alloc(); label->next = label->prev = NULL; label->block = NULL; label->id = next_label_id_++; - label->name = NULL; label->local = local; label->tag = NULL; + if (!name) { + char label_name[32] = "l"; + _itoa(label->id, label_name + 1, 10); + name = label_name; + } + size_t label_length = xestrlena(name); + label->name = (char*)arena_->Alloc(label_length + 1); + xe_copy_struct(label->name, name, label_length + 1); return label; } diff --git a/src/alloy/backend/x64/lir/lir_builder.h b/src/alloy/backend/x64/lir/lir_builder.h index 25f643ded..be7e9e2e3 100644 --- a/src/alloy/backend/x64/lir/lir_builder.h +++ b/src/alloy/backend/x64/lir/lir_builder.h @@ -41,8 +41,8 @@ public: LIRBlock* current_block() const; LIRInstr* last_instr() const; - LIRLabel* NewLabel(bool local = false); - LIRLabel* NewLocalLabel() { return NewLabel(true); } + LIRLabel* NewLabel(const char* name = 0, bool local = false); + LIRLabel* NewLocalLabel() { return NewLabel(0, true); } void MarkLabel(LIRLabel* label, LIRBlock* block = 0); // TODO(benvanik): allocations diff --git a/src/alloy/backend/x64/lowering/lowering_table.cc b/src/alloy/backend/x64/lowering/lowering_table.cc index 8073aa053..412f5b51f 100644 --- a/src/alloy/backend/x64/lowering/lowering_table.cc +++ b/src/alloy/backend/x64/lowering/lowering_table.cc @@ -55,12 +55,7 @@ int LoweringTable::Process( while (hir_block) { auto hir_label = hir_block->label_head; while (hir_label) { - auto lir_label = lir_builder->NewLabel(); - if (hir_label->name) { - size_t label_len = xestrlena(hir_label->name); - lir_label->name = (char*)lir_builder->arena()->Alloc(label_len + 1); - xe_copy_struct(lir_label->name, hir_label->name, label_len + 1); - } + auto lir_label = lir_builder->NewLabel(hir_label->name); hir_label->tag = lir_label; hir_label = hir_label->next; } diff --git a/src/alloy/backend/x64/x64_code_cache.cc b/src/alloy/backend/x64/x64_code_cache.cc index 5db2dfbb4..f0f4aa1eb 100644 --- a/src/alloy/backend/x64/x64_code_cache.cc +++ b/src/alloy/backend/x64/x64_code_cache.cc @@ -16,6 +16,40 @@ using namespace alloy::backend; using namespace alloy::backend::x64; +namespace alloy { +namespace backend { +namespace x64 { + +class X64CodeChunk { +public: + X64CodeChunk(size_t chunk_size); + ~X64CodeChunk(); +public: + X64CodeChunk* next; + size_t capacity; + uint8_t* buffer; + size_t offset; + + // Estimate of function sized use to determine initial table capacity. + const static uint32_t ESTIMATED_FN_SIZE = 512; + // Size of unwind info per function. + // TODO(benvanik): move this to emitter. + const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1); + + void* fn_table_handle; + RUNTIME_FUNCTION* fn_table; + uint32_t fn_table_count; + uint32_t fn_table_capacity; + + void AddTableEntry(uint8_t* code, size_t code_size); +}; + + +} // namespace x64 +} // namespace backend +} // namespace alloy + + X64CodeCache::X64CodeCache(size_t chunk_size) : chunk_size_(chunk_size), head_chunk_(NULL), active_chunk_(NULL) { @@ -40,6 +74,9 @@ int X64CodeCache::Initialize() { } void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size) { + // Add unwind info into the allocation size. Keep things 16b aligned. + code_size += XEROUNDUP(X64CodeChunk::UNWIND_INFO_SIZE, 16); + // Always move the code to land on 16b alignment. We do this by rounding up // to 16b so that all offsets are aligned. code_size = XEROUNDUP(code_size, 16); @@ -51,20 +88,24 @@ void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size) { auto next = active_chunk_->next; if (!next) { XEASSERT(code_size < chunk_size_); // need to support larger chunks - next = new CodeChunk(chunk_size_); + next = new X64CodeChunk(chunk_size_); active_chunk_->next = next; } active_chunk_ = next; } } else { - head_chunk_ = active_chunk_ = new CodeChunk(chunk_size_); + head_chunk_ = active_chunk_ = new X64CodeChunk(chunk_size_); } - void* final_address = active_chunk_->buffer + active_chunk_->offset; + uint8_t* final_address = active_chunk_->buffer + active_chunk_->offset; active_chunk_->offset += code_size; + // Add entry to fn table. + active_chunk_->AddTableEntry(final_address, code_size); + UnlockMutex(lock_); + // Copy code. xe_copy_struct(final_address, machine_code, code_size); // This isn't needed on x64 (probably), but is convention. @@ -72,17 +113,127 @@ void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size) { return final_address; } -X64CodeCache::CodeChunk::CodeChunk(size_t chunk_size) : +X64CodeChunk::X64CodeChunk(size_t chunk_size) : next(NULL), capacity(chunk_size), buffer(0), offset(0) { buffer = (uint8_t*)VirtualAlloc( NULL, capacity, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE); + + fn_table_capacity = (uint32_t)XEROUNDUP(capacity / ESTIMATED_FN_SIZE, 16); + size_t table_size = fn_table_capacity * sizeof(RUNTIME_FUNCTION); + fn_table = (RUNTIME_FUNCTION*)xe_malloc(table_size); + fn_table_count = 0; + fn_table_handle = 0; + RtlAddGrowableFunctionTable( + &fn_table_handle, + fn_table, + fn_table_count, + fn_table_capacity, + (ULONG_PTR)buffer, (ULONG_PTR)buffer + capacity); } -X64CodeCache::CodeChunk::~CodeChunk() { +X64CodeChunk::~X64CodeChunk() { + if (fn_table_handle) { + RtlDeleteGrowableFunctionTable(fn_table_handle); + } if (buffer) { VirtualFree(buffer, 0, MEM_RELEASE); } } + +// http://msdn.microsoft.com/en-us/library/ssa62fwe.aspx +namespace { +typedef enum _UNWIND_OP_CODES { + UWOP_PUSH_NONVOL = 0, /* info == register number */ + UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */ + UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */ + UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */ + UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */ + UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */ + UWOP_SAVE_XMM128, /* info == XMM reg number, offset in next slot */ + UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */ + UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */ +} UNWIND_CODE_OPS; + +typedef union _UNWIND_CODE { + struct { + uint8_t CodeOffset; + uint8_t UnwindOp : 4; + uint8_t OpInfo : 4; + }; + USHORT FrameOffset; +} UNWIND_CODE, *PUNWIND_CODE; + +typedef struct _UNWIND_INFO { + uint8_t Version : 3; + uint8_t Flags : 5; + uint8_t SizeOfProlog; + uint8_t CountOfCodes; + uint8_t FrameRegister : 4; + uint8_t FrameOffset : 4; + UNWIND_CODE UnwindCode[1]; +/* UNWIND_CODE MoreUnwindCode[((CountOfCodes + 1) & ~1) - 1]; +* union { +* OPTIONAL ULONG ExceptionHandler; +* OPTIONAL ULONG FunctionEntry; +* }; +* OPTIONAL ULONG ExceptionData[]; */ +} UNWIND_INFO, *PUNWIND_INFO; +} // namespace + +void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size) { + // NOTE: we assume a chunk lock. + + if (fn_table_count + 1 > fn_table_capacity) { + // Table exhausted, need to realloc. If this happens a lot we should tune + // the table size to prevent this. + XELOGW("X64CodeCache growing FunctionTable - adjust ESTIMATED_FN_SIZE"); + RtlDeleteGrowableFunctionTable(fn_table_handle); + size_t old_size = fn_table_capacity * sizeof(RUNTIME_FUNCTION); + size_t new_size = old_size * 2; + auto new_table = (RUNTIME_FUNCTION*)xe_realloc(fn_table, old_size, new_size); + XEASSERTNOTNULL(new_table); + if (!new_table) { + return; + } + fn_table = new_table; + fn_table_capacity *= 2; + RtlAddGrowableFunctionTable( + &fn_table_handle, + fn_table, + fn_table_count, + fn_table_capacity, + (ULONG_PTR)buffer, (ULONG_PTR)buffer + capacity); + } + + // Allocate unwind data. We know we have space because we overallocated. + // This should be the tailing 16b with 16b alignment. + size_t unwind_info_offset = offset; + offset += UNWIND_INFO_SIZE; + + // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx + UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset); + unwind_info->Version = 1; + unwind_info->Flags = 0; + unwind_info->SizeOfProlog = 4; + unwind_info->CountOfCodes = 1; + unwind_info->FrameRegister = 0; + unwind_info->FrameOffset = 0; + + // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx + auto& code_0 = unwind_info->UnwindCode[0]; + code_0.CodeOffset = 4; // end of instruction + 1 == offset of next instruction + code_0.UnwindOp = UWOP_ALLOC_SMALL; + code_0.OpInfo = 16; + + // Add entry. + auto& fn_entry = fn_table[fn_table_count++]; + fn_entry.BeginAddress = (DWORD)(code - buffer); + fn_entry.EndAddress = (DWORD)(fn_entry.BeginAddress + code_size); + fn_entry.UnwindData = unwind_info_offset; + + // Notify the function table that it has new entries. + RtlGrowFunctionTable(fn_table_handle, fn_table_count); +} diff --git a/src/alloy/backend/x64/x64_code_cache.h b/src/alloy/backend/x64/x64_code_cache.h index 45dfe40e8..1d6140430 100644 --- a/src/alloy/backend/x64/x64_code_cache.h +++ b/src/alloy/backend/x64/x64_code_cache.h @@ -17,6 +17,7 @@ namespace alloy { namespace backend { namespace x64 { +class X64CodeChunk; class X64CodeCache { public: @@ -32,23 +33,11 @@ public: void* PlaceCode(void* machine_code, size_t code_size); private: - class CodeChunk { - public: - CodeChunk(size_t chunk_size); - ~CodeChunk(); - public: - CodeChunk* next; - size_t capacity; - uint8_t* buffer; - size_t offset; - }; - -private: - static const size_t DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024; - Mutex* lock_; - size_t chunk_size_; - CodeChunk* head_chunk_; - CodeChunk* active_chunk_; + const static size_t DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024; + Mutex* lock_; + size_t chunk_size_; + X64CodeChunk* head_chunk_; + X64CodeChunk* active_chunk_; }; diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index a0fdf33ae..66a11274d 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -40,6 +40,7 @@ public: void* Emplace(X64CodeCache* code_cache); int Emit(LIRBuilder* builder); private: + int EmitInstruction(LIRInstr* instr); }; } // namespace x64 @@ -100,8 +101,54 @@ void* XbyakGenerator::Emplace(X64CodeCache* code_cache) { } int XbyakGenerator::Emit(LIRBuilder* builder) { - // - xor(rax, rax); + // Function prolog. + // Must be 16b aligned. + // Windows is very strict about the form of this and the eiplog: + // http://msdn.microsoft.com/en-us/library/tawsa7cb.aspx + // TODO(benvanik): save off non-volatile registers so we can use them: + // RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15 + // Only want to do this if we actually use them, though, otherwise + // it just adds overhead. + const size_t stack_size = 16; + sub(rsp, stack_size); + + // Body. + auto block = builder->first_block(); + while (block) { + // Mark block labels. + auto label = block->label_head; + while (label) { + L(label->name); + label = label->next; + } + + // Add instructions. + auto instr = block->instr_head; + while (instr) { + // Stash offset in debug info. + // TODO(benvanik): stash size_ value. + + // Emit. + int result = EmitInstruction(instr); + if (result) { + return result; + } + + instr = instr->next; + } + + block = block->next; + } + + // Function epilog. + L("epilog"); + add(rsp, stack_size); + // TODO(benvanik): restore registers. ret(); + + return 0; +} + +int XbyakGenerator::EmitInstruction(LIRInstr* instr) { return 0; } diff --git a/xenia.gyp b/xenia.gyp index d87135257..6e2805599 100644 --- a/xenia.gyp +++ b/xenia.gyp @@ -210,6 +210,7 @@ 'kernel32', 'user32', 'ole32', + 'ntdll', ], }], ['OS == "mac"', {