From 5e5eb4778947d4f6821b2ba2d4d7be711a25a10e Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Wed, 20 May 2015 19:23:46 -0700 Subject: [PATCH] Rewriting code cache to put everything at fixed addresses. --- libxenia.vcxproj | 2 +- libxenia.vcxproj.filters | 6 +- src/xenia/cpu/backend/backend.h | 3 + src/xenia/cpu/backend/x64/x64_assembler.cc | 4 +- src/xenia/cpu/backend/x64/x64_backend.cc | 5 + src/xenia/cpu/backend/x64/x64_backend.h | 2 + src/xenia/cpu/backend/x64/x64_code_cache.cc | 287 ++++++++++++++++++ src/xenia/cpu/backend/x64/x64_code_cache.h | 50 ++- .../cpu/backend/x64/x64_code_cache_posix.cc | 98 ------ .../cpu/backend/x64/x64_code_cache_win.cc | 283 ----------------- src/xenia/cpu/backend/x64/x64_emitter.cc | 13 +- src/xenia/cpu/backend/x64/x64_emitter.h | 8 +- .../cpu/backend/x64/x64_thunk_emitter.cc | 4 +- src/xenia/cpu/xex_module.cc | 3 + 14 files changed, 360 insertions(+), 408 deletions(-) create mode 100644 src/xenia/cpu/backend/x64/x64_code_cache.cc delete mode 100644 src/xenia/cpu/backend/x64/x64_code_cache_posix.cc delete mode 100644 src/xenia/cpu/backend/x64/x64_code_cache_win.cc diff --git a/libxenia.vcxproj b/libxenia.vcxproj index 84ac2ed3e..8b104b3de 100644 --- a/libxenia.vcxproj +++ b/libxenia.vcxproj @@ -38,7 +38,7 @@ - + diff --git a/libxenia.vcxproj.filters b/libxenia.vcxproj.filters index 2fbe1ebcf..4940a259b 100644 --- a/libxenia.vcxproj.filters +++ b/libxenia.vcxproj.filters @@ -274,9 +274,6 @@ src\xenia\cpu\backend\x64 - - src\xenia\cpu\backend\x64 - src\xenia\cpu\backend\x64 @@ -703,6 +700,9 @@ src\xenia\kernel + + src\xenia\cpu\backend\x64 + diff --git a/src/xenia/cpu/backend/backend.h b/src/xenia/cpu/backend/backend.h index d4c790c9c..0777bb89f 100644 --- a/src/xenia/cpu/backend/backend.h +++ b/src/xenia/cpu/backend/backend.h @@ -39,6 +39,9 @@ class Backend { virtual void* AllocThreadData(); virtual void FreeThreadData(void* thread_data); + virtual void CommitExecutableRange(uint32_t guest_low, + uint32_t guest_high) = 0; + virtual std::unique_ptr CreateAssembler() = 0; protected: diff --git a/src/xenia/cpu/backend/x64/x64_assembler.cc b/src/xenia/cpu/backend/x64/x64_assembler.cc index e236cb53e..2f0019094 100644 --- a/src/xenia/cpu/backend/x64/x64_assembler.cc +++ b/src/xenia/cpu/backend/x64/x64_assembler.cc @@ -70,8 +70,8 @@ bool X64Assembler::Assemble(FunctionInfo* symbol_info, HIRBuilder* builder, // Lower HIR -> x64. void* machine_code = nullptr; size_t code_size = 0; - if (!emitter_->Emit(builder, debug_info_flags, debug_info.get(), machine_code, - code_size)) { + if (!emitter_->Emit(symbol_info->address(), builder, debug_info_flags, + debug_info.get(), machine_code, code_size)) { return false; } diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 8cc03fb6b..b24ae174f 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -54,6 +54,11 @@ bool X64Backend::Initialize() { return true; } +void X64Backend::CommitExecutableRange(uint32_t guest_low, + uint32_t guest_high) { + code_cache_->CommitExecutableRange(guest_low, guest_high); +} + std::unique_ptr X64Backend::CreateAssembler() { return std::make_unique(this); } diff --git a/src/xenia/cpu/backend/x64/x64_backend.h b/src/xenia/cpu/backend/x64/x64_backend.h index f59ee4e90..b83e2a783 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.h +++ b/src/xenia/cpu/backend/x64/x64_backend.h @@ -35,6 +35,8 @@ class X64Backend : public Backend { bool Initialize() override; + void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high) override; + std::unique_ptr CreateAssembler() override; private: diff --git a/src/xenia/cpu/backend/x64/x64_code_cache.cc b/src/xenia/cpu/backend/x64/x64_code_cache.cc new file mode 100644 index 000000000..5d8f671c4 --- /dev/null +++ b/src/xenia/cpu/backend/x64/x64_code_cache.cc @@ -0,0 +1,287 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/cpu/backend/x64/x64_code_cache.h" + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/memory.h" + +namespace xe { +namespace cpu { +namespace backend { +namespace x64 { + +// Size of unwind info per function. +// TODO(benvanik): move this to emitter. +const static uint32_t kUnwindInfoSize = 4 + (2 * 1 + 2 + 2); + +X64CodeCache::X64CodeCache() + : indirection_table_base_(nullptr), + generated_code_base_(nullptr), + generated_code_offset_(0), + generated_code_commit_mark_(0), + unwind_table_handle_(nullptr), + unwind_table_count_(0) {} + +X64CodeCache::~X64CodeCache() { + if (unwind_table_handle_) { + RtlDeleteGrowableFunctionTable(unwind_table_handle_); + } + if (indirection_table_base_) { + VirtualFree(indirection_table_base_, kIndirectionTableSize, MEM_RELEASE); + } + if (generated_code_base_) { + VirtualFree(generated_code_base_, kIndirectionTableSize, MEM_RELEASE); + } +} + +bool X64CodeCache::Initialize() { + indirection_table_base_ = reinterpret_cast( + VirtualAlloc(reinterpret_cast(kIndirectionTableBase), + kIndirectionTableSize, MEM_RESERVE, PAGE_READWRITE)); + if (!indirection_table_base_) { + XELOGE("Unable to allocate code cache indirection table"); + XELOGE( + "This is likely because the %.8X-%.8X range is in use by some other " + "system DLL", + kIndirectionTableBase, kIndirectionTableBase + kIndirectionTableSize); + return false; + } + + generated_code_base_ = reinterpret_cast( + VirtualAlloc(reinterpret_cast(kGeneratedCodeBase), + kGeneratedCodeSize, MEM_RESERVE, PAGE_EXECUTE_READWRITE)); + if (!generated_code_base_) { + XELOGE("Unable to allocate code cache generated code storage"); + XELOGE( + "This is likely because the %.8X-%.8X range is in use by some other " + "system DLL", + kGeneratedCodeBase, kGeneratedCodeBase + kGeneratedCodeSize); + return false; + } + + // Compute total number of unwind entries we should allocate. + // We don't support reallocing right now, so this should be high. + unwind_table_.resize(30000); + + // Create table and register with the system. It's empty now, but we'll grow + // it as functions are added. + if (RtlAddGrowableFunctionTable( + &unwind_table_handle_, unwind_table_.data(), unwind_table_count_, + DWORD(unwind_table_.size()), + reinterpret_cast(generated_code_base_), + reinterpret_cast(generated_code_base_ + + kGeneratedCodeSize))) { + XELOGE("Unable to create unwind function table"); + return false; + } + + return true; +} + +void X64CodeCache::CommitExecutableRange(uint32_t guest_low, + uint32_t guest_high) { + VirtualAlloc(indirection_table_base_ + (guest_low - kIndirectionTableBase), + guest_high - guest_low, MEM_COMMIT, PAGE_READWRITE); +} + +void* X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code, + size_t code_size, size_t stack_size) { + // Hold a lock while we bump the pointers up. This is important as the + // unwind table requires entries AND code to be sorted in order. + size_t low_mark; + size_t high_mark; + uint8_t* code_address = nullptr; + uint8_t* unwind_entry_address = nullptr; + size_t unwind_table_slot = 0; + { + std::lock_guard allocation_lock(allocation_mutex_); + + low_mark = generated_code_offset_; + + // Reserve code. + // Always move the code to land on 16b alignment. + code_address = generated_code_base_ + generated_code_offset_; + generated_code_offset_ += xe::round_up(code_size, 16); + + // Reserve unwind info. + // We go on the high size of the unwind info as we don't know how big we + // need it, and a few extra bytes of padding isn't the worst thing. + unwind_entry_address = generated_code_base_ + generated_code_offset_; + generated_code_offset_ += xe::round_up(kUnwindInfoSize, 16); + unwind_table_slot = ++unwind_table_count_; + + high_mark = generated_code_offset_; + } + + // If we are going above the high water mark of committed memory, commit some + // more. It's ok if multiple threads do this, as redundant commits aren't + // harmful. + size_t old_commit_mark = generated_code_commit_mark_; + if (high_mark > old_commit_mark) { + size_t new_commit_mark = old_commit_mark + 16 * 1024 * 1024; + VirtualAlloc(generated_code_base_, new_commit_mark, MEM_COMMIT, + PAGE_EXECUTE_READWRITE); + generated_code_commit_mark_.compare_exchange_strong(old_commit_mark, + new_commit_mark); + } + + // Copy code. + std::memcpy(code_address, machine_code, code_size); + + // Add unwind info. + InitializeUnwindEntry(unwind_entry_address, unwind_table_slot, code_address, + code_size, stack_size); + + // Notify that the unwind table has grown. + // We do this outside of the lock, but with the latest total count. + RtlGrowFunctionTable(unwind_table_handle_, unwind_table_count_); + + // This isn't needed on x64 (probably), but is convention. + FlushInstructionCache(GetCurrentProcess(), code_address, code_size); + + // Now that everything is ready, fix up the indirection table. + // Note that we do support code that doesn't have an indirection fixup, so + // ignore those when we see them. + if (guest_address) { + uint32_t* indirection_slot = reinterpret_cast( + indirection_table_base_ + (guest_address - kIndirectionTableBase)); + *indirection_slot = uint32_t(reinterpret_cast(code_address)); + } + + return code_address; +} + +// http://msdn.microsoft.com/en-us/library/ssa62fwe.aspx +typedef enum _UNWIND_OP_CODES { + UWOP_PUSH_NONVOL = 0, /* info == register number */ + UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */ + UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */ + UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */ + UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */ + UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */ + UWOP_SAVE_XMM128, /* info == XMM reg number, offset in next slot */ + UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */ + UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */ +} UNWIND_CODE_OPS; +class UNWIND_REGISTER { + public: + enum _ { + RAX = 0, + RCX = 1, + RDX = 2, + RBX = 3, + RSP = 4, + RBP = 5, + RSI = 6, + RDI = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, + }; +}; + +typedef union _UNWIND_CODE { + struct { + uint8_t CodeOffset; + uint8_t UnwindOp : 4; + uint8_t OpInfo : 4; + }; + USHORT FrameOffset; +} UNWIND_CODE, *PUNWIND_CODE; + +typedef struct _UNWIND_INFO { + uint8_t Version : 3; + uint8_t Flags : 5; + uint8_t SizeOfProlog; + uint8_t CountOfCodes; + uint8_t FrameRegister : 4; + uint8_t FrameOffset : 4; + UNWIND_CODE UnwindCode[1]; + /* UNWIND_CODE MoreUnwindCode[((CountOfCodes + 1) & ~1) - 1]; + * union { + * OPTIONAL ULONG ExceptionHandler; + * OPTIONAL ULONG FunctionEntry; + * }; + * OPTIONAL ULONG ExceptionData[]; */ +} UNWIND_INFO, *PUNWIND_INFO; + +void X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address, + size_t unwind_table_slot, + uint8_t* code_address, + size_t code_size, size_t stack_size) { + auto unwind_info = reinterpret_cast(unwind_entry_address); + + if (!stack_size) { + // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx + unwind_info->Version = 1; + unwind_info->Flags = 0; + unwind_info->SizeOfProlog = 0; + unwind_info->CountOfCodes = 0; + unwind_info->FrameRegister = 0; + unwind_info->FrameOffset = 0; + } else if (stack_size <= 128) { + uint8_t prolog_size = 4; + + // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx + unwind_info->Version = 1; + unwind_info->Flags = 0; + unwind_info->SizeOfProlog = prolog_size; + unwind_info->CountOfCodes = 1; + unwind_info->FrameRegister = 0; + unwind_info->FrameOffset = 0; + + // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx + size_t co = 0; + auto& unwind_code = unwind_info->UnwindCode[co++]; + unwind_code.CodeOffset = + 14; // end of instruction + 1 == offset of next instruction + unwind_code.UnwindOp = UWOP_ALLOC_SMALL; + unwind_code.OpInfo = stack_size / 8 - 1; + } else { + // TODO(benvanik): take as parameters? + uint8_t prolog_size = 7; + + // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx + unwind_info->Version = 1; + unwind_info->Flags = 0; + unwind_info->SizeOfProlog = prolog_size; + unwind_info->CountOfCodes = 3; + unwind_info->FrameRegister = 0; + unwind_info->FrameOffset = 0; + + // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx + size_t co = 0; + auto& unwind_code = unwind_info->UnwindCode[co++]; + unwind_code.CodeOffset = + 7; // end of instruction + 1 == offset of next instruction + unwind_code.UnwindOp = UWOP_ALLOC_LARGE; + unwind_code.OpInfo = 0; + unwind_code = unwind_info->UnwindCode[co++]; + unwind_code.FrameOffset = (USHORT)(stack_size) / 8; + } + + // Add entry. + auto& fn_entry = unwind_table_[unwind_table_slot]; + fn_entry.BeginAddress = (DWORD)(code_address - generated_code_base_); + fn_entry.EndAddress = (DWORD)(fn_entry.BeginAddress + code_size); + fn_entry.UnwindData = (DWORD)(unwind_entry_address - generated_code_base_); +} + +} // namespace x64 +} // namespace backend +} // namespace cpu +} // namespace xe diff --git a/src/xenia/cpu/backend/x64/x64_code_cache.h b/src/xenia/cpu/backend/x64/x64_code_cache.h index 1fe8bff58..edf85375a 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache.h +++ b/src/xenia/cpu/backend/x64/x64_code_cache.h @@ -10,18 +10,21 @@ #ifndef XENIA_BACKEND_X64_X64_CODE_CACHE_H_ #define XENIA_BACKEND_X64_X64_CODE_CACHE_H_ +// For RUNTIME_FUNCTION: +#include "xenia/base/platform.h" + +#include #include +#include namespace xe { namespace cpu { namespace backend { namespace x64 { -class X64CodeChunk; - class X64CodeCache { public: - X64CodeCache(size_t chunk_size = DEFAULT_CHUNK_SIZE); + X64CodeCache(); virtual ~X64CodeCache(); bool Initialize(); @@ -30,14 +33,43 @@ class X64CodeCache { // TODO(benvanik): keep track of code blocks // TODO(benvanik): padding/guards/etc - void* PlaceCode(void* machine_code, size_t code_size, size_t stack_size); + void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high); + + void* PlaceCode(uint32_t guest_address, void* machine_code, size_t code_size, + size_t stack_size); private: - const static size_t DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024; - std::mutex lock_; - size_t chunk_size_; - X64CodeChunk* head_chunk_; - X64CodeChunk* active_chunk_; + const static uint64_t kIndirectionTableBase = 0x80000000; + const static uint64_t kIndirectionTableSize = 0x1FFFFFFF; + const static uint64_t kGeneratedCodeBase = 0xA0000000; + const static uint64_t kGeneratedCodeSize = 0x0FFFFFFF; + + void InitializeUnwindEntry(uint8_t* unwind_entry_address, + size_t unwind_table_slot, uint8_t* code_address, + size_t code_size, size_t stack_size); + + // Must be held when manipulating the offsets or counts of anything, to keep + // the tables consistent and ordered. + std::mutex allocation_mutex_; + + // Fixed at kIndirectionTableBase in host space, holding 4 byte pointers into + // the generated code table that correspond to the PPC functions in guest + // space. + uint8_t* indirection_table_base_; + // Fixed at kGeneratedCodeBase and holding all generated code, growing as + // needed. + uint8_t* generated_code_base_; + // Current offset to empty space in generated code. + size_t generated_code_offset_; + // Current high water mark of COMMITTED code. + std::atomic generated_code_commit_mark_; + + // Growable function table system handle. + void* unwind_table_handle_; + // Actual unwind table entries. + std::vector unwind_table_; + // Current number of entries in the table. + std::atomic unwind_table_count_; }; } // namespace x64 diff --git a/src/xenia/cpu/backend/x64/x64_code_cache_posix.cc b/src/xenia/cpu/backend/x64/x64_code_cache_posix.cc deleted file mode 100644 index 44ecb6ecd..000000000 --- a/src/xenia/cpu/backend/x64/x64_code_cache_posix.cc +++ /dev/null @@ -1,98 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/cpu/backend/x64/x64_code_cache.h" - -#include - -#include "xenia/base/assert.h" -#include "xenia/base/math.h" - -namespace xe { -namespace cpu { -namespace backend { -namespace x64 { - -class X64CodeChunk { - public: - X64CodeChunk(size_t chunk_size); - ~X64CodeChunk(); - - public: - X64CodeChunk* next; - size_t capacity; - uint8_t* buffer; - size_t offset; -}; - -X64CodeCache::X64CodeCache(size_t chunk_size) - : chunk_size_(chunk_size), head_chunk_(NULL), active_chunk_(NULL) {} - -X64CodeCache::~X64CodeCache() { - std::lock_guard guard(lock_); - auto chunk = head_chunk_; - while (chunk) { - auto next = chunk->next; - delete chunk; - chunk = next; - } - head_chunk_ = NULL; -} - -int X64CodeCache::Initialize() { return 0; } - -void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size, - size_t stack_size) { - // Always move the code to land on 16b alignment. We do this by rounding up - // to 16b so that all offsets are aligned. - code_size = xe::round_up(code_size, 16); - - lock_.lock(); - - if (active_chunk_) { - if (active_chunk_->capacity - active_chunk_->offset < code_size) { - auto next = active_chunk_->next; - if (!next) { - assert_true(code_size < chunk_size_, "need to support larger chunks"); - next = new X64CodeChunk(chunk_size_); - active_chunk_->next = next; - } - active_chunk_ = next; - } - } else { - head_chunk_ = active_chunk_ = new X64CodeChunk(chunk_size_); - } - - uint8_t* final_address = active_chunk_->buffer + active_chunk_->offset; - active_chunk_->offset += code_size; - - lock_.unlock(); - - // Copy code. - memcpy(final_address, machine_code, code_size); - - return final_address; -} - -X64CodeChunk::X64CodeChunk(size_t chunk_size) - : next(NULL), capacity(chunk_size), buffer(0), offset(0) { - buffer = (uint8_t*)mmap(nullptr, chunk_size, PROT_WRITE | PROT_EXEC, - MAP_ANON | MAP_PRIVATE, -1, 0); -} - -X64CodeChunk::~X64CodeChunk() { - if (buffer) { - munmap(buffer, capacity); - } -} - -} // namespace x64 -} // namespace backend -} // namespace cpu -} // namespace xe diff --git a/src/xenia/cpu/backend/x64/x64_code_cache_win.cc b/src/xenia/cpu/backend/x64/x64_code_cache_win.cc deleted file mode 100644 index 2949c4cc5..000000000 --- a/src/xenia/cpu/backend/x64/x64_code_cache_win.cc +++ /dev/null @@ -1,283 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/cpu/backend/x64/x64_code_cache.h" - -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" - -namespace xe { -namespace cpu { -namespace backend { -namespace x64 { - -class X64CodeChunk { - public: - X64CodeChunk(size_t chunk_size); - ~X64CodeChunk(); - - public: - X64CodeChunk* next; - size_t capacity; - uint8_t* buffer; - size_t offset; - - // Estimate of function sized use to determine initial table capacity. - const static uint32_t ESTIMATED_FN_SIZE = 512; - // Size of unwind info per function. - // TODO(benvanik): move this to emitter. - const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1 + 2 + 2); - - void* fn_table_handle; - RUNTIME_FUNCTION* fn_table; - uint32_t fn_table_count; - uint32_t fn_table_capacity; - - void AddTableEntry(uint8_t* code, size_t code_size, size_t stack_size); -}; - -X64CodeCache::X64CodeCache(size_t chunk_size) - : chunk_size_(chunk_size), head_chunk_(NULL), active_chunk_(NULL) {} - -X64CodeCache::~X64CodeCache() { - std::lock_guard guard(lock_); - auto chunk = head_chunk_; - while (chunk) { - auto next = chunk->next; - delete chunk; - chunk = next; - } - head_chunk_ = NULL; -} - -bool X64CodeCache::Initialize() { return true; } - -void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size, - size_t stack_size) { - size_t alloc_size = code_size; - - // Add unwind info into the allocation size. Keep things 16b aligned. - alloc_size += xe::round_up(X64CodeChunk::UNWIND_INFO_SIZE, 16); - - // Always move the code to land on 16b alignment. We do this by rounding up - // to 16b so that all offsets are aligned. - alloc_size = xe::round_up(alloc_size, 16); - - lock_.lock(); - - if (active_chunk_) { - if (active_chunk_->capacity - active_chunk_->offset < alloc_size) { - auto next = active_chunk_->next; - if (!next) { - assert_true(alloc_size < chunk_size_, "need to support larger chunks"); - next = new X64CodeChunk(chunk_size_); - active_chunk_->next = next; - } - active_chunk_ = next; - } - } else { - head_chunk_ = active_chunk_ = new X64CodeChunk(chunk_size_); - } - - uint8_t* final_address = active_chunk_->buffer + active_chunk_->offset; - active_chunk_->offset += alloc_size; - - // Add entry to fn table. - active_chunk_->AddTableEntry(final_address, alloc_size, stack_size); - - lock_.unlock(); - - // Copy code. - memcpy(final_address, machine_code, code_size); - - // This isn't needed on x64 (probably), but is convention. - FlushInstructionCache(GetCurrentProcess(), final_address, alloc_size); - return final_address; -} - -X64CodeChunk::X64CodeChunk(size_t chunk_size) - : next(NULL), capacity(chunk_size), buffer(0), offset(0) { - buffer = (uint8_t*)VirtualAlloc(NULL, capacity, MEM_RESERVE | MEM_COMMIT, - PAGE_EXECUTE_READWRITE); - - fn_table_capacity = - static_cast(xe::round_up(capacity / ESTIMATED_FN_SIZE, 16)); - size_t table_size = fn_table_capacity * sizeof(RUNTIME_FUNCTION); - fn_table = (RUNTIME_FUNCTION*)malloc(table_size); - fn_table_count = 0; - fn_table_handle = 0; - RtlAddGrowableFunctionTable(&fn_table_handle, fn_table, fn_table_count, - fn_table_capacity, (ULONG_PTR)buffer, - (ULONG_PTR)buffer + capacity); -} - -X64CodeChunk::~X64CodeChunk() { - if (fn_table_handle) { - RtlDeleteGrowableFunctionTable(fn_table_handle); - } - if (buffer) { - VirtualFree(buffer, 0, MEM_RELEASE); - } -} - -// http://msdn.microsoft.com/en-us/library/ssa62fwe.aspx -namespace { -typedef enum _UNWIND_OP_CODES { - UWOP_PUSH_NONVOL = 0, /* info == register number */ - UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */ - UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */ - UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */ - UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */ - UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */ - UWOP_SAVE_XMM128, /* info == XMM reg number, offset in next slot */ - UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */ - UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */ -} UNWIND_CODE_OPS; -class UNWIND_REGISTER { - public: - enum _ { - RAX = 0, - RCX = 1, - RDX = 2, - RBX = 3, - RSP = 4, - RBP = 5, - RSI = 6, - RDI = 7, - R8 = 8, - R9 = 9, - R10 = 10, - R11 = 11, - R12 = 12, - R13 = 13, - R14 = 14, - R15 = 15, - }; -}; - -typedef union _UNWIND_CODE { - struct { - uint8_t CodeOffset; - uint8_t UnwindOp : 4; - uint8_t OpInfo : 4; - }; - USHORT FrameOffset; -} UNWIND_CODE, *PUNWIND_CODE; - -typedef struct _UNWIND_INFO { - uint8_t Version : 3; - uint8_t Flags : 5; - uint8_t SizeOfProlog; - uint8_t CountOfCodes; - uint8_t FrameRegister : 4; - uint8_t FrameOffset : 4; - UNWIND_CODE UnwindCode[1]; - /* UNWIND_CODE MoreUnwindCode[((CountOfCodes + 1) & ~1) - 1]; - * union { - * OPTIONAL ULONG ExceptionHandler; - * OPTIONAL ULONG FunctionEntry; - * }; - * OPTIONAL ULONG ExceptionData[]; */ -} UNWIND_INFO, *PUNWIND_INFO; -} // namespace - -void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size, - size_t stack_size) { - // NOTE: we assume a chunk lock. - - if (fn_table_count + 1 > fn_table_capacity) { - // Table exhausted, need to realloc. If this happens a lot we should tune - // the table size to prevent this. - XELOGW("X64CodeCache growing FunctionTable - adjust ESTIMATED_FN_SIZE"); - RtlDeleteGrowableFunctionTable(fn_table_handle); - size_t old_size = fn_table_capacity * sizeof(RUNTIME_FUNCTION); - size_t new_size = old_size * 2; - auto new_table = (RUNTIME_FUNCTION*)realloc(fn_table, new_size); - assert_not_null(new_table); - if (!new_table) { - return; - } - fn_table = new_table; - fn_table_capacity *= 2; - RtlAddGrowableFunctionTable(&fn_table_handle, fn_table, fn_table_count, - fn_table_capacity, (ULONG_PTR)buffer, - (ULONG_PTR)buffer + capacity); - } - - // Allocate unwind data. We know we have space because we overallocated. - // This should be the tailing 16b with 16b alignment. - size_t unwind_info_offset = offset - UNWIND_INFO_SIZE; - - if (!stack_size) { - // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx - UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset); - unwind_info->Version = 1; - unwind_info->Flags = 0; - unwind_info->SizeOfProlog = 0; - unwind_info->CountOfCodes = 0; - unwind_info->FrameRegister = 0; - unwind_info->FrameOffset = 0; - } else if (stack_size <= 128) { - uint8_t prolog_size = 4; - - // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx - UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset); - unwind_info->Version = 1; - unwind_info->Flags = 0; - unwind_info->SizeOfProlog = prolog_size; - unwind_info->CountOfCodes = 1; - unwind_info->FrameRegister = 0; - unwind_info->FrameOffset = 0; - - // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx - size_t co = 0; - auto& unwind_code = unwind_info->UnwindCode[co++]; - unwind_code.CodeOffset = - 14; // end of instruction + 1 == offset of next instruction - unwind_code.UnwindOp = UWOP_ALLOC_SMALL; - unwind_code.OpInfo = stack_size / 8 - 1; - } else { - // TODO(benvanik): take as parameters? - uint8_t prolog_size = 7; - - // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx - UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset); - unwind_info->Version = 1; - unwind_info->Flags = 0; - unwind_info->SizeOfProlog = prolog_size; - unwind_info->CountOfCodes = 3; - unwind_info->FrameRegister = 0; - unwind_info->FrameOffset = 0; - - // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx - size_t co = 0; - auto& unwind_code = unwind_info->UnwindCode[co++]; - unwind_code.CodeOffset = - 7; // end of instruction + 1 == offset of next instruction - unwind_code.UnwindOp = UWOP_ALLOC_LARGE; - unwind_code.OpInfo = 0; - unwind_code = unwind_info->UnwindCode[co++]; - unwind_code.FrameOffset = (USHORT)(stack_size) / 8; - } - - // Add entry. - auto& fn_entry = fn_table[fn_table_count++]; - fn_entry.BeginAddress = (DWORD)(code - buffer); - fn_entry.EndAddress = (DWORD)(fn_entry.BeginAddress + code_size); - fn_entry.UnwindData = (DWORD)unwind_info_offset; - - // Notify the function table that it has new entries. - RtlGrowFunctionTable(fn_table_handle, fn_table_count); -} - -} // namespace x64 -} // namespace backend -} // namespace cpu -} // namespace xe diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index f8144931f..de0cf3186 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -87,9 +87,9 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) X64Emitter::~X64Emitter() = default; -bool X64Emitter::Emit(HIRBuilder* builder, uint32_t debug_info_flags, - DebugInfo* debug_info, void*& out_code_address, - size_t& out_code_size) { +bool X64Emitter::Emit(uint32_t guest_address, HIRBuilder* builder, + uint32_t debug_info_flags, DebugInfo* debug_info, + void*& out_code_address, size_t& out_code_size) { SCOPE_profile_cpu_f("cpu"); // Reset. @@ -108,7 +108,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, uint32_t debug_info_flags, // Copy the final code to the cache and relocate it. out_code_size = getSize(); - out_code_address = Emplace(stack_size); + out_code_address = Emplace(guest_address, stack_size); // Stash source map. if (debug_info_flags_ & DebugInfoFlags::kDebugInfoSourceMap) { @@ -119,13 +119,14 @@ bool X64Emitter::Emit(HIRBuilder* builder, uint32_t debug_info_flags, return true; } -void* X64Emitter::Emplace(size_t stack_size) { +void* X64Emitter::Emplace(uint32_t guest_address, size_t stack_size) { // To avoid changing xbyak, we do a switcharoo here. // top_ points to the Xbyak buffer, and since we are in AutoGrow mode // it has pending relocations. We copy the top_ to our buffer, swap the // pointer, relocate, then return the original scratch pointer for use. uint8_t* old_address = top_; - void* new_address = code_cache_->PlaceCode(top_, size_, stack_size); + void* new_address = + code_cache_->PlaceCode(guest_address, top_, size_, stack_size); top_ = (uint8_t*)new_address; ready(); top_ = old_address; diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index 6573d3975..8834d1813 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -112,9 +112,9 @@ class X64Emitter : public Xbyak::CodeGenerator { Processor* processor() const { return processor_; } X64Backend* backend() const { return backend_; } - bool Emit(hir::HIRBuilder* builder, uint32_t debug_info_flags, - DebugInfo* debug_info, void*& out_code_address, - size_t& out_code_size); + bool Emit(uint32_t guest_address, hir::HIRBuilder* builder, + uint32_t debug_info_flags, DebugInfo* debug_info, + void*& out_code_address, size_t& out_code_size); public: // Reserved: rsp @@ -192,7 +192,7 @@ class X64Emitter : public Xbyak::CodeGenerator { size_t stack_size() const { return stack_size_; } protected: - void* Emplace(size_t stack_size); + void* Emplace(uint32_t guest_address, size_t stack_size); bool Emit(hir::HIRBuilder* builder, size_t& out_stack_size); void EmitGetCurrentThreadId(); void EmitTraceUserCallReturn(); diff --git a/src/xenia/cpu/backend/x64/x64_thunk_emitter.cc b/src/xenia/cpu/backend/x64/x64_thunk_emitter.cc index 55ab32506..3f8e8ce9c 100644 --- a/src/xenia/cpu/backend/x64/x64_thunk_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_thunk_emitter.cc @@ -88,7 +88,7 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { mov(r8, qword[rsp + 8 * 3]); ret(); - void* fn = Emplace(stack_size); + void* fn = Emplace(0, stack_size); return (HostToGuestThunk)fn; } @@ -137,7 +137,7 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() { mov(rdx, qword[rsp + 8 * 2]); ret(); - void* fn = Emplace(stack_size); + void* fn = Emplace(0, stack_size); return (HostToGuestThunk)fn; } diff --git a/src/xenia/cpu/xex_module.cc b/src/xenia/cpu/xex_module.cc index 3fbdebc17..9fda22aaf 100644 --- a/src/xenia/cpu/xex_module.cc +++ b/src/xenia/cpu/xex_module.cc @@ -66,6 +66,9 @@ bool XexModule::Load(const std::string& name, const std::string& path, i += section->info.page_count; } + // Notify backend that we have an executable range. + processor_->backend()->CommitExecutableRange(low_address_, high_address_); + // Add all imports (variables/functions). for (size_t n = 0; n < header->import_library_count; n++) { if (!SetupLibraryImports(&header->import_libraries[n])) {