Rewriting code cache to put everything at fixed addresses.
This commit is contained in:
parent
20a3172ebb
commit
5e5eb47789
|
@ -38,7 +38,7 @@
|
|||
<ClCompile Include="src\xenia\cpu\backend\backend.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\backend\x64\x64_assembler.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\backend\x64\x64_backend.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\backend\x64\x64_code_cache_win.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\backend\x64\x64_code_cache.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\backend\x64\x64_emitter.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\backend\x64\x64_function.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\backend\x64\x64_sequences.cc" />
|
||||
|
|
|
@ -274,9 +274,6 @@
|
|||
<ClCompile Include="src\xenia\cpu\backend\x64\x64_backend.cc">
|
||||
<Filter>src\xenia\cpu\backend\x64</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\xenia\cpu\backend\x64\x64_code_cache_win.cc">
|
||||
<Filter>src\xenia\cpu\backend\x64</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\xenia\cpu\backend\x64\x64_emitter.cc">
|
||||
<Filter>src\xenia\cpu\backend\x64</Filter>
|
||||
</ClCompile>
|
||||
|
@ -703,6 +700,9 @@
|
|||
<ClCompile Include="src\xenia\kernel\xboxkrnl_error.cc">
|
||||
<Filter>src\xenia\kernel</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\xenia\cpu\backend\x64\x64_code_cache.cc">
|
||||
<Filter>src\xenia\cpu\backend\x64</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="src\xenia\emulator.h">
|
||||
|
|
|
@ -39,6 +39,9 @@ class Backend {
|
|||
virtual void* AllocThreadData();
|
||||
virtual void FreeThreadData(void* thread_data);
|
||||
|
||||
virtual void CommitExecutableRange(uint32_t guest_low,
|
||||
uint32_t guest_high) = 0;
|
||||
|
||||
virtual std::unique_ptr<Assembler> CreateAssembler() = 0;
|
||||
|
||||
protected:
|
||||
|
|
|
@ -70,8 +70,8 @@ bool X64Assembler::Assemble(FunctionInfo* symbol_info, HIRBuilder* builder,
|
|||
// Lower HIR -> x64.
|
||||
void* machine_code = nullptr;
|
||||
size_t code_size = 0;
|
||||
if (!emitter_->Emit(builder, debug_info_flags, debug_info.get(), machine_code,
|
||||
code_size)) {
|
||||
if (!emitter_->Emit(symbol_info->address(), builder, debug_info_flags,
|
||||
debug_info.get(), machine_code, code_size)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -54,6 +54,11 @@ bool X64Backend::Initialize() {
|
|||
return true;
|
||||
}
|
||||
|
||||
void X64Backend::CommitExecutableRange(uint32_t guest_low,
|
||||
uint32_t guest_high) {
|
||||
code_cache_->CommitExecutableRange(guest_low, guest_high);
|
||||
}
|
||||
|
||||
std::unique_ptr<Assembler> X64Backend::CreateAssembler() {
|
||||
return std::make_unique<X64Assembler>(this);
|
||||
}
|
||||
|
|
|
@ -35,6 +35,8 @@ class X64Backend : public Backend {
|
|||
|
||||
bool Initialize() override;
|
||||
|
||||
void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high) override;
|
||||
|
||||
std::unique_ptr<Assembler> CreateAssembler() override;
|
||||
|
||||
private:
|
||||
|
|
|
@ -0,0 +1,287 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/x64/x64_code_cache.h"
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
|
||||
// Size of unwind info per function.
|
||||
// TODO(benvanik): move this to emitter.
|
||||
const static uint32_t kUnwindInfoSize = 4 + (2 * 1 + 2 + 2);
|
||||
|
||||
X64CodeCache::X64CodeCache()
|
||||
: indirection_table_base_(nullptr),
|
||||
generated_code_base_(nullptr),
|
||||
generated_code_offset_(0),
|
||||
generated_code_commit_mark_(0),
|
||||
unwind_table_handle_(nullptr),
|
||||
unwind_table_count_(0) {}
|
||||
|
||||
X64CodeCache::~X64CodeCache() {
|
||||
if (unwind_table_handle_) {
|
||||
RtlDeleteGrowableFunctionTable(unwind_table_handle_);
|
||||
}
|
||||
if (indirection_table_base_) {
|
||||
VirtualFree(indirection_table_base_, kIndirectionTableSize, MEM_RELEASE);
|
||||
}
|
||||
if (generated_code_base_) {
|
||||
VirtualFree(generated_code_base_, kIndirectionTableSize, MEM_RELEASE);
|
||||
}
|
||||
}
|
||||
|
||||
bool X64CodeCache::Initialize() {
|
||||
indirection_table_base_ = reinterpret_cast<uint8_t*>(
|
||||
VirtualAlloc(reinterpret_cast<void*>(kIndirectionTableBase),
|
||||
kIndirectionTableSize, MEM_RESERVE, PAGE_READWRITE));
|
||||
if (!indirection_table_base_) {
|
||||
XELOGE("Unable to allocate code cache indirection table");
|
||||
XELOGE(
|
||||
"This is likely because the %.8X-%.8X range is in use by some other "
|
||||
"system DLL",
|
||||
kIndirectionTableBase, kIndirectionTableBase + kIndirectionTableSize);
|
||||
return false;
|
||||
}
|
||||
|
||||
generated_code_base_ = reinterpret_cast<uint8_t*>(
|
||||
VirtualAlloc(reinterpret_cast<void*>(kGeneratedCodeBase),
|
||||
kGeneratedCodeSize, MEM_RESERVE, PAGE_EXECUTE_READWRITE));
|
||||
if (!generated_code_base_) {
|
||||
XELOGE("Unable to allocate code cache generated code storage");
|
||||
XELOGE(
|
||||
"This is likely because the %.8X-%.8X range is in use by some other "
|
||||
"system DLL",
|
||||
kGeneratedCodeBase, kGeneratedCodeBase + kGeneratedCodeSize);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Compute total number of unwind entries we should allocate.
|
||||
// We don't support reallocing right now, so this should be high.
|
||||
unwind_table_.resize(30000);
|
||||
|
||||
// Create table and register with the system. It's empty now, but we'll grow
|
||||
// it as functions are added.
|
||||
if (RtlAddGrowableFunctionTable(
|
||||
&unwind_table_handle_, unwind_table_.data(), unwind_table_count_,
|
||||
DWORD(unwind_table_.size()),
|
||||
reinterpret_cast<ULONG_PTR>(generated_code_base_),
|
||||
reinterpret_cast<ULONG_PTR>(generated_code_base_ +
|
||||
kGeneratedCodeSize))) {
|
||||
XELOGE("Unable to create unwind function table");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void X64CodeCache::CommitExecutableRange(uint32_t guest_low,
|
||||
uint32_t guest_high) {
|
||||
VirtualAlloc(indirection_table_base_ + (guest_low - kIndirectionTableBase),
|
||||
guest_high - guest_low, MEM_COMMIT, PAGE_READWRITE);
|
||||
}
|
||||
|
||||
void* X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code,
|
||||
size_t code_size, size_t stack_size) {
|
||||
// Hold a lock while we bump the pointers up. This is important as the
|
||||
// unwind table requires entries AND code to be sorted in order.
|
||||
size_t low_mark;
|
||||
size_t high_mark;
|
||||
uint8_t* code_address = nullptr;
|
||||
uint8_t* unwind_entry_address = nullptr;
|
||||
size_t unwind_table_slot = 0;
|
||||
{
|
||||
std::lock_guard<std::mutex> allocation_lock(allocation_mutex_);
|
||||
|
||||
low_mark = generated_code_offset_;
|
||||
|
||||
// Reserve code.
|
||||
// Always move the code to land on 16b alignment.
|
||||
code_address = generated_code_base_ + generated_code_offset_;
|
||||
generated_code_offset_ += xe::round_up(code_size, 16);
|
||||
|
||||
// Reserve unwind info.
|
||||
// We go on the high size of the unwind info as we don't know how big we
|
||||
// need it, and a few extra bytes of padding isn't the worst thing.
|
||||
unwind_entry_address = generated_code_base_ + generated_code_offset_;
|
||||
generated_code_offset_ += xe::round_up(kUnwindInfoSize, 16);
|
||||
unwind_table_slot = ++unwind_table_count_;
|
||||
|
||||
high_mark = generated_code_offset_;
|
||||
}
|
||||
|
||||
// If we are going above the high water mark of committed memory, commit some
|
||||
// more. It's ok if multiple threads do this, as redundant commits aren't
|
||||
// harmful.
|
||||
size_t old_commit_mark = generated_code_commit_mark_;
|
||||
if (high_mark > old_commit_mark) {
|
||||
size_t new_commit_mark = old_commit_mark + 16 * 1024 * 1024;
|
||||
VirtualAlloc(generated_code_base_, new_commit_mark, MEM_COMMIT,
|
||||
PAGE_EXECUTE_READWRITE);
|
||||
generated_code_commit_mark_.compare_exchange_strong(old_commit_mark,
|
||||
new_commit_mark);
|
||||
}
|
||||
|
||||
// Copy code.
|
||||
std::memcpy(code_address, machine_code, code_size);
|
||||
|
||||
// Add unwind info.
|
||||
InitializeUnwindEntry(unwind_entry_address, unwind_table_slot, code_address,
|
||||
code_size, stack_size);
|
||||
|
||||
// Notify that the unwind table has grown.
|
||||
// We do this outside of the lock, but with the latest total count.
|
||||
RtlGrowFunctionTable(unwind_table_handle_, unwind_table_count_);
|
||||
|
||||
// This isn't needed on x64 (probably), but is convention.
|
||||
FlushInstructionCache(GetCurrentProcess(), code_address, code_size);
|
||||
|
||||
// Now that everything is ready, fix up the indirection table.
|
||||
// Note that we do support code that doesn't have an indirection fixup, so
|
||||
// ignore those when we see them.
|
||||
if (guest_address) {
|
||||
uint32_t* indirection_slot = reinterpret_cast<uint32_t*>(
|
||||
indirection_table_base_ + (guest_address - kIndirectionTableBase));
|
||||
*indirection_slot = uint32_t(reinterpret_cast<uint64_t>(code_address));
|
||||
}
|
||||
|
||||
return code_address;
|
||||
}
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ssa62fwe.aspx
|
||||
typedef enum _UNWIND_OP_CODES {
|
||||
UWOP_PUSH_NONVOL = 0, /* info == register number */
|
||||
UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */
|
||||
UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */
|
||||
UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */
|
||||
UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */
|
||||
UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */
|
||||
UWOP_SAVE_XMM128, /* info == XMM reg number, offset in next slot */
|
||||
UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
|
||||
UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */
|
||||
} UNWIND_CODE_OPS;
|
||||
class UNWIND_REGISTER {
|
||||
public:
|
||||
enum _ {
|
||||
RAX = 0,
|
||||
RCX = 1,
|
||||
RDX = 2,
|
||||
RBX = 3,
|
||||
RSP = 4,
|
||||
RBP = 5,
|
||||
RSI = 6,
|
||||
RDI = 7,
|
||||
R8 = 8,
|
||||
R9 = 9,
|
||||
R10 = 10,
|
||||
R11 = 11,
|
||||
R12 = 12,
|
||||
R13 = 13,
|
||||
R14 = 14,
|
||||
R15 = 15,
|
||||
};
|
||||
};
|
||||
|
||||
typedef union _UNWIND_CODE {
|
||||
struct {
|
||||
uint8_t CodeOffset;
|
||||
uint8_t UnwindOp : 4;
|
||||
uint8_t OpInfo : 4;
|
||||
};
|
||||
USHORT FrameOffset;
|
||||
} UNWIND_CODE, *PUNWIND_CODE;
|
||||
|
||||
typedef struct _UNWIND_INFO {
|
||||
uint8_t Version : 3;
|
||||
uint8_t Flags : 5;
|
||||
uint8_t SizeOfProlog;
|
||||
uint8_t CountOfCodes;
|
||||
uint8_t FrameRegister : 4;
|
||||
uint8_t FrameOffset : 4;
|
||||
UNWIND_CODE UnwindCode[1];
|
||||
/* UNWIND_CODE MoreUnwindCode[((CountOfCodes + 1) & ~1) - 1];
|
||||
* union {
|
||||
* OPTIONAL ULONG ExceptionHandler;
|
||||
* OPTIONAL ULONG FunctionEntry;
|
||||
* };
|
||||
* OPTIONAL ULONG ExceptionData[]; */
|
||||
} UNWIND_INFO, *PUNWIND_INFO;
|
||||
|
||||
void X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address,
|
||||
size_t unwind_table_slot,
|
||||
uint8_t* code_address,
|
||||
size_t code_size, size_t stack_size) {
|
||||
auto unwind_info = reinterpret_cast<UNWIND_INFO*>(unwind_entry_address);
|
||||
|
||||
if (!stack_size) {
|
||||
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
|
||||
unwind_info->Version = 1;
|
||||
unwind_info->Flags = 0;
|
||||
unwind_info->SizeOfProlog = 0;
|
||||
unwind_info->CountOfCodes = 0;
|
||||
unwind_info->FrameRegister = 0;
|
||||
unwind_info->FrameOffset = 0;
|
||||
} else if (stack_size <= 128) {
|
||||
uint8_t prolog_size = 4;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
|
||||
unwind_info->Version = 1;
|
||||
unwind_info->Flags = 0;
|
||||
unwind_info->SizeOfProlog = prolog_size;
|
||||
unwind_info->CountOfCodes = 1;
|
||||
unwind_info->FrameRegister = 0;
|
||||
unwind_info->FrameOffset = 0;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
|
||||
size_t co = 0;
|
||||
auto& unwind_code = unwind_info->UnwindCode[co++];
|
||||
unwind_code.CodeOffset =
|
||||
14; // end of instruction + 1 == offset of next instruction
|
||||
unwind_code.UnwindOp = UWOP_ALLOC_SMALL;
|
||||
unwind_code.OpInfo = stack_size / 8 - 1;
|
||||
} else {
|
||||
// TODO(benvanik): take as parameters?
|
||||
uint8_t prolog_size = 7;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
|
||||
unwind_info->Version = 1;
|
||||
unwind_info->Flags = 0;
|
||||
unwind_info->SizeOfProlog = prolog_size;
|
||||
unwind_info->CountOfCodes = 3;
|
||||
unwind_info->FrameRegister = 0;
|
||||
unwind_info->FrameOffset = 0;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
|
||||
size_t co = 0;
|
||||
auto& unwind_code = unwind_info->UnwindCode[co++];
|
||||
unwind_code.CodeOffset =
|
||||
7; // end of instruction + 1 == offset of next instruction
|
||||
unwind_code.UnwindOp = UWOP_ALLOC_LARGE;
|
||||
unwind_code.OpInfo = 0;
|
||||
unwind_code = unwind_info->UnwindCode[co++];
|
||||
unwind_code.FrameOffset = (USHORT)(stack_size) / 8;
|
||||
}
|
||||
|
||||
// Add entry.
|
||||
auto& fn_entry = unwind_table_[unwind_table_slot];
|
||||
fn_entry.BeginAddress = (DWORD)(code_address - generated_code_base_);
|
||||
fn_entry.EndAddress = (DWORD)(fn_entry.BeginAddress + code_size);
|
||||
fn_entry.UnwindData = (DWORD)(unwind_entry_address - generated_code_base_);
|
||||
}
|
||||
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -10,18 +10,21 @@
|
|||
#ifndef XENIA_BACKEND_X64_X64_CODE_CACHE_H_
|
||||
#define XENIA_BACKEND_X64_X64_CODE_CACHE_H_
|
||||
|
||||
// For RUNTIME_FUNCTION:
|
||||
#include "xenia/base/platform.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
|
||||
class X64CodeChunk;
|
||||
|
||||
class X64CodeCache {
|
||||
public:
|
||||
X64CodeCache(size_t chunk_size = DEFAULT_CHUNK_SIZE);
|
||||
X64CodeCache();
|
||||
virtual ~X64CodeCache();
|
||||
|
||||
bool Initialize();
|
||||
|
@ -30,14 +33,43 @@ class X64CodeCache {
|
|||
// TODO(benvanik): keep track of code blocks
|
||||
// TODO(benvanik): padding/guards/etc
|
||||
|
||||
void* PlaceCode(void* machine_code, size_t code_size, size_t stack_size);
|
||||
void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high);
|
||||
|
||||
void* PlaceCode(uint32_t guest_address, void* machine_code, size_t code_size,
|
||||
size_t stack_size);
|
||||
|
||||
private:
|
||||
const static size_t DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024;
|
||||
std::mutex lock_;
|
||||
size_t chunk_size_;
|
||||
X64CodeChunk* head_chunk_;
|
||||
X64CodeChunk* active_chunk_;
|
||||
const static uint64_t kIndirectionTableBase = 0x80000000;
|
||||
const static uint64_t kIndirectionTableSize = 0x1FFFFFFF;
|
||||
const static uint64_t kGeneratedCodeBase = 0xA0000000;
|
||||
const static uint64_t kGeneratedCodeSize = 0x0FFFFFFF;
|
||||
|
||||
void InitializeUnwindEntry(uint8_t* unwind_entry_address,
|
||||
size_t unwind_table_slot, uint8_t* code_address,
|
||||
size_t code_size, size_t stack_size);
|
||||
|
||||
// Must be held when manipulating the offsets or counts of anything, to keep
|
||||
// the tables consistent and ordered.
|
||||
std::mutex allocation_mutex_;
|
||||
|
||||
// Fixed at kIndirectionTableBase in host space, holding 4 byte pointers into
|
||||
// the generated code table that correspond to the PPC functions in guest
|
||||
// space.
|
||||
uint8_t* indirection_table_base_;
|
||||
// Fixed at kGeneratedCodeBase and holding all generated code, growing as
|
||||
// needed.
|
||||
uint8_t* generated_code_base_;
|
||||
// Current offset to empty space in generated code.
|
||||
size_t generated_code_offset_;
|
||||
// Current high water mark of COMMITTED code.
|
||||
std::atomic<size_t> generated_code_commit_mark_;
|
||||
|
||||
// Growable function table system handle.
|
||||
void* unwind_table_handle_;
|
||||
// Actual unwind table entries.
|
||||
std::vector<RUNTIME_FUNCTION> unwind_table_;
|
||||
// Current number of entries in the table.
|
||||
std::atomic<uint32_t> unwind_table_count_;
|
||||
};
|
||||
|
||||
} // namespace x64
|
||||
|
|
|
@ -1,98 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/x64/x64_code_cache.h"
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
|
||||
class X64CodeChunk {
|
||||
public:
|
||||
X64CodeChunk(size_t chunk_size);
|
||||
~X64CodeChunk();
|
||||
|
||||
public:
|
||||
X64CodeChunk* next;
|
||||
size_t capacity;
|
||||
uint8_t* buffer;
|
||||
size_t offset;
|
||||
};
|
||||
|
||||
X64CodeCache::X64CodeCache(size_t chunk_size)
|
||||
: chunk_size_(chunk_size), head_chunk_(NULL), active_chunk_(NULL) {}
|
||||
|
||||
X64CodeCache::~X64CodeCache() {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
auto chunk = head_chunk_;
|
||||
while (chunk) {
|
||||
auto next = chunk->next;
|
||||
delete chunk;
|
||||
chunk = next;
|
||||
}
|
||||
head_chunk_ = NULL;
|
||||
}
|
||||
|
||||
int X64CodeCache::Initialize() { return 0; }
|
||||
|
||||
void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size,
|
||||
size_t stack_size) {
|
||||
// Always move the code to land on 16b alignment. We do this by rounding up
|
||||
// to 16b so that all offsets are aligned.
|
||||
code_size = xe::round_up(code_size, 16);
|
||||
|
||||
lock_.lock();
|
||||
|
||||
if (active_chunk_) {
|
||||
if (active_chunk_->capacity - active_chunk_->offset < code_size) {
|
||||
auto next = active_chunk_->next;
|
||||
if (!next) {
|
||||
assert_true(code_size < chunk_size_, "need to support larger chunks");
|
||||
next = new X64CodeChunk(chunk_size_);
|
||||
active_chunk_->next = next;
|
||||
}
|
||||
active_chunk_ = next;
|
||||
}
|
||||
} else {
|
||||
head_chunk_ = active_chunk_ = new X64CodeChunk(chunk_size_);
|
||||
}
|
||||
|
||||
uint8_t* final_address = active_chunk_->buffer + active_chunk_->offset;
|
||||
active_chunk_->offset += code_size;
|
||||
|
||||
lock_.unlock();
|
||||
|
||||
// Copy code.
|
||||
memcpy(final_address, machine_code, code_size);
|
||||
|
||||
return final_address;
|
||||
}
|
||||
|
||||
X64CodeChunk::X64CodeChunk(size_t chunk_size)
|
||||
: next(NULL), capacity(chunk_size), buffer(0), offset(0) {
|
||||
buffer = (uint8_t*)mmap(nullptr, chunk_size, PROT_WRITE | PROT_EXEC,
|
||||
MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
}
|
||||
|
||||
X64CodeChunk::~X64CodeChunk() {
|
||||
if (buffer) {
|
||||
munmap(buffer, capacity);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -1,283 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/x64/x64_code_cache.h"
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace x64 {
|
||||
|
||||
class X64CodeChunk {
|
||||
public:
|
||||
X64CodeChunk(size_t chunk_size);
|
||||
~X64CodeChunk();
|
||||
|
||||
public:
|
||||
X64CodeChunk* next;
|
||||
size_t capacity;
|
||||
uint8_t* buffer;
|
||||
size_t offset;
|
||||
|
||||
// Estimate of function sized use to determine initial table capacity.
|
||||
const static uint32_t ESTIMATED_FN_SIZE = 512;
|
||||
// Size of unwind info per function.
|
||||
// TODO(benvanik): move this to emitter.
|
||||
const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1 + 2 + 2);
|
||||
|
||||
void* fn_table_handle;
|
||||
RUNTIME_FUNCTION* fn_table;
|
||||
uint32_t fn_table_count;
|
||||
uint32_t fn_table_capacity;
|
||||
|
||||
void AddTableEntry(uint8_t* code, size_t code_size, size_t stack_size);
|
||||
};
|
||||
|
||||
X64CodeCache::X64CodeCache(size_t chunk_size)
|
||||
: chunk_size_(chunk_size), head_chunk_(NULL), active_chunk_(NULL) {}
|
||||
|
||||
X64CodeCache::~X64CodeCache() {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
auto chunk = head_chunk_;
|
||||
while (chunk) {
|
||||
auto next = chunk->next;
|
||||
delete chunk;
|
||||
chunk = next;
|
||||
}
|
||||
head_chunk_ = NULL;
|
||||
}
|
||||
|
||||
bool X64CodeCache::Initialize() { return true; }
|
||||
|
||||
void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size,
|
||||
size_t stack_size) {
|
||||
size_t alloc_size = code_size;
|
||||
|
||||
// Add unwind info into the allocation size. Keep things 16b aligned.
|
||||
alloc_size += xe::round_up(X64CodeChunk::UNWIND_INFO_SIZE, 16);
|
||||
|
||||
// Always move the code to land on 16b alignment. We do this by rounding up
|
||||
// to 16b so that all offsets are aligned.
|
||||
alloc_size = xe::round_up(alloc_size, 16);
|
||||
|
||||
lock_.lock();
|
||||
|
||||
if (active_chunk_) {
|
||||
if (active_chunk_->capacity - active_chunk_->offset < alloc_size) {
|
||||
auto next = active_chunk_->next;
|
||||
if (!next) {
|
||||
assert_true(alloc_size < chunk_size_, "need to support larger chunks");
|
||||
next = new X64CodeChunk(chunk_size_);
|
||||
active_chunk_->next = next;
|
||||
}
|
||||
active_chunk_ = next;
|
||||
}
|
||||
} else {
|
||||
head_chunk_ = active_chunk_ = new X64CodeChunk(chunk_size_);
|
||||
}
|
||||
|
||||
uint8_t* final_address = active_chunk_->buffer + active_chunk_->offset;
|
||||
active_chunk_->offset += alloc_size;
|
||||
|
||||
// Add entry to fn table.
|
||||
active_chunk_->AddTableEntry(final_address, alloc_size, stack_size);
|
||||
|
||||
lock_.unlock();
|
||||
|
||||
// Copy code.
|
||||
memcpy(final_address, machine_code, code_size);
|
||||
|
||||
// This isn't needed on x64 (probably), but is convention.
|
||||
FlushInstructionCache(GetCurrentProcess(), final_address, alloc_size);
|
||||
return final_address;
|
||||
}
|
||||
|
||||
X64CodeChunk::X64CodeChunk(size_t chunk_size)
|
||||
: next(NULL), capacity(chunk_size), buffer(0), offset(0) {
|
||||
buffer = (uint8_t*)VirtualAlloc(NULL, capacity, MEM_RESERVE | MEM_COMMIT,
|
||||
PAGE_EXECUTE_READWRITE);
|
||||
|
||||
fn_table_capacity =
|
||||
static_cast<uint32_t>(xe::round_up(capacity / ESTIMATED_FN_SIZE, 16));
|
||||
size_t table_size = fn_table_capacity * sizeof(RUNTIME_FUNCTION);
|
||||
fn_table = (RUNTIME_FUNCTION*)malloc(table_size);
|
||||
fn_table_count = 0;
|
||||
fn_table_handle = 0;
|
||||
RtlAddGrowableFunctionTable(&fn_table_handle, fn_table, fn_table_count,
|
||||
fn_table_capacity, (ULONG_PTR)buffer,
|
||||
(ULONG_PTR)buffer + capacity);
|
||||
}
|
||||
|
||||
X64CodeChunk::~X64CodeChunk() {
|
||||
if (fn_table_handle) {
|
||||
RtlDeleteGrowableFunctionTable(fn_table_handle);
|
||||
}
|
||||
if (buffer) {
|
||||
VirtualFree(buffer, 0, MEM_RELEASE);
|
||||
}
|
||||
}
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ssa62fwe.aspx
|
||||
namespace {
|
||||
typedef enum _UNWIND_OP_CODES {
|
||||
UWOP_PUSH_NONVOL = 0, /* info == register number */
|
||||
UWOP_ALLOC_LARGE, /* no info, alloc size in next 2 slots */
|
||||
UWOP_ALLOC_SMALL, /* info == size of allocation / 8 - 1 */
|
||||
UWOP_SET_FPREG, /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */
|
||||
UWOP_SAVE_NONVOL, /* info == register number, offset in next slot */
|
||||
UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */
|
||||
UWOP_SAVE_XMM128, /* info == XMM reg number, offset in next slot */
|
||||
UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
|
||||
UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */
|
||||
} UNWIND_CODE_OPS;
|
||||
class UNWIND_REGISTER {
|
||||
public:
|
||||
enum _ {
|
||||
RAX = 0,
|
||||
RCX = 1,
|
||||
RDX = 2,
|
||||
RBX = 3,
|
||||
RSP = 4,
|
||||
RBP = 5,
|
||||
RSI = 6,
|
||||
RDI = 7,
|
||||
R8 = 8,
|
||||
R9 = 9,
|
||||
R10 = 10,
|
||||
R11 = 11,
|
||||
R12 = 12,
|
||||
R13 = 13,
|
||||
R14 = 14,
|
||||
R15 = 15,
|
||||
};
|
||||
};
|
||||
|
||||
typedef union _UNWIND_CODE {
|
||||
struct {
|
||||
uint8_t CodeOffset;
|
||||
uint8_t UnwindOp : 4;
|
||||
uint8_t OpInfo : 4;
|
||||
};
|
||||
USHORT FrameOffset;
|
||||
} UNWIND_CODE, *PUNWIND_CODE;
|
||||
|
||||
typedef struct _UNWIND_INFO {
|
||||
uint8_t Version : 3;
|
||||
uint8_t Flags : 5;
|
||||
uint8_t SizeOfProlog;
|
||||
uint8_t CountOfCodes;
|
||||
uint8_t FrameRegister : 4;
|
||||
uint8_t FrameOffset : 4;
|
||||
UNWIND_CODE UnwindCode[1];
|
||||
/* UNWIND_CODE MoreUnwindCode[((CountOfCodes + 1) & ~1) - 1];
|
||||
* union {
|
||||
* OPTIONAL ULONG ExceptionHandler;
|
||||
* OPTIONAL ULONG FunctionEntry;
|
||||
* };
|
||||
* OPTIONAL ULONG ExceptionData[]; */
|
||||
} UNWIND_INFO, *PUNWIND_INFO;
|
||||
} // namespace
|
||||
|
||||
void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size,
|
||||
size_t stack_size) {
|
||||
// NOTE: we assume a chunk lock.
|
||||
|
||||
if (fn_table_count + 1 > fn_table_capacity) {
|
||||
// Table exhausted, need to realloc. If this happens a lot we should tune
|
||||
// the table size to prevent this.
|
||||
XELOGW("X64CodeCache growing FunctionTable - adjust ESTIMATED_FN_SIZE");
|
||||
RtlDeleteGrowableFunctionTable(fn_table_handle);
|
||||
size_t old_size = fn_table_capacity * sizeof(RUNTIME_FUNCTION);
|
||||
size_t new_size = old_size * 2;
|
||||
auto new_table = (RUNTIME_FUNCTION*)realloc(fn_table, new_size);
|
||||
assert_not_null(new_table);
|
||||
if (!new_table) {
|
||||
return;
|
||||
}
|
||||
fn_table = new_table;
|
||||
fn_table_capacity *= 2;
|
||||
RtlAddGrowableFunctionTable(&fn_table_handle, fn_table, fn_table_count,
|
||||
fn_table_capacity, (ULONG_PTR)buffer,
|
||||
(ULONG_PTR)buffer + capacity);
|
||||
}
|
||||
|
||||
// Allocate unwind data. We know we have space because we overallocated.
|
||||
// This should be the tailing 16b with 16b alignment.
|
||||
size_t unwind_info_offset = offset - UNWIND_INFO_SIZE;
|
||||
|
||||
if (!stack_size) {
|
||||
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
|
||||
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
|
||||
unwind_info->Version = 1;
|
||||
unwind_info->Flags = 0;
|
||||
unwind_info->SizeOfProlog = 0;
|
||||
unwind_info->CountOfCodes = 0;
|
||||
unwind_info->FrameRegister = 0;
|
||||
unwind_info->FrameOffset = 0;
|
||||
} else if (stack_size <= 128) {
|
||||
uint8_t prolog_size = 4;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
|
||||
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
|
||||
unwind_info->Version = 1;
|
||||
unwind_info->Flags = 0;
|
||||
unwind_info->SizeOfProlog = prolog_size;
|
||||
unwind_info->CountOfCodes = 1;
|
||||
unwind_info->FrameRegister = 0;
|
||||
unwind_info->FrameOffset = 0;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
|
||||
size_t co = 0;
|
||||
auto& unwind_code = unwind_info->UnwindCode[co++];
|
||||
unwind_code.CodeOffset =
|
||||
14; // end of instruction + 1 == offset of next instruction
|
||||
unwind_code.UnwindOp = UWOP_ALLOC_SMALL;
|
||||
unwind_code.OpInfo = stack_size / 8 - 1;
|
||||
} else {
|
||||
// TODO(benvanik): take as parameters?
|
||||
uint8_t prolog_size = 7;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
|
||||
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
|
||||
unwind_info->Version = 1;
|
||||
unwind_info->Flags = 0;
|
||||
unwind_info->SizeOfProlog = prolog_size;
|
||||
unwind_info->CountOfCodes = 3;
|
||||
unwind_info->FrameRegister = 0;
|
||||
unwind_info->FrameOffset = 0;
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
|
||||
size_t co = 0;
|
||||
auto& unwind_code = unwind_info->UnwindCode[co++];
|
||||
unwind_code.CodeOffset =
|
||||
7; // end of instruction + 1 == offset of next instruction
|
||||
unwind_code.UnwindOp = UWOP_ALLOC_LARGE;
|
||||
unwind_code.OpInfo = 0;
|
||||
unwind_code = unwind_info->UnwindCode[co++];
|
||||
unwind_code.FrameOffset = (USHORT)(stack_size) / 8;
|
||||
}
|
||||
|
||||
// Add entry.
|
||||
auto& fn_entry = fn_table[fn_table_count++];
|
||||
fn_entry.BeginAddress = (DWORD)(code - buffer);
|
||||
fn_entry.EndAddress = (DWORD)(fn_entry.BeginAddress + code_size);
|
||||
fn_entry.UnwindData = (DWORD)unwind_info_offset;
|
||||
|
||||
// Notify the function table that it has new entries.
|
||||
RtlGrowFunctionTable(fn_table_handle, fn_table_count);
|
||||
}
|
||||
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -87,9 +87,9 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator)
|
|||
|
||||
X64Emitter::~X64Emitter() = default;
|
||||
|
||||
bool X64Emitter::Emit(HIRBuilder* builder, uint32_t debug_info_flags,
|
||||
DebugInfo* debug_info, void*& out_code_address,
|
||||
size_t& out_code_size) {
|
||||
bool X64Emitter::Emit(uint32_t guest_address, HIRBuilder* builder,
|
||||
uint32_t debug_info_flags, DebugInfo* debug_info,
|
||||
void*& out_code_address, size_t& out_code_size) {
|
||||
SCOPE_profile_cpu_f("cpu");
|
||||
|
||||
// Reset.
|
||||
|
@ -108,7 +108,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, uint32_t debug_info_flags,
|
|||
|
||||
// Copy the final code to the cache and relocate it.
|
||||
out_code_size = getSize();
|
||||
out_code_address = Emplace(stack_size);
|
||||
out_code_address = Emplace(guest_address, stack_size);
|
||||
|
||||
// Stash source map.
|
||||
if (debug_info_flags_ & DebugInfoFlags::kDebugInfoSourceMap) {
|
||||
|
@ -119,13 +119,14 @@ bool X64Emitter::Emit(HIRBuilder* builder, uint32_t debug_info_flags,
|
|||
return true;
|
||||
}
|
||||
|
||||
void* X64Emitter::Emplace(size_t stack_size) {
|
||||
void* X64Emitter::Emplace(uint32_t guest_address, size_t stack_size) {
|
||||
// To avoid changing xbyak, we do a switcharoo here.
|
||||
// top_ points to the Xbyak buffer, and since we are in AutoGrow mode
|
||||
// it has pending relocations. We copy the top_ to our buffer, swap the
|
||||
// pointer, relocate, then return the original scratch pointer for use.
|
||||
uint8_t* old_address = top_;
|
||||
void* new_address = code_cache_->PlaceCode(top_, size_, stack_size);
|
||||
void* new_address =
|
||||
code_cache_->PlaceCode(guest_address, top_, size_, stack_size);
|
||||
top_ = (uint8_t*)new_address;
|
||||
ready();
|
||||
top_ = old_address;
|
||||
|
|
|
@ -112,9 +112,9 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
Processor* processor() const { return processor_; }
|
||||
X64Backend* backend() const { return backend_; }
|
||||
|
||||
bool Emit(hir::HIRBuilder* builder, uint32_t debug_info_flags,
|
||||
DebugInfo* debug_info, void*& out_code_address,
|
||||
size_t& out_code_size);
|
||||
bool Emit(uint32_t guest_address, hir::HIRBuilder* builder,
|
||||
uint32_t debug_info_flags, DebugInfo* debug_info,
|
||||
void*& out_code_address, size_t& out_code_size);
|
||||
|
||||
public:
|
||||
// Reserved: rsp
|
||||
|
@ -192,7 +192,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
size_t stack_size() const { return stack_size_; }
|
||||
|
||||
protected:
|
||||
void* Emplace(size_t stack_size);
|
||||
void* Emplace(uint32_t guest_address, size_t stack_size);
|
||||
bool Emit(hir::HIRBuilder* builder, size_t& out_stack_size);
|
||||
void EmitGetCurrentThreadId();
|
||||
void EmitTraceUserCallReturn();
|
||||
|
|
|
@ -88,7 +88,7 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
|
|||
mov(r8, qword[rsp + 8 * 3]);
|
||||
ret();
|
||||
|
||||
void* fn = Emplace(stack_size);
|
||||
void* fn = Emplace(0, stack_size);
|
||||
return (HostToGuestThunk)fn;
|
||||
}
|
||||
|
||||
|
@ -137,7 +137,7 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
|
|||
mov(rdx, qword[rsp + 8 * 2]);
|
||||
ret();
|
||||
|
||||
void* fn = Emplace(stack_size);
|
||||
void* fn = Emplace(0, stack_size);
|
||||
return (HostToGuestThunk)fn;
|
||||
}
|
||||
|
||||
|
|
|
@ -66,6 +66,9 @@ bool XexModule::Load(const std::string& name, const std::string& path,
|
|||
i += section->info.page_count;
|
||||
}
|
||||
|
||||
// Notify backend that we have an executable range.
|
||||
processor_->backend()->CommitExecutableRange(low_address_, high_address_);
|
||||
|
||||
// Add all imports (variables/functions).
|
||||
for (size_t n = 0; n < header->import_library_count; n++) {
|
||||
if (!SetupLibraryImports(&header->import_libraries[n])) {
|
||||
|
|
Loading…
Reference in New Issue