Rewriting code cache to put everything at fixed addresses.

2015-05-20 19:23:46 -07:00 · 2015-05-20 19:23:46 -07:00 · 5e5eb47789
parent 20a3172ebb
commit 5e5eb47789
14 changed files with 360 additions and 408 deletions
--- a/libxenia.vcxproj
+++ b/libxenia.vcxproj
@ -38,7 +38,7 @@
    <ClCompile Include="src\xenia\cpu\backend\backend.cc" />
    <ClCompile Include="src\xenia\cpu\backend\x64\x64_assembler.cc" />
    <ClCompile Include="src\xenia\cpu\backend\x64\x64_backend.cc" />
-    <ClCompile Include="src\xenia\cpu\backend\x64\x64_code_cache_win.cc" />
+    <ClCompile Include="src\xenia\cpu\backend\x64\x64_code_cache.cc" />
    <ClCompile Include="src\xenia\cpu\backend\x64\x64_emitter.cc" />
    <ClCompile Include="src\xenia\cpu\backend\x64\x64_function.cc" />
    <ClCompile Include="src\xenia\cpu\backend\x64\x64_sequences.cc" />
--- a/libxenia.vcxproj.filters
+++ b/libxenia.vcxproj.filters
@ -274,9 +274,6 @@
    <ClCompile Include="src\xenia\cpu\backend\x64\x64_backend.cc">
      <Filter>src\xenia\cpu\backend\x64</Filter>
    </ClCompile>
-    <ClCompile Include="src\xenia\cpu\backend\x64\x64_code_cache_win.cc">
-      <Filter>src\xenia\cpu\backend\x64</Filter>
-    </ClCompile>
    <ClCompile Include="src\xenia\cpu\backend\x64\x64_emitter.cc">
      <Filter>src\xenia\cpu\backend\x64</Filter>
    </ClCompile>
@ -703,6 +700,9 @@
    <ClCompile Include="src\xenia\kernel\xboxkrnl_error.cc">
      <Filter>src\xenia\kernel</Filter>
    </ClCompile>
+    <ClCompile Include="src\xenia\cpu\backend\x64\x64_code_cache.cc">
+      <Filter>src\xenia\cpu\backend\x64</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="src\xenia\emulator.h">
--- a/src/xenia/cpu/backend/backend.h
+++ b/src/xenia/cpu/backend/backend.h
@ -39,6 +39,9 @@ class Backend {
  virtual void* AllocThreadData();
  virtual void FreeThreadData(void* thread_data);

+  virtual void CommitExecutableRange(uint32_t guest_low,
+                                     uint32_t guest_high) = 0;
+
  virtual std::unique_ptr<Assembler> CreateAssembler() = 0;

 protected:
--- a/src/xenia/cpu/backend/x64/x64_assembler.cc
+++ b/src/xenia/cpu/backend/x64/x64_assembler.cc
@ -70,8 +70,8 @@ bool X64Assembler::Assemble(FunctionInfo* symbol_info, HIRBuilder* builder,
  // Lower HIR -> x64.
  void* machine_code = nullptr;
  size_t code_size = 0;
-  if (!emitter_->Emit(builder, debug_info_flags, debug_info.get(), machine_code,
-                      code_size)) {
+  if (!emitter_->Emit(symbol_info->address(), builder, debug_info_flags,
+                      debug_info.get(), machine_code, code_size)) {
    return false;
  }

--- a/src/xenia/cpu/backend/x64/x64_backend.cc
+++ b/src/xenia/cpu/backend/x64/x64_backend.cc
@ -54,6 +54,11 @@ bool X64Backend::Initialize() {
  return true;
 }

+void X64Backend::CommitExecutableRange(uint32_t guest_low,
+                                       uint32_t guest_high) {
+  code_cache_->CommitExecutableRange(guest_low, guest_high);
+}
+
 std::unique_ptr<Assembler> X64Backend::CreateAssembler() {
  return std::make_unique<X64Assembler>(this);
 }
--- a/src/xenia/cpu/backend/x64/x64_backend.h
+++ b/src/xenia/cpu/backend/x64/x64_backend.h
@ -35,6 +35,8 @@ class X64Backend : public Backend {

  bool Initialize() override;

+  void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high) override;
+
  std::unique_ptr<Assembler> CreateAssembler() override;

 private:
--- a/src/xenia/cpu/backend/x64/x64_code_cache.cc
+++ b/src/xenia/cpu/backend/x64/x64_code_cache.cc
@ -0,0 +1,287 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2013 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/cpu/backend/x64/x64_code_cache.h"
+
+#include "xenia/base/assert.h"
+#include "xenia/base/logging.h"
+#include "xenia/base/math.h"
+#include "xenia/base/memory.h"
+
+namespace xe {
+namespace cpu {
+namespace backend {
+namespace x64 {
+
+// Size of unwind info per function.
+// TODO(benvanik): move this to emitter.
+const static uint32_t kUnwindInfoSize = 4 + (2 * 1 + 2 + 2);
+
+X64CodeCache::X64CodeCache()
+    : indirection_table_base_(nullptr),
+      generated_code_base_(nullptr),
+      generated_code_offset_(0),
+      generated_code_commit_mark_(0),
+      unwind_table_handle_(nullptr),
+      unwind_table_count_(0) {}
+
+X64CodeCache::~X64CodeCache() {
+  if (unwind_table_handle_) {
+    RtlDeleteGrowableFunctionTable(unwind_table_handle_);
+  }
+  if (indirection_table_base_) {
+    VirtualFree(indirection_table_base_, kIndirectionTableSize, MEM_RELEASE);
+  }
+  if (generated_code_base_) {
+    VirtualFree(generated_code_base_, kIndirectionTableSize, MEM_RELEASE);
+  }
+}
+
+bool X64CodeCache::Initialize() {
+  indirection_table_base_ = reinterpret_cast<uint8_t*>(
+      VirtualAlloc(reinterpret_cast<void*>(kIndirectionTableBase),
+                   kIndirectionTableSize, MEM_RESERVE, PAGE_READWRITE));
+  if (!indirection_table_base_) {
+    XELOGE("Unable to allocate code cache indirection table");
+    XELOGE(
+        "This is likely because the %.8X-%.8X range is in use by some other "
+        "system DLL",
+        kIndirectionTableBase, kIndirectionTableBase + kIndirectionTableSize);
+    return false;
+  }
+
+  generated_code_base_ = reinterpret_cast<uint8_t*>(
+      VirtualAlloc(reinterpret_cast<void*>(kGeneratedCodeBase),
+                   kGeneratedCodeSize, MEM_RESERVE, PAGE_EXECUTE_READWRITE));
+  if (!generated_code_base_) {
+    XELOGE("Unable to allocate code cache generated code storage");
+    XELOGE(
+        "This is likely because the %.8X-%.8X range is in use by some other "
+        "system DLL",
+        kGeneratedCodeBase, kGeneratedCodeBase + kGeneratedCodeSize);
+    return false;
+  }
+
+  // Compute total number of unwind entries we should allocate.
+  // We don't support reallocing right now, so this should be high.
+  unwind_table_.resize(30000);
+
+  // Create table and register with the system. It's empty now, but we'll grow
+  // it as functions are added.
+  if (RtlAddGrowableFunctionTable(
+          &unwind_table_handle_, unwind_table_.data(), unwind_table_count_,
+          DWORD(unwind_table_.size()),
+          reinterpret_cast<ULONG_PTR>(generated_code_base_),
+          reinterpret_cast<ULONG_PTR>(generated_code_base_ +
+                                      kGeneratedCodeSize))) {
+    XELOGE("Unable to create unwind function table");
+    return false;
+  }
+
+  return true;
+}
+
+void X64CodeCache::CommitExecutableRange(uint32_t guest_low,
+                                         uint32_t guest_high) {
+  VirtualAlloc(indirection_table_base_ + (guest_low - kIndirectionTableBase),
+               guest_high - guest_low, MEM_COMMIT, PAGE_READWRITE);
+}
+
+void* X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code,
+                              size_t code_size, size_t stack_size) {
+  // Hold a lock while we bump the pointers up. This is important as the
+  // unwind table requires entries AND code to be sorted in order.
+  size_t low_mark;
+  size_t high_mark;
+  uint8_t* code_address = nullptr;
+  uint8_t* unwind_entry_address = nullptr;
+  size_t unwind_table_slot = 0;
+  {
+    std::lock_guard<std::mutex> allocation_lock(allocation_mutex_);
+
+    low_mark = generated_code_offset_;
+
+    // Reserve code.
+    // Always move the code to land on 16b alignment.
+    code_address = generated_code_base_ + generated_code_offset_;
+    generated_code_offset_ += xe::round_up(code_size, 16);
+
+    // Reserve unwind info.
+    // We go on the high size of the unwind info as we don't know how big we
+    // need it, and a few extra bytes of padding isn't the worst thing.
+    unwind_entry_address = generated_code_base_ + generated_code_offset_;
+    generated_code_offset_ += xe::round_up(kUnwindInfoSize, 16);
+    unwind_table_slot = ++unwind_table_count_;
+
+    high_mark = generated_code_offset_;
+  }
+
+  // If we are going above the high water mark of committed memory, commit some
+  // more. It's ok if multiple threads do this, as redundant commits aren't
+  // harmful.
+  size_t old_commit_mark = generated_code_commit_mark_;
+  if (high_mark > old_commit_mark) {
+    size_t new_commit_mark = old_commit_mark + 16 * 1024 * 1024;
+    VirtualAlloc(generated_code_base_, new_commit_mark, MEM_COMMIT,
+                 PAGE_EXECUTE_READWRITE);
+    generated_code_commit_mark_.compare_exchange_strong(old_commit_mark,
+                                                        new_commit_mark);
+  }
+
+  // Copy code.
+  std::memcpy(code_address, machine_code, code_size);
+
+  // Add unwind info.
+  InitializeUnwindEntry(unwind_entry_address, unwind_table_slot, code_address,
+                        code_size, stack_size);
+
+  // Notify that the unwind table has grown.
+  // We do this outside of the lock, but with the latest total count.
+  RtlGrowFunctionTable(unwind_table_handle_, unwind_table_count_);
+
+  // This isn't needed on x64 (probably), but is convention.
+  FlushInstructionCache(GetCurrentProcess(), code_address, code_size);
+
+  // Now that everything is ready, fix up the indirection table.
+  // Note that we do support code that doesn't have an indirection fixup, so
+  // ignore those when we see them.
+  if (guest_address) {
+    uint32_t* indirection_slot = reinterpret_cast<uint32_t*>(
+        indirection_table_base_ + (guest_address - kIndirectionTableBase));
+    *indirection_slot = uint32_t(reinterpret_cast<uint64_t>(code_address));
+  }
+
+  return code_address;
+}
+
+// http://msdn.microsoft.com/en-us/library/ssa62fwe.aspx
+typedef enum _UNWIND_OP_CODES {
+  UWOP_PUSH_NONVOL = 0, /* info == register number */
+  UWOP_ALLOC_LARGE,     /* no info, alloc size in next 2 slots */
+  UWOP_ALLOC_SMALL,     /* info == size of allocation / 8 - 1 */
+  UWOP_SET_FPREG,       /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */
+  UWOP_SAVE_NONVOL,     /* info == register number, offset in next slot */
+  UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */
+  UWOP_SAVE_XMM128,     /* info == XMM reg number, offset in next slot */
+  UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
+  UWOP_PUSH_MACHFRAME   /* info == 0: no error-code, 1: error-code */
+} UNWIND_CODE_OPS;
+class UNWIND_REGISTER {
+ public:
+  enum _ {
+    RAX = 0,
+    RCX = 1,
+    RDX = 2,
+    RBX = 3,
+    RSP = 4,
+    RBP = 5,
+    RSI = 6,
+    RDI = 7,
+    R8 = 8,
+    R9 = 9,
+    R10 = 10,
+    R11 = 11,
+    R12 = 12,
+    R13 = 13,
+    R14 = 14,
+    R15 = 15,
+  };
+};
+
+typedef union _UNWIND_CODE {
+  struct {
+    uint8_t CodeOffset;
+    uint8_t UnwindOp : 4;
+    uint8_t OpInfo : 4;
+  };
+  USHORT FrameOffset;
+} UNWIND_CODE, *PUNWIND_CODE;
+
+typedef struct _UNWIND_INFO {
+  uint8_t Version : 3;
+  uint8_t Flags : 5;
+  uint8_t SizeOfProlog;
+  uint8_t CountOfCodes;
+  uint8_t FrameRegister : 4;
+  uint8_t FrameOffset : 4;
+  UNWIND_CODE UnwindCode[1];
+  /*  UNWIND_CODE MoreUnwindCode[((CountOfCodes + 1) & ~1) - 1];
+  *   union {
+  *       OPTIONAL ULONG ExceptionHandler;
+  *       OPTIONAL ULONG FunctionEntry;
+  *   };
+  *   OPTIONAL ULONG ExceptionData[]; */
+} UNWIND_INFO, *PUNWIND_INFO;
+
+void X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address,
+                                         size_t unwind_table_slot,
+                                         uint8_t* code_address,
+                                         size_t code_size, size_t stack_size) {
+  auto unwind_info = reinterpret_cast<UNWIND_INFO*>(unwind_entry_address);
+
+  if (!stack_size) {
+    // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
+    unwind_info->Version = 1;
+    unwind_info->Flags = 0;
+    unwind_info->SizeOfProlog = 0;
+    unwind_info->CountOfCodes = 0;
+    unwind_info->FrameRegister = 0;
+    unwind_info->FrameOffset = 0;
+  } else if (stack_size <= 128) {
+    uint8_t prolog_size = 4;
+
+    // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
+    unwind_info->Version = 1;
+    unwind_info->Flags = 0;
+    unwind_info->SizeOfProlog = prolog_size;
+    unwind_info->CountOfCodes = 1;
+    unwind_info->FrameRegister = 0;
+    unwind_info->FrameOffset = 0;
+
+    // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
+    size_t co = 0;
+    auto& unwind_code = unwind_info->UnwindCode[co++];
+    unwind_code.CodeOffset =
+        14;  // end of instruction + 1 == offset of next instruction
+    unwind_code.UnwindOp = UWOP_ALLOC_SMALL;
+    unwind_code.OpInfo = stack_size / 8 - 1;
+  } else {
+    // TODO(benvanik): take as parameters?
+    uint8_t prolog_size = 7;
+
+    // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
+    unwind_info->Version = 1;
+    unwind_info->Flags = 0;
+    unwind_info->SizeOfProlog = prolog_size;
+    unwind_info->CountOfCodes = 3;
+    unwind_info->FrameRegister = 0;
+    unwind_info->FrameOffset = 0;
+
+    // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
+    size_t co = 0;
+    auto& unwind_code = unwind_info->UnwindCode[co++];
+    unwind_code.CodeOffset =
+        7;  // end of instruction + 1 == offset of next instruction
+    unwind_code.UnwindOp = UWOP_ALLOC_LARGE;
+    unwind_code.OpInfo = 0;
+    unwind_code = unwind_info->UnwindCode[co++];
+    unwind_code.FrameOffset = (USHORT)(stack_size) / 8;
+  }
+
+  // Add entry.
+  auto& fn_entry = unwind_table_[unwind_table_slot];
+  fn_entry.BeginAddress = (DWORD)(code_address - generated_code_base_);
+  fn_entry.EndAddress = (DWORD)(fn_entry.BeginAddress + code_size);
+  fn_entry.UnwindData = (DWORD)(unwind_entry_address - generated_code_base_);
+}
+
+}  // namespace x64
+}  // namespace backend
+}  // namespace cpu
+}  // namespace xe
--- a/src/xenia/cpu/backend/x64/x64_code_cache.h
+++ b/src/xenia/cpu/backend/x64/x64_code_cache.h
@ -10,18 +10,21 @@
 #ifndef XENIA_BACKEND_X64_X64_CODE_CACHE_H_
 #define XENIA_BACKEND_X64_X64_CODE_CACHE_H_

+// For RUNTIME_FUNCTION:
+#include "xenia/base/platform.h"
+
+#include <atomic>
 #include <mutex>
+#include <vector>

 namespace xe {
 namespace cpu {
 namespace backend {
 namespace x64 {

-class X64CodeChunk;
-
 class X64CodeCache {
 public:
-  X64CodeCache(size_t chunk_size = DEFAULT_CHUNK_SIZE);
+  X64CodeCache();
  virtual ~X64CodeCache();

  bool Initialize();
@ -30,14 +33,43 @@ class X64CodeCache {
  // TODO(benvanik): keep track of code blocks
  // TODO(benvanik): padding/guards/etc

-  void* PlaceCode(void* machine_code, size_t code_size, size_t stack_size);
+  void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high);
+
+  void* PlaceCode(uint32_t guest_address, void* machine_code, size_t code_size,
+                  size_t stack_size);

 private:
-  const static size_t DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024;
-  std::mutex lock_;
-  size_t chunk_size_;
-  X64CodeChunk* head_chunk_;
-  X64CodeChunk* active_chunk_;
+  const static uint64_t kIndirectionTableBase = 0x80000000;
+  const static uint64_t kIndirectionTableSize = 0x1FFFFFFF;
+  const static uint64_t kGeneratedCodeBase = 0xA0000000;
+  const static uint64_t kGeneratedCodeSize = 0x0FFFFFFF;
+
+  void InitializeUnwindEntry(uint8_t* unwind_entry_address,
+                             size_t unwind_table_slot, uint8_t* code_address,
+                             size_t code_size, size_t stack_size);
+
+  // Must be held when manipulating the offsets or counts of anything, to keep
+  // the tables consistent and ordered.
+  std::mutex allocation_mutex_;
+
+  // Fixed at kIndirectionTableBase in host space, holding 4 byte pointers into
+  // the generated code table that correspond to the PPC functions in guest
+  // space.
+  uint8_t* indirection_table_base_;
+  // Fixed at kGeneratedCodeBase and holding all generated code, growing as
+  // needed.
+  uint8_t* generated_code_base_;
+  // Current offset to empty space in generated code.
+  size_t generated_code_offset_;
+  // Current high water mark of COMMITTED code.
+  std::atomic<size_t> generated_code_commit_mark_;
+
+  // Growable function table system handle.
+  void* unwind_table_handle_;
+  // Actual unwind table entries.
+  std::vector<RUNTIME_FUNCTION> unwind_table_;
+  // Current number of entries in the table.
+  std::atomic<uint32_t> unwind_table_count_;
 };

 }  // namespace x64
--- a/src/xenia/cpu/backend/x64/x64_code_cache_posix.cc
+++ b/src/xenia/cpu/backend/x64/x64_code_cache_posix.cc
@ -1,98 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2014 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include "xenia/cpu/backend/x64/x64_code_cache.h"
-
-#include <sys/mman.h>
-
-#include "xenia/base/assert.h"
-#include "xenia/base/math.h"
-
-namespace xe {
-namespace cpu {
-namespace backend {
-namespace x64 {
-
-class X64CodeChunk {
- public:
-  X64CodeChunk(size_t chunk_size);
-  ~X64CodeChunk();
-
- public:
-  X64CodeChunk* next;
-  size_t capacity;
-  uint8_t* buffer;
-  size_t offset;
-};
-
-X64CodeCache::X64CodeCache(size_t chunk_size)
-    : chunk_size_(chunk_size), head_chunk_(NULL), active_chunk_(NULL) {}
-
-X64CodeCache::~X64CodeCache() {
-  std::lock_guard<std::mutex> guard(lock_);
-  auto chunk = head_chunk_;
-  while (chunk) {
-    auto next = chunk->next;
-    delete chunk;
-    chunk = next;
-  }
-  head_chunk_ = NULL;
-}
-
-int X64CodeCache::Initialize() { return 0; }
-
-void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size,
-                              size_t stack_size) {
-  // Always move the code to land on 16b alignment. We do this by rounding up
-  // to 16b so that all offsets are aligned.
-  code_size = xe::round_up(code_size, 16);
-
-  lock_.lock();
-
-  if (active_chunk_) {
-    if (active_chunk_->capacity - active_chunk_->offset < code_size) {
-      auto next = active_chunk_->next;
-      if (!next) {
-        assert_true(code_size < chunk_size_, "need to support larger chunks");
-        next = new X64CodeChunk(chunk_size_);
-        active_chunk_->next = next;
-      }
-      active_chunk_ = next;
-    }
-  } else {
-    head_chunk_ = active_chunk_ = new X64CodeChunk(chunk_size_);
-  }
-
-  uint8_t* final_address = active_chunk_->buffer + active_chunk_->offset;
-  active_chunk_->offset += code_size;
-
-  lock_.unlock();
-
-  // Copy code.
-  memcpy(final_address, machine_code, code_size);
-
-  return final_address;
-}
-
-X64CodeChunk::X64CodeChunk(size_t chunk_size)
-    : next(NULL), capacity(chunk_size), buffer(0), offset(0) {
-  buffer = (uint8_t*)mmap(nullptr, chunk_size, PROT_WRITE | PROT_EXEC,
-                          MAP_ANON | MAP_PRIVATE, -1, 0);
-}
-
-X64CodeChunk::~X64CodeChunk() {
-  if (buffer) {
-    munmap(buffer, capacity);
-  }
-}
-
-}  // namespace x64
-}  // namespace backend
-}  // namespace cpu
-}  // namespace xe
--- a/src/xenia/cpu/backend/x64/x64_code_cache_win.cc
+++ b/src/xenia/cpu/backend/x64/x64_code_cache_win.cc
@ -1,283 +0,0 @@
-/**
- ******************************************************************************
- * Xenia : Xbox 360 Emulator Research Project                                 *
- ******************************************************************************
- * Copyright 2013 Ben Vanik. All rights reserved.                             *
- * Released under the BSD license - see LICENSE in the root for more details. *
- ******************************************************************************
- */
-
-#include "xenia/cpu/backend/x64/x64_code_cache.h"
-
-#include "xenia/base/assert.h"
-#include "xenia/base/logging.h"
-#include "xenia/base/math.h"
-
-namespace xe {
-namespace cpu {
-namespace backend {
-namespace x64 {
-
-class X64CodeChunk {
- public:
-  X64CodeChunk(size_t chunk_size);
-  ~X64CodeChunk();
-
- public:
-  X64CodeChunk* next;
-  size_t capacity;
-  uint8_t* buffer;
-  size_t offset;
-
-  // Estimate of function sized use to determine initial table capacity.
-  const static uint32_t ESTIMATED_FN_SIZE = 512;
-  // Size of unwind info per function.
-  // TODO(benvanik): move this to emitter.
-  const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1 + 2 + 2);
-
-  void* fn_table_handle;
-  RUNTIME_FUNCTION* fn_table;
-  uint32_t fn_table_count;
-  uint32_t fn_table_capacity;
-
-  void AddTableEntry(uint8_t* code, size_t code_size, size_t stack_size);
-};
-
-X64CodeCache::X64CodeCache(size_t chunk_size)
-    : chunk_size_(chunk_size), head_chunk_(NULL), active_chunk_(NULL) {}
-
-X64CodeCache::~X64CodeCache() {
-  std::lock_guard<std::mutex> guard(lock_);
-  auto chunk = head_chunk_;
-  while (chunk) {
-    auto next = chunk->next;
-    delete chunk;
-    chunk = next;
-  }
-  head_chunk_ = NULL;
-}
-
-bool X64CodeCache::Initialize() { return true; }
-
-void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size,
-                              size_t stack_size) {
-  size_t alloc_size = code_size;
-
-  // Add unwind info into the allocation size. Keep things 16b aligned.
-  alloc_size += xe::round_up(X64CodeChunk::UNWIND_INFO_SIZE, 16);
-
-  // Always move the code to land on 16b alignment. We do this by rounding up
-  // to 16b so that all offsets are aligned.
-  alloc_size = xe::round_up(alloc_size, 16);
-
-  lock_.lock();
-
-  if (active_chunk_) {
-    if (active_chunk_->capacity - active_chunk_->offset < alloc_size) {
-      auto next = active_chunk_->next;
-      if (!next) {
-        assert_true(alloc_size < chunk_size_, "need to support larger chunks");
-        next = new X64CodeChunk(chunk_size_);
-        active_chunk_->next = next;
-      }
-      active_chunk_ = next;
-    }
-  } else {
-    head_chunk_ = active_chunk_ = new X64CodeChunk(chunk_size_);
-  }
-
-  uint8_t* final_address = active_chunk_->buffer + active_chunk_->offset;
-  active_chunk_->offset += alloc_size;
-
-  // Add entry to fn table.
-  active_chunk_->AddTableEntry(final_address, alloc_size, stack_size);
-
-  lock_.unlock();
-
-  // Copy code.
-  memcpy(final_address, machine_code, code_size);
-
-  // This isn't needed on x64 (probably), but is convention.
-  FlushInstructionCache(GetCurrentProcess(), final_address, alloc_size);
-  return final_address;
-}
-
-X64CodeChunk::X64CodeChunk(size_t chunk_size)
-    : next(NULL), capacity(chunk_size), buffer(0), offset(0) {
-  buffer = (uint8_t*)VirtualAlloc(NULL, capacity, MEM_RESERVE | MEM_COMMIT,
-                                  PAGE_EXECUTE_READWRITE);
-
-  fn_table_capacity =
-      static_cast<uint32_t>(xe::round_up(capacity / ESTIMATED_FN_SIZE, 16));
-  size_t table_size = fn_table_capacity * sizeof(RUNTIME_FUNCTION);
-  fn_table = (RUNTIME_FUNCTION*)malloc(table_size);
-  fn_table_count = 0;
-  fn_table_handle = 0;
-  RtlAddGrowableFunctionTable(&fn_table_handle, fn_table, fn_table_count,
-                              fn_table_capacity, (ULONG_PTR)buffer,
-                              (ULONG_PTR)buffer + capacity);
-}
-
-X64CodeChunk::~X64CodeChunk() {
-  if (fn_table_handle) {
-    RtlDeleteGrowableFunctionTable(fn_table_handle);
-  }
-  if (buffer) {
-    VirtualFree(buffer, 0, MEM_RELEASE);
-  }
-}
-
-// http://msdn.microsoft.com/en-us/library/ssa62fwe.aspx
-namespace {
-typedef enum _UNWIND_OP_CODES {
-  UWOP_PUSH_NONVOL = 0, /* info == register number */
-  UWOP_ALLOC_LARGE,     /* no info, alloc size in next 2 slots */
-  UWOP_ALLOC_SMALL,     /* info == size of allocation / 8 - 1 */
-  UWOP_SET_FPREG,       /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */
-  UWOP_SAVE_NONVOL,     /* info == register number, offset in next slot */
-  UWOP_SAVE_NONVOL_FAR, /* info == register number, offset in next 2 slots */
-  UWOP_SAVE_XMM128,     /* info == XMM reg number, offset in next slot */
-  UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
-  UWOP_PUSH_MACHFRAME   /* info == 0: no error-code, 1: error-code */
-} UNWIND_CODE_OPS;
-class UNWIND_REGISTER {
- public:
-  enum _ {
-    RAX = 0,
-    RCX = 1,
-    RDX = 2,
-    RBX = 3,
-    RSP = 4,
-    RBP = 5,
-    RSI = 6,
-    RDI = 7,
-    R8 = 8,
-    R9 = 9,
-    R10 = 10,
-    R11 = 11,
-    R12 = 12,
-    R13 = 13,
-    R14 = 14,
-    R15 = 15,
-  };
-};
-
-typedef union _UNWIND_CODE {
-  struct {
-    uint8_t CodeOffset;
-    uint8_t UnwindOp : 4;
-    uint8_t OpInfo : 4;
-  };
-  USHORT FrameOffset;
-} UNWIND_CODE, *PUNWIND_CODE;
-
-typedef struct _UNWIND_INFO {
-  uint8_t Version : 3;
-  uint8_t Flags : 5;
-  uint8_t SizeOfProlog;
-  uint8_t CountOfCodes;
-  uint8_t FrameRegister : 4;
-  uint8_t FrameOffset : 4;
-  UNWIND_CODE UnwindCode[1];
-  /*  UNWIND_CODE MoreUnwindCode[((CountOfCodes + 1) & ~1) - 1];
-  *   union {
-  *       OPTIONAL ULONG ExceptionHandler;
-  *       OPTIONAL ULONG FunctionEntry;
-  *   };
-  *   OPTIONAL ULONG ExceptionData[]; */
-} UNWIND_INFO, *PUNWIND_INFO;
-}  // namespace
-
-void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size,
-                                 size_t stack_size) {
-  // NOTE: we assume a chunk lock.
-
-  if (fn_table_count + 1 > fn_table_capacity) {
-    // Table exhausted, need to realloc. If this happens a lot we should tune
-    // the table size to prevent this.
-    XELOGW("X64CodeCache growing FunctionTable - adjust ESTIMATED_FN_SIZE");
-    RtlDeleteGrowableFunctionTable(fn_table_handle);
-    size_t old_size = fn_table_capacity * sizeof(RUNTIME_FUNCTION);
-    size_t new_size = old_size * 2;
-    auto new_table = (RUNTIME_FUNCTION*)realloc(fn_table, new_size);
-    assert_not_null(new_table);
-    if (!new_table) {
-      return;
-    }
-    fn_table = new_table;
-    fn_table_capacity *= 2;
-    RtlAddGrowableFunctionTable(&fn_table_handle, fn_table, fn_table_count,
-                                fn_table_capacity, (ULONG_PTR)buffer,
-                                (ULONG_PTR)buffer + capacity);
-  }
-
-  // Allocate unwind data. We know we have space because we overallocated.
-  // This should be the tailing 16b with 16b alignment.
-  size_t unwind_info_offset = offset - UNWIND_INFO_SIZE;
-
-  if (!stack_size) {
-    // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
-    UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
-    unwind_info->Version = 1;
-    unwind_info->Flags = 0;
-    unwind_info->SizeOfProlog = 0;
-    unwind_info->CountOfCodes = 0;
-    unwind_info->FrameRegister = 0;
-    unwind_info->FrameOffset = 0;
-  } else if (stack_size <= 128) {
-    uint8_t prolog_size = 4;
-
-    // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
-    UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
-    unwind_info->Version = 1;
-    unwind_info->Flags = 0;
-    unwind_info->SizeOfProlog = prolog_size;
-    unwind_info->CountOfCodes = 1;
-    unwind_info->FrameRegister = 0;
-    unwind_info->FrameOffset = 0;
-
-    // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
-    size_t co = 0;
-    auto& unwind_code = unwind_info->UnwindCode[co++];
-    unwind_code.CodeOffset =
-        14;  // end of instruction + 1 == offset of next instruction
-    unwind_code.UnwindOp = UWOP_ALLOC_SMALL;
-    unwind_code.OpInfo = stack_size / 8 - 1;
-  } else {
-    // TODO(benvanik): take as parameters?
-    uint8_t prolog_size = 7;
-
-    // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
-    UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
-    unwind_info->Version = 1;
-    unwind_info->Flags = 0;
-    unwind_info->SizeOfProlog = prolog_size;
-    unwind_info->CountOfCodes = 3;
-    unwind_info->FrameRegister = 0;
-    unwind_info->FrameOffset = 0;
-
-    // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
-    size_t co = 0;
-    auto& unwind_code = unwind_info->UnwindCode[co++];
-    unwind_code.CodeOffset =
-        7;  // end of instruction + 1 == offset of next instruction
-    unwind_code.UnwindOp = UWOP_ALLOC_LARGE;
-    unwind_code.OpInfo = 0;
-    unwind_code = unwind_info->UnwindCode[co++];
-    unwind_code.FrameOffset = (USHORT)(stack_size) / 8;
-  }
-
-  // Add entry.
-  auto& fn_entry = fn_table[fn_table_count++];
-  fn_entry.BeginAddress = (DWORD)(code - buffer);
-  fn_entry.EndAddress = (DWORD)(fn_entry.BeginAddress + code_size);
-  fn_entry.UnwindData = (DWORD)unwind_info_offset;
-
-  // Notify the function table that it has new entries.
-  RtlGrowFunctionTable(fn_table_handle, fn_table_count);
-}
-
-}  // namespace x64
-}  // namespace backend
-}  // namespace cpu
-}  // namespace xe
--- a/src/xenia/cpu/backend/x64/x64_emitter.cc
+++ b/src/xenia/cpu/backend/x64/x64_emitter.cc
@ -87,9 +87,9 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator)

 X64Emitter::~X64Emitter() = default;

-bool X64Emitter::Emit(HIRBuilder* builder, uint32_t debug_info_flags,
-                      DebugInfo* debug_info, void*& out_code_address,
-                      size_t& out_code_size) {
+bool X64Emitter::Emit(uint32_t guest_address, HIRBuilder* builder,
+                      uint32_t debug_info_flags, DebugInfo* debug_info,
+                      void*& out_code_address, size_t& out_code_size) {
  SCOPE_profile_cpu_f("cpu");

  // Reset.
@ -108,7 +108,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, uint32_t debug_info_flags,

  // Copy the final code to the cache and relocate it.
  out_code_size = getSize();
-  out_code_address = Emplace(stack_size);
+  out_code_address = Emplace(guest_address, stack_size);

  // Stash source map.
  if (debug_info_flags_ & DebugInfoFlags::kDebugInfoSourceMap) {
@ -119,13 +119,14 @@ bool X64Emitter::Emit(HIRBuilder* builder, uint32_t debug_info_flags,
  return true;
 }

-void* X64Emitter::Emplace(size_t stack_size) {
+void* X64Emitter::Emplace(uint32_t guest_address, size_t stack_size) {
  // To avoid changing xbyak, we do a switcharoo here.
  // top_ points to the Xbyak buffer, and since we are in AutoGrow mode
  // it has pending relocations. We copy the top_ to our buffer, swap the
  // pointer, relocate, then return the original scratch pointer for use.
  uint8_t* old_address = top_;
-  void* new_address = code_cache_->PlaceCode(top_, size_, stack_size);
+  void* new_address =
+      code_cache_->PlaceCode(guest_address, top_, size_, stack_size);
  top_ = (uint8_t*)new_address;
  ready();
  top_ = old_address;
--- a/src/xenia/cpu/backend/x64/x64_emitter.h
+++ b/src/xenia/cpu/backend/x64/x64_emitter.h
@ -112,9 +112,9 @@ class X64Emitter : public Xbyak::CodeGenerator {
  Processor* processor() const { return processor_; }
  X64Backend* backend() const { return backend_; }

-  bool Emit(hir::HIRBuilder* builder, uint32_t debug_info_flags,
-            DebugInfo* debug_info, void*& out_code_address,
-            size_t& out_code_size);
+  bool Emit(uint32_t guest_address, hir::HIRBuilder* builder,
+            uint32_t debug_info_flags, DebugInfo* debug_info,
+            void*& out_code_address, size_t& out_code_size);

 public:
  // Reserved:  rsp
@ -192,7 +192,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
  size_t stack_size() const { return stack_size_; }

 protected:
-  void* Emplace(size_t stack_size);
+  void* Emplace(uint32_t guest_address, size_t stack_size);
  bool Emit(hir::HIRBuilder* builder, size_t& out_stack_size);
  void EmitGetCurrentThreadId();
  void EmitTraceUserCallReturn();
--- a/src/xenia/cpu/backend/x64/x64_thunk_emitter.cc
+++ b/src/xenia/cpu/backend/x64/x64_thunk_emitter.cc
@ -88,7 +88,7 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
  mov(r8, qword[rsp + 8 * 3]);
  ret();

-  void* fn = Emplace(stack_size);
+  void* fn = Emplace(0, stack_size);
  return (HostToGuestThunk)fn;
 }

@ -137,7 +137,7 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
  mov(rdx, qword[rsp + 8 * 2]);
  ret();

-  void* fn = Emplace(stack_size);
+  void* fn = Emplace(0, stack_size);
  return (HostToGuestThunk)fn;
 }

--- a/src/xenia/cpu/xex_module.cc
+++ b/src/xenia/cpu/xex_module.cc
@ -66,6 +66,9 @@ bool XexModule::Load(const std::string& name, const std::string& path,
    i += section->info.page_count;
  }

+  // Notify backend that we have an executable range.
+  processor_->backend()->CommitExecutableRange(low_address_, high_address_);
+
  // Add all imports (variables/functions).
  for (size_t n = 0; n < header->import_library_count; n++) {
    if (!SetupLibraryImports(&header->import_libraries[n])) {