diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc
index 19527a3a1..c5976f84f 100644
--- a/src/xenia/cpu/backend/x64/x64_backend.cc
+++ b/src/xenia/cpu/backend/x64/x64_backend.cc
@@ -388,8 +388,8 @@ X64ThunkEmitter::~X64ThunkEmitter() {}
 
 HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
   // rcx = target
-  // rdx = arg0
-  // r8 = arg1
+  // rdx = arg0 (context)
+  // r8 = arg1 (guest return address)
 
   const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
   // rsp + 0 = return address
@@ -399,52 +399,52 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
   sub(rsp, stack_size);
 
   // Preserve nonvolatile registers.
-  mov(qword[rsp + 40], rbx);
-  mov(qword[rsp + 48], rcx);
-  mov(qword[rsp + 56], rbp);
-  mov(qword[rsp + 64], rsi);
-  mov(qword[rsp + 72], rdi);
-  mov(qword[rsp + 80], r12);
-  mov(qword[rsp + 88], r13);
-  mov(qword[rsp + 96], r14);
-  mov(qword[rsp + 104], r15);
+  mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx);
+  mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
+  mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp);
+  mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
+  mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
+  mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12);
+  mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13);
+  mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14);
+  mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15);
 
-  movaps(ptr[rsp + 112], xmm6);
-  movaps(ptr[rsp + 128], xmm7);
-  movaps(ptr[rsp + 144], xmm8);
-  movaps(ptr[rsp + 160], xmm9);
-  movaps(ptr[rsp + 176], xmm10);
-  movaps(ptr[rsp + 192], xmm11);
-  movaps(ptr[rsp + 208], xmm12);
-  movaps(ptr[rsp + 224], xmm13);
-  movaps(ptr[rsp + 240], xmm14);
-  movaps(ptr[rsp + 256], xmm15);
+  movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6);
+  movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7);
+  movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8);
+  movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm9);
+  movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm10);
+  movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[5])], xmm11);
+  movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[6])], xmm12);
+  movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13);
+  movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14);
+  movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15);
 
   mov(rax, rcx);
-  mov(rcx, rdx);
+  mov(rsi, rdx);  // context
   mov(rdx, r8);
   call(rax);
 
-  movaps(xmm6, ptr[rsp + 112]);
-  movaps(xmm7, ptr[rsp + 128]);
-  movaps(xmm8, ptr[rsp + 144]);
-  movaps(xmm9, ptr[rsp + 160]);
-  movaps(xmm10, ptr[rsp + 176]);
-  movaps(xmm11, ptr[rsp + 192]);
-  movaps(xmm12, ptr[rsp + 208]);
-  movaps(xmm13, ptr[rsp + 224]);
-  movaps(xmm14, ptr[rsp + 240]);
-  movaps(xmm15, ptr[rsp + 256]);
+  movaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
+  movaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
+  movaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
+  movaps(xmm9, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
+  movaps(xmm10, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
+  movaps(xmm11, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);
+  movaps(xmm12, qword[rsp + offsetof(StackLayout::Thunk, xmm[6])]);
+  movaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]);
+  movaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]);
+  movaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]);
 
-  mov(rbx, qword[rsp + 40]);
-  mov(rcx, qword[rsp + 48]);
-  mov(rbp, qword[rsp + 56]);
-  mov(rsi, qword[rsp + 64]);
-  mov(rdi, qword[rsp + 72]);
-  mov(r12, qword[rsp + 80]);
-  mov(r13, qword[rsp + 88]);
-  mov(r14, qword[rsp + 96]);
-  mov(r15, qword[rsp + 104]);
+  mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
+  mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
+  mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
+  mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
+  mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
+  mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
+  mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
+  mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
+  mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
 
   add(rsp, stack_size);
   mov(rcx, qword[rsp + 8 * 1]);
@@ -469,34 +469,40 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
   mov(qword[rsp + 8 * 1], rcx);
   sub(rsp, stack_size);
 
-  mov(qword[rsp + 40], rbx);
-  mov(qword[rsp + 48], rcx);
-  mov(qword[rsp + 56], rbp);
-  mov(qword[rsp + 64], rsi);
-  mov(qword[rsp + 72], rdi);
-  mov(qword[rsp + 80], r12);
-  mov(qword[rsp + 88], r13);
-  mov(qword[rsp + 96], r14);
-  mov(qword[rsp + 104], r15);
+  // Save off volatile registers.
+  // TODO(DrChat): Enable this when we actually need this.
+  // mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rcx);
+  // mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rdx);
+  // mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], r8);
+  // mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r9);
+  // mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r10);
+  // mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r11);
 
-  // TODO(benvanik): save things? XMM0-5?
-  // HACK: Some emulated vector instructions require that we don't touch xmm0.
+  // movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm1);
+  // movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm2);
+  // movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm3);
+  // movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm4);
+  // movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[5])], xmm5);
 
   mov(rax, rdx);
+  mov(rcx, rsi);  // context
   mov(rdx, r8);
   mov(r8, r9);
   mov(r9, r10);
   call(rax);
 
-  mov(rbx, qword[rsp + 40]);
-  mov(rcx, qword[rsp + 48]);
-  mov(rbp, qword[rsp + 56]);
-  mov(rsi, qword[rsp + 64]);
-  mov(rdi, qword[rsp + 72]);
-  mov(r12, qword[rsp + 80]);
-  mov(r13, qword[rsp + 88]);
-  mov(r14, qword[rsp + 96]);
-  mov(r15, qword[rsp + 104]);
+  // movaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
+  // movaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
+  // movaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
+  // movaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
+  // movaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);
+
+  // mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
+  // mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
+  // mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
+  // mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
+  // mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
+  // mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
 
   add(rsp, stack_size);
   mov(rcx, qword[rsp + 8 * 1]);
@@ -514,36 +520,18 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
   // ebx = target PPC address
   // rcx = context
 
-  const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
+  uint32_t stack_size = 0x18;
+
   // rsp + 0 = return address
   mov(qword[rsp + 8 * 2], rdx);
   mov(qword[rsp + 8 * 1], rcx);
   sub(rsp, stack_size);
 
-  mov(qword[rsp + 40], rbx);
-  mov(qword[rsp + 48], rcx);
-  mov(qword[rsp + 56], rbp);
-  mov(qword[rsp + 64], rsi);
-  mov(qword[rsp + 72], rdi);
-  mov(qword[rsp + 80], r12);
-  mov(qword[rsp + 88], r13);
-  mov(qword[rsp + 96], r14);
-  mov(qword[rsp + 104], r15);
-
+  mov(rcx, rsi);  // context
   mov(rdx, rbx);
   mov(rax, uint64_t(&ResolveFunction));
   call(rax);
 
-  mov(rbx, qword[rsp + 40]);
-  mov(rcx, qword[rsp + 48]);
-  mov(rbp, qword[rsp + 56]);
-  mov(rsi, qword[rsp + 64]);
-  mov(rdi, qword[rsp + 72]);
-  mov(r12, qword[rsp + 80]);
-  mov(r13, qword[rsp + 88]);
-  mov(r14, qword[rsp + 96]);
-  mov(r15, qword[rsp + 104]);
-
   add(rsp, stack_size);
   mov(rcx, qword[rsp + 8 * 1]);
   mov(rdx, qword[rsp + 8 * 2]);
diff --git a/src/xenia/cpu/backend/x64/x64_code_cache.cc b/src/xenia/cpu/backend/x64/x64_code_cache.cc
index fc90102d2..01d098db8 100644
--- a/src/xenia/cpu/backend/x64/x64_code_cache.cc
+++ b/src/xenia/cpu/backend/x64/x64_code_cache.cc
@@ -181,6 +181,10 @@ void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code,
     // Copy code.
     std::memcpy(code_address, machine_code, code_size);
 
+    // Fill unused slots with 0xCC
+    std::memset(code_address + code_size, 0xCC,
+                xe::round_up(code_size, 16) - code_size);
+
     // Notify subclasses of placed code.
     PlaceCode(guest_address, machine_code, code_size, stack_size, code_address,
               unwind_reservation);
diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc
index dc087c3ca..b19579cc4 100644
--- a/src/xenia/cpu/backend/x64/x64_emitter.cc
+++ b/src/xenia/cpu/backend/x64/x64_emitter.cc
@@ -168,7 +168,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
   *out_stack_size = stack_size;
   stack_size_ = stack_size;
   sub(rsp, (uint32_t)stack_size);
-  mov(qword[rsp + StackLayout::GUEST_CTX_HOME], rcx);
+  mov(qword[rsp + StackLayout::GUEST_CTX_HOME], GetContextReg());
   mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rdx);
   mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0);
 
@@ -201,7 +201,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
   }
 
   // Load membase.
-  mov(rdx, qword[rcx + offsetof(ppc::PPCContext, virtual_membase)]);
+  mov(GetMembaseReg(),
+      qword[GetContextReg() + offsetof(ppc::PPCContext, virtual_membase)]);
 
   // Body.
   auto block = builder->first_block();
@@ -233,7 +234,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
   L(epilog_label);
   epilog_label_ = nullptr;
   EmitTraceUserCallReturn();
-  mov(rcx, qword[rsp + StackLayout::GUEST_CTX_HOME]);
+  mov(GetContextReg(), qword[rsp + StackLayout::GUEST_CTX_HOME]);
   add(rsp, (uint32_t)stack_size);
   ret();
 
@@ -272,8 +273,8 @@ void X64Emitter::MarkSourceOffset(const Instr* i) {
 }
 
 void X64Emitter::EmitGetCurrentThreadId() {
-  // rcx must point to context. We could fetch from the stack if needed.
-  mov(ax, word[rcx + offsetof(ppc::PPCContext, thread_id)]);
+  // rsi must point to context. We could fetch from the stack if needed.
+  mov(ax, word[GetContextReg() + offsetof(ppc::PPCContext, thread_id)]);
 }
 
 void X64Emitter::EmitTraceUserCallReturn() {}
@@ -372,10 +373,9 @@ void X64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
     // Not too important because indirection table is almost always available.
     // TODO: Overwrite the call-site with a straight call.
     mov(rax, reinterpret_cast<uint64_t>(ResolveFunction));
+    mov(rcx, GetContextReg());
     mov(rdx, function->address());
     call(rax);
-    ReloadECX();
-    ReloadEDX();
   }
 
   // Actually jump/call to rax.
@@ -417,9 +417,8 @@ void X64Emitter::CallIndirect(const hir::Instr* instr,
     // Not too important because indirection table is almost always available.
     mov(edx, reg.cvt32());
     mov(rax, reinterpret_cast<uint64_t>(ResolveFunction));
+    mov(rcx, GetContextReg());
     call(rax);
-    ReloadECX();
-    ReloadEDX();
   }
 
   // Actually jump/call to rax.
@@ -461,14 +460,13 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
       // rdx = target host function
       // r8  = arg0
       // r9  = arg1
+      mov(rcx, GetContextReg());
       mov(rdx, reinterpret_cast<uint64_t>(builtin_function->handler()));
       mov(r8, reinterpret_cast<uint64_t>(builtin_function->arg0()));
       mov(r9, reinterpret_cast<uint64_t>(builtin_function->arg1()));
       auto thunk = backend()->guest_to_host_thunk();
       mov(rax, reinterpret_cast<uint64_t>(thunk));
       call(rax);
-      ReloadECX();
-      ReloadEDX();
       // rax = host return
     }
   } else if (function->behavior() == Function::Behavior::kExtern) {
@@ -477,13 +475,12 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
       undefined = false;
       // rcx = context
       // rdx = target host function
+      mov(rcx, GetContextReg());
       mov(rdx, reinterpret_cast<uint64_t>(extern_function->extern_handler()));
-      mov(r8, qword[rcx + offsetof(ppc::PPCContext, kernel_state)]);
+      mov(r8, qword[GetContextReg() + offsetof(ppc::PPCContext, kernel_state)]);
       auto thunk = backend()->guest_to_host_thunk();
       mov(rax, reinterpret_cast<uint64_t>(thunk));
       call(rax);
-      ReloadECX();
-      ReloadEDX();
       // rax = host return
     }
   }
@@ -494,32 +491,28 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
 
 void X64Emitter::CallNative(void* fn) {
   mov(rax, reinterpret_cast<uint64_t>(fn));
+  mov(rcx, GetContextReg());
   call(rax);
-  ReloadECX();
-  ReloadEDX();
 }
 
 void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context)) {
   mov(rax, reinterpret_cast<uint64_t>(fn));
+  mov(rcx, GetContextReg());
   call(rax);
-  ReloadECX();
-  ReloadEDX();
 }
 
 void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0)) {
   mov(rax, reinterpret_cast<uint64_t>(fn));
+  mov(rcx, GetContextReg());
   call(rax);
-  ReloadECX();
-  ReloadEDX();
 }
 
 void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0),
                             uint64_t arg0) {
-  mov(rdx, arg0);
   mov(rax, reinterpret_cast<uint64_t>(fn));
+  mov(rcx, GetContextReg());
+  mov(rdx, arg0);
   call(rax);
-  ReloadECX();
-  ReloadEDX();
 }
 
 void X64Emitter::CallNativeSafe(void* fn) {
@@ -528,12 +521,11 @@ void X64Emitter::CallNativeSafe(void* fn) {
   // r8  = arg0
   // r9  = arg1
   // r10 = arg2
-  mov(rdx, reinterpret_cast<uint64_t>(fn));
   auto thunk = backend()->guest_to_host_thunk();
   mov(rax, reinterpret_cast<uint64_t>(thunk));
+  mov(rcx, GetContextReg());
+  mov(rdx, reinterpret_cast<uint64_t>(fn));
   call(rax);
-  ReloadECX();
-  ReloadEDX();
   // rax = host return
 }
 
@@ -542,15 +534,16 @@ void X64Emitter::SetReturnAddress(uint64_t value) {
   mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], rax);
 }
 
-Xbyak::Reg64 X64Emitter::GetContextReg() { return rcx; }
-Xbyak::Reg64 X64Emitter::GetMembaseReg() { return rdx; }
+// Important: If you change these, you must update the thunks in x64_backend.cc!
+Xbyak::Reg64 X64Emitter::GetContextReg() { return rsi; }
+Xbyak::Reg64 X64Emitter::GetMembaseReg() { return rdi; }
 
-void X64Emitter::ReloadECX() {
-  mov(rcx, qword[rsp + StackLayout::GUEST_CTX_HOME]);
+void X64Emitter::ReloadContext() {
+  mov(GetContextReg(), qword[rsp + StackLayout::GUEST_CTX_HOME]);
 }
 
-void X64Emitter::ReloadEDX() {
-  mov(rdx, qword[rcx + 8]);  // membase
+void X64Emitter::ReloadMembase() {
+  mov(GetMembaseReg(), qword[GetContextReg() + 8]);  // membase
 }
 
 // Len Assembly                                   Byte Sequence
diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h
index 5de5985f2..c3d3a4356 100644
--- a/src/xenia/cpu/backend/x64/x64_emitter.h
+++ b/src/xenia/cpu/backend/x64/x64_emitter.h
@@ -173,8 +173,8 @@ class X64Emitter : public Xbyak::CodeGenerator {
 
   Xbyak::Reg64 GetContextReg();
   Xbyak::Reg64 GetMembaseReg();
-  void ReloadECX();
-  void ReloadEDX();
+  void ReloadContext();
+  void ReloadMembase();
 
   void nop(size_t length = 1);
 
diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc
index ac8237e7c..7185e8aa4 100644
--- a/src/xenia/cpu/backend/x64/x64_sequences.cc
+++ b/src/xenia/cpu/backend/x64/x64_sequences.cc
@@ -1611,7 +1611,7 @@ struct LOAD_VECTOR_SHL_I8
       e.shl(e.dx, 4);
       e.mov(e.rax, (uintptr_t)lvsl_table);
       e.vmovaps(i.dest, e.ptr[e.rax + e.rdx]);
-      e.ReloadEDX();
+      e.ReloadMembase();
     }
   }
 };
@@ -1653,7 +1653,7 @@ struct LOAD_VECTOR_SHR_I8
       e.shl(e.dx, 4);
       e.mov(e.rax, (uintptr_t)lvsr_table);
       e.vmovaps(i.dest, e.ptr[e.rax + e.rdx]);
-      e.ReloadEDX();
+      e.ReloadMembase();
     }
   }
 };
@@ -3788,7 +3788,7 @@ struct MUL_I8 : Sequence<MUL_I8, I<OPCODE_MUL, I8Op, I8Op, I8Op>> {
       }
     }
 
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 struct MUL_I16 : Sequence<MUL_I16, I<OPCODE_MUL, I16Op, I16Op, I16Op>> {
@@ -3831,7 +3831,7 @@ struct MUL_I16 : Sequence<MUL_I16, I<OPCODE_MUL, I16Op, I16Op, I16Op>> {
       }
     }
 
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 struct MUL_I32 : Sequence<MUL_I32, I<OPCODE_MUL, I32Op, I32Op, I32Op>> {
@@ -3875,7 +3875,7 @@ struct MUL_I32 : Sequence<MUL_I32, I<OPCODE_MUL, I32Op, I32Op, I32Op>> {
       }
     }
 
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 struct MUL_I64 : Sequence<MUL_I64, I<OPCODE_MUL, I64Op, I64Op, I64Op>> {
@@ -3918,7 +3918,7 @@ struct MUL_I64 : Sequence<MUL_I64, I<OPCODE_MUL, I64Op, I64Op, I64Op>> {
       }
     }
 
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 struct MUL_F32 : Sequence<MUL_F32, I<OPCODE_MUL, F32Op, F32Op, F32Op>> {
@@ -3996,7 +3996,7 @@ struct MUL_HI_I8 : Sequence<MUL_HI_I8, I<OPCODE_MUL_HI, I8Op, I8Op, I8Op>> {
       }
       e.mov(i.dest, e.ah);
     }
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 struct MUL_HI_I16
@@ -4040,7 +4040,7 @@ struct MUL_HI_I16
       }
       e.mov(i.dest, e.dx);
     }
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 struct MUL_HI_I32
@@ -4089,7 +4089,7 @@ struct MUL_HI_I32
       }
       e.mov(i.dest, e.edx);
     }
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 struct MUL_HI_I64
@@ -4138,7 +4138,7 @@ struct MUL_HI_I64
       }
       e.mov(i.dest, e.rdx);
     }
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 EMITTER_OPCODE_TABLE(OPCODE_MUL_HI, MUL_HI_I8, MUL_HI_I16, MUL_HI_I32,
@@ -4193,9 +4193,9 @@ struct DIV_I8 : Sequence<DIV_I8, I<OPCODE_DIV, I8Op, I8Op, I8Op>> {
     e.outLocalLabel();
     e.mov(i.dest, e.al);
     if (clobbered_rcx) {
-      e.ReloadECX();
+      e.ReloadContext();
     }
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 struct DIV_I16 : Sequence<DIV_I16, I<OPCODE_DIV, I16Op, I16Op, I16Op>> {
@@ -4248,9 +4248,9 @@ struct DIV_I16 : Sequence<DIV_I16, I<OPCODE_DIV, I16Op, I16Op, I16Op>> {
     e.outLocalLabel();
     e.mov(i.dest, e.ax);
     if (clobbered_rcx) {
-      e.ReloadECX();
+      e.ReloadContext();
     }
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
@@ -4303,9 +4303,9 @@ struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
     e.outLocalLabel();
     e.mov(i.dest, e.eax);
     if (clobbered_rcx) {
-      e.ReloadECX();
+      e.ReloadContext();
     }
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
@@ -4358,9 +4358,9 @@ struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
     e.outLocalLabel();
     e.mov(i.dest, e.rax);
     if (clobbered_rcx) {
-      e.ReloadECX();
+      e.ReloadContext();
     }
-    e.ReloadEDX();
+    e.ReloadMembase();
   }
 };
 struct DIV_F32 : Sequence<DIV_F32, I<OPCODE_DIV, F32Op, F32Op, F32Op>> {
@@ -5225,7 +5225,7 @@ void EmitShlXX(X64Emitter& e, const ARGS& i) {
         } else {
           e.mov(e.cl, src);
           e.shl(dest_src, e.cl);
-          e.ReloadECX();
+          e.ReloadContext();
         }
       },
       [](X64Emitter& e, const REG& dest_src, int8_t constant) {
@@ -5303,7 +5303,7 @@ void EmitShrXX(X64Emitter& e, const ARGS& i) {
         } else {
           e.mov(e.cl, src);
           e.shr(dest_src, e.cl);
-          e.ReloadECX();
+          e.ReloadContext();
         }
       },
       [](X64Emitter& e, const REG& dest_src, int8_t constant) {
@@ -5379,7 +5379,7 @@ void EmitSarXX(X64Emitter& e, const ARGS& i) {
         } else {
           e.mov(e.cl, src);
           e.sar(dest_src, e.cl);
-          e.ReloadECX();
+          e.ReloadContext();
         }
       },
       [](X64Emitter& e, const REG& dest_src, int8_t constant) {
@@ -5988,7 +5988,7 @@ void EmitRotateLeftXX(X64Emitter& e, const ARGS& i) {
       }
     }
     e.rol(i.dest, e.cl);
-    e.ReloadECX();
+    e.ReloadContext();
   }
 }
 struct ROTATE_LEFT_I8
@@ -6469,7 +6469,7 @@ struct EXTRACT_I32
       e.vmovaps(e.xmm0, e.ptr[e.rdx + e.rax]);
       e.vpshufb(e.xmm0, i.src1, e.xmm0);
       e.vpextrd(i.dest, e.xmm0, 0);
-      e.ReloadEDX();
+      e.ReloadMembase();
     }
   }
 };
@@ -7508,10 +7508,10 @@ struct ATOMIC_COMPARE_EXCHANGE_I32
     e.mov(e.eax, i.src2);
     e.mov(e.ecx, i.src1.reg().cvt32());
     e.lock();
-    e.cmpxchg(e.dword[e.rdx + e.rcx], i.src3);
+    e.cmpxchg(e.dword[e.GetMembaseReg() + e.rcx], i.src3);
     e.sete(i.dest);
 
-    e.ReloadECX();
+    e.ReloadContext();
   }
 };
 struct ATOMIC_COMPARE_EXCHANGE_I64
@@ -7521,10 +7521,10 @@ struct ATOMIC_COMPARE_EXCHANGE_I64
     e.mov(e.rax, i.src2);
     e.mov(e.ecx, i.src1.reg().cvt32());
     e.lock();
-    e.cmpxchg(e.qword[e.rdx + e.rcx], i.src3);
+    e.cmpxchg(e.qword[e.GetMembaseReg() + e.rcx], i.src3);
     e.sete(i.dest);
 
-    e.ReloadECX();
+    e.ReloadContext();
   }
 };
 EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_COMPARE_EXCHANGE,
diff --git a/src/xenia/cpu/backend/x64/x64_stack_layout.h b/src/xenia/cpu/backend/x64/x64_stack_layout.h
index 439e1d708..3835bb82e 100644
--- a/src/xenia/cpu/backend/x64/x64_stack_layout.h
+++ b/src/xenia/cpu/backend/x64/x64_stack_layout.h
@@ -10,6 +10,7 @@
 #ifndef XENIA_CPU_BACKEND_X64_X64_STACK_LAYOUT_H_
 #define XENIA_CPU_BACKEND_X64_X64_STACK_LAYOUT_H_
 
+#include "xenia/base/vec128.h"
 #include "xenia/cpu/backend/x64/x64_backend.h"
 #include "xenia/cpu/backend/x64/x64_emitter.h"
 
@@ -18,105 +19,115 @@ namespace cpu {
 namespace backend {
 namespace x64 {
 
-/**
- * Stack Layout
- * ----------------------------
- * NOTE: stack must always be 16b aligned.
- *
- * Thunk stack:
- *  +------------------+
- *  | arg temp, 3 * 8  | rsp + 0
- *  |                  |
- *  |                  |
- *  +------------------+
- *  | scratch, 16b     | rsp + 24
- *  |                  |
- *  +------------------+
- *  | rbx              | rsp + 40
- *  +------------------+
- *  | rcx / context    | rsp + 48
- *  +------------------+
- *  | rbp              | rsp + 56
- *  +------------------+
- *  | rsi              | rsp + 64
- *  +------------------+
- *  | rdi              | rsp + 72
- *  +------------------+
- *  | r12              | rsp + 80
- *  +------------------+
- *  | r13              | rsp + 88
- *  +------------------+
- *  | r14              | rsp + 96
- *  +------------------+
- *  | r15              | rsp + 104
- *  +------------------+
- *  | xmm6/0           | rsp + 112
- *  |                  |
- *  +------------------+
- *  | xmm7/1           | rsp + 128
- *  |                  |
- *  +------------------+
- *  | xmm8/2           | rsp + 144
- *  |                  |
- *  +------------------+
- *  | xmm9/3           | rsp + 160
- *  |                  |
- *  +------------------+
- *  | xmm10/4          | rsp + 176
- *  |                  |
- *  +------------------+
- *  | xmm11/5          | rsp + 192
- *  |                  |
- *  +------------------+
- *  | xmm12            | rsp + 208
- *  |                  |
- *  +------------------+
- *  | xmm13            | rsp + 224
- *  |                  |
- *  +------------------+
- *  | xmm14            | rsp + 240
- *  |                  |
- *  +------------------+
- *  | xmm15            | rsp + 256
- *  |                  |
- *  +------------------+
- *  | scratch, 8b      | rsp + 272
- *  |                  |
- *  +------------------+
- *  | (return address) | rsp + 280
- *  +------------------+
- *  | (rcx home)       | rsp + 288
- *  +------------------+
- *  | (rdx home)       | rsp + 296
- *  +------------------+
- *
- *
- * Guest stack:
- *  +------------------+
- *  | arg temp, 3 * 8  | rsp + 0
- *  |                  |
- *  |                  |
- *  +------------------+
- *  | scratch, 48b     | rsp + 32
- *  |                  |
- *  +------------------+
- *  | rcx / context    | rsp + 80
- *  +------------------+
- *  | guest ret addr   | rsp + 88
- *  +------------------+
- *  | call ret addr    | rsp + 96
- *  +------------------+
- *    ... locals ...
- *  +------------------+
- *  | (return address) |
- *  +------------------+
- *
- */
-
 class StackLayout {
  public:
-  static const size_t THUNK_STACK_SIZE = 280;
+  /**
+   * Stack Layout
+   * ----------------------------
+   * NOTE: stack must always be 16b aligned.
+   *
+   * Thunk stack:
+   *  +------------------+
+   *  | arg temp, 3 * 8  | rsp + 0
+   *  |                  |
+   *  |                  |
+   *  +------------------+
+   *  | scratch, 16b     | rsp + 24
+   *  |                  |
+   *  +------------------+
+   *  | rbx              | rsp + 40
+   *  +------------------+
+   *  | rcx / context    | rsp + 48
+   *  +------------------+
+   *  | rbp              | rsp + 56
+   *  +------------------+
+   *  | rsi              | rsp + 64
+   *  +------------------+
+   *  | rdi              | rsp + 72
+   *  +------------------+
+   *  | r12              | rsp + 80
+   *  +------------------+
+   *  | r13              | rsp + 88
+   *  +------------------+
+   *  | r14              | rsp + 96
+   *  +------------------+
+   *  | r15              | rsp + 104
+   *  +------------------+
+   *  | xmm6/0           | rsp + 112
+   *  |                  |
+   *  +------------------+
+   *  | xmm7/1           | rsp + 128
+   *  |                  |
+   *  +------------------+
+   *  | xmm8/2           | rsp + 144
+   *  |                  |
+   *  +------------------+
+   *  | xmm9/3           | rsp + 160
+   *  |                  |
+   *  +------------------+
+   *  | xmm10/4          | rsp + 176
+   *  |                  |
+   *  +------------------+
+   *  | xmm11/5          | rsp + 192
+   *  |                  |
+   *  +------------------+
+   *  | xmm12            | rsp + 208
+   *  |                  |
+   *  +------------------+
+   *  | xmm13            | rsp + 224
+   *  |                  |
+   *  +------------------+
+   *  | xmm14            | rsp + 240
+   *  |                  |
+   *  +------------------+
+   *  | xmm15            | rsp + 256
+   *  |                  |
+   *  +------------------+
+   *  | scratch, 8b      | rsp + 272
+   *  |                  |
+   *  +------------------+
+   *  | (return address) | rsp + 280
+   *  +------------------+
+   *  | (rcx home)       | rsp + 288
+   *  +------------------+
+   *  | (rdx home)       | rsp + 296
+   *  +------------------+
+   */
+  XEPACKEDSTRUCT(Thunk, {
+    uint64_t arg_temp[3];
+    uint8_t scratch[16];
+    uint64_t r[10];
+    vec128_t xmm[10];
+    uint64_t dummy;
+  });
+  static_assert(sizeof(Thunk) % 16 == 0,
+                "sizeof(Thunk) must be a multiple of 16!");
+  static const size_t THUNK_STACK_SIZE = sizeof(Thunk) + 8;
 
+  /**
+   *
+   *
+   * Guest stack:
+   *  +------------------+
+   *  | arg temp, 3 * 8  | rsp + 0
+   *  |                  |
+   *  |                  |
+   *  +------------------+
+   *  | scratch, 48b     | rsp + 32
+   *  |                  |
+   *  +------------------+
+   *  | rcx / context    | rsp + 80
+   *  +------------------+
+   *  | guest ret addr   | rsp + 88
+   *  +------------------+
+   *  | call ret addr    | rsp + 96
+   *  +------------------+
+   *    ... locals ...
+   *  +------------------+
+   *  | (return address) |
+   *  +------------------+
+   *
+   */
   static const size_t GUEST_STACK_SIZE = 104;
   static const size_t GUEST_CTX_HOME = 80;
   static const size_t GUEST_RET_ADDR = 88;