diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 19527a3a1..c5976f84f 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -388,8 +388,8 @@ X64ThunkEmitter::~X64ThunkEmitter() {} HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { // rcx = target - // rdx = arg0 - // r8 = arg1 + // rdx = arg0 (context) + // r8 = arg1 (guest return address) const size_t stack_size = StackLayout::THUNK_STACK_SIZE; // rsp + 0 = return address @@ -399,52 +399,52 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { sub(rsp, stack_size); // Preserve nonvolatile registers. - mov(qword[rsp + 40], rbx); - mov(qword[rsp + 48], rcx); - mov(qword[rsp + 56], rbp); - mov(qword[rsp + 64], rsi); - mov(qword[rsp + 72], rdi); - mov(qword[rsp + 80], r12); - mov(qword[rsp + 88], r13); - mov(qword[rsp + 96], r14); - mov(qword[rsp + 104], r15); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15); - movaps(ptr[rsp + 112], xmm6); - movaps(ptr[rsp + 128], xmm7); - movaps(ptr[rsp + 144], xmm8); - movaps(ptr[rsp + 160], xmm9); - movaps(ptr[rsp + 176], xmm10); - movaps(ptr[rsp + 192], xmm11); - movaps(ptr[rsp + 208], xmm12); - movaps(ptr[rsp + 224], xmm13); - movaps(ptr[rsp + 240], xmm14); - movaps(ptr[rsp + 256], xmm15); + movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6); + movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7); + movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8); + movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm9); + movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm10); + movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[5])], xmm11); + movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[6])], xmm12); + movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13); + movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14); + movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15); mov(rax, rcx); - mov(rcx, rdx); + mov(rsi, rdx); // context mov(rdx, r8); call(rax); - movaps(xmm6, ptr[rsp + 112]); - movaps(xmm7, ptr[rsp + 128]); - movaps(xmm8, ptr[rsp + 144]); - movaps(xmm9, ptr[rsp + 160]); - movaps(xmm10, ptr[rsp + 176]); - movaps(xmm11, ptr[rsp + 192]); - movaps(xmm12, ptr[rsp + 208]); - movaps(xmm13, ptr[rsp + 224]); - movaps(xmm14, ptr[rsp + 240]); - movaps(xmm15, ptr[rsp + 256]); + movaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]); + movaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]); + movaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]); + movaps(xmm9, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]); + movaps(xmm10, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]); + movaps(xmm11, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]); + movaps(xmm12, qword[rsp + offsetof(StackLayout::Thunk, xmm[6])]); + movaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]); + movaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]); + movaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]); - mov(rbx, qword[rsp + 40]); - mov(rcx, qword[rsp + 48]); - mov(rbp, qword[rsp + 56]); - mov(rsi, qword[rsp + 64]); - mov(rdi, qword[rsp + 72]); - mov(r12, qword[rsp + 80]); - mov(r13, qword[rsp + 88]); - mov(r14, qword[rsp + 96]); - mov(r15, qword[rsp + 104]); + mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]); + mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]); + mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[2])]); + mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]); + mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]); + mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]); + mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]); + mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]); + mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]); add(rsp, stack_size); mov(rcx, qword[rsp + 8 * 1]); @@ -469,34 +469,40 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() { mov(qword[rsp + 8 * 1], rcx); sub(rsp, stack_size); - mov(qword[rsp + 40], rbx); - mov(qword[rsp + 48], rcx); - mov(qword[rsp + 56], rbp); - mov(qword[rsp + 64], rsi); - mov(qword[rsp + 72], rdi); - mov(qword[rsp + 80], r12); - mov(qword[rsp + 88], r13); - mov(qword[rsp + 96], r14); - mov(qword[rsp + 104], r15); + // Save off volatile registers. + // TODO(DrChat): Enable this when we actually need this. + // mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rcx); + // mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rdx); + // mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], r8); + // mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r9); + // mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r10); + // mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r11); - // TODO(benvanik): save things? XMM0-5? - // HACK: Some emulated vector instructions require that we don't touch xmm0. + // movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm1); + // movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm2); + // movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm3); + // movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm4); + // movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[5])], xmm5); mov(rax, rdx); + mov(rcx, rsi); // context mov(rdx, r8); mov(r8, r9); mov(r9, r10); call(rax); - mov(rbx, qword[rsp + 40]); - mov(rcx, qword[rsp + 48]); - mov(rbp, qword[rsp + 56]); - mov(rsi, qword[rsp + 64]); - mov(rdi, qword[rsp + 72]); - mov(r12, qword[rsp + 80]); - mov(r13, qword[rsp + 88]); - mov(r14, qword[rsp + 96]); - mov(r15, qword[rsp + 104]); + // movaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]); + // movaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]); + // movaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]); + // movaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]); + // movaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]); + + // mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]); + // mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]); + // mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[2])]); + // mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[3])]); + // mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[4])]); + // mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[5])]); add(rsp, stack_size); mov(rcx, qword[rsp + 8 * 1]); @@ -514,36 +520,18 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() { // ebx = target PPC address // rcx = context - const size_t stack_size = StackLayout::THUNK_STACK_SIZE; + uint32_t stack_size = 0x18; + // rsp + 0 = return address mov(qword[rsp + 8 * 2], rdx); mov(qword[rsp + 8 * 1], rcx); sub(rsp, stack_size); - mov(qword[rsp + 40], rbx); - mov(qword[rsp + 48], rcx); - mov(qword[rsp + 56], rbp); - mov(qword[rsp + 64], rsi); - mov(qword[rsp + 72], rdi); - mov(qword[rsp + 80], r12); - mov(qword[rsp + 88], r13); - mov(qword[rsp + 96], r14); - mov(qword[rsp + 104], r15); - + mov(rcx, rsi); // context mov(rdx, rbx); mov(rax, uint64_t(&ResolveFunction)); call(rax); - mov(rbx, qword[rsp + 40]); - mov(rcx, qword[rsp + 48]); - mov(rbp, qword[rsp + 56]); - mov(rsi, qword[rsp + 64]); - mov(rdi, qword[rsp + 72]); - mov(r12, qword[rsp + 80]); - mov(r13, qword[rsp + 88]); - mov(r14, qword[rsp + 96]); - mov(r15, qword[rsp + 104]); - add(rsp, stack_size); mov(rcx, qword[rsp + 8 * 1]); mov(rdx, qword[rsp + 8 * 2]); diff --git a/src/xenia/cpu/backend/x64/x64_code_cache.cc b/src/xenia/cpu/backend/x64/x64_code_cache.cc index fc90102d2..01d098db8 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache.cc +++ b/src/xenia/cpu/backend/x64/x64_code_cache.cc @@ -181,6 +181,10 @@ void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code, // Copy code. std::memcpy(code_address, machine_code, code_size); + // Fill unused slots with 0xCC + std::memset(code_address + code_size, 0xCC, + xe::round_up(code_size, 16) - code_size); + // Notify subclasses of placed code. PlaceCode(guest_address, machine_code, code_size, stack_size, code_address, unwind_reservation); diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index dc087c3ca..b19579cc4 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -168,7 +168,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) { *out_stack_size = stack_size; stack_size_ = stack_size; sub(rsp, (uint32_t)stack_size); - mov(qword[rsp + StackLayout::GUEST_CTX_HOME], rcx); + mov(qword[rsp + StackLayout::GUEST_CTX_HOME], GetContextReg()); mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rdx); mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0); @@ -201,7 +201,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) { } // Load membase. - mov(rdx, qword[rcx + offsetof(ppc::PPCContext, virtual_membase)]); + mov(GetMembaseReg(), + qword[GetContextReg() + offsetof(ppc::PPCContext, virtual_membase)]); // Body. auto block = builder->first_block(); @@ -233,7 +234,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) { L(epilog_label); epilog_label_ = nullptr; EmitTraceUserCallReturn(); - mov(rcx, qword[rsp + StackLayout::GUEST_CTX_HOME]); + mov(GetContextReg(), qword[rsp + StackLayout::GUEST_CTX_HOME]); add(rsp, (uint32_t)stack_size); ret(); @@ -272,8 +273,8 @@ void X64Emitter::MarkSourceOffset(const Instr* i) { } void X64Emitter::EmitGetCurrentThreadId() { - // rcx must point to context. We could fetch from the stack if needed. - mov(ax, word[rcx + offsetof(ppc::PPCContext, thread_id)]); + // rsi must point to context. We could fetch from the stack if needed. + mov(ax, word[GetContextReg() + offsetof(ppc::PPCContext, thread_id)]); } void X64Emitter::EmitTraceUserCallReturn() {} @@ -372,10 +373,9 @@ void X64Emitter::Call(const hir::Instr* instr, GuestFunction* function) { // Not too important because indirection table is almost always available. // TODO: Overwrite the call-site with a straight call. mov(rax, reinterpret_cast(ResolveFunction)); + mov(rcx, GetContextReg()); mov(rdx, function->address()); call(rax); - ReloadECX(); - ReloadEDX(); } // Actually jump/call to rax. @@ -417,9 +417,8 @@ void X64Emitter::CallIndirect(const hir::Instr* instr, // Not too important because indirection table is almost always available. mov(edx, reg.cvt32()); mov(rax, reinterpret_cast(ResolveFunction)); + mov(rcx, GetContextReg()); call(rax); - ReloadECX(); - ReloadEDX(); } // Actually jump/call to rax. @@ -461,14 +460,13 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) { // rdx = target host function // r8 = arg0 // r9 = arg1 + mov(rcx, GetContextReg()); mov(rdx, reinterpret_cast(builtin_function->handler())); mov(r8, reinterpret_cast(builtin_function->arg0())); mov(r9, reinterpret_cast(builtin_function->arg1())); auto thunk = backend()->guest_to_host_thunk(); mov(rax, reinterpret_cast(thunk)); call(rax); - ReloadECX(); - ReloadEDX(); // rax = host return } } else if (function->behavior() == Function::Behavior::kExtern) { @@ -477,13 +475,12 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) { undefined = false; // rcx = context // rdx = target host function + mov(rcx, GetContextReg()); mov(rdx, reinterpret_cast(extern_function->extern_handler())); - mov(r8, qword[rcx + offsetof(ppc::PPCContext, kernel_state)]); + mov(r8, qword[GetContextReg() + offsetof(ppc::PPCContext, kernel_state)]); auto thunk = backend()->guest_to_host_thunk(); mov(rax, reinterpret_cast(thunk)); call(rax); - ReloadECX(); - ReloadEDX(); // rax = host return } } @@ -494,32 +491,28 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) { void X64Emitter::CallNative(void* fn) { mov(rax, reinterpret_cast(fn)); + mov(rcx, GetContextReg()); call(rax); - ReloadECX(); - ReloadEDX(); } void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context)) { mov(rax, reinterpret_cast(fn)); + mov(rcx, GetContextReg()); call(rax); - ReloadECX(); - ReloadEDX(); } void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0)) { mov(rax, reinterpret_cast(fn)); + mov(rcx, GetContextReg()); call(rax); - ReloadECX(); - ReloadEDX(); } void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0), uint64_t arg0) { - mov(rdx, arg0); mov(rax, reinterpret_cast(fn)); + mov(rcx, GetContextReg()); + mov(rdx, arg0); call(rax); - ReloadECX(); - ReloadEDX(); } void X64Emitter::CallNativeSafe(void* fn) { @@ -528,12 +521,11 @@ void X64Emitter::CallNativeSafe(void* fn) { // r8 = arg0 // r9 = arg1 // r10 = arg2 - mov(rdx, reinterpret_cast(fn)); auto thunk = backend()->guest_to_host_thunk(); mov(rax, reinterpret_cast(thunk)); + mov(rcx, GetContextReg()); + mov(rdx, reinterpret_cast(fn)); call(rax); - ReloadECX(); - ReloadEDX(); // rax = host return } @@ -542,15 +534,16 @@ void X64Emitter::SetReturnAddress(uint64_t value) { mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], rax); } -Xbyak::Reg64 X64Emitter::GetContextReg() { return rcx; } -Xbyak::Reg64 X64Emitter::GetMembaseReg() { return rdx; } +// Important: If you change these, you must update the thunks in x64_backend.cc! +Xbyak::Reg64 X64Emitter::GetContextReg() { return rsi; } +Xbyak::Reg64 X64Emitter::GetMembaseReg() { return rdi; } -void X64Emitter::ReloadECX() { - mov(rcx, qword[rsp + StackLayout::GUEST_CTX_HOME]); +void X64Emitter::ReloadContext() { + mov(GetContextReg(), qword[rsp + StackLayout::GUEST_CTX_HOME]); } -void X64Emitter::ReloadEDX() { - mov(rdx, qword[rcx + 8]); // membase +void X64Emitter::ReloadMembase() { + mov(GetMembaseReg(), qword[GetContextReg() + 8]); // membase } // Len Assembly Byte Sequence diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index 5de5985f2..c3d3a4356 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -173,8 +173,8 @@ class X64Emitter : public Xbyak::CodeGenerator { Xbyak::Reg64 GetContextReg(); Xbyak::Reg64 GetMembaseReg(); - void ReloadECX(); - void ReloadEDX(); + void ReloadContext(); + void ReloadMembase(); void nop(size_t length = 1); diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index ac8237e7c..7185e8aa4 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -1611,7 +1611,7 @@ struct LOAD_VECTOR_SHL_I8 e.shl(e.dx, 4); e.mov(e.rax, (uintptr_t)lvsl_table); e.vmovaps(i.dest, e.ptr[e.rax + e.rdx]); - e.ReloadEDX(); + e.ReloadMembase(); } } }; @@ -1653,7 +1653,7 @@ struct LOAD_VECTOR_SHR_I8 e.shl(e.dx, 4); e.mov(e.rax, (uintptr_t)lvsr_table); e.vmovaps(i.dest, e.ptr[e.rax + e.rdx]); - e.ReloadEDX(); + e.ReloadMembase(); } } }; @@ -3788,7 +3788,7 @@ struct MUL_I8 : Sequence> { } } - e.ReloadEDX(); + e.ReloadMembase(); } }; struct MUL_I16 : Sequence> { @@ -3831,7 +3831,7 @@ struct MUL_I16 : Sequence> { } } - e.ReloadEDX(); + e.ReloadMembase(); } }; struct MUL_I32 : Sequence> { @@ -3875,7 +3875,7 @@ struct MUL_I32 : Sequence> { } } - e.ReloadEDX(); + e.ReloadMembase(); } }; struct MUL_I64 : Sequence> { @@ -3918,7 +3918,7 @@ struct MUL_I64 : Sequence> { } } - e.ReloadEDX(); + e.ReloadMembase(); } }; struct MUL_F32 : Sequence> { @@ -3996,7 +3996,7 @@ struct MUL_HI_I8 : Sequence> { } e.mov(i.dest, e.ah); } - e.ReloadEDX(); + e.ReloadMembase(); } }; struct MUL_HI_I16 @@ -4040,7 +4040,7 @@ struct MUL_HI_I16 } e.mov(i.dest, e.dx); } - e.ReloadEDX(); + e.ReloadMembase(); } }; struct MUL_HI_I32 @@ -4089,7 +4089,7 @@ struct MUL_HI_I32 } e.mov(i.dest, e.edx); } - e.ReloadEDX(); + e.ReloadMembase(); } }; struct MUL_HI_I64 @@ -4138,7 +4138,7 @@ struct MUL_HI_I64 } e.mov(i.dest, e.rdx); } - e.ReloadEDX(); + e.ReloadMembase(); } }; EMITTER_OPCODE_TABLE(OPCODE_MUL_HI, MUL_HI_I8, MUL_HI_I16, MUL_HI_I32, @@ -4193,9 +4193,9 @@ struct DIV_I8 : Sequence> { e.outLocalLabel(); e.mov(i.dest, e.al); if (clobbered_rcx) { - e.ReloadECX(); + e.ReloadContext(); } - e.ReloadEDX(); + e.ReloadMembase(); } }; struct DIV_I16 : Sequence> { @@ -4248,9 +4248,9 @@ struct DIV_I16 : Sequence> { e.outLocalLabel(); e.mov(i.dest, e.ax); if (clobbered_rcx) { - e.ReloadECX(); + e.ReloadContext(); } - e.ReloadEDX(); + e.ReloadMembase(); } }; struct DIV_I32 : Sequence> { @@ -4303,9 +4303,9 @@ struct DIV_I32 : Sequence> { e.outLocalLabel(); e.mov(i.dest, e.eax); if (clobbered_rcx) { - e.ReloadECX(); + e.ReloadContext(); } - e.ReloadEDX(); + e.ReloadMembase(); } }; struct DIV_I64 : Sequence> { @@ -4358,9 +4358,9 @@ struct DIV_I64 : Sequence> { e.outLocalLabel(); e.mov(i.dest, e.rax); if (clobbered_rcx) { - e.ReloadECX(); + e.ReloadContext(); } - e.ReloadEDX(); + e.ReloadMembase(); } }; struct DIV_F32 : Sequence> { @@ -5225,7 +5225,7 @@ void EmitShlXX(X64Emitter& e, const ARGS& i) { } else { e.mov(e.cl, src); e.shl(dest_src, e.cl); - e.ReloadECX(); + e.ReloadContext(); } }, [](X64Emitter& e, const REG& dest_src, int8_t constant) { @@ -5303,7 +5303,7 @@ void EmitShrXX(X64Emitter& e, const ARGS& i) { } else { e.mov(e.cl, src); e.shr(dest_src, e.cl); - e.ReloadECX(); + e.ReloadContext(); } }, [](X64Emitter& e, const REG& dest_src, int8_t constant) { @@ -5379,7 +5379,7 @@ void EmitSarXX(X64Emitter& e, const ARGS& i) { } else { e.mov(e.cl, src); e.sar(dest_src, e.cl); - e.ReloadECX(); + e.ReloadContext(); } }, [](X64Emitter& e, const REG& dest_src, int8_t constant) { @@ -5988,7 +5988,7 @@ void EmitRotateLeftXX(X64Emitter& e, const ARGS& i) { } } e.rol(i.dest, e.cl); - e.ReloadECX(); + e.ReloadContext(); } } struct ROTATE_LEFT_I8 @@ -6469,7 +6469,7 @@ struct EXTRACT_I32 e.vmovaps(e.xmm0, e.ptr[e.rdx + e.rax]); e.vpshufb(e.xmm0, i.src1, e.xmm0); e.vpextrd(i.dest, e.xmm0, 0); - e.ReloadEDX(); + e.ReloadMembase(); } } }; @@ -7508,10 +7508,10 @@ struct ATOMIC_COMPARE_EXCHANGE_I32 e.mov(e.eax, i.src2); e.mov(e.ecx, i.src1.reg().cvt32()); e.lock(); - e.cmpxchg(e.dword[e.rdx + e.rcx], i.src3); + e.cmpxchg(e.dword[e.GetMembaseReg() + e.rcx], i.src3); e.sete(i.dest); - e.ReloadECX(); + e.ReloadContext(); } }; struct ATOMIC_COMPARE_EXCHANGE_I64 @@ -7521,10 +7521,10 @@ struct ATOMIC_COMPARE_EXCHANGE_I64 e.mov(e.rax, i.src2); e.mov(e.ecx, i.src1.reg().cvt32()); e.lock(); - e.cmpxchg(e.qword[e.rdx + e.rcx], i.src3); + e.cmpxchg(e.qword[e.GetMembaseReg() + e.rcx], i.src3); e.sete(i.dest); - e.ReloadECX(); + e.ReloadContext(); } }; EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_COMPARE_EXCHANGE, diff --git a/src/xenia/cpu/backend/x64/x64_stack_layout.h b/src/xenia/cpu/backend/x64/x64_stack_layout.h index 439e1d708..3835bb82e 100644 --- a/src/xenia/cpu/backend/x64/x64_stack_layout.h +++ b/src/xenia/cpu/backend/x64/x64_stack_layout.h @@ -10,6 +10,7 @@ #ifndef XENIA_CPU_BACKEND_X64_X64_STACK_LAYOUT_H_ #define XENIA_CPU_BACKEND_X64_X64_STACK_LAYOUT_H_ +#include "xenia/base/vec128.h" #include "xenia/cpu/backend/x64/x64_backend.h" #include "xenia/cpu/backend/x64/x64_emitter.h" @@ -18,105 +19,115 @@ namespace cpu { namespace backend { namespace x64 { -/** - * Stack Layout - * ---------------------------- - * NOTE: stack must always be 16b aligned. - * - * Thunk stack: - * +------------------+ - * | arg temp, 3 * 8 | rsp + 0 - * | | - * | | - * +------------------+ - * | scratch, 16b | rsp + 24 - * | | - * +------------------+ - * | rbx | rsp + 40 - * +------------------+ - * | rcx / context | rsp + 48 - * +------------------+ - * | rbp | rsp + 56 - * +------------------+ - * | rsi | rsp + 64 - * +------------------+ - * | rdi | rsp + 72 - * +------------------+ - * | r12 | rsp + 80 - * +------------------+ - * | r13 | rsp + 88 - * +------------------+ - * | r14 | rsp + 96 - * +------------------+ - * | r15 | rsp + 104 - * +------------------+ - * | xmm6/0 | rsp + 112 - * | | - * +------------------+ - * | xmm7/1 | rsp + 128 - * | | - * +------------------+ - * | xmm8/2 | rsp + 144 - * | | - * +------------------+ - * | xmm9/3 | rsp + 160 - * | | - * +------------------+ - * | xmm10/4 | rsp + 176 - * | | - * +------------------+ - * | xmm11/5 | rsp + 192 - * | | - * +------------------+ - * | xmm12 | rsp + 208 - * | | - * +------------------+ - * | xmm13 | rsp + 224 - * | | - * +------------------+ - * | xmm14 | rsp + 240 - * | | - * +------------------+ - * | xmm15 | rsp + 256 - * | | - * +------------------+ - * | scratch, 8b | rsp + 272 - * | | - * +------------------+ - * | (return address) | rsp + 280 - * +------------------+ - * | (rcx home) | rsp + 288 - * +------------------+ - * | (rdx home) | rsp + 296 - * +------------------+ - * - * - * Guest stack: - * +------------------+ - * | arg temp, 3 * 8 | rsp + 0 - * | | - * | | - * +------------------+ - * | scratch, 48b | rsp + 32 - * | | - * +------------------+ - * | rcx / context | rsp + 80 - * +------------------+ - * | guest ret addr | rsp + 88 - * +------------------+ - * | call ret addr | rsp + 96 - * +------------------+ - * ... locals ... - * +------------------+ - * | (return address) | - * +------------------+ - * - */ - class StackLayout { public: - static const size_t THUNK_STACK_SIZE = 280; + /** + * Stack Layout + * ---------------------------- + * NOTE: stack must always be 16b aligned. + * + * Thunk stack: + * +------------------+ + * | arg temp, 3 * 8 | rsp + 0 + * | | + * | | + * +------------------+ + * | scratch, 16b | rsp + 24 + * | | + * +------------------+ + * | rbx | rsp + 40 + * +------------------+ + * | rcx / context | rsp + 48 + * +------------------+ + * | rbp | rsp + 56 + * +------------------+ + * | rsi | rsp + 64 + * +------------------+ + * | rdi | rsp + 72 + * +------------------+ + * | r12 | rsp + 80 + * +------------------+ + * | r13 | rsp + 88 + * +------------------+ + * | r14 | rsp + 96 + * +------------------+ + * | r15 | rsp + 104 + * +------------------+ + * | xmm6/0 | rsp + 112 + * | | + * +------------------+ + * | xmm7/1 | rsp + 128 + * | | + * +------------------+ + * | xmm8/2 | rsp + 144 + * | | + * +------------------+ + * | xmm9/3 | rsp + 160 + * | | + * +------------------+ + * | xmm10/4 | rsp + 176 + * | | + * +------------------+ + * | xmm11/5 | rsp + 192 + * | | + * +------------------+ + * | xmm12 | rsp + 208 + * | | + * +------------------+ + * | xmm13 | rsp + 224 + * | | + * +------------------+ + * | xmm14 | rsp + 240 + * | | + * +------------------+ + * | xmm15 | rsp + 256 + * | | + * +------------------+ + * | scratch, 8b | rsp + 272 + * | | + * +------------------+ + * | (return address) | rsp + 280 + * +------------------+ + * | (rcx home) | rsp + 288 + * +------------------+ + * | (rdx home) | rsp + 296 + * +------------------+ + */ + XEPACKEDSTRUCT(Thunk, { + uint64_t arg_temp[3]; + uint8_t scratch[16]; + uint64_t r[10]; + vec128_t xmm[10]; + uint64_t dummy; + }); + static_assert(sizeof(Thunk) % 16 == 0, + "sizeof(Thunk) must be a multiple of 16!"); + static const size_t THUNK_STACK_SIZE = sizeof(Thunk) + 8; + /** + * + * + * Guest stack: + * +------------------+ + * | arg temp, 3 * 8 | rsp + 0 + * | | + * | | + * +------------------+ + * | scratch, 48b | rsp + 32 + * | | + * +------------------+ + * | rcx / context | rsp + 80 + * +------------------+ + * | guest ret addr | rsp + 88 + * +------------------+ + * | call ret addr | rsp + 96 + * +------------------+ + * ... locals ... + * +------------------+ + * | (return address) | + * +------------------+ + * + */ static const size_t GUEST_STACK_SIZE = 104; static const size_t GUEST_CTX_HOME = 80; static const size_t GUEST_RET_ADDR = 88;