diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 8d3d48cee..f379fcbff 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -403,9 +403,9 @@ X64ThunkEmitter::X64ThunkEmitter(X64Backend* backend, XbyakAllocator* allocator) X64ThunkEmitter::~X64ThunkEmitter() {} HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { - // rcx = target - // rdx = arg0 (context) - // r8 = arg1 (guest return address) + // rcx (win), rdi (linux) = target + // rdx (win), rsi (linux) = arg0 (context) + // r8 (win), rdx (linux) = arg1 (guest return address) struct _code_offsets { size_t prolog; @@ -420,9 +420,15 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { code_offsets.prolog = getSize(); // rsp + 0 = return address +#if XE_PLATFORM_LINUX + mov(qword[rsp + 8 * 3], rdx); + mov(qword[rsp + 8 * 2], rsi); + mov(qword[rsp + 8 * 1], rdi); +#else mov(qword[rsp + 8 * 3], r8); mov(qword[rsp + 8 * 2], rdx); mov(qword[rsp + 8 * 1], rcx); +#endif sub(rsp, stack_size); code_offsets.prolog_stack_alloc = getSize(); @@ -431,9 +437,15 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { // Save nonvolatile registers. EmitSaveNonvolatileRegs(); +#ifdef XE_PLATFORM_LINUX + mov(rax, rdi); + // context already in rsi + mov(rcx, rdx); // return address +#else mov(rax, rcx); mov(rsi, rdx); // context mov(rcx, r8); // return address +#endif call(rax); EmitLoadNonvolatileRegs(); @@ -441,9 +453,15 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { code_offsets.epilog = getSize(); add(rsp, stack_size); +#if XE_PLATFORM_LINUX + mov(rdi, qword[rsp + 8 * 1]); + mov(rsi, qword[rsp + 8 * 2]); + mov(rdx, qword[rsp + 8 * 3]); +#else mov(rcx, qword[rsp + 8 * 1]); mov(rdx, qword[rsp + 8 * 2]); mov(r8, qword[rsp + 8 * 3]); +#endif ret(); code_offsets.tail = getSize(); @@ -464,10 +482,12 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { } GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() { - // rcx = target function - // rdx = arg0 - // r8 = arg1 - // r9 = arg2 + // rcx (windows), rdi (linux) = target function + // rdx (windows), rsi (linux) = arg0 + // r8 (windows), rdx (linux) = arg1 + // r9 (windows), rcx (linux) = arg2 + // --- (windows), r8 (linux) = arg3 + // --- (windows), r9 (linux) = arg4 struct _code_offsets { size_t prolog; @@ -490,8 +510,13 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() { // Save off volatile registers. EmitSaveVolatileRegs(); - mov(rax, rcx); // function + mov(rax, rcx); // function +#if XE_PLATFORM_LINUX + mov(rdi, GetContextReg()); // context + mov(rsi, rbx); +#else mov(rcx, GetContextReg()); // context +#endif call(rax); EmitLoadVolatileRegs(); @@ -546,8 +571,13 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() { // Save volatile registers EmitSaveVolatileRegs(); - mov(rcx, rsi); // context +#if XE_PLATFORM_LINUX + mov(rdi, rsi); // context + mov(rsi, rbx); +#else + mov(rcx, rsi); // context mov(rdx, rbx); +#endif mov(rax, uint64_t(&ResolveFunction)); call(rax); @@ -578,6 +608,12 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() { void X64ThunkEmitter::EmitSaveVolatileRegs() { // Save off volatile registers. // mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rax); +#if XE_PLATFORM_LINUX + mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rdi); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rsi); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rcx); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdx); +#else mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx); mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rdx); mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r8); @@ -591,10 +627,17 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() { vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm3); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm4); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[5])], xmm5); +#endif } void X64ThunkEmitter::EmitLoadVolatileRegs() { - // Load volatile registers from our stack frame. +#if XE_PLATFORM_LINUX + // mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]); + mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[1])]); + mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[2])]); + mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[3])]); + mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[4])]); +#else // vmovaps(xmm0, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]); vmovaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]); vmovaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]); @@ -609,10 +652,18 @@ void X64ThunkEmitter::EmitLoadVolatileRegs() { mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[4])]); mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[5])]); mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[6])]); +#endif } void X64ThunkEmitter::EmitSaveNonvolatileRegs() { - // Preserve nonvolatile registers. +#if XE_PLATFORM_LINUX + mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rbp); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], r12); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r13); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r14); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r15); +#else mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx); mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx); mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp); @@ -633,9 +684,18 @@ void X64ThunkEmitter::EmitSaveNonvolatileRegs() { vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15); +#endif } void X64ThunkEmitter::EmitLoadNonvolatileRegs() { +#ifdef XE_PLATFORM_LINUX + mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]); + mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[1])]); + mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[2])]); + mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[3])]); + mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[4])]); + mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[5])]); +#else vmovaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]); vmovaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]); vmovaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]); @@ -656,6 +716,7 @@ void X64ThunkEmitter::EmitLoadNonvolatileRegs() { mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]); mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]); mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]); +#endif } } // namespace x64 diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 216efd9bc..a57cada36 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -491,9 +491,9 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) { if (builtin_function->handler()) { undefined = false; // rcx = target function - // rdx = arg0 - // r8 = arg1 - // r9 = arg2 + // rdx (windows), r8 (linux) = arg0 + // r8 (windows), rdx (linux) = arg1 + // r9 (windows), rcx (linux) = arg2 auto thunk = backend()->guest_to_host_thunk(); mov(rax, reinterpret_cast(thunk)); mov(rcx, reinterpret_cast(builtin_function->handler())); @@ -507,9 +507,9 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) { if (extern_function->extern_handler()) { undefined = false; // rcx = target function - // rdx = arg0 - // r8 = arg1 - // r9 = arg2 + // rdx (windows), r8 (linux) = arg0 + // r8 (windows), rdx (linux) = arg1 + // r9 (windows), rcx (linux) = arg2 auto thunk = backend()->guest_to_host_thunk(); mov(rax, reinterpret_cast(thunk)); mov(rcx, reinterpret_cast(extern_function->extern_handler())); @@ -542,9 +542,9 @@ void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0), void X64Emitter::CallNativeSafe(void* fn) { // rcx = target function - // rdx = arg0 - // r8 = arg1 - // r9 = arg2 + // rdx (windows), r8 (linux) = arg0 + // r8 (windows), rdx (linux) = arg1 + // r9 (windows), rcx (linux) = arg2 auto thunk = backend()->guest_to_host_thunk(); mov(rax, reinterpret_cast(thunk)); mov(rcx, reinterpret_cast(fn)); @@ -558,6 +558,17 @@ void X64Emitter::SetReturnAddress(uint64_t value) { } Xbyak::Reg64 X64Emitter::GetNativeParam(uint32_t param) { +#if XE_PLATFORM_LINUX + if (param == 0) + return rbx; + else if (param == 1) + return rdx; + else if (param == 2) + return rcx; + + assert_always(); + return rcx; +#else if (param == 0) return rdx; else if (param == 1) @@ -567,6 +578,7 @@ Xbyak::Reg64 X64Emitter::GetNativeParam(uint32_t param) { assert_always(); return r9; +#endif } // Important: If you change these, you must update the thunks in x64_backend.cc!