From 009a6d0745f5a756e56e303415a2534576aecb62 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Fri, 31 Jan 2014 22:16:05 -0800 Subject: [PATCH] Hacking together proper stack saving. Can't get >128b frames working. --- .../x64/lowering/lowering_sequences.cc | 53 ++++--- src/alloy/backend/x64/lowering/op_utils.inl | 4 +- src/alloy/backend/x64/sources.gypi | 2 + src/alloy/backend/x64/x64_assembler.cc | 7 +- src/alloy/backend/x64/x64_assembler.h | 2 + src/alloy/backend/x64/x64_backend.cc | 8 + src/alloy/backend/x64/x64_backend.h | 9 ++ src/alloy/backend/x64/x64_code_cache.cc | 103 ++++++++++--- src/alloy/backend/x64/x64_code_cache.h | 2 +- src/alloy/backend/x64/x64_emitter.cc | 29 ++-- src/alloy/backend/x64/x64_emitter.h | 13 +- src/alloy/backend/x64/x64_function.cc | 9 +- src/alloy/backend/x64/x64_thunk_emitter.cc | 139 ++++++++++++++++++ src/alloy/backend/x64/x64_thunk_emitter.h | 124 ++++++++++++++++ 14 files changed, 424 insertions(+), 80 deletions(-) create mode 100644 src/alloy/backend/x64/x64_thunk_emitter.cc create mode 100644 src/alloy/backend/x64/x64_thunk_emitter.h diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc index f161424a8..d2420c740 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ b/src/alloy/backend/x64/lowering/lowering_sequences.cc @@ -9,8 +9,10 @@ #include +#include #include #include +#include #include #include #include @@ -44,6 +46,11 @@ namespace { #define SHUFPS_SWAP_DWORDS 0x1B + +// Major templating foo lives in here. +#include + + enum XmmConst { XMMZero = 0, XMMOne = 1, @@ -156,25 +163,31 @@ void* ResolveFunctionAddress(void* raw_context, uint32_t target_address) { auto x64_fn = (X64Function*)fn; return x64_fn->machine_code(); } +void TransitionToHost(X64Emitter& e) { + // Expects: + // rcx = context + // rdx = target host function + // r8 = arg0 + // r9 = arg1 + // Returns: + // rax = host return + auto thunk = e.backend()->guest_to_host_thunk(); + e.mov(e.rax, (uint64_t)thunk); + e.call(e.rax); +} void IssueCall(X64Emitter& e, FunctionInfo* symbol_info, uint32_t flags) { auto fn = symbol_info->function(); // Resolve address to the function to call and store in rax. // TODO(benvanik): caching/etc. For now this makes debugging easier. e.mov(e.rdx, (uint64_t)symbol_info); - e.mov(e.rax, (uint64_t)ResolveFunctionSymbol); - e.call(e.rax); - e.mov(e.rcx, e.qword[e.rsp + 0]); - e.mov(e.rdx, e.qword[e.rcx + 8]); // membase + CallNative(e, ResolveFunctionSymbol); // Actually jump/call to rax. if (flags & CALL_TAIL) { - // TODO(benvanik): adjust stack? - e.add(e.rsp, 72); + e.add(e.rsp, StackLayout::GUEST_STACK_SIZE); e.jmp(e.rax); } else { e.call(e.rax); - e.mov(e.rcx, e.qword[e.rsp + 0]); - e.mov(e.rdx, e.qword[e.rcx + 8]); // membase } } void IssueCallIndirect(X64Emitter& e, Value* target, uint32_t flags) { @@ -186,30 +199,20 @@ void IssueCallIndirect(X64Emitter& e, Value* target, uint32_t flags) { e.mov(e.rdx, r); } e.EndOp(r); - e.mov(e.rax, (uint64_t)ResolveFunctionAddress); - e.call(e.rax); - e.mov(e.rcx, e.qword[e.rsp + 0]); - e.mov(e.rdx, e.qword[e.rcx + 8]); // membase + CallNative(e, ResolveFunctionAddress); // Actually jump/call to rax. if (flags & CALL_TAIL) { - // TODO(benvanik): adjust stack? - e.add(e.rsp, 72); + e.add(e.rsp, StackLayout::GUEST_STACK_SIZE); e.jmp(e.rax); } else { e.call(e.rax); - e.mov(e.rcx, e.qword[e.rsp + 0]); - e.mov(e.rdx, e.qword[e.rcx + 8]); // membase } } } // namespace -// Major templating foo lives in here. -#include - - void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { // -------------------------------------------------------------------------- // General @@ -337,9 +340,13 @@ table->AddSequence(OPCODE_CALL_EXTERN, [](X64Emitter& e, Instr*& i) { auto symbol_info = i->src1.symbol_info; XEASSERT(symbol_info->behavior() == FunctionInfo::BEHAVIOR_EXTERN); XEASSERTNOTNULL(symbol_info->extern_handler()); - e.mov(e.rdx, (uint64_t)symbol_info->extern_arg0()); - e.mov(e.r8, (uint64_t)symbol_info->extern_arg1()); - CallNative(e, symbol_info->extern_handler()); + // rdx = target host function + // r8 = arg0 + // r9 = arg1 + e.mov(e.rdx, (uint64_t)symbol_info->extern_handler()); + e.mov(e.r8, (uint64_t)symbol_info->extern_arg0()); + e.mov(e.r9, (uint64_t)symbol_info->extern_arg1()); + TransitionToHost(e); i = e.Advance(i); return true; }); diff --git a/src/alloy/backend/x64/lowering/op_utils.inl b/src/alloy/backend/x64/lowering/op_utils.inl index 52bf39d7d..7fe1bda6d 100644 --- a/src/alloy/backend/x64/lowering/op_utils.inl +++ b/src/alloy/backend/x64/lowering/op_utils.inl @@ -17,7 +17,7 @@ namespace { #define LIKE_REG(dest, like) Reg(dest.getIdx(), dest.getKind(), like.getBit(), false) #define NAX_LIKE(like) Reg(e.rax.getIdx(), e.rax.getKind(), like.getBit(), false) -#define STASH_OFFSET 48 +#define STASH_OFFSET 0 // If we are running with tracing on we have to store the EFLAGS in the stack, // otherwise our calls out to C to print will clear it before DID_CARRY/etc @@ -68,7 +68,7 @@ void MovMem64(X64Emitter& e, RegExp& addr, uint64_t v) { void CallNative(X64Emitter& e, void* target) { e.mov(e.rax, (uint64_t)target); e.call(e.rax); - e.mov(e.rcx, e.qword[e.rsp + 0]); + e.mov(e.rcx, e.qword[e.rsp + StackLayout::RCX_HOME]); e.mov(e.rdx, e.qword[e.rcx + 8]); // membase } diff --git a/src/alloy/backend/x64/sources.gypi b/src/alloy/backend/x64/sources.gypi index 0a3ead5a9..7ca63e25d 100644 --- a/src/alloy/backend/x64/sources.gypi +++ b/src/alloy/backend/x64/sources.gypi @@ -12,6 +12,8 @@ 'x64_emitter.h', 'x64_function.cc', 'x64_function.h', + 'x64_thunk_emitter.cc', + 'x64_thunk_emitter.h', ], 'includes': [ diff --git a/src/alloy/backend/x64/x64_assembler.cc b/src/alloy/backend/x64/x64_assembler.cc index d4e88e621..3f90b077b 100644 --- a/src/alloy/backend/x64/x64_assembler.cc +++ b/src/alloy/backend/x64/x64_assembler.cc @@ -30,7 +30,7 @@ using namespace alloy::runtime; X64Assembler::X64Assembler(X64Backend* backend) : x64_backend_(backend), - emitter_(0), + emitter_(0), allocator_(0), Assembler(backend) { } @@ -39,6 +39,7 @@ X64Assembler::~X64Assembler() { })); delete emitter_; + delete allocator_; } int X64Assembler::Initialize() { @@ -47,8 +48,8 @@ int X64Assembler::Initialize() { return result; } - emitter_ = new X64Emitter(x64_backend_, - new XbyakAllocator()); + allocator_ = new XbyakAllocator(); + emitter_ = new X64Emitter(x64_backend_, allocator_); alloy::tracing::WriteEvent(EventType::AssemblerInit({ })); diff --git a/src/alloy/backend/x64/x64_assembler.h b/src/alloy/backend/x64/x64_assembler.h index 3d6235254..063e19c63 100644 --- a/src/alloy/backend/x64/x64_assembler.h +++ b/src/alloy/backend/x64/x64_assembler.h @@ -21,6 +21,7 @@ namespace x64 { class X64Backend; class X64Emitter; +class XbyakAllocator; class X64Assembler : public Assembler { @@ -45,6 +46,7 @@ private: private: X64Backend* x64_backend_; X64Emitter* emitter_; + XbyakAllocator* allocator_; StringBuffer string_buffer_; }; diff --git a/src/alloy/backend/x64/x64_backend.cc b/src/alloy/backend/x64/x64_backend.cc index 560328750..031dc6bda 100644 --- a/src/alloy/backend/x64/x64_backend.cc +++ b/src/alloy/backend/x64/x64_backend.cc @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,13 @@ int X64Backend::Initialize() { return result; } + auto allocator = new XbyakAllocator(); + auto thunk_emitter = new X64ThunkEmitter(this, allocator); + host_to_guest_thunk_ = thunk_emitter->EmitHostToGuestThunk(); + guest_to_host_thunk_ = thunk_emitter->EmitGuestToHostThunk(); + delete thunk_emitter; + delete allocator; + lowering_table_ = new LoweringTable(this); RegisterSequences(lowering_table_); diff --git a/src/alloy/backend/x64/x64_backend.h b/src/alloy/backend/x64/x64_backend.h index b10f7e571..dd12c0347 100644 --- a/src/alloy/backend/x64/x64_backend.h +++ b/src/alloy/backend/x64/x64_backend.h @@ -26,12 +26,18 @@ namespace lowering { class LoweringTable; } #define ALLOY_HAS_X64_BACKEND 1 +typedef void* (*HostToGuestThunk)(void* target, void* arg0, void* arg1); +typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1); + class X64Backend : public Backend { public: X64Backend(runtime::Runtime* runtime); virtual ~X64Backend(); X64CodeCache* code_cache() const { return code_cache_; } + HostToGuestThunk host_to_guest_thunk() const { return host_to_guest_thunk_; } + GuestToHostThunk guest_to_host_thunk() const { return guest_to_host_thunk_; } + lowering::LoweringTable* lowering_table() const { return lowering_table_; } virtual int Initialize(); @@ -40,6 +46,9 @@ public: private: X64CodeCache* code_cache_; + HostToGuestThunk host_to_guest_thunk_; + GuestToHostThunk guest_to_host_thunk_; + lowering::LoweringTable* lowering_table_; }; diff --git a/src/alloy/backend/x64/x64_code_cache.cc b/src/alloy/backend/x64/x64_code_cache.cc index c7a456830..2b2bf322d 100644 --- a/src/alloy/backend/x64/x64_code_cache.cc +++ b/src/alloy/backend/x64/x64_code_cache.cc @@ -34,14 +34,14 @@ public: const static uint32_t ESTIMATED_FN_SIZE = 512; // Size of unwind info per function. // TODO(benvanik): move this to emitter. - const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1); + const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1 + 2 + 2); void* fn_table_handle; RUNTIME_FUNCTION* fn_table; uint32_t fn_table_count; uint32_t fn_table_capacity; - void AddTableEntry(uint8_t* code, size_t code_size); + void AddTableEntry(uint8_t* code, size_t code_size, size_t stack_size); }; @@ -73,7 +73,8 @@ int X64CodeCache::Initialize() { return 0; } -void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size) { +void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size, + size_t stack_size) { // Add unwind info into the allocation size. Keep things 16b aligned. code_size += XEROUNDUP(X64CodeChunk::UNWIND_INFO_SIZE, 16); @@ -101,7 +102,7 @@ void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size) { active_chunk_->offset += code_size; // Add entry to fn table. - active_chunk_->AddTableEntry(final_address, code_size); + active_chunk_->AddTableEntry(final_address, code_size, stack_size); UnlockMutex(lock_); @@ -156,6 +157,27 @@ typedef enum _UNWIND_OP_CODES { UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */ UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */ } UNWIND_CODE_OPS; +class UNWIND_REGISTER { +public: + enum _ { + RAX = 0, + RCX = 1, + RDX = 2, + RBX = 3, + RSP = 4, + RBP = 5, + RSI = 6, + RDI = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, + }; +}; typedef union _UNWIND_CODE { struct { @@ -183,7 +205,8 @@ typedef struct _UNWIND_INFO { } UNWIND_INFO, *PUNWIND_INFO; } // namespace -void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size) { +void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size, + size_t stack_size) { // NOTE: we assume a chunk lock. if (fn_table_count + 1 > fn_table_capacity) { @@ -213,26 +236,60 @@ void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size) { size_t unwind_info_offset = offset; offset += UNWIND_INFO_SIZE; - // TODO(benvanik): take as parameters? - bool has_prolog = true; - uint8_t prolog_size = 4; - uint8_t stack_bytes = 72; + if (!stack_size) { + uint8_t prolog_size = 0; - // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx - UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset); - unwind_info->Version = 1; - unwind_info->Flags = 0; - unwind_info->SizeOfProlog = has_prolog ? prolog_size : 0; - unwind_info->CountOfCodes = has_prolog ? 1 : 0; - unwind_info->FrameRegister = 0; - unwind_info->FrameOffset = 0; + // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx + UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset); + unwind_info->Version = 1; + unwind_info->Flags = 0; + unwind_info->SizeOfProlog = 0; + unwind_info->CountOfCodes = 0; + unwind_info->FrameRegister = 0; + unwind_info->FrameOffset = 0; + } else if (stack_size <= 128) { + uint8_t prolog_size = 4; - // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx - auto& code_0 = unwind_info->UnwindCode[0]; - code_0.CodeOffset = 4; // end of instruction + 1 == offset of next instruction - code_0.UnwindOp = UWOP_ALLOC_SMALL; - code_0.OpInfo = stack_bytes / 8 - 1; - XEASSERT(stack_bytes < 128); + // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx + UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset); + unwind_info->Version = 1; + unwind_info->Flags = 0; + unwind_info->SizeOfProlog = prolog_size; + unwind_info->CountOfCodes = 1; + unwind_info->FrameRegister = 0; + unwind_info->FrameOffset = 0; + + // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx + size_t co = 0; + auto& unwind_code = unwind_info->UnwindCode[co++]; + unwind_code.CodeOffset = 14; // end of instruction + 1 == offset of next instruction + unwind_code.UnwindOp = UWOP_ALLOC_SMALL; + unwind_code.OpInfo = stack_size / 8 - 1; + } else { + // TODO(benvanik): take as parameters? + uint8_t prolog_size = 17; + + // This doesn't work, for some reason. + XEASSERTALWAYS(); + + // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx + UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset); + unwind_info->Version = 1; + unwind_info->Flags = 0; + unwind_info->SizeOfProlog = prolog_size; + unwind_info->CountOfCodes = 3; + unwind_info->FrameRegister = 0; + unwind_info->FrameOffset = 0; + + // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx + size_t co = 0; + auto& unwind_code = unwind_info->UnwindCode[co++]; + unwind_code.CodeOffset = 17; // end of instruction + 1 == offset of next instruction + unwind_code.UnwindOp = UWOP_ALLOC_LARGE; + unwind_code.OpInfo = 0; + unwind_code = unwind_info->UnwindCode[co++]; + unwind_code.FrameOffset = (USHORT)(stack_size) / 8; + } // Add entry. auto& fn_entry = fn_table[fn_table_count++]; diff --git a/src/alloy/backend/x64/x64_code_cache.h b/src/alloy/backend/x64/x64_code_cache.h index 1d6140430..23ba2e639 100644 --- a/src/alloy/backend/x64/x64_code_cache.h +++ b/src/alloy/backend/x64/x64_code_cache.h @@ -30,7 +30,7 @@ public: // TODO(benvanik): keep track of code blocks // TODO(benvanik): padding/guards/etc - void* PlaceCode(void* machine_code, size_t code_size); + void* PlaceCode(void* machine_code, size_t code_size, size_t stack_size); private: const static size_t DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024; diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index fc5dce840..1e938b084 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -46,7 +47,6 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) : } X64Emitter::~X64Emitter() { - delete allocator_; } int X64Emitter::Initialize() { @@ -71,7 +71,7 @@ int X64Emitter::Emit( // Copy the final code to the cache and relocate it. out_code_size = getSize(); - out_code_address = Emplace(code_cache_); + out_code_address = Emplace(StackLayout::GUEST_STACK_SIZE); // Stash source map. if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) { @@ -83,13 +83,13 @@ int X64Emitter::Emit( return 0; } -void* X64Emitter::Emplace(X64CodeCache* code_cache) { +void* X64Emitter::Emplace(size_t stack_size) { // To avoid changing xbyak, we do a switcharoo here. // top_ points to the Xbyak buffer, and since we are in AutoGrow mode // it has pending relocations. We copy the top_ to our buffer, swap the // pointer, relocate, then return the original scratch pointer for use. uint8_t* old_address = top_; - void* new_address = code_cache->PlaceCode(top_, size_); + void* new_address = code_cache_->PlaceCode(top_, size_, stack_size); top_ = (uint8_t*)new_address; ready(); top_ = old_address; @@ -132,21 +132,13 @@ int X64Emitter::Emit(HIRBuilder* builder) { // X64CodeCache, which dynamically generates exception information. // Adding or changing anything here must be matched! const bool emit_prolog = true; - const size_t stack_size = 72; + const size_t stack_size = StackLayout::GUEST_STACK_SIZE; if (emit_prolog) { - mov(qword[rsp + 8], rcx); + mov(qword[rsp + 8 * 2], rdx); + mov(qword[rsp + 8 * 1], rcx); sub(rsp, stack_size); - mov(qword[rsp + 8 * 0], rbx); - mov(qword[rsp + 8 * 1], r12); - mov(qword[rsp + 8 * 2], r13); - mov(qword[rsp + 8 * 3], r14); - mov(qword[rsp + 8 * 4], r15); } - // membase stays in rdx. If we evict it (like on function calls) we - // must put it back. - mov(rdx, qword[rcx + 8]); - auto lowering_table = backend_->lowering_table(); reg_state_.active_regs = reg_state_.live_regs = reserved_regs; @@ -180,12 +172,9 @@ int X64Emitter::Emit(HIRBuilder* builder) { // Function epilog. L("epilog"); if (emit_prolog) { - mov(rbx, qword[rsp + 8 * 0]); - mov(r12, qword[rsp + 8 * 1]); - mov(r13, qword[rsp + 8 * 2]); - mov(r14, qword[rsp + 8 * 3]); - mov(r15, qword[rsp + 8 * 4]); add(rsp, stack_size); + mov(rcx, qword[rsp + 8 * 1]); + mov(rdx, qword[rsp + 8 * 2]); } ret(); diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index fe458b8cb..c5bc51e05 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -45,6 +45,7 @@ public: virtual ~X64Emitter(); runtime::Runtime* runtime() const { return runtime_; } + X64Backend* backend() const { return backend_; } int Initialize(); @@ -144,15 +145,15 @@ public: void MarkSourceOffset(hir::Instr* i); -private: - void* Emplace(X64CodeCache* code_cache); +protected: + void* Emplace(size_t stack_size); int Emit(hir::HIRBuilder* builder); -private: +protected: runtime::Runtime* runtime_; - X64Backend* backend_; - X64CodeCache* code_cache_; - XbyakAllocator* allocator_; + X64Backend* backend_; + X64CodeCache* code_cache_; + XbyakAllocator* allocator_; struct { // Registers currently active within a begin/end op block. These diff --git a/src/alloy/backend/x64/x64_function.cc b/src/alloy/backend/x64/x64_function.cc index b8172247e..3f7f4bc57 100644 --- a/src/alloy/backend/x64/x64_function.cc +++ b/src/alloy/backend/x64/x64_function.cc @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -42,7 +43,11 @@ int X64Function::RemoveBreakpointImpl(Breakpoint* breakpoint) { } int X64Function::CallImpl(ThreadState* thread_state) { - typedef void(*call_t)(void* raw_context, uint8_t* membase); - ((call_t)machine_code_)(thread_state->raw_context(), thread_state->memory()->membase()); + auto backend = (X64Backend*)thread_state->runtime()->backend(); + auto thunk = backend->host_to_guest_thunk(); + thunk( + machine_code_, + thread_state->raw_context(), + thread_state->memory()->membase()); return 0; } diff --git a/src/alloy/backend/x64/x64_thunk_emitter.cc b/src/alloy/backend/x64/x64_thunk_emitter.cc new file mode 100644 index 000000000..0bd7239f6 --- /dev/null +++ b/src/alloy/backend/x64/x64_thunk_emitter.cc @@ -0,0 +1,139 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include + + +using namespace alloy; +using namespace alloy::backend; +using namespace alloy::backend::x64; + +using namespace Xbyak; + + +X64ThunkEmitter::X64ThunkEmitter( + X64Backend* backend, XbyakAllocator* allocator) : + X64Emitter(backend, allocator) { +} + +X64ThunkEmitter::~X64ThunkEmitter() { +} + +HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { + // rcx = target + // rdx = arg0 + // r8 = arg1 + + const size_t stack_size = StackLayout::THUNK_STACK_SIZE; + // rsp + 0 = return address + mov(qword[rsp + 8 * 2], rdx); + mov(qword[rsp + 8 * 1], rcx); + sub(rsp, stack_size); + + mov(qword[rsp + 56], rbx); + mov(qword[rsp + 64], rbp); + mov(qword[rsp + 72], rsi); + mov(qword[rsp + 80], rdi); + mov(qword[rsp + 88], r12); + mov(qword[rsp + 96], r13); + mov(qword[rsp + 104], r14); + mov(qword[rsp + 112], r15); + + /*movaps(ptr[rsp + 128], xmm6); + movaps(ptr[rsp + 144], xmm7); + movaps(ptr[rsp + 160], xmm8); + movaps(ptr[rsp + 176], xmm9); + movaps(ptr[rsp + 192], xmm10); + movaps(ptr[rsp + 208], xmm11); + movaps(ptr[rsp + 224], xmm12); + movaps(ptr[rsp + 240], xmm13); + movaps(ptr[rsp + 256], xmm14); + movaps(ptr[rsp + 272], xmm15);*/ + + mov(rax, rcx); + mov(rcx, rdx); + mov(rdx, r8); + call(rax); + + /*movaps(xmm6, ptr[rsp + 128]); + movaps(xmm7, ptr[rsp + 144]); + movaps(xmm8, ptr[rsp + 160]); + movaps(xmm9, ptr[rsp + 176]); + movaps(xmm10, ptr[rsp + 192]); + movaps(xmm11, ptr[rsp + 208]); + movaps(xmm12, ptr[rsp + 224]); + movaps(xmm13, ptr[rsp + 240]); + movaps(xmm14, ptr[rsp + 256]); + movaps(xmm15, ptr[rsp + 272]);*/ + + mov(rbx, qword[rsp + 56]); + mov(rbp, qword[rsp + 64]); + mov(rsi, qword[rsp + 72]); + mov(rdi, qword[rsp + 80]); + mov(r12, qword[rsp + 88]); + mov(r13, qword[rsp + 96]); + mov(r14, qword[rsp + 104]); + mov(r15, qword[rsp + 112]); + + add(rsp, stack_size); + mov(rcx, qword[rsp + 8 * 1]); + mov(rdx, qword[rsp + 8 * 2]); + ret(); + + void* fn = Emplace(stack_size); + return (HostToGuestThunk)fn; +} + +GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() { + // rcx = context + // rdx = target function + // r8 = arg0 + // r9 = arg1 + + const size_t stack_size = StackLayout::THUNK_STACK_SIZE; + // rsp + 0 = return address + mov(qword[rsp + 8 * 2], rdx); + mov(qword[rsp + 8 * 1], rcx); + sub(rsp, stack_size); + + mov(qword[rsp + 56], rbx); + mov(qword[rsp + 64], rbp); + mov(qword[rsp + 72], rsi); + mov(qword[rsp + 80], rdi); + mov(qword[rsp + 88], r12); + mov(qword[rsp + 96], r13); + mov(qword[rsp + 104], r14); + mov(qword[rsp + 112], r15); + + // TODO(benvanik): save things? XMM0-5? + + mov(rax, rdx); + mov(rdx, r8); + mov(r8, r9); + call(rax); + + mov(rbx, qword[rsp + 56]); + mov(rbp, qword[rsp + 64]); + mov(rsi, qword[rsp + 72]); + mov(rdi, qword[rsp + 80]); + mov(r12, qword[rsp + 88]); + mov(r13, qword[rsp + 96]); + mov(r14, qword[rsp + 104]); + mov(r15, qword[rsp + 112]); + + add(rsp, stack_size); + mov(rcx, qword[rsp + 8 * 1]); + mov(rdx, qword[rsp + 8 * 2]); + ret(); + + void* fn = Emplace(stack_size); + return (HostToGuestThunk)fn; +} diff --git a/src/alloy/backend/x64/x64_thunk_emitter.h b/src/alloy/backend/x64/x64_thunk_emitter.h new file mode 100644 index 000000000..4ce0669a7 --- /dev/null +++ b/src/alloy/backend/x64/x64_thunk_emitter.h @@ -0,0 +1,124 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_CPU_X64_X64_THUNK_EMITTER_H_ +#define XENIA_CPU_X64_X64_THUNK_EMITTER_H_ + +#include +#include +#include + + +namespace alloy { +namespace backend { +namespace x64 { + + +/** + * Stack Layout + * ---------------------------- + * NOTE: stack must always be 16b aligned. + * + * +------------------+ + * | scratch, 56b | rsp + 0 + * | | + * | .... | + * | | + * | | + * +------------------+ + * | rbx | rsp + 56 + * +------------------+ + * | rbp | rsp + 64 + * +------------------+ + * | rsi | rsp + 72 + * +------------------+ + * | rdi | rsp + 80 + * +------------------+ + * | r12 | rsp + 88 + * +------------------+ + * | r13 | rsp + 96 + * +------------------+ + * | r14 | rsp + 104 + * +------------------+ + * | r15 | rsp + 112 + * +------------------+ + * | (return address) | rsp + 120 + * +------------------+ + * | (rcx home) | rsp + 128 + * +------------------+ + * | (rdx home) | rsp + 136 + * +------------------+ + * + * + * TODO: + * +------------------+ + * | xmm6 | rsp + 128 + * | | + * +------------------+ + * | xmm7 | rsp + 144 + * | | + * +------------------+ + * | xmm8 | rsp + 160 + * | | + * +------------------+ + * | xmm9 | rsp + 176 + * | | + * +------------------+ + * | xmm10 | rsp + 192 + * | | + * +------------------+ + * | xmm11 | rsp + 208 + * | | + * +------------------+ + * | xmm12 | rsp + 224 + * | | + * +------------------+ + * | xmm13 | rsp + 240 + * | | + * +------------------+ + * | xmm14 | rsp + 256 + * | | + * +------------------+ + * | xmm15 | rsp + 272 + * | | + * +------------------+ + * + */ + +class StackLayout { +public: + const static size_t GUEST_STACK_SIZE = 120; + + const static size_t THUNK_STACK_SIZE = 120; + + const static size_t RETURN_ADDRESS = 120; + const static size_t RCX_HOME = 128; + const static size_t RDX_HOME = 136; +}; + + +class X64ThunkEmitter : public X64Emitter { +public: + X64ThunkEmitter(X64Backend* backend, XbyakAllocator* allocator); + virtual ~X64ThunkEmitter(); + + // Call a generated function, saving all stack parameters. + HostToGuestThunk EmitHostToGuestThunk(); + + // Function that guest code can call to transition into host code. + GuestToHostThunk EmitGuestToHostThunk(); +}; + + +} // namespace x64 +} // namespace backend +} // namespace alloy + + +#endif // XENIA_CPU_X64_X64_THUNK_EMITTER_H_