Hacking together proper stack saving. Can't get >128b frames working.

This commit is contained in:
Ben Vanik 2014-01-31 22:16:05 -08:00
parent f85b83709e
commit 009a6d0745
14 changed files with 424 additions and 80 deletions

View File

@ -9,8 +9,10 @@
#include <alloy/backend/x64/lowering/lowering_sequences.h>
#include <alloy/backend/x64/x64_backend.h>
#include <alloy/backend/x64/x64_emitter.h>
#include <alloy/backend/x64/x64_function.h>
#include <alloy/backend/x64/x64_thunk_emitter.h>
#include <alloy/backend/x64/lowering/lowering_table.h>
#include <alloy/backend/x64/lowering/tracers.h>
#include <alloy/runtime/symbol_info.h>
@ -44,6 +46,11 @@ namespace {
#define SHUFPS_SWAP_DWORDS 0x1B
// Major templating foo lives in here.
#include <alloy/backend/x64/lowering/op_utils.inl>
enum XmmConst {
XMMZero = 0,
XMMOne = 1,
@ -156,25 +163,31 @@ void* ResolveFunctionAddress(void* raw_context, uint32_t target_address) {
auto x64_fn = (X64Function*)fn;
return x64_fn->machine_code();
}
void TransitionToHost(X64Emitter& e) {
// Expects:
// rcx = context
// rdx = target host function
// r8 = arg0
// r9 = arg1
// Returns:
// rax = host return
auto thunk = e.backend()->guest_to_host_thunk();
e.mov(e.rax, (uint64_t)thunk);
e.call(e.rax);
}
void IssueCall(X64Emitter& e, FunctionInfo* symbol_info, uint32_t flags) {
auto fn = symbol_info->function();
// Resolve address to the function to call and store in rax.
// TODO(benvanik): caching/etc. For now this makes debugging easier.
e.mov(e.rdx, (uint64_t)symbol_info);
e.mov(e.rax, (uint64_t)ResolveFunctionSymbol);
e.call(e.rax);
e.mov(e.rcx, e.qword[e.rsp + 0]);
e.mov(e.rdx, e.qword[e.rcx + 8]); // membase
CallNative(e, ResolveFunctionSymbol);
// Actually jump/call to rax.
if (flags & CALL_TAIL) {
// TODO(benvanik): adjust stack?
e.add(e.rsp, 72);
e.add(e.rsp, StackLayout::GUEST_STACK_SIZE);
e.jmp(e.rax);
} else {
e.call(e.rax);
e.mov(e.rcx, e.qword[e.rsp + 0]);
e.mov(e.rdx, e.qword[e.rcx + 8]); // membase
}
}
void IssueCallIndirect(X64Emitter& e, Value* target, uint32_t flags) {
@ -186,30 +199,20 @@ void IssueCallIndirect(X64Emitter& e, Value* target, uint32_t flags) {
e.mov(e.rdx, r);
}
e.EndOp(r);
e.mov(e.rax, (uint64_t)ResolveFunctionAddress);
e.call(e.rax);
e.mov(e.rcx, e.qword[e.rsp + 0]);
e.mov(e.rdx, e.qword[e.rcx + 8]); // membase
CallNative(e, ResolveFunctionAddress);
// Actually jump/call to rax.
if (flags & CALL_TAIL) {
// TODO(benvanik): adjust stack?
e.add(e.rsp, 72);
e.add(e.rsp, StackLayout::GUEST_STACK_SIZE);
e.jmp(e.rax);
} else {
e.call(e.rax);
e.mov(e.rcx, e.qword[e.rsp + 0]);
e.mov(e.rdx, e.qword[e.rcx + 8]); // membase
}
}
} // namespace
// Major templating foo lives in here.
#include <alloy/backend/x64/lowering/op_utils.inl>
void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) {
// --------------------------------------------------------------------------
// General
@ -337,9 +340,13 @@ table->AddSequence(OPCODE_CALL_EXTERN, [](X64Emitter& e, Instr*& i) {
auto symbol_info = i->src1.symbol_info;
XEASSERT(symbol_info->behavior() == FunctionInfo::BEHAVIOR_EXTERN);
XEASSERTNOTNULL(symbol_info->extern_handler());
e.mov(e.rdx, (uint64_t)symbol_info->extern_arg0());
e.mov(e.r8, (uint64_t)symbol_info->extern_arg1());
CallNative(e, symbol_info->extern_handler());
// rdx = target host function
// r8 = arg0
// r9 = arg1
e.mov(e.rdx, (uint64_t)symbol_info->extern_handler());
e.mov(e.r8, (uint64_t)symbol_info->extern_arg0());
e.mov(e.r9, (uint64_t)symbol_info->extern_arg1());
TransitionToHost(e);
i = e.Advance(i);
return true;
});

View File

@ -17,7 +17,7 @@ namespace {
#define LIKE_REG(dest, like) Reg(dest.getIdx(), dest.getKind(), like.getBit(), false)
#define NAX_LIKE(like) Reg(e.rax.getIdx(), e.rax.getKind(), like.getBit(), false)
#define STASH_OFFSET 48
#define STASH_OFFSET 0
// If we are running with tracing on we have to store the EFLAGS in the stack,
// otherwise our calls out to C to print will clear it before DID_CARRY/etc
@ -68,7 +68,7 @@ void MovMem64(X64Emitter& e, RegExp& addr, uint64_t v) {
void CallNative(X64Emitter& e, void* target) {
e.mov(e.rax, (uint64_t)target);
e.call(e.rax);
e.mov(e.rcx, e.qword[e.rsp + 0]);
e.mov(e.rcx, e.qword[e.rsp + StackLayout::RCX_HOME]);
e.mov(e.rdx, e.qword[e.rcx + 8]); // membase
}

View File

@ -12,6 +12,8 @@
'x64_emitter.h',
'x64_function.cc',
'x64_function.h',
'x64_thunk_emitter.cc',
'x64_thunk_emitter.h',
],
'includes': [

View File

@ -30,7 +30,7 @@ using namespace alloy::runtime;
X64Assembler::X64Assembler(X64Backend* backend) :
x64_backend_(backend),
emitter_(0),
emitter_(0), allocator_(0),
Assembler(backend) {
}
@ -39,6 +39,7 @@ X64Assembler::~X64Assembler() {
}));
delete emitter_;
delete allocator_;
}
int X64Assembler::Initialize() {
@ -47,8 +48,8 @@ int X64Assembler::Initialize() {
return result;
}
emitter_ = new X64Emitter(x64_backend_,
new XbyakAllocator());
allocator_ = new XbyakAllocator();
emitter_ = new X64Emitter(x64_backend_, allocator_);
alloy::tracing::WriteEvent(EventType::AssemblerInit({
}));

View File

@ -21,6 +21,7 @@ namespace x64 {
class X64Backend;
class X64Emitter;
class XbyakAllocator;
class X64Assembler : public Assembler {
@ -45,6 +46,7 @@ private:
private:
X64Backend* x64_backend_;
X64Emitter* emitter_;
XbyakAllocator* allocator_;
StringBuffer string_buffer_;
};

View File

@ -12,6 +12,7 @@
#include <alloy/backend/x64/tracing.h>
#include <alloy/backend/x64/x64_assembler.h>
#include <alloy/backend/x64/x64_code_cache.h>
#include <alloy/backend/x64/x64_thunk_emitter.h>
#include <alloy/backend/x64/lowering/lowering_table.h>
#include <alloy/backend/x64/lowering/lowering_sequences.h>
@ -46,6 +47,13 @@ int X64Backend::Initialize() {
return result;
}
auto allocator = new XbyakAllocator();
auto thunk_emitter = new X64ThunkEmitter(this, allocator);
host_to_guest_thunk_ = thunk_emitter->EmitHostToGuestThunk();
guest_to_host_thunk_ = thunk_emitter->EmitGuestToHostThunk();
delete thunk_emitter;
delete allocator;
lowering_table_ = new LoweringTable(this);
RegisterSequences(lowering_table_);

View File

@ -26,12 +26,18 @@ namespace lowering { class LoweringTable; }
#define ALLOY_HAS_X64_BACKEND 1
typedef void* (*HostToGuestThunk)(void* target, void* arg0, void* arg1);
typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1);
class X64Backend : public Backend {
public:
X64Backend(runtime::Runtime* runtime);
virtual ~X64Backend();
X64CodeCache* code_cache() const { return code_cache_; }
HostToGuestThunk host_to_guest_thunk() const { return host_to_guest_thunk_; }
GuestToHostThunk guest_to_host_thunk() const { return guest_to_host_thunk_; }
lowering::LoweringTable* lowering_table() const { return lowering_table_; }
virtual int Initialize();
@ -40,6 +46,9 @@ public:
private:
X64CodeCache* code_cache_;
HostToGuestThunk host_to_guest_thunk_;
GuestToHostThunk guest_to_host_thunk_;
lowering::LoweringTable* lowering_table_;
};

View File

@ -34,14 +34,14 @@ public:
const static uint32_t ESTIMATED_FN_SIZE = 512;
// Size of unwind info per function.
// TODO(benvanik): move this to emitter.
const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1);
const static uint32_t UNWIND_INFO_SIZE = 4 + (2 * 1 + 2 + 2);
void* fn_table_handle;
RUNTIME_FUNCTION* fn_table;
uint32_t fn_table_count;
uint32_t fn_table_capacity;
void AddTableEntry(uint8_t* code, size_t code_size);
void AddTableEntry(uint8_t* code, size_t code_size, size_t stack_size);
};
@ -73,7 +73,8 @@ int X64CodeCache::Initialize() {
return 0;
}
void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size) {
void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size,
size_t stack_size) {
// Add unwind info into the allocation size. Keep things 16b aligned.
code_size += XEROUNDUP(X64CodeChunk::UNWIND_INFO_SIZE, 16);
@ -101,7 +102,7 @@ void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size) {
active_chunk_->offset += code_size;
// Add entry to fn table.
active_chunk_->AddTableEntry(final_address, code_size);
active_chunk_->AddTableEntry(final_address, code_size, stack_size);
UnlockMutex(lock_);
@ -156,6 +157,27 @@ typedef enum _UNWIND_OP_CODES {
UWOP_SAVE_XMM128_FAR, /* info == XMM reg number, offset in next 2 slots */
UWOP_PUSH_MACHFRAME /* info == 0: no error-code, 1: error-code */
} UNWIND_CODE_OPS;
class UNWIND_REGISTER {
public:
enum _ {
RAX = 0,
RCX = 1,
RDX = 2,
RBX = 3,
RSP = 4,
RBP = 5,
RSI = 6,
RDI = 7,
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15,
};
};
typedef union _UNWIND_CODE {
struct {
@ -183,7 +205,8 @@ typedef struct _UNWIND_INFO {
} UNWIND_INFO, *PUNWIND_INFO;
} // namespace
void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size) {
void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size,
size_t stack_size) {
// NOTE: we assume a chunk lock.
if (fn_table_count + 1 > fn_table_capacity) {
@ -213,26 +236,60 @@ void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size) {
size_t unwind_info_offset = offset;
offset += UNWIND_INFO_SIZE;
// TODO(benvanik): take as parameters?
bool has_prolog = true;
uint8_t prolog_size = 4;
uint8_t stack_bytes = 72;
if (!stack_size) {
uint8_t prolog_size = 0;
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
unwind_info->Version = 1;
unwind_info->Flags = 0;
unwind_info->SizeOfProlog = has_prolog ? prolog_size : 0;
unwind_info->CountOfCodes = has_prolog ? 1 : 0;
unwind_info->FrameRegister = 0;
unwind_info->FrameOffset = 0;
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
unwind_info->Version = 1;
unwind_info->Flags = 0;
unwind_info->SizeOfProlog = 0;
unwind_info->CountOfCodes = 0;
unwind_info->FrameRegister = 0;
unwind_info->FrameOffset = 0;
} else if (stack_size <= 128) {
uint8_t prolog_size = 4;
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
auto& code_0 = unwind_info->UnwindCode[0];
code_0.CodeOffset = 4; // end of instruction + 1 == offset of next instruction
code_0.UnwindOp = UWOP_ALLOC_SMALL;
code_0.OpInfo = stack_bytes / 8 - 1;
XEASSERT(stack_bytes < 128);
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
unwind_info->Version = 1;
unwind_info->Flags = 0;
unwind_info->SizeOfProlog = prolog_size;
unwind_info->CountOfCodes = 1;
unwind_info->FrameRegister = 0;
unwind_info->FrameOffset = 0;
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
size_t co = 0;
auto& unwind_code = unwind_info->UnwindCode[co++];
unwind_code.CodeOffset = 14; // end of instruction + 1 == offset of next instruction
unwind_code.UnwindOp = UWOP_ALLOC_SMALL;
unwind_code.OpInfo = stack_size / 8 - 1;
} else {
// TODO(benvanik): take as parameters?
uint8_t prolog_size = 17;
// This doesn't work, for some reason.
XEASSERTALWAYS();
// http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset);
unwind_info->Version = 1;
unwind_info->Flags = 0;
unwind_info->SizeOfProlog = prolog_size;
unwind_info->CountOfCodes = 3;
unwind_info->FrameRegister = 0;
unwind_info->FrameOffset = 0;
// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
size_t co = 0;
auto& unwind_code = unwind_info->UnwindCode[co++];
unwind_code.CodeOffset = 17; // end of instruction + 1 == offset of next instruction
unwind_code.UnwindOp = UWOP_ALLOC_LARGE;
unwind_code.OpInfo = 0;
unwind_code = unwind_info->UnwindCode[co++];
unwind_code.FrameOffset = (USHORT)(stack_size) / 8;
}
// Add entry.
auto& fn_entry = fn_table[fn_table_count++];

View File

@ -30,7 +30,7 @@ public:
// TODO(benvanik): keep track of code blocks
// TODO(benvanik): padding/guards/etc
void* PlaceCode(void* machine_code, size_t code_size);
void* PlaceCode(void* machine_code, size_t code_size, size_t stack_size);
private:
const static size_t DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024;

View File

@ -11,6 +11,7 @@
#include <alloy/backend/x64/x64_backend.h>
#include <alloy/backend/x64/x64_code_cache.h>
#include <alloy/backend/x64/x64_thunk_emitter.h>
#include <alloy/backend/x64/lowering/lowering_table.h>
#include <alloy/hir/hir_builder.h>
#include <alloy/runtime/debug_info.h>
@ -46,7 +47,6 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) :
}
X64Emitter::~X64Emitter() {
delete allocator_;
}
int X64Emitter::Initialize() {
@ -71,7 +71,7 @@ int X64Emitter::Emit(
// Copy the final code to the cache and relocate it.
out_code_size = getSize();
out_code_address = Emplace(code_cache_);
out_code_address = Emplace(StackLayout::GUEST_STACK_SIZE);
// Stash source map.
if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) {
@ -83,13 +83,13 @@ int X64Emitter::Emit(
return 0;
}
void* X64Emitter::Emplace(X64CodeCache* code_cache) {
void* X64Emitter::Emplace(size_t stack_size) {
// To avoid changing xbyak, we do a switcharoo here.
// top_ points to the Xbyak buffer, and since we are in AutoGrow mode
// it has pending relocations. We copy the top_ to our buffer, swap the
// pointer, relocate, then return the original scratch pointer for use.
uint8_t* old_address = top_;
void* new_address = code_cache->PlaceCode(top_, size_);
void* new_address = code_cache_->PlaceCode(top_, size_, stack_size);
top_ = (uint8_t*)new_address;
ready();
top_ = old_address;
@ -132,21 +132,13 @@ int X64Emitter::Emit(HIRBuilder* builder) {
// X64CodeCache, which dynamically generates exception information.
// Adding or changing anything here must be matched!
const bool emit_prolog = true;
const size_t stack_size = 72;
const size_t stack_size = StackLayout::GUEST_STACK_SIZE;
if (emit_prolog) {
mov(qword[rsp + 8], rcx);
mov(qword[rsp + 8 * 2], rdx);
mov(qword[rsp + 8 * 1], rcx);
sub(rsp, stack_size);
mov(qword[rsp + 8 * 0], rbx);
mov(qword[rsp + 8 * 1], r12);
mov(qword[rsp + 8 * 2], r13);
mov(qword[rsp + 8 * 3], r14);
mov(qword[rsp + 8 * 4], r15);
}
// membase stays in rdx. If we evict it (like on function calls) we
// must put it back.
mov(rdx, qword[rcx + 8]);
auto lowering_table = backend_->lowering_table();
reg_state_.active_regs = reg_state_.live_regs = reserved_regs;
@ -180,12 +172,9 @@ int X64Emitter::Emit(HIRBuilder* builder) {
// Function epilog.
L("epilog");
if (emit_prolog) {
mov(rbx, qword[rsp + 8 * 0]);
mov(r12, qword[rsp + 8 * 1]);
mov(r13, qword[rsp + 8 * 2]);
mov(r14, qword[rsp + 8 * 3]);
mov(r15, qword[rsp + 8 * 4]);
add(rsp, stack_size);
mov(rcx, qword[rsp + 8 * 1]);
mov(rdx, qword[rsp + 8 * 2]);
}
ret();

View File

@ -45,6 +45,7 @@ public:
virtual ~X64Emitter();
runtime::Runtime* runtime() const { return runtime_; }
X64Backend* backend() const { return backend_; }
int Initialize();
@ -144,15 +145,15 @@ public:
void MarkSourceOffset(hir::Instr* i);
private:
void* Emplace(X64CodeCache* code_cache);
protected:
void* Emplace(size_t stack_size);
int Emit(hir::HIRBuilder* builder);
private:
protected:
runtime::Runtime* runtime_;
X64Backend* backend_;
X64CodeCache* code_cache_;
XbyakAllocator* allocator_;
X64Backend* backend_;
X64CodeCache* code_cache_;
XbyakAllocator* allocator_;
struct {
// Registers currently active within a begin/end op block. These

View File

@ -10,6 +10,7 @@
#include <alloy/backend/x64/x64_function.h>
#include <alloy/backend/x64/tracing.h>
#include <alloy/backend/x64/x64_backend.h>
#include <alloy/runtime/runtime.h>
#include <alloy/runtime/thread_state.h>
@ -42,7 +43,11 @@ int X64Function::RemoveBreakpointImpl(Breakpoint* breakpoint) {
}
int X64Function::CallImpl(ThreadState* thread_state) {
typedef void(*call_t)(void* raw_context, uint8_t* membase);
((call_t)machine_code_)(thread_state->raw_context(), thread_state->memory()->membase());
auto backend = (X64Backend*)thread_state->runtime()->backend();
auto thunk = backend->host_to_guest_thunk();
thunk(
machine_code_,
thread_state->raw_context(),
thread_state->memory()->membase());
return 0;
}

View File

@ -0,0 +1,139 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/backend/x64/x64_thunk_emitter.h>
#include <third_party/xbyak/xbyak/xbyak.h>
using namespace alloy;
using namespace alloy::backend;
using namespace alloy::backend::x64;
using namespace Xbyak;
X64ThunkEmitter::X64ThunkEmitter(
X64Backend* backend, XbyakAllocator* allocator) :
X64Emitter(backend, allocator) {
}
X64ThunkEmitter::~X64ThunkEmitter() {
}
HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
// rcx = target
// rdx = arg0
// r8 = arg1
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
// rsp + 0 = return address
mov(qword[rsp + 8 * 2], rdx);
mov(qword[rsp + 8 * 1], rcx);
sub(rsp, stack_size);
mov(qword[rsp + 56], rbx);
mov(qword[rsp + 64], rbp);
mov(qword[rsp + 72], rsi);
mov(qword[rsp + 80], rdi);
mov(qword[rsp + 88], r12);
mov(qword[rsp + 96], r13);
mov(qword[rsp + 104], r14);
mov(qword[rsp + 112], r15);
/*movaps(ptr[rsp + 128], xmm6);
movaps(ptr[rsp + 144], xmm7);
movaps(ptr[rsp + 160], xmm8);
movaps(ptr[rsp + 176], xmm9);
movaps(ptr[rsp + 192], xmm10);
movaps(ptr[rsp + 208], xmm11);
movaps(ptr[rsp + 224], xmm12);
movaps(ptr[rsp + 240], xmm13);
movaps(ptr[rsp + 256], xmm14);
movaps(ptr[rsp + 272], xmm15);*/
mov(rax, rcx);
mov(rcx, rdx);
mov(rdx, r8);
call(rax);
/*movaps(xmm6, ptr[rsp + 128]);
movaps(xmm7, ptr[rsp + 144]);
movaps(xmm8, ptr[rsp + 160]);
movaps(xmm9, ptr[rsp + 176]);
movaps(xmm10, ptr[rsp + 192]);
movaps(xmm11, ptr[rsp + 208]);
movaps(xmm12, ptr[rsp + 224]);
movaps(xmm13, ptr[rsp + 240]);
movaps(xmm14, ptr[rsp + 256]);
movaps(xmm15, ptr[rsp + 272]);*/
mov(rbx, qword[rsp + 56]);
mov(rbp, qword[rsp + 64]);
mov(rsi, qword[rsp + 72]);
mov(rdi, qword[rsp + 80]);
mov(r12, qword[rsp + 88]);
mov(r13, qword[rsp + 96]);
mov(r14, qword[rsp + 104]);
mov(r15, qword[rsp + 112]);
add(rsp, stack_size);
mov(rcx, qword[rsp + 8 * 1]);
mov(rdx, qword[rsp + 8 * 2]);
ret();
void* fn = Emplace(stack_size);
return (HostToGuestThunk)fn;
}
GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
// rcx = context
// rdx = target function
// r8 = arg0
// r9 = arg1
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
// rsp + 0 = return address
mov(qword[rsp + 8 * 2], rdx);
mov(qword[rsp + 8 * 1], rcx);
sub(rsp, stack_size);
mov(qword[rsp + 56], rbx);
mov(qword[rsp + 64], rbp);
mov(qword[rsp + 72], rsi);
mov(qword[rsp + 80], rdi);
mov(qword[rsp + 88], r12);
mov(qword[rsp + 96], r13);
mov(qword[rsp + 104], r14);
mov(qword[rsp + 112], r15);
// TODO(benvanik): save things? XMM0-5?
mov(rax, rdx);
mov(rdx, r8);
mov(r8, r9);
call(rax);
mov(rbx, qword[rsp + 56]);
mov(rbp, qword[rsp + 64]);
mov(rsi, qword[rsp + 72]);
mov(rdi, qword[rsp + 80]);
mov(r12, qword[rsp + 88]);
mov(r13, qword[rsp + 96]);
mov(r14, qword[rsp + 104]);
mov(r15, qword[rsp + 112]);
add(rsp, stack_size);
mov(rcx, qword[rsp + 8 * 1]);
mov(rdx, qword[rsp + 8 * 2]);
ret();
void* fn = Emplace(stack_size);
return (HostToGuestThunk)fn;
}

View File

@ -0,0 +1,124 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_X64_X64_THUNK_EMITTER_H_
#define XENIA_CPU_X64_X64_THUNK_EMITTER_H_
#include <alloy/core.h>
#include <alloy/backend/x64/x64_backend.h>
#include <alloy/backend/x64/x64_emitter.h>
namespace alloy {
namespace backend {
namespace x64 {
/**
* Stack Layout
* ----------------------------
* NOTE: stack must always be 16b aligned.
*
* +------------------+
* | scratch, 56b | rsp + 0
* | |
* | .... |
* | |
* | |
* +------------------+
* | rbx | rsp + 56
* +------------------+
* | rbp | rsp + 64
* +------------------+
* | rsi | rsp + 72
* +------------------+
* | rdi | rsp + 80
* +------------------+
* | r12 | rsp + 88
* +------------------+
* | r13 | rsp + 96
* +------------------+
* | r14 | rsp + 104
* +------------------+
* | r15 | rsp + 112
* +------------------+
* | (return address) | rsp + 120
* +------------------+
* | (rcx home) | rsp + 128
* +------------------+
* | (rdx home) | rsp + 136
* +------------------+
*
*
* TODO:
* +------------------+
* | xmm6 | rsp + 128
* | |
* +------------------+
* | xmm7 | rsp + 144
* | |
* +------------------+
* | xmm8 | rsp + 160
* | |
* +------------------+
* | xmm9 | rsp + 176
* | |
* +------------------+
* | xmm10 | rsp + 192
* | |
* +------------------+
* | xmm11 | rsp + 208
* | |
* +------------------+
* | xmm12 | rsp + 224
* | |
* +------------------+
* | xmm13 | rsp + 240
* | |
* +------------------+
* | xmm14 | rsp + 256
* | |
* +------------------+
* | xmm15 | rsp + 272
* | |
* +------------------+
*
*/
class StackLayout {
public:
const static size_t GUEST_STACK_SIZE = 120;
const static size_t THUNK_STACK_SIZE = 120;
const static size_t RETURN_ADDRESS = 120;
const static size_t RCX_HOME = 128;
const static size_t RDX_HOME = 136;
};
class X64ThunkEmitter : public X64Emitter {
public:
X64ThunkEmitter(X64Backend* backend, XbyakAllocator* allocator);
virtual ~X64ThunkEmitter();
// Call a generated function, saving all stack parameters.
HostToGuestThunk EmitHostToGuestThunk();
// Function that guest code can call to transition into host code.
GuestToHostThunk EmitGuestToHostThunk();
};
} // namespace x64
} // namespace backend
} // namespace alloy
#endif // XENIA_CPU_X64_X64_THUNK_EMITTER_H_