diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc index 45bbaa32e..576ad65a0 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ b/src/alloy/backend/x64/lowering/lowering_sequences.cc @@ -231,7 +231,7 @@ void IssueCall(X64Emitter& e, FunctionInfo* symbol_info, uint32_t flags) { // Actually jump/call to rax. if (flags & CALL_TAIL) { - e.add(e.rsp, StackLayout::GUEST_STACK_SIZE); + e.add(e.rsp, (uint32_t)e.stack_size()); e.jmp(e.rax); } else { e.call(e.rax); @@ -250,7 +250,7 @@ void IssueCallIndirect(X64Emitter& e, Value* target, uint32_t flags) { // Actually jump/call to rax. if (flags & CALL_TAIL) { - e.add(e.rsp, StackLayout::GUEST_STACK_SIZE); + e.add(e.rsp, (uint32_t)e.stack_size()); e.jmp(e.rax); } else { e.call(e.rax); @@ -397,6 +397,7 @@ table->AddSequence(OPCODE_CALL_EXTERN, [](X64Emitter& e, Instr*& i) { e.mov(e.r8, (uint64_t)symbol_info->extern_arg0()); e.mov(e.r9, (uint64_t)symbol_info->extern_arg1()); TransitionToHost(e); + ReloadRDX(e); } i = e.Advance(i); return true; diff --git a/src/alloy/backend/x64/lowering/op_utils.inl b/src/alloy/backend/x64/lowering/op_utils.inl index 3e0ed6789..5ec86b65d 100644 --- a/src/alloy/backend/x64/lowering/op_utils.inl +++ b/src/alloy/backend/x64/lowering/op_utils.inl @@ -28,7 +28,7 @@ namespace { void LoadEflags(X64Emitter& e) { #if STORE_EFLAGS e.mov(e.eax, e.dword[e.rsp + STASH_OFFSET]); - e.push(e.ax); + e.push(e.rax); e.popf(); #else // EFLAGS already present. @@ -37,7 +37,7 @@ void LoadEflags(X64Emitter& e) { void StoreEflags(X64Emitter& e) { #if STORE_EFLAGS e.pushf(); - e.pop(e.word[e.rsp + STASH_OFFSET]); + e.pop(e.qword[e.rsp + STASH_OFFSET]); #else // EFLAGS should have CA set? // (so long as we don't fuck with it) @@ -84,7 +84,7 @@ void MovMem64(X64Emitter& e, RegExp& addr, uint64_t v) { void CallNative(X64Emitter& e, void* target) { e.mov(e.rax, (uint64_t)target); e.call(e.rax); - e.mov(e.rcx, e.qword[e.rsp + StackLayout::RCX_HOME]); + e.mov(e.rcx, e.qword[e.rsp + StackLayout::GUEST_RCX_HOME]); e.mov(e.rdx, e.qword[e.rcx + 8]); // membase } diff --git a/src/alloy/backend/x64/x64_code_cache.cc b/src/alloy/backend/x64/x64_code_cache.cc index 2b2bf322d..7282c2e23 100644 --- a/src/alloy/backend/x64/x64_code_cache.cc +++ b/src/alloy/backend/x64/x64_code_cache.cc @@ -267,10 +267,7 @@ void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size, unwind_code.OpInfo = stack_size / 8 - 1; } else { // TODO(benvanik): take as parameters? - uint8_t prolog_size = 17; - - // This doesn't work, for some reason. - XEASSERTALWAYS(); + uint8_t prolog_size = 7; // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx UNWIND_INFO* unwind_info = (UNWIND_INFO*)(buffer + unwind_info_offset); @@ -284,7 +281,7 @@ void X64CodeChunk::AddTableEntry(uint8_t* code, size_t code_size, // http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx size_t co = 0; auto& unwind_code = unwind_info->UnwindCode[co++]; - unwind_code.CodeOffset = 17; // end of instruction + 1 == offset of next instruction + unwind_code.CodeOffset = 7; // end of instruction + 1 == offset of next instruction unwind_code.UnwindOp = UWOP_ALLOC_LARGE; unwind_code.OpInfo = 0; unwind_code = unwind_info->UnwindCode[co++]; diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index 5aee9fb59..c8ddaddb4 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -64,14 +64,15 @@ int X64Emitter::Emit( } // Fill the generator with code. - int result = Emit(builder); + size_t stack_size = 0; + int result = Emit(builder, stack_size); if (result) { return result; } // Copy the final code to the cache and relocate it. out_code_size = getSize(); - out_code_address = Emplace(StackLayout::GUEST_STACK_SIZE); + out_code_address = Emplace(stack_size); // Stash source map. if (debug_info_flags & DEBUG_INFO_SOURCE_MAP) { @@ -99,7 +100,7 @@ void* X64Emitter::Emplace(size_t stack_size) { #define XEALIGN(value, align) ((value + align - 1) & ~(align - 1)) -int X64Emitter::Emit(HIRBuilder* builder) { +int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) { // These are the registers we will not be using. All others are fare game. const uint32_t reserved_regs = GetRegBit(rax) | // scratch @@ -125,7 +126,7 @@ int X64Emitter::Emit(HIRBuilder* builder) { // Calculate stack size. We need to align things to their natural sizes. // This could be much better (sort by type/etc). auto locals = builder->locals(); - size_t stack_offset = 0; + size_t stack_offset = StackLayout::GUEST_STACK_SIZE; for (auto it = locals.begin(); it != locals.end(); ++it) { auto slot = *it; size_t type_size = GetTypeSize(slot->type); @@ -134,6 +135,9 @@ int X64Emitter::Emit(HIRBuilder* builder) { slot->set_constant(stack_offset); stack_offset += type_size; } + // Ensure 16b alignment. + stack_offset -= StackLayout::GUEST_STACK_SIZE; + stack_offset = XEALIGN(stack_offset, 16); // Function prolog. // Must be 16b aligned. @@ -147,11 +151,13 @@ int X64Emitter::Emit(HIRBuilder* builder) { // X64CodeCache, which dynamically generates exception information. // Adding or changing anything here must be matched! const bool emit_prolog = true; - const size_t stack_size = StackLayout::GUEST_STACK_SIZE; + const size_t stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset; + XEASSERT((stack_size + 8) % 16 == 0); + out_stack_size = stack_size; + stack_size_ = stack_size; if (emit_prolog) { - mov(qword[rsp + 8 * 2], rdx); - mov(qword[rsp + 8 * 1], rcx); - sub(rsp, stack_size); + sub(rsp, (uint32_t)stack_size); + mov(qword[rsp + StackLayout::GUEST_RCX_HOME], rcx); } auto lowering_table = backend_->lowering_table(); @@ -187,9 +193,8 @@ int X64Emitter::Emit(HIRBuilder* builder) { // Function epilog. L("epilog"); if (emit_prolog) { - add(rsp, stack_size); - mov(rcx, qword[rsp + 8 * 1]); - mov(rdx, qword[rsp + 8 * 2]); + mov(rcx, qword[rsp + StackLayout::GUEST_RCX_HOME]); + add(rsp, (uint32_t)stack_size); } ret(); diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index c5bc51e05..4962dab14 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -145,9 +145,11 @@ public: void MarkSourceOffset(hir::Instr* i); + size_t stack_size() const { return stack_size_; } + protected: void* Emplace(size_t stack_size); - int Emit(hir::HIRBuilder* builder); + int Emit(hir::HIRBuilder* builder, size_t& out_stack_size); protected: runtime::Runtime* runtime_; @@ -168,6 +170,8 @@ protected: size_t source_map_count_; Arena source_map_arena_; + + size_t stack_size_; }; diff --git a/src/alloy/backend/x64/x64_thunk_emitter.cc b/src/alloy/backend/x64/x64_thunk_emitter.cc index 0bd7239f6..7fc6fab60 100644 --- a/src/alloy/backend/x64/x64_thunk_emitter.cc +++ b/src/alloy/backend/x64/x64_thunk_emitter.cc @@ -38,7 +38,8 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { mov(qword[rsp + 8 * 1], rcx); sub(rsp, stack_size); - mov(qword[rsp + 56], rbx); + mov(qword[rsp + 48], rbx); + mov(qword[rsp + 56], rcx); mov(qword[rsp + 64], rbp); mov(qword[rsp + 72], rsi); mov(qword[rsp + 80], rdi); @@ -74,7 +75,8 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() { movaps(xmm14, ptr[rsp + 256]); movaps(xmm15, ptr[rsp + 272]);*/ - mov(rbx, qword[rsp + 56]); + mov(rbx, qword[rsp + 48]); + mov(rcx, qword[rsp + 56]); mov(rbp, qword[rsp + 64]); mov(rsi, qword[rsp + 72]); mov(rdi, qword[rsp + 80]); @@ -104,7 +106,8 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() { mov(qword[rsp + 8 * 1], rcx); sub(rsp, stack_size); - mov(qword[rsp + 56], rbx); + mov(qword[rsp + 48], rbx); + mov(qword[rsp + 56], rcx); mov(qword[rsp + 64], rbp); mov(qword[rsp + 72], rsi); mov(qword[rsp + 80], rdi); @@ -120,7 +123,8 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() { mov(r8, r9); call(rax); - mov(rbx, qword[rsp + 56]); + mov(rbx, qword[rsp + 48]); + mov(rcx, qword[rsp + 56]); mov(rbp, qword[rsp + 64]); mov(rsi, qword[rsp + 72]); mov(rdi, qword[rsp + 80]); @@ -128,7 +132,7 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() { mov(r13, qword[rsp + 96]); mov(r14, qword[rsp + 104]); mov(r15, qword[rsp + 112]); - + add(rsp, stack_size); mov(rcx, qword[rsp + 8 * 1]); mov(rdx, qword[rsp + 8 * 2]); diff --git a/src/alloy/backend/x64/x64_thunk_emitter.h b/src/alloy/backend/x64/x64_thunk_emitter.h index 6559ab9a5..a9f27650b 100644 --- a/src/alloy/backend/x64/x64_thunk_emitter.h +++ b/src/alloy/backend/x64/x64_thunk_emitter.h @@ -25,15 +25,18 @@ namespace x64 { * ---------------------------- * NOTE: stack must always be 16b aligned. * + * Thunk stack: * +------------------+ * | arg temp, 3 * 8 | rsp + 0 * | | * | | * +------------------+ - * | scratch, 24b | rsp + 32 + * | scratch, 16b | rsp + 32 * | | * +------------------+ - * | rbx | rsp + 56 + * | rbx | rsp + 48 + * +------------------+ + * | rcx / context | rsp + 56 * +------------------+ * | rbp | rsp + 64 * +------------------+ @@ -90,17 +93,30 @@ namespace x64 { * | | * +------------------+ * + * Guest stack: + * +------------------+ + * | arg temp, 3 * 8 | rsp + 0 + * | | + * | | + * +------------------+ + * | scratch, 32b | rsp + 32 + * | | + * +------------------+ + * | rcx / context | rsp + 64 + * +------------------+ + * ... locals ... + * +------------------+ + * | (return address) | + * +------------------+ + * */ class StackLayout { public: - const static size_t GUEST_STACK_SIZE = 120; - const static size_t THUNK_STACK_SIZE = 120; - const static size_t RETURN_ADDRESS = 120; - const static size_t RCX_HOME = 128; - const static size_t RDX_HOME = 136; + const static size_t GUEST_STACK_SIZE = 72; + const static size_t GUEST_RCX_HOME = 64; };