From dc6666d4d2a51f223460ecdf005919330706288e Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Wed, 8 May 2024 11:34:26 -0700 Subject: [PATCH] [a64] Update guest calling conventions Guest-function calls will use W17 for indirect calls --- src/xenia/cpu/backend/a64/a64_backend.cc | 73 +++++--------------- src/xenia/cpu/backend/a64/a64_emitter.cc | 58 +++++----------- src/xenia/cpu/backend/a64/a64_seq_memory.cc | 3 +- src/xenia/cpu/backend/a64/a64_sequences.cc | 22 ++++-- src/xenia/cpu/backend/a64/a64_stack_layout.h | 58 +++++++--------- 5 files changed, 81 insertions(+), 133 deletions(-) diff --git a/src/xenia/cpu/backend/a64/a64_backend.cc b/src/xenia/cpu/backend/a64/a64_backend.cc index db332ec87..60e6c6236 100644 --- a/src/xenia/cpu/backend/a64/a64_backend.cc +++ b/src/xenia/cpu/backend/a64/a64_backend.cc @@ -52,8 +52,8 @@ class A64ThunkEmitter : public A64Emitter { // Caller saved: // Dont assume these registers will survive a subroutine call - // x0, v0 is not saved/preserved since this is used to return values from - // subroutines x1-x15, x30 | d0-d7 and d16-v31 + // x0, v0 is not saved for use as arg0/return + // x1-x15, x30 | v0-v7 and v16-v31 void EmitSaveVolatileRegs(); void EmitLoadVolatileRegs(); @@ -223,47 +223,23 @@ HostToGuestThunk A64ThunkEmitter::EmitHostToGuestThunk() { code_offsets.prolog = offset(); - // mov(qword[rsp + 8 * 3], r8); - // mov(qword[rsp + 8 * 2], rdx); - // mov(qword[rsp + 8 * 1], rcx); - // sub(rsp, stack_size); - - - STR(X2, SP, 8 * 3); - STR(X1, SP, 8 * 2); - STR(X0, SP, 8 * 1); SUB(SP, SP, stack_size); code_offsets.prolog_stack_alloc = offset(); code_offsets.body = offset(); - // Save nonvolatile registers. EmitSaveNonvolatileRegs(); - // mov(rax, rcx); - // mov(rsi, rdx); // context - // mov(rcx, r8); // return address - // call(rax); MOV(X16, X0); - MOV(A64Emitter::GetContextReg(), X1); // context - MOV(X0, X2); // return address - + MOV(GetContextReg(), X1); // context + MOV(X0, X2); // return address BLR(X16); EmitLoadNonvolatileRegs(); code_offsets.epilog = offset(); - // add(rsp, stack_size); - // mov(rcx, qword[rsp + 8 * 1]); - // mov(rdx, qword[rsp + 8 * 2]); - // mov(r8, qword[rsp + 8 * 3]); - // ret(); - ADD(SP, SP, stack_size); - LDR(X0, SP, 8 * 1); - LDR(X1, SP, 8 * 2); - LDR(X2, SP, 8 * 3); RET(); @@ -302,19 +278,13 @@ GuestToHostThunk A64ThunkEmitter::EmitGuestToHostThunk() { code_offsets.prolog = offset(); - // rsp + 0 = return address - // sub(rsp, stack_size); SUB(SP, SP, stack_size); code_offsets.prolog_stack_alloc = offset(); code_offsets.body = offset(); - // Save off volatile registers. EmitSaveVolatileRegs(); - // mov(rax, rcx); // function - // mov(rcx, GetContextReg()); // context - // call(rax); MOV(X16, X0); // function MOV(X0, GetContextReg()); // context BLR(X16); @@ -323,8 +293,6 @@ GuestToHostThunk A64ThunkEmitter::EmitGuestToHostThunk() { code_offsets.epilog = offset(); - // add(rsp, stack_size); - // ret(); ADD(SP, SP, stack_size); RET(); @@ -350,11 +318,8 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address); ResolveFunctionThunk A64ThunkEmitter::EmitResolveFunctionThunk() { // Entry: - // X0 = target PPC address - - // Resolve Function: + // W17 = target PPC address // X0 = context - // X1 = target PPC address struct _code_offsets { size_t prolog; @@ -369,22 +334,20 @@ ResolveFunctionThunk A64ThunkEmitter::EmitResolveFunctionThunk() { code_offsets.prolog = offset(); // rsp + 0 = return address - // sub(rsp, stack_size); SUB(SP, SP, stack_size); code_offsets.prolog_stack_alloc = offset(); code_offsets.body = offset(); - // Save volatile registers EmitSaveVolatileRegs(); // mov(rcx, rsi); // context // mov(rdx, rbx); // mov(rax, reinterpret_cast(&ResolveFunction)); // call(rax) - MOV(X1, X0); MOV(X0, GetContextReg()); // context - MOVP2R(X16, &ResolveFunction); + MOV(W1, W17); + MOV(X16, reinterpret_cast(&ResolveFunction)); BLR(X16); EmitLoadVolatileRegs(); @@ -432,7 +395,6 @@ void A64ThunkEmitter::EmitSaveVolatileRegs() { STP(Q3, Q4, SP, offsetof(StackLayout::Thunk, xmm[2])); STP(Q5, Q6, SP, offsetof(StackLayout::Thunk, xmm[4])); STP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6])); - STP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6])); STP(Q17, Q18, SP, offsetof(StackLayout::Thunk, xmm[8])); STP(Q19, Q20, SP, offsetof(StackLayout::Thunk, xmm[10])); STP(Q21, Q22, SP, offsetof(StackLayout::Thunk, xmm[12])); @@ -461,7 +423,6 @@ void A64ThunkEmitter::EmitLoadVolatileRegs() { LDP(Q3, Q4, SP, offsetof(StackLayout::Thunk, xmm[2])); LDP(Q5, Q6, SP, offsetof(StackLayout::Thunk, xmm[4])); LDP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6])); - LDP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6])); LDP(Q17, Q18, SP, offsetof(StackLayout::Thunk, xmm[8])); LDP(Q19, Q20, SP, offsetof(StackLayout::Thunk, xmm[10])); LDP(Q21, Q22, SP, offsetof(StackLayout::Thunk, xmm[12])); @@ -480,10 +441,12 @@ void A64ThunkEmitter::EmitSaveNonvolatileRegs() { STP(X27, X28, SP, offsetof(StackLayout::Thunk, r[8])); STP(X29, X30, SP, offsetof(StackLayout::Thunk, r[10])); - STP(Q8, Q9, SP, offsetof(StackLayout::Thunk, xmm[0])); - STP(Q10, Q11, SP, offsetof(StackLayout::Thunk, xmm[2])); - STP(Q12, Q13, SP, offsetof(StackLayout::Thunk, xmm[4])); - STP(Q14, Q15, SP, offsetof(StackLayout::Thunk, xmm[6])); + STR(X17, SP, offsetof(StackLayout::Thunk, r[12])); + + STP(D8, D9, SP, offsetof(StackLayout::Thunk, xmm[0])); + STP(D10, D11, SP, offsetof(StackLayout::Thunk, xmm[1])); + STP(D12, D13, SP, offsetof(StackLayout::Thunk, xmm[2])); + STP(D14, D15, SP, offsetof(StackLayout::Thunk, xmm[3])); } void A64ThunkEmitter::EmitLoadNonvolatileRegs() { @@ -494,10 +457,12 @@ void A64ThunkEmitter::EmitLoadNonvolatileRegs() { LDP(X27, X28, SP, offsetof(StackLayout::Thunk, r[8])); LDP(X29, X30, SP, offsetof(StackLayout::Thunk, r[10])); - LDP(Q8, Q9, SP, offsetof(StackLayout::Thunk, xmm[0])); - LDP(Q10, Q11, SP, offsetof(StackLayout::Thunk, xmm[2])); - LDP(Q12, Q13, SP, offsetof(StackLayout::Thunk, xmm[4])); - LDP(Q14, Q15, SP, offsetof(StackLayout::Thunk, xmm[6])); + LDR(X17, SP, offsetof(StackLayout::Thunk, r[12])); + + LDP(D8, D9, SP, offsetof(StackLayout::Thunk, xmm[0])); + LDP(D10, D11, SP, offsetof(StackLayout::Thunk, xmm[1])); + LDP(D12, D13, SP, offsetof(StackLayout::Thunk, xmm[2])); + LDP(D14, D15, SP, offsetof(StackLayout::Thunk, xmm[3])); } } // namespace a64 diff --git a/src/xenia/cpu/backend/a64/a64_emitter.cc b/src/xenia/cpu/backend/a64/a64_emitter.cc index ab7ea125e..d57bae253 100644 --- a/src/xenia/cpu/backend/a64/a64_emitter.cc +++ b/src/xenia/cpu/backend/a64/a64_emitter.cc @@ -83,20 +83,6 @@ A64Emitter::A64Emitter(A64Backend* backend) feature_flags_ |= (cpu_.has(ext) ? emit : 0); \ } - // TEST_EMIT_FEATURE(kA64EmitAVX2, oaknut::util::Cpu::tAVX2); - // TEST_EMIT_FEATURE(kA64EmitFMA, oaknut::util::Cpu::tFMA); - // TEST_EMIT_FEATURE(kA64EmitLZCNT, oaknut::util::Cpu::tLZCNT); - // TEST_EMIT_FEATURE(kA64EmitBMI1, oaknut::util::Cpu::tBMI1); - // TEST_EMIT_FEATURE(kA64EmitBMI2, oaknut::util::Cpu::tBMI2); - // TEST_EMIT_FEATURE(kA64EmitF16C, oaknut::util::Cpu::tF16C); - // TEST_EMIT_FEATURE(kA64EmitMovbe, oaknut::util::Cpu::tMOVBE); - // TEST_EMIT_FEATURE(kA64EmitGFNI, oaknut::util::Cpu::tGFNI); - // TEST_EMIT_FEATURE(kA64EmitAVX512F, oaknut::util::Cpu::tAVX512F); - // TEST_EMIT_FEATURE(kA64EmitAVX512VL, oaknut::util::Cpu::tAVX512VL); - // TEST_EMIT_FEATURE(kA64EmitAVX512BW, oaknut::util::Cpu::tAVX512BW); - // TEST_EMIT_FEATURE(kA64EmitAVX512DQ, oaknut::util::Cpu::tAVX512DQ); - // TEST_EMIT_FEATURE(kA64EmitAVX512VBMI, oaknut::util::Cpu::tAVX512_VBMI); - #undef TEST_EMIT_FEATURE } @@ -218,15 +204,11 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { STP(X29, X30, SP, PRE_INDEXED, -32); MOV(X29, SP); - // sub(rsp, (uint32_t)stack_size); SUB(SP, SP, (uint32_t)stack_size); code_offsets.prolog_stack_alloc = offset(); code_offsets.body = offset(); - // mov(qword[rsp + StackLayout::GUEST_CTX_HOME], GetContextReg()); - // mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rcx); - // mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0); STR(GetContextReg(), SP, StackLayout::GUEST_CTX_HOME); STR(X0, SP, StackLayout::GUEST_RET_ADDR); STR(XZR, SP, StackLayout::GUEST_CALL_RET_ADDR); @@ -260,8 +242,6 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { } // Load membase. - // mov(GetMembaseReg(), - // qword[GetContextReg() + offsetof(ppc::PPCContext, virtual_membase)]); LDR(GetMembaseReg(), GetContextReg(), offsetof(ppc::PPCContext, virtual_membase)); @@ -297,13 +277,10 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { l(epilog_label); epilog_label_ = nullptr; EmitTraceUserCallReturn(); - // mov(GetContextReg(), qword[rsp + StackLayout::GUEST_CTX_HOME]); LDR(GetContextReg(), SP, StackLayout::GUEST_CTX_HOME); code_offsets.epilog = offset(); - // add(rsp, (uint32_t)stack_size); - // ret(); ADD(SP, SP, (uint32_t)stack_size); MOV(SP, X29); @@ -342,7 +319,6 @@ void A64Emitter::MarkSourceOffset(const Instr* i) { if (cvars::emit_source_annotations) { NOP(); NOP(); - // mov(eax, entry->guest_address); MOV(X0, entry->guest_address); NOP(); NOP(); @@ -451,8 +427,8 @@ void A64Emitter::Call(const hir::Instr* instr, GuestFunction* function) { // or a thunk to ResolveAddress. // mov(ebx, function->address()); // mov(eax, dword[ebx]); - MOV(W1, function->address()); - LDR(W16, X1); + MOV(W17, function->address()); + LDR(W16, X17); } else { // Old-style resolve. // Not too important because indirection table is almost always available. @@ -472,7 +448,11 @@ void A64Emitter::Call(const hir::Instr* instr, GuestFunction* function) { // add(rsp, static_cast(stack_size())); // jmp(rax); - ADD(SP, SP, stack_size()); + ADD(SP, SP, static_cast(stack_size())); + + MOV(SP, X29); + LDP(X29, X30, SP, POST_INDEXED, 32); + BR(X16); } else { // Return address is from the previous SET_RETURN_ADDRESS. @@ -499,10 +479,11 @@ void A64Emitter::CallIndirect(const hir::Instr* instr, // The target dword will either contain the address of the generated code // or a thunk to ResolveAddress. if (code_cache_->has_indirection_table()) { - if (reg.toW().index() != W1.index()) { + if (reg.toW().index() != W17.index()) { // mov(ebx, reg.cvt32()); - MOV(W1, reg.toW()); + MOV(W17, reg.toW()); } + LDR(W16, X17); // mov(eax, dword[ebx]); } else { // Old-style resolve. @@ -515,7 +496,7 @@ void A64Emitter::CallIndirect(const hir::Instr* instr, MOV(X0, GetContextReg()); MOV(W1, reg.toW()); - ADRP(X16, ResolveFunction); + MOV(X16, reinterpret_cast(ResolveFunction)); BLR(X16); MOV(X16, X0); } @@ -526,18 +507,16 @@ void A64Emitter::CallIndirect(const hir::Instr* instr, EmitTraceUserCallReturn(); // Pass the callers return address over. - // mov(rcx, qword[rsp + StackLayout::GUEST_RET_ADDR]); LDR(X0, SP, StackLayout::GUEST_RET_ADDR); - // add(rsp, static_cast(stack_size())); ADD(SP, SP, static_cast(stack_size())); - // jmp(rax); + MOV(SP, X29); + LDP(X29, X30, SP, POST_INDEXED, 32); + BR(X16); } else { // Return address is from the previous SET_RETURN_ADDRESS. - // mov(rcx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]); - // call(rax); LDR(X0, SP, StackLayout::GUEST_CALL_RET_ADDR); BLR(X16); @@ -571,7 +550,6 @@ void A64Emitter::CallExtern(const hir::Instr* instr, const Function* function) { auto thunk = backend()->guest_to_host_thunk(); MOV(X16, reinterpret_cast(thunk)); - BLR(X16); // x0 = host return @@ -589,7 +567,6 @@ void A64Emitter::CallExtern(const hir::Instr* instr, const Function* function) { auto thunk = backend()->guest_to_host_thunk(); MOV(X16, reinterpret_cast(thunk)); - BLR(X16); // x0 = host return @@ -612,7 +589,6 @@ void A64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0)) { void A64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0), uint64_t arg0) { - // mov(GetNativeParam(0), arg0); MOV(GetNativeParam(0), arg0); CallNativeSafe(reinterpret_cast(fn)); } @@ -698,7 +674,7 @@ void A64Emitter::MovMem64(const oaknut::XRegSp& addr, intptr_t offset, } } -static const vec128_t xmm_consts[] = { +static const vec128_t v_consts[] = { /* VZero */ vec128f(0.0f), /* VOne */ vec128f(1.0f), /* VOnePD */ vec128d(1.0), @@ -813,7 +789,7 @@ static const vec128_t xmm_consts[] = { // First location to try and place constants. static const uintptr_t kConstDataLocation = 0x20000000; -static const uintptr_t kConstDataSize = sizeof(xmm_consts); +static const uintptr_t kConstDataSize = sizeof(v_consts); // Increment the location by this amount for every allocation failure. static const uintptr_t kConstDataIncrement = 0x00001000; @@ -837,7 +813,7 @@ uintptr_t A64Emitter::PlaceConstData() { // The pointer must not be greater than 31 bits. assert_zero(reinterpret_cast(mem) & ~0x7FFFFFFF); - std::memcpy(mem, xmm_consts, sizeof(xmm_consts)); + std::memcpy(mem, v_consts, sizeof(v_consts)); memory::Protect(mem, kConstDataSize, memory::PageAccess::kReadOnly, nullptr); return reinterpret_cast(mem); diff --git a/src/xenia/cpu/backend/a64/a64_seq_memory.cc b/src/xenia/cpu/backend/a64/a64_seq_memory.cc index 0bf67e8c9..cf31e42b5 100644 --- a/src/xenia/cpu/backend/a64/a64_seq_memory.cc +++ b/src/xenia/cpu/backend/a64/a64_seq_memory.cc @@ -33,7 +33,8 @@ XReg ComputeMemoryAddressOffset(A64Emitter& e, const T& guest, const T& offset, uint32_t address = static_cast(guest.constant()); address += offset_const; if (address < 0x80000000) { - e.ADD(address_register.toX(), e.GetMembaseReg(), address); + e.MOV(address_register.toX(), address); + e.ADD(address_register.toX(), e.GetMembaseReg(), address_register.toX()); return address_register.toX(); } else { if (address >= 0xE0000000 && diff --git a/src/xenia/cpu/backend/a64/a64_sequences.cc b/src/xenia/cpu/backend/a64/a64_sequences.cc index 5e67a744d..4125d10fa 100644 --- a/src/xenia/cpu/backend/a64/a64_sequences.cc +++ b/src/xenia/cpu/backend/a64/a64_sequences.cc @@ -1199,7 +1199,15 @@ void EmitAddCarryXX(A64Emitter& e, const ARGS& i) { e.BFI(X1, X0, 61, 1); e.MSR(SystemReg::NZCV, X1); } - e.ADC(i.dest, i.src1, i.src2); + SEQ::EmitCommutativeBinaryOp( + e, i, + [](A64Emitter& e, const REG& dest_src, const REG& src) { + e.ADC(dest_src, dest_src, src); + }, + [](A64Emitter& e, const REG& dest_src, int32_t constant) { + e.MOV(REG(1), constant); + e.ADC(dest_src, dest_src, REG(1)); + }); } struct ADD_CARRY_I8 : Sequence> { @@ -1240,7 +1248,8 @@ void EmitSubXX(A64Emitter& e, const ARGS& i) { e.SUB(dest_src, dest_src, src); }, [](A64Emitter& e, REG dest_src, int32_t constant) { - e.SUB(dest_src, dest_src, constant); + e.MOV(REG(1), constant); + e.SUB(dest_src, dest_src, REG(1)); }); } struct SUB_I8 : Sequence> { @@ -2157,7 +2166,8 @@ void EmitAndXX(A64Emitter& e, const ARGS& i) { e.AND(dest_src, dest_src, src); }, [](A64Emitter& e, REG dest_src, int32_t constant) { - e.AND(dest_src, dest_src, constant); + e.MOV(REG(1), constant); + e.AND(dest_src, dest_src, REG(1)); }); } struct AND_I8 : Sequence> { @@ -2264,7 +2274,8 @@ void EmitOrXX(A64Emitter& e, const ARGS& i) { e.ORR(dest_src, dest_src, src); }, [](A64Emitter& e, REG dest_src, int32_t constant) { - e.ORR(dest_src, dest_src, constant); + e.MOV(REG(1), constant); + e.ORR(dest_src, dest_src, REG(1)); }); } struct OR_I8 : Sequence> { @@ -2309,7 +2320,8 @@ void EmitXorXX(A64Emitter& e, const ARGS& i) { e.EOR(dest_src, dest_src, src); }, [](A64Emitter& e, REG dest_src, int32_t constant) { - e.EOR(dest_src, dest_src, constant); + e.MOV(REG(1), constant); + e.EOR(dest_src, dest_src, REG(1)); }); } struct XOR_I8 : Sequence> { diff --git a/src/xenia/cpu/backend/a64/a64_stack_layout.h b/src/xenia/cpu/backend/a64/a64_stack_layout.h index 72ded80dc..ee8cbcfac 100644 --- a/src/xenia/cpu/backend/a64/a64_stack_layout.h +++ b/src/xenia/cpu/backend/a64/a64_stack_layout.h @@ -29,64 +29,58 @@ class StackLayout { * Thunk stack: * Non-Volatile Volatile * +------------------+------------------+ - * | arg temp, 3 * 8 | arg temp, 3 * 8 | xsp + 0x000 + * | arg temp, 3 * 8 | arg temp, 3 * 8 | sp + 0x000 * | | | * | | | * +------------------+------------------+ - * | rbx | (unused) | xsp + 0x018 + * | rbx | (unused) | sp + 0x018 * +------------------+------------------+ - * | rbp | X1 | xsp + 0x020 + * | rbp | X1 | sp + 0x020 * +------------------+------------------+ - * | rcx (Win32) | X2 | xsp + 0x028 + * | rcx (Win32) | X2 | sp + 0x028 * +------------------+------------------+ - * | rsi (Win32) | X3 | xsp + 0x030 + * | rsi (Win32) | X3 | sp + 0x030 * +------------------+------------------+ - * | rdi (Win32) | X4 | xsp + 0x038 + * | rdi (Win32) | X4 | sp + 0x038 * +------------------+------------------+ - * | r12 | X5 | xsp + 0x040 + * | r12 | X5 | sp + 0x040 * +------------------+------------------+ - * | r13 | X6 | xsp + 0x048 + * | r13 | X6 | sp + 0x048 * +------------------+------------------+ - * | r14 | X7 | xsp + 0x050 + * | r14 | X7 | sp + 0x050 * +------------------+------------------+ - * | r15 | X8 | xsp + 0x058 + * | r15 | X8 | sp + 0x058 * +------------------+------------------+ - * | xmm6 (Win32) | X9 | xsp + 0x060 + * | xmm6 (Win32) | X9 | sp + 0x060 * | | | * +------------------+------------------+ - * | xmm7 (Win32) | X10 | xsp + 0x070 + * | xmm7 (Win32) | X10 | sp + 0x070 * | | | * +------------------+------------------+ - * | xmm8 (Win32) | X11 | xsp + 0x080 + * | xmm8 (Win32) | X11 | sp + 0x080 * | | | * +------------------+------------------+ - * | xmm9 (Win32) | X12 | xsp + 0x090 + * | xmm9 (Win32) | X12 | sp + 0x090 * | | | * +------------------+------------------+ - * | xmm10 (Win32) | X13 | xsp + 0x0A0 + * | xmm10 (Win32) | X13 | sp + 0x0A0 * | | | * +------------------+------------------+ - * | xmm11 (Win32) | X14 | xsp + 0x0B0 + * | xmm11 (Win32) | X14 | sp + 0x0B0 * | | | * +------------------+------------------+ - * | xmm12 (Win32) | X15 | xsp + 0x0C0 + * | xmm12 (Win32) | X15 | sp + 0x0C0 * | | | * +------------------+------------------+ - * | xmm13 (Win32) | X16 | xsp + 0x0D0 + * | xmm13 (Win32) | X16 | sp + 0x0D0 * | | | * +------------------+------------------+ - * | xmm14 (Win32) | X17 | xsp + 0x0E0 + * | xmm14 (Win32) | X17 | sp + 0x0E0 * | | | * +------------------+------------------+ - * | xmm15 (Win32) | X18 | xsp + 0x0F0 + * | xmm15 (Win32) | X18 | sp + 0x0F0 * | | | * +------------------+------------------+ - * | (return address) | (return address) | xsp + 0x100 - * +------------------+------------------+ - * | (rcx home) | (rcx home) | xsp + 0x108 - * +------------------+------------------+ - * | (rdx home) | (rdx home) | xsp + 0x110 - * +------------------+------------------+ */ XEPACKEDSTRUCT(Thunk, { uint64_t arg_temp[3]; @@ -95,25 +89,25 @@ class StackLayout { }); static_assert(sizeof(Thunk) % 16 == 0, "sizeof(Thunk) must be a multiple of 16!"); - static const size_t THUNK_STACK_SIZE = sizeof(Thunk) + 16; + static const size_t THUNK_STACK_SIZE = sizeof(Thunk); /** * * * Guest stack: * +------------------+ - * | arg temp, 3 * 8 | xsp + 0 + * | arg temp, 3 * 8 | sp + 0 * | | * | | * +------------------+ - * | scratch, 48b | xsp + 32 + * | scratch, 48b | sp + 32(kStashOffset) * | | * +------------------+ - * | X0 / context | xsp + 80 + * | X0 / context | sp + 80 * +------------------+ - * | guest ret addr | xsp + 88 + * | guest ret addr | sp + 88 * +------------------+ - * | call ret addr | xsp + 96 + * | call ret addr | sp + 96 * +------------------+ * ... locals ... * +------------------+