[a64] Update guest calling conventions
Guest-function calls will use W17 for indirect calls
This commit is contained in:
parent
fd32c0e959
commit
dc6666d4d2
|
@ -52,8 +52,8 @@ class A64ThunkEmitter : public A64Emitter {
|
|||
|
||||
// Caller saved:
|
||||
// Dont assume these registers will survive a subroutine call
|
||||
// x0, v0 is not saved/preserved since this is used to return values from
|
||||
// subroutines x1-x15, x30 | d0-d7 and d16-v31
|
||||
// x0, v0 is not saved for use as arg0/return
|
||||
// x1-x15, x30 | v0-v7 and v16-v31
|
||||
void EmitSaveVolatileRegs();
|
||||
void EmitLoadVolatileRegs();
|
||||
|
||||
|
@ -223,47 +223,23 @@ HostToGuestThunk A64ThunkEmitter::EmitHostToGuestThunk() {
|
|||
|
||||
code_offsets.prolog = offset();
|
||||
|
||||
// mov(qword[rsp + 8 * 3], r8);
|
||||
// mov(qword[rsp + 8 * 2], rdx);
|
||||
// mov(qword[rsp + 8 * 1], rcx);
|
||||
// sub(rsp, stack_size);
|
||||
|
||||
|
||||
STR(X2, SP, 8 * 3);
|
||||
STR(X1, SP, 8 * 2);
|
||||
STR(X0, SP, 8 * 1);
|
||||
SUB(SP, SP, stack_size);
|
||||
|
||||
code_offsets.prolog_stack_alloc = offset();
|
||||
code_offsets.body = offset();
|
||||
|
||||
// Save nonvolatile registers.
|
||||
EmitSaveNonvolatileRegs();
|
||||
|
||||
// mov(rax, rcx);
|
||||
// mov(rsi, rdx); // context
|
||||
// mov(rcx, r8); // return address
|
||||
// call(rax);
|
||||
MOV(X16, X0);
|
||||
MOV(A64Emitter::GetContextReg(), X1); // context
|
||||
MOV(X0, X2); // return address
|
||||
|
||||
MOV(GetContextReg(), X1); // context
|
||||
MOV(X0, X2); // return address
|
||||
BLR(X16);
|
||||
|
||||
EmitLoadNonvolatileRegs();
|
||||
|
||||
code_offsets.epilog = offset();
|
||||
|
||||
// add(rsp, stack_size);
|
||||
// mov(rcx, qword[rsp + 8 * 1]);
|
||||
// mov(rdx, qword[rsp + 8 * 2]);
|
||||
// mov(r8, qword[rsp + 8 * 3]);
|
||||
// ret();
|
||||
|
||||
ADD(SP, SP, stack_size);
|
||||
LDR(X0, SP, 8 * 1);
|
||||
LDR(X1, SP, 8 * 2);
|
||||
LDR(X2, SP, 8 * 3);
|
||||
|
||||
RET();
|
||||
|
||||
|
@ -302,19 +278,13 @@ GuestToHostThunk A64ThunkEmitter::EmitGuestToHostThunk() {
|
|||
|
||||
code_offsets.prolog = offset();
|
||||
|
||||
// rsp + 0 = return address
|
||||
// sub(rsp, stack_size);
|
||||
SUB(SP, SP, stack_size);
|
||||
|
||||
code_offsets.prolog_stack_alloc = offset();
|
||||
code_offsets.body = offset();
|
||||
|
||||
// Save off volatile registers.
|
||||
EmitSaveVolatileRegs();
|
||||
|
||||
// mov(rax, rcx); // function
|
||||
// mov(rcx, GetContextReg()); // context
|
||||
// call(rax);
|
||||
MOV(X16, X0); // function
|
||||
MOV(X0, GetContextReg()); // context
|
||||
BLR(X16);
|
||||
|
@ -323,8 +293,6 @@ GuestToHostThunk A64ThunkEmitter::EmitGuestToHostThunk() {
|
|||
|
||||
code_offsets.epilog = offset();
|
||||
|
||||
// add(rsp, stack_size);
|
||||
// ret();
|
||||
ADD(SP, SP, stack_size);
|
||||
RET();
|
||||
|
||||
|
@ -350,11 +318,8 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address);
|
|||
|
||||
ResolveFunctionThunk A64ThunkEmitter::EmitResolveFunctionThunk() {
|
||||
// Entry:
|
||||
// X0 = target PPC address
|
||||
|
||||
// Resolve Function:
|
||||
// W17 = target PPC address
|
||||
// X0 = context
|
||||
// X1 = target PPC address
|
||||
|
||||
struct _code_offsets {
|
||||
size_t prolog;
|
||||
|
@ -369,22 +334,20 @@ ResolveFunctionThunk A64ThunkEmitter::EmitResolveFunctionThunk() {
|
|||
code_offsets.prolog = offset();
|
||||
|
||||
// rsp + 0 = return address
|
||||
// sub(rsp, stack_size);
|
||||
SUB(SP, SP, stack_size);
|
||||
|
||||
code_offsets.prolog_stack_alloc = offset();
|
||||
code_offsets.body = offset();
|
||||
|
||||
// Save volatile registers
|
||||
EmitSaveVolatileRegs();
|
||||
|
||||
// mov(rcx, rsi); // context
|
||||
// mov(rdx, rbx);
|
||||
// mov(rax, reinterpret_cast<uint64_t>(&ResolveFunction));
|
||||
// call(rax)
|
||||
MOV(X1, X0);
|
||||
MOV(X0, GetContextReg()); // context
|
||||
MOVP2R(X16, &ResolveFunction);
|
||||
MOV(W1, W17);
|
||||
MOV(X16, reinterpret_cast<uint64_t>(&ResolveFunction));
|
||||
BLR(X16);
|
||||
|
||||
EmitLoadVolatileRegs();
|
||||
|
@ -432,7 +395,6 @@ void A64ThunkEmitter::EmitSaveVolatileRegs() {
|
|||
STP(Q3, Q4, SP, offsetof(StackLayout::Thunk, xmm[2]));
|
||||
STP(Q5, Q6, SP, offsetof(StackLayout::Thunk, xmm[4]));
|
||||
STP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6]));
|
||||
STP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6]));
|
||||
STP(Q17, Q18, SP, offsetof(StackLayout::Thunk, xmm[8]));
|
||||
STP(Q19, Q20, SP, offsetof(StackLayout::Thunk, xmm[10]));
|
||||
STP(Q21, Q22, SP, offsetof(StackLayout::Thunk, xmm[12]));
|
||||
|
@ -461,7 +423,6 @@ void A64ThunkEmitter::EmitLoadVolatileRegs() {
|
|||
LDP(Q3, Q4, SP, offsetof(StackLayout::Thunk, xmm[2]));
|
||||
LDP(Q5, Q6, SP, offsetof(StackLayout::Thunk, xmm[4]));
|
||||
LDP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6]));
|
||||
LDP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6]));
|
||||
LDP(Q17, Q18, SP, offsetof(StackLayout::Thunk, xmm[8]));
|
||||
LDP(Q19, Q20, SP, offsetof(StackLayout::Thunk, xmm[10]));
|
||||
LDP(Q21, Q22, SP, offsetof(StackLayout::Thunk, xmm[12]));
|
||||
|
@ -480,10 +441,12 @@ void A64ThunkEmitter::EmitSaveNonvolatileRegs() {
|
|||
STP(X27, X28, SP, offsetof(StackLayout::Thunk, r[8]));
|
||||
STP(X29, X30, SP, offsetof(StackLayout::Thunk, r[10]));
|
||||
|
||||
STP(Q8, Q9, SP, offsetof(StackLayout::Thunk, xmm[0]));
|
||||
STP(Q10, Q11, SP, offsetof(StackLayout::Thunk, xmm[2]));
|
||||
STP(Q12, Q13, SP, offsetof(StackLayout::Thunk, xmm[4]));
|
||||
STP(Q14, Q15, SP, offsetof(StackLayout::Thunk, xmm[6]));
|
||||
STR(X17, SP, offsetof(StackLayout::Thunk, r[12]));
|
||||
|
||||
STP(D8, D9, SP, offsetof(StackLayout::Thunk, xmm[0]));
|
||||
STP(D10, D11, SP, offsetof(StackLayout::Thunk, xmm[1]));
|
||||
STP(D12, D13, SP, offsetof(StackLayout::Thunk, xmm[2]));
|
||||
STP(D14, D15, SP, offsetof(StackLayout::Thunk, xmm[3]));
|
||||
}
|
||||
|
||||
void A64ThunkEmitter::EmitLoadNonvolatileRegs() {
|
||||
|
@ -494,10 +457,12 @@ void A64ThunkEmitter::EmitLoadNonvolatileRegs() {
|
|||
LDP(X27, X28, SP, offsetof(StackLayout::Thunk, r[8]));
|
||||
LDP(X29, X30, SP, offsetof(StackLayout::Thunk, r[10]));
|
||||
|
||||
LDP(Q8, Q9, SP, offsetof(StackLayout::Thunk, xmm[0]));
|
||||
LDP(Q10, Q11, SP, offsetof(StackLayout::Thunk, xmm[2]));
|
||||
LDP(Q12, Q13, SP, offsetof(StackLayout::Thunk, xmm[4]));
|
||||
LDP(Q14, Q15, SP, offsetof(StackLayout::Thunk, xmm[6]));
|
||||
LDR(X17, SP, offsetof(StackLayout::Thunk, r[12]));
|
||||
|
||||
LDP(D8, D9, SP, offsetof(StackLayout::Thunk, xmm[0]));
|
||||
LDP(D10, D11, SP, offsetof(StackLayout::Thunk, xmm[1]));
|
||||
LDP(D12, D13, SP, offsetof(StackLayout::Thunk, xmm[2]));
|
||||
LDP(D14, D15, SP, offsetof(StackLayout::Thunk, xmm[3]));
|
||||
}
|
||||
|
||||
} // namespace a64
|
||||
|
|
|
@ -83,20 +83,6 @@ A64Emitter::A64Emitter(A64Backend* backend)
|
|||
feature_flags_ |= (cpu_.has(ext) ? emit : 0); \
|
||||
}
|
||||
|
||||
// TEST_EMIT_FEATURE(kA64EmitAVX2, oaknut::util::Cpu::tAVX2);
|
||||
// TEST_EMIT_FEATURE(kA64EmitFMA, oaknut::util::Cpu::tFMA);
|
||||
// TEST_EMIT_FEATURE(kA64EmitLZCNT, oaknut::util::Cpu::tLZCNT);
|
||||
// TEST_EMIT_FEATURE(kA64EmitBMI1, oaknut::util::Cpu::tBMI1);
|
||||
// TEST_EMIT_FEATURE(kA64EmitBMI2, oaknut::util::Cpu::tBMI2);
|
||||
// TEST_EMIT_FEATURE(kA64EmitF16C, oaknut::util::Cpu::tF16C);
|
||||
// TEST_EMIT_FEATURE(kA64EmitMovbe, oaknut::util::Cpu::tMOVBE);
|
||||
// TEST_EMIT_FEATURE(kA64EmitGFNI, oaknut::util::Cpu::tGFNI);
|
||||
// TEST_EMIT_FEATURE(kA64EmitAVX512F, oaknut::util::Cpu::tAVX512F);
|
||||
// TEST_EMIT_FEATURE(kA64EmitAVX512VL, oaknut::util::Cpu::tAVX512VL);
|
||||
// TEST_EMIT_FEATURE(kA64EmitAVX512BW, oaknut::util::Cpu::tAVX512BW);
|
||||
// TEST_EMIT_FEATURE(kA64EmitAVX512DQ, oaknut::util::Cpu::tAVX512DQ);
|
||||
// TEST_EMIT_FEATURE(kA64EmitAVX512VBMI, oaknut::util::Cpu::tAVX512_VBMI);
|
||||
|
||||
#undef TEST_EMIT_FEATURE
|
||||
}
|
||||
|
||||
|
@ -218,15 +204,11 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
|
|||
STP(X29, X30, SP, PRE_INDEXED, -32);
|
||||
MOV(X29, SP);
|
||||
|
||||
// sub(rsp, (uint32_t)stack_size);
|
||||
SUB(SP, SP, (uint32_t)stack_size);
|
||||
|
||||
code_offsets.prolog_stack_alloc = offset();
|
||||
code_offsets.body = offset();
|
||||
|
||||
// mov(qword[rsp + StackLayout::GUEST_CTX_HOME], GetContextReg());
|
||||
// mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rcx);
|
||||
// mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0);
|
||||
STR(GetContextReg(), SP, StackLayout::GUEST_CTX_HOME);
|
||||
STR(X0, SP, StackLayout::GUEST_RET_ADDR);
|
||||
STR(XZR, SP, StackLayout::GUEST_CALL_RET_ADDR);
|
||||
|
@ -260,8 +242,6 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
|
|||
}
|
||||
|
||||
// Load membase.
|
||||
// mov(GetMembaseReg(),
|
||||
// qword[GetContextReg() + offsetof(ppc::PPCContext, virtual_membase)]);
|
||||
LDR(GetMembaseReg(), GetContextReg(),
|
||||
offsetof(ppc::PPCContext, virtual_membase));
|
||||
|
||||
|
@ -297,13 +277,10 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
|
|||
l(epilog_label);
|
||||
epilog_label_ = nullptr;
|
||||
EmitTraceUserCallReturn();
|
||||
// mov(GetContextReg(), qword[rsp + StackLayout::GUEST_CTX_HOME]);
|
||||
LDR(GetContextReg(), SP, StackLayout::GUEST_CTX_HOME);
|
||||
|
||||
code_offsets.epilog = offset();
|
||||
|
||||
// add(rsp, (uint32_t)stack_size);
|
||||
// ret();
|
||||
ADD(SP, SP, (uint32_t)stack_size);
|
||||
|
||||
MOV(SP, X29);
|
||||
|
@ -342,7 +319,6 @@ void A64Emitter::MarkSourceOffset(const Instr* i) {
|
|||
if (cvars::emit_source_annotations) {
|
||||
NOP();
|
||||
NOP();
|
||||
// mov(eax, entry->guest_address);
|
||||
MOV(X0, entry->guest_address);
|
||||
NOP();
|
||||
NOP();
|
||||
|
@ -451,8 +427,8 @@ void A64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
|
|||
// or a thunk to ResolveAddress.
|
||||
// mov(ebx, function->address());
|
||||
// mov(eax, dword[ebx]);
|
||||
MOV(W1, function->address());
|
||||
LDR(W16, X1);
|
||||
MOV(W17, function->address());
|
||||
LDR(W16, X17);
|
||||
} else {
|
||||
// Old-style resolve.
|
||||
// Not too important because indirection table is almost always available.
|
||||
|
@ -472,7 +448,11 @@ void A64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
|
|||
|
||||
// add(rsp, static_cast<uint32_t>(stack_size()));
|
||||
// jmp(rax);
|
||||
ADD(SP, SP, stack_size());
|
||||
ADD(SP, SP, static_cast<uint32_t>(stack_size()));
|
||||
|
||||
MOV(SP, X29);
|
||||
LDP(X29, X30, SP, POST_INDEXED, 32);
|
||||
|
||||
BR(X16);
|
||||
} else {
|
||||
// Return address is from the previous SET_RETURN_ADDRESS.
|
||||
|
@ -499,10 +479,11 @@ void A64Emitter::CallIndirect(const hir::Instr* instr,
|
|||
// The target dword will either contain the address of the generated code
|
||||
// or a thunk to ResolveAddress.
|
||||
if (code_cache_->has_indirection_table()) {
|
||||
if (reg.toW().index() != W1.index()) {
|
||||
if (reg.toW().index() != W17.index()) {
|
||||
// mov(ebx, reg.cvt32());
|
||||
MOV(W1, reg.toW());
|
||||
MOV(W17, reg.toW());
|
||||
}
|
||||
LDR(W16, X17);
|
||||
// mov(eax, dword[ebx]);
|
||||
} else {
|
||||
// Old-style resolve.
|
||||
|
@ -515,7 +496,7 @@ void A64Emitter::CallIndirect(const hir::Instr* instr,
|
|||
MOV(X0, GetContextReg());
|
||||
MOV(W1, reg.toW());
|
||||
|
||||
ADRP(X16, ResolveFunction);
|
||||
MOV(X16, reinterpret_cast<uint64_t>(ResolveFunction));
|
||||
BLR(X16);
|
||||
MOV(X16, X0);
|
||||
}
|
||||
|
@ -526,18 +507,16 @@ void A64Emitter::CallIndirect(const hir::Instr* instr,
|
|||
EmitTraceUserCallReturn();
|
||||
|
||||
// Pass the callers return address over.
|
||||
// mov(rcx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
|
||||
LDR(X0, SP, StackLayout::GUEST_RET_ADDR);
|
||||
|
||||
// add(rsp, static_cast<uint32_t>(stack_size()));
|
||||
ADD(SP, SP, static_cast<uint32_t>(stack_size()));
|
||||
|
||||
// jmp(rax);
|
||||
MOV(SP, X29);
|
||||
LDP(X29, X30, SP, POST_INDEXED, 32);
|
||||
|
||||
BR(X16);
|
||||
} else {
|
||||
// Return address is from the previous SET_RETURN_ADDRESS.
|
||||
// mov(rcx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]);
|
||||
// call(rax);
|
||||
LDR(X0, SP, StackLayout::GUEST_CALL_RET_ADDR);
|
||||
|
||||
BLR(X16);
|
||||
|
@ -571,7 +550,6 @@ void A64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
|
|||
|
||||
auto thunk = backend()->guest_to_host_thunk();
|
||||
MOV(X16, reinterpret_cast<uint64_t>(thunk));
|
||||
|
||||
BLR(X16);
|
||||
|
||||
// x0 = host return
|
||||
|
@ -589,7 +567,6 @@ void A64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
|
|||
|
||||
auto thunk = backend()->guest_to_host_thunk();
|
||||
MOV(X16, reinterpret_cast<uint64_t>(thunk));
|
||||
|
||||
BLR(X16);
|
||||
|
||||
// x0 = host return
|
||||
|
@ -612,7 +589,6 @@ void A64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0)) {
|
|||
|
||||
void A64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0),
|
||||
uint64_t arg0) {
|
||||
// mov(GetNativeParam(0), arg0);
|
||||
MOV(GetNativeParam(0), arg0);
|
||||
CallNativeSafe(reinterpret_cast<void*>(fn));
|
||||
}
|
||||
|
@ -698,7 +674,7 @@ void A64Emitter::MovMem64(const oaknut::XRegSp& addr, intptr_t offset,
|
|||
}
|
||||
}
|
||||
|
||||
static const vec128_t xmm_consts[] = {
|
||||
static const vec128_t v_consts[] = {
|
||||
/* VZero */ vec128f(0.0f),
|
||||
/* VOne */ vec128f(1.0f),
|
||||
/* VOnePD */ vec128d(1.0),
|
||||
|
@ -813,7 +789,7 @@ static const vec128_t xmm_consts[] = {
|
|||
|
||||
// First location to try and place constants.
|
||||
static const uintptr_t kConstDataLocation = 0x20000000;
|
||||
static const uintptr_t kConstDataSize = sizeof(xmm_consts);
|
||||
static const uintptr_t kConstDataSize = sizeof(v_consts);
|
||||
|
||||
// Increment the location by this amount for every allocation failure.
|
||||
static const uintptr_t kConstDataIncrement = 0x00001000;
|
||||
|
@ -837,7 +813,7 @@ uintptr_t A64Emitter::PlaceConstData() {
|
|||
|
||||
// The pointer must not be greater than 31 bits.
|
||||
assert_zero(reinterpret_cast<uintptr_t>(mem) & ~0x7FFFFFFF);
|
||||
std::memcpy(mem, xmm_consts, sizeof(xmm_consts));
|
||||
std::memcpy(mem, v_consts, sizeof(v_consts));
|
||||
memory::Protect(mem, kConstDataSize, memory::PageAccess::kReadOnly, nullptr);
|
||||
|
||||
return reinterpret_cast<uintptr_t>(mem);
|
||||
|
|
|
@ -33,7 +33,8 @@ XReg ComputeMemoryAddressOffset(A64Emitter& e, const T& guest, const T& offset,
|
|||
uint32_t address = static_cast<uint32_t>(guest.constant());
|
||||
address += offset_const;
|
||||
if (address < 0x80000000) {
|
||||
e.ADD(address_register.toX(), e.GetMembaseReg(), address);
|
||||
e.MOV(address_register.toX(), address);
|
||||
e.ADD(address_register.toX(), e.GetMembaseReg(), address_register.toX());
|
||||
return address_register.toX();
|
||||
} else {
|
||||
if (address >= 0xE0000000 &&
|
||||
|
|
|
@ -1199,7 +1199,15 @@ void EmitAddCarryXX(A64Emitter& e, const ARGS& i) {
|
|||
e.BFI(X1, X0, 61, 1);
|
||||
e.MSR(SystemReg::NZCV, X1);
|
||||
}
|
||||
e.ADC(i.dest, i.src1, i.src2);
|
||||
SEQ::EmitCommutativeBinaryOp(
|
||||
e, i,
|
||||
[](A64Emitter& e, const REG& dest_src, const REG& src) {
|
||||
e.ADC(dest_src, dest_src, src);
|
||||
},
|
||||
[](A64Emitter& e, const REG& dest_src, int32_t constant) {
|
||||
e.MOV(REG(1), constant);
|
||||
e.ADC(dest_src, dest_src, REG(1));
|
||||
});
|
||||
}
|
||||
struct ADD_CARRY_I8
|
||||
: Sequence<ADD_CARRY_I8, I<OPCODE_ADD_CARRY, I8Op, I8Op, I8Op, I8Op>> {
|
||||
|
@ -1240,7 +1248,8 @@ void EmitSubXX(A64Emitter& e, const ARGS& i) {
|
|||
e.SUB(dest_src, dest_src, src);
|
||||
},
|
||||
[](A64Emitter& e, REG dest_src, int32_t constant) {
|
||||
e.SUB(dest_src, dest_src, constant);
|
||||
e.MOV(REG(1), constant);
|
||||
e.SUB(dest_src, dest_src, REG(1));
|
||||
});
|
||||
}
|
||||
struct SUB_I8 : Sequence<SUB_I8, I<OPCODE_SUB, I8Op, I8Op, I8Op>> {
|
||||
|
@ -2157,7 +2166,8 @@ void EmitAndXX(A64Emitter& e, const ARGS& i) {
|
|||
e.AND(dest_src, dest_src, src);
|
||||
},
|
||||
[](A64Emitter& e, REG dest_src, int32_t constant) {
|
||||
e.AND(dest_src, dest_src, constant);
|
||||
e.MOV(REG(1), constant);
|
||||
e.AND(dest_src, dest_src, REG(1));
|
||||
});
|
||||
}
|
||||
struct AND_I8 : Sequence<AND_I8, I<OPCODE_AND, I8Op, I8Op, I8Op>> {
|
||||
|
@ -2264,7 +2274,8 @@ void EmitOrXX(A64Emitter& e, const ARGS& i) {
|
|||
e.ORR(dest_src, dest_src, src);
|
||||
},
|
||||
[](A64Emitter& e, REG dest_src, int32_t constant) {
|
||||
e.ORR(dest_src, dest_src, constant);
|
||||
e.MOV(REG(1), constant);
|
||||
e.ORR(dest_src, dest_src, REG(1));
|
||||
});
|
||||
}
|
||||
struct OR_I8 : Sequence<OR_I8, I<OPCODE_OR, I8Op, I8Op, I8Op>> {
|
||||
|
@ -2309,7 +2320,8 @@ void EmitXorXX(A64Emitter& e, const ARGS& i) {
|
|||
e.EOR(dest_src, dest_src, src);
|
||||
},
|
||||
[](A64Emitter& e, REG dest_src, int32_t constant) {
|
||||
e.EOR(dest_src, dest_src, constant);
|
||||
e.MOV(REG(1), constant);
|
||||
e.EOR(dest_src, dest_src, REG(1));
|
||||
});
|
||||
}
|
||||
struct XOR_I8 : Sequence<XOR_I8, I<OPCODE_XOR, I8Op, I8Op, I8Op>> {
|
||||
|
|
|
@ -29,64 +29,58 @@ class StackLayout {
|
|||
* Thunk stack:
|
||||
* Non-Volatile Volatile
|
||||
* +------------------+------------------+
|
||||
* | arg temp, 3 * 8 | arg temp, 3 * 8 | xsp + 0x000
|
||||
* | arg temp, 3 * 8 | arg temp, 3 * 8 | sp + 0x000
|
||||
* | | |
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | rbx | (unused) | xsp + 0x018
|
||||
* | rbx | (unused) | sp + 0x018
|
||||
* +------------------+------------------+
|
||||
* | rbp | X1 | xsp + 0x020
|
||||
* | rbp | X1 | sp + 0x020
|
||||
* +------------------+------------------+
|
||||
* | rcx (Win32) | X2 | xsp + 0x028
|
||||
* | rcx (Win32) | X2 | sp + 0x028
|
||||
* +------------------+------------------+
|
||||
* | rsi (Win32) | X3 | xsp + 0x030
|
||||
* | rsi (Win32) | X3 | sp + 0x030
|
||||
* +------------------+------------------+
|
||||
* | rdi (Win32) | X4 | xsp + 0x038
|
||||
* | rdi (Win32) | X4 | sp + 0x038
|
||||
* +------------------+------------------+
|
||||
* | r12 | X5 | xsp + 0x040
|
||||
* | r12 | X5 | sp + 0x040
|
||||
* +------------------+------------------+
|
||||
* | r13 | X6 | xsp + 0x048
|
||||
* | r13 | X6 | sp + 0x048
|
||||
* +------------------+------------------+
|
||||
* | r14 | X7 | xsp + 0x050
|
||||
* | r14 | X7 | sp + 0x050
|
||||
* +------------------+------------------+
|
||||
* | r15 | X8 | xsp + 0x058
|
||||
* | r15 | X8 | sp + 0x058
|
||||
* +------------------+------------------+
|
||||
* | xmm6 (Win32) | X9 | xsp + 0x060
|
||||
* | xmm6 (Win32) | X9 | sp + 0x060
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm7 (Win32) | X10 | xsp + 0x070
|
||||
* | xmm7 (Win32) | X10 | sp + 0x070
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm8 (Win32) | X11 | xsp + 0x080
|
||||
* | xmm8 (Win32) | X11 | sp + 0x080
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm9 (Win32) | X12 | xsp + 0x090
|
||||
* | xmm9 (Win32) | X12 | sp + 0x090
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm10 (Win32) | X13 | xsp + 0x0A0
|
||||
* | xmm10 (Win32) | X13 | sp + 0x0A0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm11 (Win32) | X14 | xsp + 0x0B0
|
||||
* | xmm11 (Win32) | X14 | sp + 0x0B0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm12 (Win32) | X15 | xsp + 0x0C0
|
||||
* | xmm12 (Win32) | X15 | sp + 0x0C0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm13 (Win32) | X16 | xsp + 0x0D0
|
||||
* | xmm13 (Win32) | X16 | sp + 0x0D0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm14 (Win32) | X17 | xsp + 0x0E0
|
||||
* | xmm14 (Win32) | X17 | sp + 0x0E0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm15 (Win32) | X18 | xsp + 0x0F0
|
||||
* | xmm15 (Win32) | X18 | sp + 0x0F0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | (return address) | (return address) | xsp + 0x100
|
||||
* +------------------+------------------+
|
||||
* | (rcx home) | (rcx home) | xsp + 0x108
|
||||
* +------------------+------------------+
|
||||
* | (rdx home) | (rdx home) | xsp + 0x110
|
||||
* +------------------+------------------+
|
||||
*/
|
||||
XEPACKEDSTRUCT(Thunk, {
|
||||
uint64_t arg_temp[3];
|
||||
|
@ -95,25 +89,25 @@ class StackLayout {
|
|||
});
|
||||
static_assert(sizeof(Thunk) % 16 == 0,
|
||||
"sizeof(Thunk) must be a multiple of 16!");
|
||||
static const size_t THUNK_STACK_SIZE = sizeof(Thunk) + 16;
|
||||
static const size_t THUNK_STACK_SIZE = sizeof(Thunk);
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* Guest stack:
|
||||
* +------------------+
|
||||
* | arg temp, 3 * 8 | xsp + 0
|
||||
* | arg temp, 3 * 8 | sp + 0
|
||||
* | |
|
||||
* | |
|
||||
* +------------------+
|
||||
* | scratch, 48b | xsp + 32
|
||||
* | scratch, 48b | sp + 32(kStashOffset)
|
||||
* | |
|
||||
* +------------------+
|
||||
* | X0 / context | xsp + 80
|
||||
* | X0 / context | sp + 80
|
||||
* +------------------+
|
||||
* | guest ret addr | xsp + 88
|
||||
* | guest ret addr | sp + 88
|
||||
* +------------------+
|
||||
* | call ret addr | xsp + 96
|
||||
* | call ret addr | sp + 96
|
||||
* +------------------+
|
||||
* ... locals ...
|
||||
* +------------------+
|
||||
|
|
Loading…
Reference in New Issue