[a64] Reduce function prolog/epilog to 16 bytes

Just need to store `fp` and `lr`
This commit is contained in:
Wunkolo 2024-05-16 09:22:28 -07:00
parent a54226578e
commit eb0736eb25
3 changed files with 11 additions and 17 deletions

View File

@ -437,7 +437,9 @@ ResolveFunctionThunk A64ThunkEmitter::EmitResolveFunctionThunk() {
code_offsets.prolog = offset();
// Preserve context register
STP(ZR, X0, SP, PRE_INDEXED, -16);
SUB(SP, SP, stack_size);
code_offsets.prolog_stack_alloc = offset();
@ -462,6 +464,8 @@ ResolveFunctionThunk A64ThunkEmitter::EmitResolveFunctionThunk() {
// add(rsp, stack_size);
// jmp(rax);
ADD(SP, SP, stack_size);
// Reload context register
LDP(ZR, X0, SP, POST_INDEXED, 16);
BR(X16);

View File

@ -273,18 +273,18 @@ void Win32A64CodeCache::InitializeUnwindEntry(
// function was called.
// Function frames are generally:
// STP(X29, X30, SP, PRE_INDEXED, -32);
// STP(X29, X30, SP, PRE_INDEXED, -16);
// MOV(X29, XSP);
// SUB(XSP, XSP, stack_size);
// ... function body ...
// ADD(XSP, XSP, stack_size);
// MOV(XSP, X29);
// LDP(X29, X30, SP, POST_INDEXED, 32);
// LDP(X29, X30, SP, POST_INDEXED, 16);
// These opcodes must undo the epilog and put the return address within lr
unwind_info->UnwindCodes[0] = OpAllocL(func_info.stack_size);
unwind_info->UnwindCodes[1] =
UnwindOpWord(UWOP_SET_FP, OpSaveFpLrX(-32), UWOP_END);
UnwindOpWord(UWOP_SET_FP, OpSaveFpLrX(-16), UWOP_END);
// Add entry.
RUNTIME_FUNCTION& fn_entry = unwind_table_[unwind_table_slot];

View File

@ -201,7 +201,7 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
func_info.stack_size = stack_size;
stack_size_ = stack_size;
STP(X29, X30, SP, PRE_INDEXED, -32);
STP(X29, X30, SP, PRE_INDEXED, -16);
MOV(X29, SP);
SUB(SP, SP, (uint32_t)stack_size);
@ -287,7 +287,7 @@ bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
ADD(SP, SP, (uint32_t)stack_size);
MOV(SP, X29);
LDP(X29, X30, SP, POST_INDEXED, 32);
LDP(X29, X30, SP, POST_INDEXED, 16);
RET();
@ -447,15 +447,12 @@ void A64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
EmitTraceUserCallReturn();
// Pass the callers return address over.
// mov(rcx, qword[rsp + StackLayout::GUEST_RET_ADDR]);
LDR(X0, SP, StackLayout::GUEST_RET_ADDR);
// add(rsp, static_cast<uint32_t>(stack_size()));
// jmp(rax);
ADD(SP, SP, static_cast<uint32_t>(stack_size()));
MOV(SP, X29);
LDP(X29, X30, SP, POST_INDEXED, 32);
LDP(X29, X30, SP, POST_INDEXED, 16);
BR(X16);
} else {
@ -484,19 +481,12 @@ void A64Emitter::CallIndirect(const hir::Instr* instr,
// or a thunk to ResolveAddress.
if (code_cache_->has_indirection_table()) {
if (reg.toW().index() != W17.index()) {
// mov(ebx, reg.cvt32());
MOV(W17, reg.toW());
}
LDR(W16, X17);
// mov(eax, dword[ebx]);
} else {
// Old-style resolve.
// Not too important because indirection table is almost always available.
// mov(rcx, GetContextReg());
// mov(edx, reg.cvt32());
//
// mov(rax, reinterpret_cast<uint64_t>(ResolveFunction));
// call(rax);
MOV(X0, GetContextReg());
MOV(W1, reg.toW());
@ -516,7 +506,7 @@ void A64Emitter::CallIndirect(const hir::Instr* instr,
ADD(SP, SP, static_cast<uint32_t>(stack_size()));
MOV(SP, X29);
LDP(X29, X30, SP, POST_INDEXED, 32);
LDP(X29, X30, SP, POST_INDEXED, 16);
BR(X16);
} else {