[x64] Fix up unwind info for emitted functions.

- [x64] Track size of code within emitted functions (prolog, body, epilog).
- [x64] Don't use hardcoded prolog size in generated unwind info.
- [x64] Update URLs to MSDN documentation on UNWIND_INFO/UNWIND_CODE.
This commit is contained in:
gibbed 2019-08-23 05:34:19 -05:00 committed by Rick Gibbed
parent 918a7d4365
commit 0dc4a13db3
6 changed files with 166 additions and 54 deletions

View File

@ -407,13 +407,25 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
// rdx = arg0 (context)
// r8 = arg1 (guest return address)
struct _code_offsets {
size_t prolog;
size_t body;
size_t epilog;
size_t tail;
} code_offsets = {};
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
code_offsets.prolog = getSize();
// rsp + 0 = return address
mov(qword[rsp + 8 * 3], r8);
mov(qword[rsp + 8 * 2], rdx);
mov(qword[rsp + 8 * 1], rcx);
sub(rsp, stack_size);
code_offsets.body = getSize();
// Save nonvolatile registers.
EmitSaveNonvolatileRegs();
@ -424,13 +436,26 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
EmitLoadNonvolatileRegs();
code_offsets.epilog = getSize();
add(rsp, stack_size);
mov(rcx, qword[rsp + 8 * 1]);
mov(rdx, qword[rsp + 8 * 2]);
mov(r8, qword[rsp + 8 * 3]);
ret();
void* fn = Emplace(stack_size);
code_offsets.tail = getSize();
assert_zero(code_offsets.prolog);
EmitFunctionInfo func_info = {};
func_info.code_size.total = getSize();
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
func_info.code_size.tail = getSize() - code_offsets.tail;
func_info.stack_size = stack_size;
void* fn = Emplace(func_info);
return (HostToGuestThunk)fn;
}
@ -440,10 +465,22 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
// r8 = arg1
// r9 = arg2
struct _code_offsets {
size_t prolog;
size_t body;
size_t epilog;
size_t tail;
} code_offsets = {};
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
code_offsets.prolog = getSize();
// rsp + 0 = return address
sub(rsp, stack_size);
code_offsets.body = getSize();
// Save off volatile registers.
EmitSaveVolatileRegs();
@ -453,10 +490,23 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
EmitLoadVolatileRegs();
code_offsets.epilog = getSize();
add(rsp, stack_size);
ret();
void* fn = Emplace(stack_size);
code_offsets.tail = getSize();
assert_zero(code_offsets.prolog);
EmitFunctionInfo func_info = {};
func_info.code_size.total = getSize();
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
func_info.code_size.tail = getSize() - code_offsets.tail;
func_info.stack_size = stack_size;
void* fn = Emplace(func_info);
return (GuestToHostThunk)fn;
}
@ -466,11 +516,23 @@ extern "C" uint64_t ResolveFunction(void* raw_context, uint32_t target_address);
ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
// ebx = target PPC address
// rcx = context
struct _code_offsets {
size_t prolog;
size_t body;
size_t epilog;
size_t tail;
} code_offsets = {};
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
code_offsets.prolog = getSize();
// rsp + 0 = return address
sub(rsp, stack_size);
code_offsets.body = getSize();
// Save volatile registers
EmitSaveVolatileRegs();
@ -481,10 +543,23 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
EmitLoadVolatileRegs();
code_offsets.epilog = getSize();
add(rsp, stack_size);
jmp(rax);
void* fn = Emplace(stack_size);
code_offsets.tail = getSize();
assert_zero(code_offsets.prolog);
EmitFunctionInfo func_info = {};
func_info.code_size.total = getSize();
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
func_info.code_size.tail = getSize() - code_offsets.tail;
func_info.stack_size = stack_size;
void* fn = Emplace(func_info);
return (ResolveFunctionThunk)fn;
}

View File

@ -125,15 +125,14 @@ void X64CodeCache::CommitExecutableRange(uint32_t guest_low,
}
void* X64CodeCache::PlaceHostCode(uint32_t guest_address, void* machine_code,
size_t code_size, size_t stack_size) {
const EmitFunctionInfo& func_info) {
// Same for now. We may use different pools or whatnot later on, like when
// we only want to place guest code in a serialized cache on disk.
return PlaceGuestCode(guest_address, machine_code, code_size, stack_size,
nullptr);
return PlaceGuestCode(guest_address, machine_code, func_info, nullptr);
}
void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code,
size_t code_size, size_t stack_size,
const EmitFunctionInfo& func_info,
GuestFunction* function_info) {
// Hold a lock while we bump the pointers up. This is important as the
// unwind table requires entries AND code to be sorted in order.
@ -149,7 +148,7 @@ void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code,
// Reserve code.
// Always move the code to land on 16b alignment.
code_address = generated_code_base_ + generated_code_offset_;
generated_code_offset_ += xe::round_up(code_size, 16);
generated_code_offset_ += xe::round_up(func_info.code_size.total, 16);
// Reserve unwind info.
// We go on the high size of the unwind info as we don't know how big we
@ -187,15 +186,17 @@ void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code,
old_commit_mark, new_commit_mark));
// Copy code.
std::memcpy(code_address, machine_code, code_size);
std::memcpy(code_address, machine_code, func_info.code_size.total);
// Fill unused slots with 0xCC
std::memset(
code_address + code_size, 0xCC,
xe::round_up(code_size + unwind_reservation.data_size, 16) - code_size);
code_address + func_info.code_size.total, 0xCC,
xe::round_up(func_info.code_size.total + unwind_reservation.data_size,
16) -
func_info.code_size.total);
// Notify subclasses of placed code.
PlaceCode(guest_address, machine_code, code_size, stack_size, code_address,
PlaceCode(guest_address, machine_code, func_info, code_address,
unwind_reservation);
}

View File

@ -25,6 +25,17 @@ namespace cpu {
namespace backend {
namespace x64 {
struct EmitFunctionInfo {
struct _code_size {
size_t prolog;
size_t body;
size_t epilog;
size_t tail;
size_t total;
} code_size;
size_t stack_size;
};
class X64CodeCache : public CodeCache {
public:
~X64CodeCache() override;
@ -48,9 +59,9 @@ class X64CodeCache : public CodeCache {
void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high);
void* PlaceHostCode(uint32_t guest_address, void* machine_code,
size_t code_size, size_t stack_size);
const EmitFunctionInfo& func_info);
void* PlaceGuestCode(uint32_t guest_address, void* machine_code,
size_t code_size, size_t stack_size,
const EmitFunctionInfo& func_info,
GuestFunction* function_info);
uint32_t PlaceData(const void* data, size_t length);
@ -84,8 +95,7 @@ class X64CodeCache : public CodeCache {
return UnwindReservation();
}
virtual void PlaceCode(uint32_t guest_address, void* machine_code,
size_t code_size, size_t stack_size,
void* code_address,
const EmitFunctionInfo& func_info, void* code_address,
UnwindReservation unwind_reservation) {}
std::wstring file_name_;

View File

@ -112,13 +112,13 @@ class Win32X64CodeCache : public X64CodeCache {
private:
UnwindReservation RequestUnwindReservation(uint8_t* entry_address) override;
void PlaceCode(uint32_t guest_address, void* machine_code, size_t code_size,
size_t stack_size, void* code_address,
void PlaceCode(uint32_t guest_address, void* machine_code,
const EmitFunctionInfo& func_info, void* code_address,
UnwindReservation unwind_reservation) override;
void InitializeUnwindEntry(uint8_t* unwind_entry_address,
size_t unwind_table_slot, void* code_address,
size_t code_size, size_t stack_size);
const EmitFunctionInfo& func_info);
// Growable function table system handle.
void* unwind_table_handle_ = nullptr;
@ -222,13 +222,12 @@ Win32X64CodeCache::RequestUnwindReservation(uint8_t* entry_address) {
}
void Win32X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code,
size_t code_size, size_t stack_size,
const EmitFunctionInfo& func_info,
void* code_address,
UnwindReservation unwind_reservation) {
// Add unwind info.
InitializeUnwindEntry(unwind_reservation.entry_address,
unwind_reservation.table_slot, code_address, code_size,
stack_size);
unwind_reservation.table_slot, code_address, func_info);
if (supports_growable_table_) {
// Notify that the unwind table has grown.
@ -237,29 +236,29 @@ void Win32X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code,
}
// This isn't needed on x64 (probably), but is convention.
FlushInstructionCache(GetCurrentProcess(), code_address, code_size);
FlushInstructionCache(GetCurrentProcess(), code_address,
func_info.code_size.total);
}
void Win32X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address,
size_t unwind_table_slot,
void* code_address,
size_t code_size,
size_t stack_size) {
void Win32X64CodeCache::InitializeUnwindEntry(
uint8_t* unwind_entry_address, size_t unwind_table_slot, void* code_address,
const EmitFunctionInfo& func_info) {
auto unwind_info = reinterpret_cast<UNWIND_INFO*>(unwind_entry_address);
UNWIND_CODE* unwind_code = nullptr;
if (!stack_size) {
// https://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
assert_true(func_info.code_size.prolog < 256); // needs to fit into a uint8_t
auto prolog_size = static_cast<uint8_t>(func_info.code_size.prolog);
if (!func_info.stack_size) {
// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_info
unwind_info->Version = 1;
unwind_info->Flags = 0;
unwind_info->SizeOfProlog = 0;
unwind_info->SizeOfProlog = prolog_size;
unwind_info->CountOfCodes = 0;
unwind_info->FrameRegister = 0;
unwind_info->FrameOffset = 0;
} else if (stack_size <= 128) {
uint8_t prolog_size = 4;
// https://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
} else if (func_info.stack_size <= 128) {
// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_info
unwind_info->Version = 1;
unwind_info->Flags = 0;
unwind_info->SizeOfProlog = prolog_size;
@ -267,17 +266,16 @@ void Win32X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address,
unwind_info->FrameRegister = 0;
unwind_info->FrameOffset = 0;
// https://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_code
unwind_code = &unwind_info->UnwindCode[unwind_info->CountOfCodes++];
unwind_code->CodeOffset =
14; // end of instruction + 1 == offset of next instruction
unwind_code->UnwindOp = UWOP_ALLOC_SMALL;
unwind_code->OpInfo = stack_size / 8 - 1;
unwind_code->OpInfo = func_info.stack_size / 8 - 1;
} else {
// TODO(benvanik): take as parameters?
uint8_t prolog_size = 7;
// https://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_info
unwind_info->Version = 1;
unwind_info->Flags = 0;
unwind_info->SizeOfProlog = prolog_size;
@ -285,16 +283,16 @@ void Win32X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address,
unwind_info->FrameRegister = 0;
unwind_info->FrameOffset = 0;
// https://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_code
unwind_code = &unwind_info->UnwindCode[unwind_info->CountOfCodes++];
unwind_code->CodeOffset =
7; // end of instruction + 1 == offset of next instruction
unwind_code->UnwindOp = UWOP_ALLOC_LARGE;
unwind_code->OpInfo = 0; // One slot for size
assert_true((stack_size / 8) < 65536u);
assert_true((func_info.stack_size / 8) < 65536u);
unwind_code = &unwind_info->UnwindCode[unwind_info->CountOfCodes++];
unwind_code->FrameOffset = (USHORT)(stack_size) / 8;
unwind_code->FrameOffset = (USHORT)(func_info.stack_size) / 8;
}
if (unwind_info->CountOfCodes % 1) {
@ -307,7 +305,8 @@ void Win32X64CodeCache::InitializeUnwindEntry(uint8_t* unwind_entry_address,
auto& fn_entry = unwind_table_[unwind_table_slot];
fn_entry.BeginAddress =
(DWORD)(reinterpret_cast<uint8_t*>(code_address) - generated_code_base_);
fn_entry.EndAddress = (DWORD)(fn_entry.BeginAddress + code_size);
fn_entry.EndAddress =
(DWORD)(fn_entry.BeginAddress + func_info.code_size.total);
fn_entry.UnwindData = (DWORD)(unwind_entry_address - generated_code_base_);
}

View File

@ -102,14 +102,14 @@ bool X64Emitter::Emit(GuestFunction* function, HIRBuilder* builder,
source_map_arena_.Reset();
// Fill the generator with code.
size_t stack_size = 0;
if (!Emit(builder, &stack_size)) {
EmitFunctionInfo func_info = {};
if (!Emit(builder, func_info)) {
return false;
}
// Copy the final code to the cache and relocate it.
*out_code_size = getSize();
*out_code_address = Emplace(stack_size, function);
*out_code_address = Emplace(func_info, function);
// Stash source map.
source_map_arena_.CloneContents(out_source_map);
@ -117,18 +117,20 @@ bool X64Emitter::Emit(GuestFunction* function, HIRBuilder* builder,
return true;
}
void* X64Emitter::Emplace(size_t stack_size, GuestFunction* function) {
void* X64Emitter::Emplace(const EmitFunctionInfo& func_info,
GuestFunction* function) {
// To avoid changing xbyak, we do a switcharoo here.
// top_ points to the Xbyak buffer, and since we are in AutoGrow mode
// it has pending relocations. We copy the top_ to our buffer, swap the
// pointer, relocate, then return the original scratch pointer for use.
uint8_t* old_address = top_;
void* new_address;
assert_true(func_info.code_size.total == size_);
if (function) {
new_address = code_cache_->PlaceGuestCode(function->address(), top_, size_,
stack_size, function);
new_address = code_cache_->PlaceGuestCode(function->address(), top_,
func_info, function);
} else {
new_address = code_cache_->PlaceHostCode(0, top_, size_, stack_size);
new_address = code_cache_->PlaceHostCode(0, top_, func_info);
}
top_ = reinterpret_cast<uint8_t*>(new_address);
ready();
@ -137,7 +139,7 @@ void* X64Emitter::Emplace(size_t stack_size, GuestFunction* function) {
return new_address;
}
bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
Xbyak::Label epilog_label;
epilog_label_ = &epilog_label;
@ -159,6 +161,15 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
stack_offset -= StackLayout::GUEST_STACK_SIZE;
stack_offset = xe::align(stack_offset, static_cast<size_t>(16));
struct _code_offsets {
size_t prolog;
size_t body;
size_t epilog;
size_t tail;
} code_offsets = {};
code_offsets.prolog = getSize();
// Function prolog.
// Must be 16b aligned.
// Windows is very strict about the form of this and the epilog:
@ -168,7 +179,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
// Adding or changing anything here must be matched!
const size_t stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset;
assert_true((stack_size + 8) % 16 == 0);
*out_stack_size = stack_size;
func_info.stack_size = stack_size;
stack_size_ = stack_size;
sub(rsp, (uint32_t)stack_size);
@ -208,6 +219,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
mov(GetMembaseReg(),
qword[GetContextReg() + offsetof(ppc::PPCContext, virtual_membase)]);
code_offsets.body = getSize();
// Body.
auto block = builder->first_block();
while (block) {
@ -236,6 +249,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
block = block->next;
}
code_offsets.epilog = getSize();
// Function epilog.
L(epilog_label);
epilog_label_ = nullptr;
@ -244,6 +259,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
add(rsp, (uint32_t)stack_size);
ret();
code_offsets.tail = getSize();
if (cvars::emit_source_annotations) {
nop();
nop();
@ -252,6 +269,13 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
nop();
}
assert_zero(code_offsets.prolog);
func_info.code_size.total = getSize();
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
func_info.code_size.tail = getSize() - code_offsets.tail;
return true;
}

View File

@ -39,6 +39,8 @@ namespace x64 {
class X64Backend;
class X64CodeCache;
struct EmitFunctionInfo;
enum RegisterFlags {
REG_DEST = (1 << 0),
REG_ABCD = (1 << 1),
@ -222,8 +224,9 @@ class X64Emitter : public Xbyak::CodeGenerator {
size_t stack_size() const { return stack_size_; }
protected:
void* Emplace(size_t stack_size, GuestFunction* function = nullptr);
bool Emit(hir::HIRBuilder* builder, size_t* out_stack_size);
void* Emplace(const EmitFunctionInfo& func_info,
GuestFunction* function = nullptr);
bool Emit(hir::HIRBuilder* builder, EmitFunctionInfo& func_info);
void EmitGetCurrentThreadId();
void EmitTraceUserCallReturn();