From 1cce25d5e7ef617cef4e9b4059f5690a6eecdff8 Mon Sep 17 00:00:00 2001 From: Prism Tutaj Date: Mon, 2 Dec 2019 23:30:41 -0600 Subject: [PATCH] [CPU] Change thunk layout and fix Linux thunk misalignment --- src/xenia/cpu/backend/x64/x64_backend.cc | 67 ++++++---- src/xenia/cpu/backend/x64/x64_stack_layout.h | 129 +++++++++---------- 2 files changed, 101 insertions(+), 95 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 8d3d48cee..60ee17dd6 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -580,11 +580,14 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() { // mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rax); mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx); mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rdx); - mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r8); - mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r9); - mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r10); - mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r11); - +#if XE_PLATFORM_LINUX + mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi); +#endif + mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r8); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r9); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r10); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r11); // vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm0); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm1); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm2); @@ -594,35 +597,40 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() { } void X64ThunkEmitter::EmitLoadVolatileRegs() { - // Load volatile registers from our stack frame. + // mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]); + mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]); + mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]); +#if XE_PLATFORM_LINUX + mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]); + mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]); +#endif + mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[5])]); + mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[6])]); + mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[7])]); + mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[8])]); // vmovaps(xmm0, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]); vmovaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]); vmovaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]); vmovaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]); vmovaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]); vmovaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]); - - // mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]); - mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]); - mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]); - mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[3])]); - mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[4])]); - mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[5])]); - mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[6])]); } void X64ThunkEmitter::EmitSaveNonvolatileRegs() { - // Preserve nonvolatile registers. mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx); - mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx); - mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp); + mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rbp); +#if XE_PLATFORM_WIN32 + mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rcx); mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi); mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi); +#endif mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12); mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13); mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14); mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15); + // SysV does not have nonvolatile XMM registers. +#if XE_PLATFORM_WIN32 vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8); @@ -633,9 +641,23 @@ void X64ThunkEmitter::EmitSaveNonvolatileRegs() { vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15); +#endif } void X64ThunkEmitter::EmitLoadNonvolatileRegs() { + mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]); + mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[1])]); +#if XE_PLATFORM_WIN32 + mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]); + mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]); + mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]); +#endif + mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]); + mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]); + mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]); + mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]); + +#if XE_PLATFORM_WIN32 vmovaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]); vmovaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]); vmovaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]); @@ -646,16 +668,7 @@ void X64ThunkEmitter::EmitLoadNonvolatileRegs() { vmovaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]); vmovaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]); vmovaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]); - - mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]); - mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]); - mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[2])]); - mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]); - mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]); - mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]); - mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]); - mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]); - mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]); +#endif } } // namespace x64 diff --git a/src/xenia/cpu/backend/x64/x64_stack_layout.h b/src/xenia/cpu/backend/x64/x64_stack_layout.h index 3835bb82e..1736dc02a 100644 --- a/src/xenia/cpu/backend/x64/x64_stack_layout.h +++ b/src/xenia/cpu/backend/x64/x64_stack_layout.h @@ -27,78 +27,71 @@ class StackLayout { * NOTE: stack must always be 16b aligned. * * Thunk stack: - * +------------------+ - * | arg temp, 3 * 8 | rsp + 0 - * | | - * | | - * +------------------+ - * | scratch, 16b | rsp + 24 - * | | - * +------------------+ - * | rbx | rsp + 40 - * +------------------+ - * | rcx / context | rsp + 48 - * +------------------+ - * | rbp | rsp + 56 - * +------------------+ - * | rsi | rsp + 64 - * +------------------+ - * | rdi | rsp + 72 - * +------------------+ - * | r12 | rsp + 80 - * +------------------+ - * | r13 | rsp + 88 - * +------------------+ - * | r14 | rsp + 96 - * +------------------+ - * | r15 | rsp + 104 - * +------------------+ - * | xmm6/0 | rsp + 112 - * | | - * +------------------+ - * | xmm7/1 | rsp + 128 - * | | - * +------------------+ - * | xmm8/2 | rsp + 144 - * | | - * +------------------+ - * | xmm9/3 | rsp + 160 - * | | - * +------------------+ - * | xmm10/4 | rsp + 176 - * | | - * +------------------+ - * | xmm11/5 | rsp + 192 - * | | - * +------------------+ - * | xmm12 | rsp + 208 - * | | - * +------------------+ - * | xmm13 | rsp + 224 - * | | - * +------------------+ - * | xmm14 | rsp + 240 - * | | - * +------------------+ - * | xmm15 | rsp + 256 - * | | - * +------------------+ - * | scratch, 8b | rsp + 272 - * | | - * +------------------+ - * | (return address) | rsp + 280 - * +------------------+ - * | (rcx home) | rsp + 288 - * +------------------+ - * | (rdx home) | rsp + 296 - * +------------------+ + * Non-Volatile Volatile + * +------------------+------------------+ + * | arg temp, 3 * 8 | arg temp, 3 * 8 | rsp + 0x000 + * | | | + * | | | + * +------------------+------------------+ + * | rbx | (unused) | rsp + 0x018 + * +------------------+------------------+ + * | rbp | rcx | rsp + 0x020 + * +------------------+------------------+ + * | rcx (Win32) | rdx | rsp + 0x028 + * +------------------+------------------+ + * | rsi (Win32) | rsi (Linux) | rsp + 0x030 + * +------------------+------------------+ + * | rdi (Win32) | rdi (Linux) | rsp + 0x038 + * +------------------+------------------+ + * | r12 | r8 | rsp + 0x040 + * +------------------+------------------+ + * | r13 | r9 | rsp + 0x048 + * +------------------+------------------+ + * | r14 | r10 | rsp + 0x050 + * +------------------+------------------+ + * | r15 | r11 | rsp + 0x058 + * +------------------+------------------+ + * | xmm6 (Win32) | (unused) | rsp + 0x060 + * | | | + * +------------------+------------------+ + * | xmm7 (Win32) | xmm1 | rsp + 0x070 + * | | | + * +------------------+------------------+ + * | xmm8 (Win32) | xmm2 | rsp + 0x080 + * | | | + * +------------------+------------------+ + * | xmm9 (Win32) | xmm3 | rsp + 0x090 + * | | | + * +------------------+------------------+ + * | xmm10 (Win32) | xmm4 | rsp + 0x0A0 + * | | | + * +------------------+------------------+ + * | xmm11 (Win32) | xmm5 | rsp + 0x0B0 + * | | | + * +------------------+------------------+ + * | xmm12 (Win32) | (unused) | rsp + 0x0C0 + * | | | + * +------------------+------------------+ + * | xmm13 (Win32) | (unused) | rsp + 0x0D0 + * | | | + * +------------------+------------------+ + * | xmm14 (Win32) | (unused) | rsp + 0x0E0 + * | | | + * +------------------+------------------+ + * | xmm15 (Win32) | (unused) | rsp + 0x0F0 + * | | | + * +------------------+------------------+ + * | (return address) | (return address) | rsp + 0x100 + * +------------------+------------------+ + * | (rcx home) | (rcx home) | rsp + 0x108 + * +------------------+------------------+ + * | (rdx home) | (rdx home) | rsp + 0x110 + * +------------------+------------------+ */ XEPACKEDSTRUCT(Thunk, { uint64_t arg_temp[3]; - uint8_t scratch[16]; - uint64_t r[10]; + uint64_t r[9]; vec128_t xmm[10]; - uint64_t dummy; }); static_assert(sizeof(Thunk) % 16 == 0, "sizeof(Thunk) must be a multiple of 16!");