[CPU] Change thunk layout and fix Linux thunk misalignment

This commit is contained in:
Prism Tutaj 2019-12-02 23:30:41 -06:00 committed by Rick Gibbed
parent f0723c211b
commit 1cce25d5e7
2 changed files with 101 additions and 95 deletions

View File

@ -580,11 +580,14 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() {
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rax); // mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rax);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx); mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rdx); mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rdx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r8); #if XE_PLATFORM_LINUX
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r9); mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r10); mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r11); #endif
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r8);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r9);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r10);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r11);
// vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm0); // vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm0);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm1); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm1);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm2); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm2);
@ -594,35 +597,40 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() {
} }
void X64ThunkEmitter::EmitLoadVolatileRegs() { void X64ThunkEmitter::EmitLoadVolatileRegs() {
// Load volatile registers from our stack frame. // mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
#if XE_PLATFORM_LINUX
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
#endif
mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
// vmovaps(xmm0, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]); // vmovaps(xmm0, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
vmovaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]); vmovaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
vmovaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]); vmovaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
vmovaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]); vmovaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
vmovaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]); vmovaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
vmovaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]); vmovaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);
// mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
} }
void X64ThunkEmitter::EmitSaveNonvolatileRegs() { void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
// Preserve nonvolatile registers.
mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx); mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx); mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rbp);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp); #if XE_PLATFORM_WIN32
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi); mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi); mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
#endif
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12); mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13); mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14); mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15); mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15);
// SysV does not have nonvolatile XMM registers.
#if XE_PLATFORM_WIN32
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8);
@ -633,9 +641,23 @@ void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15); vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15);
#endif
} }
void X64ThunkEmitter::EmitLoadNonvolatileRegs() { void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
#if XE_PLATFORM_WIN32
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
#endif
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
#if XE_PLATFORM_WIN32
vmovaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]); vmovaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
vmovaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]); vmovaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
vmovaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]); vmovaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
@ -646,16 +668,7 @@ void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
vmovaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]); vmovaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]);
vmovaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]); vmovaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]);
vmovaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]); vmovaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]);
#endif
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
} }
} // namespace x64 } // namespace x64

View File

@ -27,78 +27,71 @@ class StackLayout {
* NOTE: stack must always be 16b aligned. * NOTE: stack must always be 16b aligned.
* *
* Thunk stack: * Thunk stack:
* +------------------+ * Non-Volatile Volatile
* | arg temp, 3 * 8 | rsp + 0 * +------------------+------------------+
* | | * | arg temp, 3 * 8 | arg temp, 3 * 8 | rsp + 0x000
* | | * | | |
* +------------------+ * | | |
* | scratch, 16b | rsp + 24 * +------------------+------------------+
* | | * | rbx | (unused) | rsp + 0x018
* +------------------+ * +------------------+------------------+
* | rbx | rsp + 40 * | rbp | rcx | rsp + 0x020
* +------------------+ * +------------------+------------------+
* | rcx / context | rsp + 48 * | rcx (Win32) | rdx | rsp + 0x028
* +------------------+ * +------------------+------------------+
* | rbp | rsp + 56 * | rsi (Win32) | rsi (Linux) | rsp + 0x030
* +------------------+ * +------------------+------------------+
* | rsi | rsp + 64 * | rdi (Win32) | rdi (Linux) | rsp + 0x038
* +------------------+ * +------------------+------------------+
* | rdi | rsp + 72 * | r12 | r8 | rsp + 0x040
* +------------------+ * +------------------+------------------+
* | r12 | rsp + 80 * | r13 | r9 | rsp + 0x048
* +------------------+ * +------------------+------------------+
* | r13 | rsp + 88 * | r14 | r10 | rsp + 0x050
* +------------------+ * +------------------+------------------+
* | r14 | rsp + 96 * | r15 | r11 | rsp + 0x058
* +------------------+ * +------------------+------------------+
* | r15 | rsp + 104 * | xmm6 (Win32) | (unused) | rsp + 0x060
* +------------------+ * | | |
* | xmm6/0 | rsp + 112 * +------------------+------------------+
* | | * | xmm7 (Win32) | xmm1 | rsp + 0x070
* +------------------+ * | | |
* | xmm7/1 | rsp + 128 * +------------------+------------------+
* | | * | xmm8 (Win32) | xmm2 | rsp + 0x080
* +------------------+ * | | |
* | xmm8/2 | rsp + 144 * +------------------+------------------+
* | | * | xmm9 (Win32) | xmm3 | rsp + 0x090
* +------------------+ * | | |
* | xmm9/3 | rsp + 160 * +------------------+------------------+
* | | * | xmm10 (Win32) | xmm4 | rsp + 0x0A0
* +------------------+ * | | |
* | xmm10/4 | rsp + 176 * +------------------+------------------+
* | | * | xmm11 (Win32) | xmm5 | rsp + 0x0B0
* +------------------+ * | | |
* | xmm11/5 | rsp + 192 * +------------------+------------------+
* | | * | xmm12 (Win32) | (unused) | rsp + 0x0C0
* +------------------+ * | | |
* | xmm12 | rsp + 208 * +------------------+------------------+
* | | * | xmm13 (Win32) | (unused) | rsp + 0x0D0
* +------------------+ * | | |
* | xmm13 | rsp + 224 * +------------------+------------------+
* | | * | xmm14 (Win32) | (unused) | rsp + 0x0E0
* +------------------+ * | | |
* | xmm14 | rsp + 240 * +------------------+------------------+
* | | * | xmm15 (Win32) | (unused) | rsp + 0x0F0
* +------------------+ * | | |
* | xmm15 | rsp + 256 * +------------------+------------------+
* | | * | (return address) | (return address) | rsp + 0x100
* +------------------+ * +------------------+------------------+
* | scratch, 8b | rsp + 272 * | (rcx home) | (rcx home) | rsp + 0x108
* | | * +------------------+------------------+
* +------------------+ * | (rdx home) | (rdx home) | rsp + 0x110
* | (return address) | rsp + 280 * +------------------+------------------+
* +------------------+
* | (rcx home) | rsp + 288
* +------------------+
* | (rdx home) | rsp + 296
* +------------------+
*/ */
XEPACKEDSTRUCT(Thunk, { XEPACKEDSTRUCT(Thunk, {
uint64_t arg_temp[3]; uint64_t arg_temp[3];
uint8_t scratch[16]; uint64_t r[9];
uint64_t r[10];
vec128_t xmm[10]; vec128_t xmm[10];
uint64_t dummy;
}); });
static_assert(sizeof(Thunk) % 16 == 0, static_assert(sizeof(Thunk) % 16 == 0,
"sizeof(Thunk) must be a multiple of 16!"); "sizeof(Thunk) must be a multiple of 16!");