[CPU] Change thunk layout and fix Linux thunk misalignment

This commit is contained in:
Prism Tutaj 2019-12-02 23:30:41 -06:00 committed by Rick Gibbed
parent f0723c211b
commit 1cce25d5e7
2 changed files with 101 additions and 95 deletions

View File

@ -580,11 +580,14 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() {
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rax);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rdx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r8);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r9);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r10);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r11);
#if XE_PLATFORM_LINUX
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
#endif
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r8);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r9);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r10);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r11);
// vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm0);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm1);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm2);
@ -594,35 +597,40 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() {
}
void X64ThunkEmitter::EmitLoadVolatileRegs() {
// Load volatile registers from our stack frame.
// mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
#if XE_PLATFORM_LINUX
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
#endif
mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
// vmovaps(xmm0, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
vmovaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
vmovaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
vmovaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
vmovaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
vmovaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);
// mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
}
void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
// Preserve nonvolatile registers.
mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rbp);
#if XE_PLATFORM_WIN32
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
#endif
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15);
// SysV does not have nonvolatile XMM registers.
#if XE_PLATFORM_WIN32
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8);
@ -633,9 +641,23 @@ void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15);
#endif
}
void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
#if XE_PLATFORM_WIN32
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
#endif
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
#if XE_PLATFORM_WIN32
vmovaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
vmovaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
vmovaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
@ -646,16 +668,7 @@ void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
vmovaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]);
vmovaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]);
vmovaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]);
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
#endif
}
} // namespace x64

View File

@ -27,78 +27,71 @@ class StackLayout {
* NOTE: stack must always be 16b aligned.
*
* Thunk stack:
* +------------------+
* | arg temp, 3 * 8 | rsp + 0
* | |
* | |
* +------------------+
* | scratch, 16b | rsp + 24
* | |
* +------------------+
* | rbx | rsp + 40
* +------------------+
* | rcx / context | rsp + 48
* +------------------+
* | rbp | rsp + 56
* +------------------+
* | rsi | rsp + 64
* +------------------+
* | rdi | rsp + 72
* +------------------+
* | r12 | rsp + 80
* +------------------+
* | r13 | rsp + 88
* +------------------+
* | r14 | rsp + 96
* +------------------+
* | r15 | rsp + 104
* +------------------+
* | xmm6/0 | rsp + 112
* | |
* +------------------+
* | xmm7/1 | rsp + 128
* | |
* +------------------+
* | xmm8/2 | rsp + 144
* | |
* +------------------+
* | xmm9/3 | rsp + 160
* | |
* +------------------+
* | xmm10/4 | rsp + 176
* | |
* +------------------+
* | xmm11/5 | rsp + 192
* | |
* +------------------+
* | xmm12 | rsp + 208
* | |
* +------------------+
* | xmm13 | rsp + 224
* | |
* +------------------+
* | xmm14 | rsp + 240
* | |
* +------------------+
* | xmm15 | rsp + 256
* | |
* +------------------+
* | scratch, 8b | rsp + 272
* | |
* +------------------+
* | (return address) | rsp + 280
* +------------------+
* | (rcx home) | rsp + 288
* +------------------+
* | (rdx home) | rsp + 296
* +------------------+
* Non-Volatile Volatile
* +------------------+------------------+
* | arg temp, 3 * 8 | arg temp, 3 * 8 | rsp + 0x000
* | | |
* | | |
* +------------------+------------------+
* | rbx | (unused) | rsp + 0x018
* +------------------+------------------+
* | rbp | rcx | rsp + 0x020
* +------------------+------------------+
* | rcx (Win32) | rdx | rsp + 0x028
* +------------------+------------------+
* | rsi (Win32) | rsi (Linux) | rsp + 0x030
* +------------------+------------------+
* | rdi (Win32) | rdi (Linux) | rsp + 0x038
* +------------------+------------------+
* | r12 | r8 | rsp + 0x040
* +------------------+------------------+
* | r13 | r9 | rsp + 0x048
* +------------------+------------------+
* | r14 | r10 | rsp + 0x050
* +------------------+------------------+
* | r15 | r11 | rsp + 0x058
* +------------------+------------------+
* | xmm6 (Win32) | (unused) | rsp + 0x060
* | | |
* +------------------+------------------+
* | xmm7 (Win32) | xmm1 | rsp + 0x070
* | | |
* +------------------+------------------+
* | xmm8 (Win32) | xmm2 | rsp + 0x080
* | | |
* +------------------+------------------+
* | xmm9 (Win32) | xmm3 | rsp + 0x090
* | | |
* +------------------+------------------+
* | xmm10 (Win32) | xmm4 | rsp + 0x0A0
* | | |
* +------------------+------------------+
* | xmm11 (Win32) | xmm5 | rsp + 0x0B0
* | | |
* +------------------+------------------+
* | xmm12 (Win32) | (unused) | rsp + 0x0C0
* | | |
* +------------------+------------------+
* | xmm13 (Win32) | (unused) | rsp + 0x0D0
* | | |
* +------------------+------------------+
* | xmm14 (Win32) | (unused) | rsp + 0x0E0
* | | |
* +------------------+------------------+
* | xmm15 (Win32) | (unused) | rsp + 0x0F0
* | | |
* +------------------+------------------+
* | (return address) | (return address) | rsp + 0x100
* +------------------+------------------+
* | (rcx home) | (rcx home) | rsp + 0x108
* +------------------+------------------+
* | (rdx home) | (rdx home) | rsp + 0x110
* +------------------+------------------+
*/
XEPACKEDSTRUCT(Thunk, {
uint64_t arg_temp[3];
uint8_t scratch[16];
uint64_t r[10];
uint64_t r[9];
vec128_t xmm[10];
uint64_t dummy;
});
static_assert(sizeof(Thunk) % 16 == 0,
"sizeof(Thunk) must be a multiple of 16!");