[CPU] Change thunk layout and fix Linux thunk misalignment
This commit is contained in:
parent
f0723c211b
commit
1cce25d5e7
|
@ -580,11 +580,14 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() {
|
|||
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rax);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rdx);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r8);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r9);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r10);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r11);
|
||||
|
||||
#if XE_PLATFORM_LINUX
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
|
||||
#endif
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r8);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r9);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r10);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r11);
|
||||
// vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm0);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm1);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm2);
|
||||
|
@ -594,35 +597,40 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() {
|
|||
}
|
||||
|
||||
void X64ThunkEmitter::EmitLoadVolatileRegs() {
|
||||
// Load volatile registers from our stack frame.
|
||||
// mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
|
||||
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
|
||||
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
|
||||
#if XE_PLATFORM_LINUX
|
||||
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
|
||||
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
|
||||
#endif
|
||||
mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
|
||||
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
|
||||
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
|
||||
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
|
||||
// vmovaps(xmm0, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
|
||||
vmovaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
|
||||
vmovaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
|
||||
vmovaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
|
||||
vmovaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
|
||||
vmovaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);
|
||||
|
||||
// mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
|
||||
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
|
||||
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
|
||||
mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
|
||||
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
|
||||
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
|
||||
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
|
||||
}
|
||||
|
||||
void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
|
||||
// Preserve nonvolatile registers.
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rbp);
|
||||
#if XE_PLATFORM_WIN32
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rcx);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
|
||||
#endif
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15);
|
||||
|
||||
// SysV does not have nonvolatile XMM registers.
|
||||
#if XE_PLATFORM_WIN32
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8);
|
||||
|
@ -633,9 +641,23 @@ void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
|
|||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15);
|
||||
#endif
|
||||
}
|
||||
|
||||
void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
|
||||
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
|
||||
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
|
||||
#if XE_PLATFORM_WIN32
|
||||
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
|
||||
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
|
||||
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
|
||||
#endif
|
||||
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
|
||||
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
|
||||
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
|
||||
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
|
||||
|
||||
#if XE_PLATFORM_WIN32
|
||||
vmovaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
|
||||
vmovaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
|
||||
vmovaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
|
||||
|
@ -646,16 +668,7 @@ void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
|
|||
vmovaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]);
|
||||
vmovaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]);
|
||||
vmovaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]);
|
||||
|
||||
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
|
||||
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
|
||||
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
|
||||
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
|
||||
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
|
||||
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
|
||||
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
|
||||
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
|
||||
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace x64
|
||||
|
|
|
@ -27,78 +27,71 @@ class StackLayout {
|
|||
* NOTE: stack must always be 16b aligned.
|
||||
*
|
||||
* Thunk stack:
|
||||
* +------------------+
|
||||
* | arg temp, 3 * 8 | rsp + 0
|
||||
* | |
|
||||
* | |
|
||||
* +------------------+
|
||||
* | scratch, 16b | rsp + 24
|
||||
* | |
|
||||
* +------------------+
|
||||
* | rbx | rsp + 40
|
||||
* +------------------+
|
||||
* | rcx / context | rsp + 48
|
||||
* +------------------+
|
||||
* | rbp | rsp + 56
|
||||
* +------------------+
|
||||
* | rsi | rsp + 64
|
||||
* +------------------+
|
||||
* | rdi | rsp + 72
|
||||
* +------------------+
|
||||
* | r12 | rsp + 80
|
||||
* +------------------+
|
||||
* | r13 | rsp + 88
|
||||
* +------------------+
|
||||
* | r14 | rsp + 96
|
||||
* +------------------+
|
||||
* | r15 | rsp + 104
|
||||
* +------------------+
|
||||
* | xmm6/0 | rsp + 112
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm7/1 | rsp + 128
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm8/2 | rsp + 144
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm9/3 | rsp + 160
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm10/4 | rsp + 176
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm11/5 | rsp + 192
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm12 | rsp + 208
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm13 | rsp + 224
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm14 | rsp + 240
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm15 | rsp + 256
|
||||
* | |
|
||||
* +------------------+
|
||||
* | scratch, 8b | rsp + 272
|
||||
* | |
|
||||
* +------------------+
|
||||
* | (return address) | rsp + 280
|
||||
* +------------------+
|
||||
* | (rcx home) | rsp + 288
|
||||
* +------------------+
|
||||
* | (rdx home) | rsp + 296
|
||||
* +------------------+
|
||||
* Non-Volatile Volatile
|
||||
* +------------------+------------------+
|
||||
* | arg temp, 3 * 8 | arg temp, 3 * 8 | rsp + 0x000
|
||||
* | | |
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | rbx | (unused) | rsp + 0x018
|
||||
* +------------------+------------------+
|
||||
* | rbp | rcx | rsp + 0x020
|
||||
* +------------------+------------------+
|
||||
* | rcx (Win32) | rdx | rsp + 0x028
|
||||
* +------------------+------------------+
|
||||
* | rsi (Win32) | rsi (Linux) | rsp + 0x030
|
||||
* +------------------+------------------+
|
||||
* | rdi (Win32) | rdi (Linux) | rsp + 0x038
|
||||
* +------------------+------------------+
|
||||
* | r12 | r8 | rsp + 0x040
|
||||
* +------------------+------------------+
|
||||
* | r13 | r9 | rsp + 0x048
|
||||
* +------------------+------------------+
|
||||
* | r14 | r10 | rsp + 0x050
|
||||
* +------------------+------------------+
|
||||
* | r15 | r11 | rsp + 0x058
|
||||
* +------------------+------------------+
|
||||
* | xmm6 (Win32) | (unused) | rsp + 0x060
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm7 (Win32) | xmm1 | rsp + 0x070
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm8 (Win32) | xmm2 | rsp + 0x080
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm9 (Win32) | xmm3 | rsp + 0x090
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm10 (Win32) | xmm4 | rsp + 0x0A0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm11 (Win32) | xmm5 | rsp + 0x0B0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm12 (Win32) | (unused) | rsp + 0x0C0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm13 (Win32) | (unused) | rsp + 0x0D0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm14 (Win32) | (unused) | rsp + 0x0E0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm15 (Win32) | (unused) | rsp + 0x0F0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | (return address) | (return address) | rsp + 0x100
|
||||
* +------------------+------------------+
|
||||
* | (rcx home) | (rcx home) | rsp + 0x108
|
||||
* +------------------+------------------+
|
||||
* | (rdx home) | (rdx home) | rsp + 0x110
|
||||
* +------------------+------------------+
|
||||
*/
|
||||
XEPACKEDSTRUCT(Thunk, {
|
||||
uint64_t arg_temp[3];
|
||||
uint8_t scratch[16];
|
||||
uint64_t r[10];
|
||||
uint64_t r[9];
|
||||
vec128_t xmm[10];
|
||||
uint64_t dummy;
|
||||
});
|
||||
static_assert(sizeof(Thunk) % 16 == 0,
|
||||
"sizeof(Thunk) must be a multiple of 16!");
|
||||
|
|
Loading…
Reference in New Issue