x64 backend: Save nonvolatile XMM registers on host -> guest transitions
Define the context and membase registers in the x64 emitter.
This commit is contained in:
parent
e3fdb08ad7
commit
5f4416ee2f
|
@ -398,52 +398,53 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
|
|||
mov(qword[rsp + 8 * 1], rcx);
|
||||
sub(rsp, stack_size);
|
||||
|
||||
mov(qword[rsp + 48], rbx);
|
||||
mov(qword[rsp + 56], rcx);
|
||||
mov(qword[rsp + 64], rbp);
|
||||
mov(qword[rsp + 72], rsi);
|
||||
mov(qword[rsp + 80], rdi);
|
||||
mov(qword[rsp + 88], r12);
|
||||
mov(qword[rsp + 96], r13);
|
||||
mov(qword[rsp + 104], r14);
|
||||
mov(qword[rsp + 112], r15);
|
||||
// Preserve nonvolatile registers.
|
||||
mov(qword[rsp + 40], rbx);
|
||||
mov(qword[rsp + 48], rcx);
|
||||
mov(qword[rsp + 56], rbp);
|
||||
mov(qword[rsp + 64], rsi);
|
||||
mov(qword[rsp + 72], rdi);
|
||||
mov(qword[rsp + 80], r12);
|
||||
mov(qword[rsp + 88], r13);
|
||||
mov(qword[rsp + 96], r14);
|
||||
mov(qword[rsp + 104], r15);
|
||||
|
||||
/*movaps(ptr[rsp + 128], xmm6);
|
||||
movaps(ptr[rsp + 144], xmm7);
|
||||
movaps(ptr[rsp + 160], xmm8);
|
||||
movaps(ptr[rsp + 176], xmm9);
|
||||
movaps(ptr[rsp + 192], xmm10);
|
||||
movaps(ptr[rsp + 208], xmm11);
|
||||
movaps(ptr[rsp + 224], xmm12);
|
||||
movaps(ptr[rsp + 240], xmm13);
|
||||
movaps(ptr[rsp + 256], xmm14);
|
||||
movaps(ptr[rsp + 272], xmm15);*/
|
||||
movaps(ptr[rsp + 112], xmm6);
|
||||
movaps(ptr[rsp + 128], xmm7);
|
||||
movaps(ptr[rsp + 144], xmm8);
|
||||
movaps(ptr[rsp + 160], xmm9);
|
||||
movaps(ptr[rsp + 176], xmm10);
|
||||
movaps(ptr[rsp + 192], xmm11);
|
||||
movaps(ptr[rsp + 208], xmm12);
|
||||
movaps(ptr[rsp + 224], xmm13);
|
||||
movaps(ptr[rsp + 240], xmm14);
|
||||
movaps(ptr[rsp + 256], xmm15);
|
||||
|
||||
mov(rax, rcx);
|
||||
mov(rcx, rdx);
|
||||
mov(rdx, r8);
|
||||
call(rax);
|
||||
|
||||
/*movaps(xmm6, ptr[rsp + 128]);
|
||||
movaps(xmm7, ptr[rsp + 144]);
|
||||
movaps(xmm8, ptr[rsp + 160]);
|
||||
movaps(xmm9, ptr[rsp + 176]);
|
||||
movaps(xmm10, ptr[rsp + 192]);
|
||||
movaps(xmm11, ptr[rsp + 208]);
|
||||
movaps(xmm12, ptr[rsp + 224]);
|
||||
movaps(xmm13, ptr[rsp + 240]);
|
||||
movaps(xmm14, ptr[rsp + 256]);
|
||||
movaps(xmm15, ptr[rsp + 272]);*/
|
||||
movaps(xmm6, ptr[rsp + 112]);
|
||||
movaps(xmm7, ptr[rsp + 128]);
|
||||
movaps(xmm8, ptr[rsp + 144]);
|
||||
movaps(xmm9, ptr[rsp + 160]);
|
||||
movaps(xmm10, ptr[rsp + 176]);
|
||||
movaps(xmm11, ptr[rsp + 192]);
|
||||
movaps(xmm12, ptr[rsp + 208]);
|
||||
movaps(xmm13, ptr[rsp + 224]);
|
||||
movaps(xmm14, ptr[rsp + 240]);
|
||||
movaps(xmm15, ptr[rsp + 256]);
|
||||
|
||||
mov(rbx, qword[rsp + 48]);
|
||||
mov(rcx, qword[rsp + 56]);
|
||||
mov(rbp, qword[rsp + 64]);
|
||||
mov(rsi, qword[rsp + 72]);
|
||||
mov(rdi, qword[rsp + 80]);
|
||||
mov(r12, qword[rsp + 88]);
|
||||
mov(r13, qword[rsp + 96]);
|
||||
mov(r14, qword[rsp + 104]);
|
||||
mov(r15, qword[rsp + 112]);
|
||||
mov(rbx, qword[rsp + 40]);
|
||||
mov(rcx, qword[rsp + 48]);
|
||||
mov(rbp, qword[rsp + 56]);
|
||||
mov(rsi, qword[rsp + 64]);
|
||||
mov(rdi, qword[rsp + 72]);
|
||||
mov(r12, qword[rsp + 80]);
|
||||
mov(r13, qword[rsp + 88]);
|
||||
mov(r14, qword[rsp + 96]);
|
||||
mov(r15, qword[rsp + 104]);
|
||||
|
||||
add(rsp, stack_size);
|
||||
mov(rcx, qword[rsp + 8 * 1]);
|
||||
|
@ -468,17 +469,18 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
|
|||
mov(qword[rsp + 8 * 1], rcx);
|
||||
sub(rsp, stack_size);
|
||||
|
||||
mov(qword[rsp + 48], rbx);
|
||||
mov(qword[rsp + 56], rcx);
|
||||
mov(qword[rsp + 64], rbp);
|
||||
mov(qword[rsp + 72], rsi);
|
||||
mov(qword[rsp + 80], rdi);
|
||||
mov(qword[rsp + 88], r12);
|
||||
mov(qword[rsp + 96], r13);
|
||||
mov(qword[rsp + 104], r14);
|
||||
mov(qword[rsp + 112], r15);
|
||||
mov(qword[rsp + 40], rbx);
|
||||
mov(qword[rsp + 48], rcx);
|
||||
mov(qword[rsp + 56], rbp);
|
||||
mov(qword[rsp + 64], rsi);
|
||||
mov(qword[rsp + 72], rdi);
|
||||
mov(qword[rsp + 80], r12);
|
||||
mov(qword[rsp + 88], r13);
|
||||
mov(qword[rsp + 96], r14);
|
||||
mov(qword[rsp + 104], r15);
|
||||
|
||||
// TODO(benvanik): save things? XMM0-5?
|
||||
// HACK: Some emulated vector instructions require that we don't touch xmm0.
|
||||
|
||||
mov(rax, rdx);
|
||||
mov(rdx, r8);
|
||||
|
@ -486,15 +488,15 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
|
|||
mov(r9, r10);
|
||||
call(rax);
|
||||
|
||||
mov(rbx, qword[rsp + 48]);
|
||||
mov(rcx, qword[rsp + 56]);
|
||||
mov(rbp, qword[rsp + 64]);
|
||||
mov(rsi, qword[rsp + 72]);
|
||||
mov(rdi, qword[rsp + 80]);
|
||||
mov(r12, qword[rsp + 88]);
|
||||
mov(r13, qword[rsp + 96]);
|
||||
mov(r14, qword[rsp + 104]);
|
||||
mov(r15, qword[rsp + 112]);
|
||||
mov(rbx, qword[rsp + 40]);
|
||||
mov(rcx, qword[rsp + 48]);
|
||||
mov(rbp, qword[rsp + 56]);
|
||||
mov(rsi, qword[rsp + 64]);
|
||||
mov(rdi, qword[rsp + 72]);
|
||||
mov(r12, qword[rsp + 80]);
|
||||
mov(r13, qword[rsp + 88]);
|
||||
mov(r14, qword[rsp + 96]);
|
||||
mov(r15, qword[rsp + 104]);
|
||||
|
||||
add(rsp, stack_size);
|
||||
mov(rcx, qword[rsp + 8 * 1]);
|
||||
|
@ -502,7 +504,7 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
|
|||
ret();
|
||||
|
||||
void* fn = Emplace(stack_size);
|
||||
return (HostToGuestThunk)fn;
|
||||
return (GuestToHostThunk)fn;
|
||||
}
|
||||
|
||||
// X64Emitter handles actually resolving functions.
|
||||
|
@ -518,29 +520,29 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
|
|||
mov(qword[rsp + 8 * 1], rcx);
|
||||
sub(rsp, stack_size);
|
||||
|
||||
mov(qword[rsp + 48], rbx);
|
||||
mov(qword[rsp + 56], rcx);
|
||||
mov(qword[rsp + 64], rbp);
|
||||
mov(qword[rsp + 72], rsi);
|
||||
mov(qword[rsp + 80], rdi);
|
||||
mov(qword[rsp + 88], r12);
|
||||
mov(qword[rsp + 96], r13);
|
||||
mov(qword[rsp + 104], r14);
|
||||
mov(qword[rsp + 112], r15);
|
||||
mov(qword[rsp + 40], rbx);
|
||||
mov(qword[rsp + 48], rcx);
|
||||
mov(qword[rsp + 56], rbp);
|
||||
mov(qword[rsp + 64], rsi);
|
||||
mov(qword[rsp + 72], rdi);
|
||||
mov(qword[rsp + 80], r12);
|
||||
mov(qword[rsp + 88], r13);
|
||||
mov(qword[rsp + 96], r14);
|
||||
mov(qword[rsp + 104], r15);
|
||||
|
||||
mov(rdx, rbx);
|
||||
mov(rax, uint64_t(&ResolveFunction));
|
||||
call(rax);
|
||||
|
||||
mov(rbx, qword[rsp + 48]);
|
||||
mov(rcx, qword[rsp + 56]);
|
||||
mov(rbp, qword[rsp + 64]);
|
||||
mov(rsi, qword[rsp + 72]);
|
||||
mov(rdi, qword[rsp + 80]);
|
||||
mov(r12, qword[rsp + 88]);
|
||||
mov(r13, qword[rsp + 96]);
|
||||
mov(r14, qword[rsp + 104]);
|
||||
mov(r15, qword[rsp + 112]);
|
||||
mov(rbx, qword[rsp + 40]);
|
||||
mov(rcx, qword[rsp + 48]);
|
||||
mov(rbp, qword[rsp + 56]);
|
||||
mov(rsi, qword[rsp + 64]);
|
||||
mov(rdi, qword[rsp + 72]);
|
||||
mov(r12, qword[rsp + 80]);
|
||||
mov(r13, qword[rsp + 88]);
|
||||
mov(r14, qword[rsp + 96]);
|
||||
mov(r15, qword[rsp + 104]);
|
||||
|
||||
add(rsp, stack_size);
|
||||
mov(rcx, qword[rsp + 8 * 1]);
|
||||
|
|
|
@ -160,10 +160,6 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
|
|||
// Must be 16b aligned.
|
||||
// Windows is very strict about the form of this and the epilog:
|
||||
// http://msdn.microsoft.com/en-us/library/tawsa7cb.aspx
|
||||
// TODO(benvanik): save off non-volatile registers so we can use them:
|
||||
// RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15
|
||||
// Only want to do this if we actually use them, though, otherwise
|
||||
// it just adds overhead.
|
||||
// IMPORTANT: any changes to the prolog must be kept in sync with
|
||||
// X64CodeCache, which dynamically generates exception information.
|
||||
// Adding or changing anything here must be matched!
|
||||
|
@ -172,7 +168,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
|
|||
*out_stack_size = stack_size;
|
||||
stack_size_ = stack_size;
|
||||
sub(rsp, (uint32_t)stack_size);
|
||||
mov(qword[rsp + StackLayout::GUEST_RCX_HOME], rcx);
|
||||
mov(qword[rsp + StackLayout::GUEST_CTX_HOME], rcx);
|
||||
mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rdx);
|
||||
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0);
|
||||
|
||||
|
@ -205,7 +201,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
|
|||
}
|
||||
|
||||
// Load membase.
|
||||
mov(rdx, qword[rcx + 8]);
|
||||
mov(rdx, qword[rcx + offsetof(ppc::PPCContext, virtual_membase)]);
|
||||
|
||||
// Body.
|
||||
auto block = builder->first_block();
|
||||
|
@ -237,7 +233,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
|
|||
L(epilog_label);
|
||||
epilog_label_ = nullptr;
|
||||
EmitTraceUserCallReturn();
|
||||
mov(rcx, qword[rsp + StackLayout::GUEST_RCX_HOME]);
|
||||
mov(rcx, qword[rsp + StackLayout::GUEST_CTX_HOME]);
|
||||
add(rsp, (uint32_t)stack_size);
|
||||
ret();
|
||||
|
||||
|
@ -546,8 +542,11 @@ void X64Emitter::SetReturnAddress(uint64_t value) {
|
|||
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], rax);
|
||||
}
|
||||
|
||||
Xbyak::Reg64 X64Emitter::GetContextReg() { return rcx; }
|
||||
Xbyak::Reg64 X64Emitter::GetMembaseReg() { return rdx; }
|
||||
|
||||
void X64Emitter::ReloadECX() {
|
||||
mov(rcx, qword[rsp + StackLayout::GUEST_RCX_HOME]);
|
||||
mov(rcx, qword[rsp + StackLayout::GUEST_CTX_HOME]);
|
||||
}
|
||||
|
||||
void X64Emitter::ReloadEDX() {
|
||||
|
|
|
@ -125,7 +125,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
public:
|
||||
// Reserved: rsp
|
||||
// Scratch: rax/rcx/rdx
|
||||
// xmm0-2 (could be only xmm0 with some trickery)
|
||||
// xmm0-2
|
||||
// Available: rbx, r12-r15 (save to get r8-r11, rbp, rsi, rdi?)
|
||||
// xmm6-xmm15 (save to get xmm3-xmm5)
|
||||
static const int GPR_COUNT = 5;
|
||||
|
@ -170,6 +170,9 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
uint64_t arg0);
|
||||
void CallNativeSafe(void* fn);
|
||||
void SetReturnAddress(uint64_t value);
|
||||
|
||||
Xbyak::Reg64 GetContextReg();
|
||||
Xbyak::Reg64 GetMembaseReg();
|
||||
void ReloadECX();
|
||||
void ReloadEDX();
|
||||
|
||||
|
|
|
@ -1793,7 +1793,7 @@ EMITTER_OPCODE_TABLE(OPCODE_STORE_LOCAL, STORE_LOCAL_I8, STORE_LOCAL_I16,
|
|||
// ============================================================================
|
||||
// Note: all types are always aligned in the context.
|
||||
RegExp ComputeContextAddress(X64Emitter& e, const OffsetOp& offset) {
|
||||
return e.rcx + offset.value;
|
||||
return e.GetContextReg() + offset.value;
|
||||
}
|
||||
struct LOAD_CONTEXT_I8
|
||||
: Sequence<LOAD_CONTEXT_I8, I<OPCODE_LOAD_CONTEXT, I8Op, OffsetOp>> {
|
||||
|
@ -2088,12 +2088,12 @@ RegExp ComputeMemoryAddress(X64Emitter& e, const T& guest) {
|
|||
// Since the constant is often 0x8... if we tried to use that as a
|
||||
// displacement it would be sign extended and mess things up.
|
||||
e.mov(e.eax, static_cast<uint32_t>(guest.constant()));
|
||||
return e.rdx + e.rax;
|
||||
return e.GetMembaseReg() + e.rax;
|
||||
} else {
|
||||
// Clear the top 32 bits, as they are likely garbage.
|
||||
// TODO(benvanik): find a way to avoid doing this.
|
||||
e.mov(e.eax, guest.reg().cvt32());
|
||||
return e.rdx + e.rax;
|
||||
return e.GetMembaseReg() + e.rax;
|
||||
}
|
||||
}
|
||||
struct LOAD_I8 : Sequence<LOAD_I8, I<OPCODE_LOAD, I8Op, I64Op>> {
|
||||
|
@ -3959,7 +3959,6 @@ struct MUL_HI_I8 : Sequence<MUL_HI_I8, I<OPCODE_MUL_HI, I8Op, I8Op, I8Op>> {
|
|||
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||
// mulx: $1:$2 = EDX * $3
|
||||
|
||||
// TODO(justin): Find a way to shorten this has call
|
||||
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
||||
// TODO(benvanik): place src1 in eax? still need to sign extend
|
||||
e.movzx(e.edx, i.src1);
|
||||
|
@ -4004,7 +4003,6 @@ struct MUL_HI_I16
|
|||
: Sequence<MUL_HI_I16, I<OPCODE_MUL_HI, I16Op, I16Op, I16Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||
// TODO(justin): Find a way to shorten this has call
|
||||
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
||||
// TODO(benvanik): place src1 in eax? still need to sign extend
|
||||
e.movzx(e.edx, i.src1);
|
||||
|
@ -4049,7 +4047,6 @@ struct MUL_HI_I32
|
|||
: Sequence<MUL_HI_I32, I<OPCODE_MUL_HI, I32Op, I32Op, I32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||
// TODO(justin): Find a way to shorten this has call
|
||||
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
||||
// TODO(benvanik): place src1 in eax? still need to sign extend
|
||||
e.mov(e.edx, i.src1);
|
||||
|
@ -4099,7 +4096,6 @@ struct MUL_HI_I64
|
|||
: Sequence<MUL_HI_I64, I<OPCODE_MUL_HI, I64Op, I64Op, I64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||
// TODO(justin): Find a way to shorten this has call
|
||||
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
||||
// TODO(benvanik): place src1 in eax? still need to sign extend
|
||||
e.mov(e.rdx, i.src1);
|
||||
|
|
|
@ -29,68 +29,68 @@ namespace x64 {
|
|||
* | |
|
||||
* | |
|
||||
* +------------------+
|
||||
* | scratch, 16b | rsp + 32
|
||||
* | scratch, 16b | rsp + 24
|
||||
* | |
|
||||
* +------------------+
|
||||
* | rbx | rsp + 48
|
||||
* | rbx | rsp + 40
|
||||
* +------------------+
|
||||
* | rcx / context | rsp + 56
|
||||
* | rcx / context | rsp + 48
|
||||
* +------------------+
|
||||
* | rbp | rsp + 64
|
||||
* | rbp | rsp + 56
|
||||
* +------------------+
|
||||
* | rsi | rsp + 72
|
||||
* | rsi | rsp + 64
|
||||
* +------------------+
|
||||
* | rdi | rsp + 80
|
||||
* | rdi | rsp + 72
|
||||
* +------------------+
|
||||
* | r12 | rsp + 88
|
||||
* | r12 | rsp + 80
|
||||
* +------------------+
|
||||
* | r13 | rsp + 96
|
||||
* | r13 | rsp + 88
|
||||
* +------------------+
|
||||
* | r14 | rsp + 104
|
||||
* | r14 | rsp + 96
|
||||
* +------------------+
|
||||
* | r15 | rsp + 112
|
||||
* | r15 | rsp + 104
|
||||
* +------------------+
|
||||
* | (return address) | rsp + 120
|
||||
* | xmm6/0 | rsp + 112
|
||||
* | |
|
||||
* +------------------+
|
||||
* | (rcx home) | rsp + 128
|
||||
* | xmm7/1 | rsp + 128
|
||||
* | |
|
||||
* +------------------+
|
||||
* | (rdx home) | rsp + 136
|
||||
* | xmm8/2 | rsp + 144
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm9/3 | rsp + 160
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm10/4 | rsp + 176
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm11/5 | rsp + 192
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm12 | rsp + 208
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm13 | rsp + 224
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm14 | rsp + 240
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm15 | rsp + 256
|
||||
* | |
|
||||
* +------------------+
|
||||
* | scratch, 8b | rsp + 272
|
||||
* | |
|
||||
* +------------------+
|
||||
* | (return address) | rsp + 280
|
||||
* +------------------+
|
||||
* | (rcx home) | rsp + 288
|
||||
* +------------------+
|
||||
* | (rdx home) | rsp + 296
|
||||
* +------------------+
|
||||
*
|
||||
*
|
||||
* TODO:
|
||||
* +------------------+
|
||||
* | xmm6 | rsp + 128
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm7 | rsp + 144
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm8 | rsp + 160
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm9 | rsp + 176
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm10 | rsp + 192
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm11 | rsp + 208
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm12 | rsp + 224
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm13 | rsp + 240
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm14 | rsp + 256
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm15 | rsp + 272
|
||||
* | |
|
||||
* +------------------+
|
||||
*
|
||||
* Guest stack:
|
||||
* +------------------+
|
||||
* | arg temp, 3 * 8 | rsp + 0
|
||||
|
@ -115,10 +115,10 @@ namespace x64 {
|
|||
|
||||
class StackLayout {
|
||||
public:
|
||||
static const size_t THUNK_STACK_SIZE = 120;
|
||||
static const size_t THUNK_STACK_SIZE = 280;
|
||||
|
||||
static const size_t GUEST_STACK_SIZE = 104;
|
||||
static const size_t GUEST_RCX_HOME = 80;
|
||||
static const size_t GUEST_CTX_HOME = 80;
|
||||
static const size_t GUEST_RET_ADDR = 88;
|
||||
static const size_t GUEST_CALL_RET_ADDR = 96;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue