place locals on backend pages
This commit is contained in:
parent
67f16c4e31
commit
fe7dc26e3f
|
@ -208,7 +208,26 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
|
|||
// IMPORTANT: any changes to the prolog must be kept in sync with
|
||||
// X64CodeCache, which dynamically generates exception information.
|
||||
// Adding or changing anything here must be matched!
|
||||
const size_t stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset;
|
||||
|
||||
/*
|
||||
pick a page to use as the local base as close to the commonly accessed page that contains most backend fields
|
||||
the sizes that are checked are chosen based on PTE coalescing sizes. zen does 16k or 32k
|
||||
*/
|
||||
size_t stack_size = StackLayout::GUEST_STACK_SIZE;
|
||||
if (stack_offset < (4096 - sizeof(X64BackendContext))) {
|
||||
locals_page_delta_ = 4096;
|
||||
} else if (stack_offset < (16384 - sizeof(X64BackendContext))) {//16k PTE coalescing
|
||||
locals_page_delta_ = 16384;
|
||||
} else if (stack_offset < (32768 - sizeof(X64BackendContext))) {
|
||||
locals_page_delta_ = 32768;
|
||||
} else if (stack_offset < (65536 - sizeof(X64BackendContext))) {
|
||||
locals_page_delta_ = 65536;
|
||||
} else {
|
||||
//extremely unlikely, fall back to stack
|
||||
stack_size = xe::align<size_t>(StackLayout::GUEST_STACK_SIZE + stack_offset, 16);
|
||||
locals_page_delta_ = 0;
|
||||
}
|
||||
|
||||
assert_true((stack_size + 8) % 16 == 0);
|
||||
func_info.stack_size = stack_size;
|
||||
stack_size_ = stack_size;
|
||||
|
@ -1591,6 +1610,9 @@ SimdDomain X64Emitter::DeduceSimdDomain(const hir::Value* for_value) {
|
|||
|
||||
return SimdDomain::DONTCARE;
|
||||
}
|
||||
Xbyak::RegExp X64Emitter::GetLocalsBase() const {
|
||||
return !locals_page_delta_ ? rsp : GetContextReg() - locals_page_delta_;
|
||||
}
|
||||
Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const {
|
||||
/*
|
||||
index context ptr negatively to get to backend ctx field
|
||||
|
|
|
@ -309,6 +309,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
FunctionDebugInfo* debug_info() const { return debug_info_; }
|
||||
|
||||
size_t stack_size() const { return stack_size_; }
|
||||
Xbyak::RegExp GetLocalsBase() const;
|
||||
SimdDomain DeduceSimdDomain(const hir::Value* for_value);
|
||||
|
||||
void ForgetMxcsrMode() { mxcsr_mode_ = MXCSRMode::Unknown; }
|
||||
|
@ -396,6 +397,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
XbyakAllocator* allocator_ = nullptr;
|
||||
XexModule* guest_module_ = nullptr;
|
||||
bool synchronize_stack_on_next_instruction_ = false;
|
||||
int locals_page_delta_ = 0;
|
||||
Xbyak::util::Cpu cpu_;
|
||||
uint64_t feature_flags_ = 0;
|
||||
uint32_t current_guest_function_ = 0;
|
||||
|
|
|
@ -633,49 +633,49 @@ EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_COMPARE_EXCHANGE,
|
|||
struct LOAD_LOCAL_I8
|
||||
: Sequence<LOAD_LOCAL_I8, I<OPCODE_LOAD_LOCAL, I8Op, I32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.mov(i.dest, e.byte[e.rsp + i.src1.constant()]);
|
||||
e.mov(i.dest, e.byte[e.GetLocalsBase() + i.src1.constant()]);
|
||||
// e.TraceLoadI8(DATA_LOCAL, i.src1.constant, i.dest);
|
||||
}
|
||||
};
|
||||
struct LOAD_LOCAL_I16
|
||||
: Sequence<LOAD_LOCAL_I16, I<OPCODE_LOAD_LOCAL, I16Op, I32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.mov(i.dest, e.word[e.rsp + i.src1.constant()]);
|
||||
e.mov(i.dest, e.word[e.GetLocalsBase() + i.src1.constant()]);
|
||||
// e.TraceLoadI16(DATA_LOCAL, i.src1.constant, i.dest);
|
||||
}
|
||||
};
|
||||
struct LOAD_LOCAL_I32
|
||||
: Sequence<LOAD_LOCAL_I32, I<OPCODE_LOAD_LOCAL, I32Op, I32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.mov(i.dest, e.dword[e.rsp + i.src1.constant()]);
|
||||
e.mov(i.dest, e.dword[e.GetLocalsBase() + i.src1.constant()]);
|
||||
// e.TraceLoadI32(DATA_LOCAL, i.src1.constant, i.dest);
|
||||
}
|
||||
};
|
||||
struct LOAD_LOCAL_I64
|
||||
: Sequence<LOAD_LOCAL_I64, I<OPCODE_LOAD_LOCAL, I64Op, I32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.mov(i.dest, e.qword[e.rsp + i.src1.constant()]);
|
||||
e.mov(i.dest, e.qword[e.GetLocalsBase() + i.src1.constant()]);
|
||||
// e.TraceLoadI64(DATA_LOCAL, i.src1.constant, i.dest);
|
||||
}
|
||||
};
|
||||
struct LOAD_LOCAL_F32
|
||||
: Sequence<LOAD_LOCAL_F32, I<OPCODE_LOAD_LOCAL, F32Op, I32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.vmovss(i.dest, e.dword[e.rsp + i.src1.constant()]);
|
||||
e.vmovss(i.dest, e.dword[e.GetLocalsBase() + i.src1.constant()]);
|
||||
// e.TraceLoadF32(DATA_LOCAL, i.src1.constant, i.dest);
|
||||
}
|
||||
};
|
||||
struct LOAD_LOCAL_F64
|
||||
: Sequence<LOAD_LOCAL_F64, I<OPCODE_LOAD_LOCAL, F64Op, I32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.vmovsd(i.dest, e.qword[e.rsp + i.src1.constant()]);
|
||||
e.vmovsd(i.dest, e.qword[e.GetLocalsBase() + i.src1.constant()]);
|
||||
// e.TraceLoadF64(DATA_LOCAL, i.src1.constant, i.dest);
|
||||
}
|
||||
};
|
||||
struct LOAD_LOCAL_V128
|
||||
: Sequence<LOAD_LOCAL_V128, I<OPCODE_LOAD_LOCAL, V128Op, I32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.vmovaps(i.dest, e.ptr[e.rsp + i.src1.constant()]);
|
||||
e.vmovaps(i.dest, e.ptr[e.GetLocalsBase() + i.src1.constant()]);
|
||||
// e.TraceLoadV128(DATA_LOCAL, i.src1.constant, i.dest);
|
||||
}
|
||||
};
|
||||
|
@ -691,7 +691,7 @@ struct STORE_LOCAL_I8
|
|||
: Sequence<STORE_LOCAL_I8, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, I8Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// e.TraceStoreI8(DATA_LOCAL, i.src1.constant, i.src2);
|
||||
e.mov(e.byte[e.rsp + i.src1.constant()], i.src2);
|
||||
e.mov(e.byte[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -705,9 +705,9 @@ struct STORE_LOCAL_I16
|
|||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// e.TraceStoreI16(DATA_LOCAL, i.src1.constant, i.src2);
|
||||
if (LocalStoreMayUseMembaseLow(e, i)) {
|
||||
e.mov(e.word[e.rsp + i.src1.constant()], e.GetMembaseReg().cvt16());
|
||||
e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt16());
|
||||
} else {
|
||||
e.mov(e.word[e.rsp + i.src1.constant()], i.src2);
|
||||
e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -716,9 +716,9 @@ struct STORE_LOCAL_I32
|
|||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// e.TraceStoreI32(DATA_LOCAL, i.src1.constant, i.src2);
|
||||
if (LocalStoreMayUseMembaseLow(e, i)) {
|
||||
e.mov(e.dword[e.rsp + i.src1.constant()], e.GetMembaseReg().cvt32());
|
||||
e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt32());
|
||||
} else {
|
||||
e.mov(e.dword[e.rsp + i.src1.constant()], i.src2);
|
||||
e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -728,9 +728,9 @@ struct STORE_LOCAL_I64
|
|||
// e.TraceStoreI64(DATA_LOCAL, i.src1.constant, i.src2);
|
||||
if (i.src2.is_constant && i.src2.constant() == 0) {
|
||||
e.xor_(e.eax, e.eax);
|
||||
e.mov(e.qword[e.rsp + i.src1.constant()], e.rax);
|
||||
e.mov(e.qword[e.GetLocalsBase() + i.src1.constant()], e.rax);
|
||||
} else {
|
||||
e.mov(e.qword[e.rsp + i.src1.constant()], i.src2);
|
||||
e.mov(e.qword[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -738,21 +738,21 @@ struct STORE_LOCAL_F32
|
|||
: Sequence<STORE_LOCAL_F32, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, F32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// e.TraceStoreF32(DATA_LOCAL, i.src1.constant, i.src2);
|
||||
e.vmovss(e.dword[e.rsp + i.src1.constant()], i.src2);
|
||||
e.vmovss(e.dword[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||
}
|
||||
};
|
||||
struct STORE_LOCAL_F64
|
||||
: Sequence<STORE_LOCAL_F64, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, F64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// e.TraceStoreF64(DATA_LOCAL, i.src1.constant, i.src2);
|
||||
e.vmovsd(e.qword[e.rsp + i.src1.constant()], i.src2);
|
||||
e.vmovsd(e.qword[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||
}
|
||||
};
|
||||
struct STORE_LOCAL_V128
|
||||
: Sequence<STORE_LOCAL_V128, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// e.TraceStoreV128(DATA_LOCAL, i.src1.constant, i.src2);
|
||||
e.vmovaps(e.ptr[e.rsp + i.src1.constant()], i.src2);
|
||||
e.vmovaps(e.ptr[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_STORE_LOCAL, STORE_LOCAL_I8, STORE_LOCAL_I16,
|
||||
|
|
Loading…
Reference in New Issue