place locals on backend pages
This commit is contained in:
parent
67f16c4e31
commit
fe7dc26e3f
|
@ -208,7 +208,26 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
|
||||||
// IMPORTANT: any changes to the prolog must be kept in sync with
|
// IMPORTANT: any changes to the prolog must be kept in sync with
|
||||||
// X64CodeCache, which dynamically generates exception information.
|
// X64CodeCache, which dynamically generates exception information.
|
||||||
// Adding or changing anything here must be matched!
|
// Adding or changing anything here must be matched!
|
||||||
const size_t stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset;
|
|
||||||
|
/*
|
||||||
|
pick a page to use as the local base as close to the commonly accessed page that contains most backend fields
|
||||||
|
the sizes that are checked are chosen based on PTE coalescing sizes. zen does 16k or 32k
|
||||||
|
*/
|
||||||
|
size_t stack_size = StackLayout::GUEST_STACK_SIZE;
|
||||||
|
if (stack_offset < (4096 - sizeof(X64BackendContext))) {
|
||||||
|
locals_page_delta_ = 4096;
|
||||||
|
} else if (stack_offset < (16384 - sizeof(X64BackendContext))) {//16k PTE coalescing
|
||||||
|
locals_page_delta_ = 16384;
|
||||||
|
} else if (stack_offset < (32768 - sizeof(X64BackendContext))) {
|
||||||
|
locals_page_delta_ = 32768;
|
||||||
|
} else if (stack_offset < (65536 - sizeof(X64BackendContext))) {
|
||||||
|
locals_page_delta_ = 65536;
|
||||||
|
} else {
|
||||||
|
//extremely unlikely, fall back to stack
|
||||||
|
stack_size = xe::align<size_t>(StackLayout::GUEST_STACK_SIZE + stack_offset, 16);
|
||||||
|
locals_page_delta_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
assert_true((stack_size + 8) % 16 == 0);
|
assert_true((stack_size + 8) % 16 == 0);
|
||||||
func_info.stack_size = stack_size;
|
func_info.stack_size = stack_size;
|
||||||
stack_size_ = stack_size;
|
stack_size_ = stack_size;
|
||||||
|
@ -1591,6 +1610,9 @@ SimdDomain X64Emitter::DeduceSimdDomain(const hir::Value* for_value) {
|
||||||
|
|
||||||
return SimdDomain::DONTCARE;
|
return SimdDomain::DONTCARE;
|
||||||
}
|
}
|
||||||
|
Xbyak::RegExp X64Emitter::GetLocalsBase() const {
|
||||||
|
return !locals_page_delta_ ? rsp : GetContextReg() - locals_page_delta_;
|
||||||
|
}
|
||||||
Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const {
|
Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const {
|
||||||
/*
|
/*
|
||||||
index context ptr negatively to get to backend ctx field
|
index context ptr negatively to get to backend ctx field
|
||||||
|
|
|
@ -309,6 +309,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
||||||
FunctionDebugInfo* debug_info() const { return debug_info_; }
|
FunctionDebugInfo* debug_info() const { return debug_info_; }
|
||||||
|
|
||||||
size_t stack_size() const { return stack_size_; }
|
size_t stack_size() const { return stack_size_; }
|
||||||
|
Xbyak::RegExp GetLocalsBase() const;
|
||||||
SimdDomain DeduceSimdDomain(const hir::Value* for_value);
|
SimdDomain DeduceSimdDomain(const hir::Value* for_value);
|
||||||
|
|
||||||
void ForgetMxcsrMode() { mxcsr_mode_ = MXCSRMode::Unknown; }
|
void ForgetMxcsrMode() { mxcsr_mode_ = MXCSRMode::Unknown; }
|
||||||
|
@ -396,6 +397,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
||||||
XbyakAllocator* allocator_ = nullptr;
|
XbyakAllocator* allocator_ = nullptr;
|
||||||
XexModule* guest_module_ = nullptr;
|
XexModule* guest_module_ = nullptr;
|
||||||
bool synchronize_stack_on_next_instruction_ = false;
|
bool synchronize_stack_on_next_instruction_ = false;
|
||||||
|
int locals_page_delta_ = 0;
|
||||||
Xbyak::util::Cpu cpu_;
|
Xbyak::util::Cpu cpu_;
|
||||||
uint64_t feature_flags_ = 0;
|
uint64_t feature_flags_ = 0;
|
||||||
uint32_t current_guest_function_ = 0;
|
uint32_t current_guest_function_ = 0;
|
||||||
|
|
|
@ -633,49 +633,49 @@ EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_COMPARE_EXCHANGE,
|
||||||
struct LOAD_LOCAL_I8
|
struct LOAD_LOCAL_I8
|
||||||
: Sequence<LOAD_LOCAL_I8, I<OPCODE_LOAD_LOCAL, I8Op, I32Op>> {
|
: Sequence<LOAD_LOCAL_I8, I<OPCODE_LOAD_LOCAL, I8Op, I32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.mov(i.dest, e.byte[e.rsp + i.src1.constant()]);
|
e.mov(i.dest, e.byte[e.GetLocalsBase() + i.src1.constant()]);
|
||||||
// e.TraceLoadI8(DATA_LOCAL, i.src1.constant, i.dest);
|
// e.TraceLoadI8(DATA_LOCAL, i.src1.constant, i.dest);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct LOAD_LOCAL_I16
|
struct LOAD_LOCAL_I16
|
||||||
: Sequence<LOAD_LOCAL_I16, I<OPCODE_LOAD_LOCAL, I16Op, I32Op>> {
|
: Sequence<LOAD_LOCAL_I16, I<OPCODE_LOAD_LOCAL, I16Op, I32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.mov(i.dest, e.word[e.rsp + i.src1.constant()]);
|
e.mov(i.dest, e.word[e.GetLocalsBase() + i.src1.constant()]);
|
||||||
// e.TraceLoadI16(DATA_LOCAL, i.src1.constant, i.dest);
|
// e.TraceLoadI16(DATA_LOCAL, i.src1.constant, i.dest);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct LOAD_LOCAL_I32
|
struct LOAD_LOCAL_I32
|
||||||
: Sequence<LOAD_LOCAL_I32, I<OPCODE_LOAD_LOCAL, I32Op, I32Op>> {
|
: Sequence<LOAD_LOCAL_I32, I<OPCODE_LOAD_LOCAL, I32Op, I32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.mov(i.dest, e.dword[e.rsp + i.src1.constant()]);
|
e.mov(i.dest, e.dword[e.GetLocalsBase() + i.src1.constant()]);
|
||||||
// e.TraceLoadI32(DATA_LOCAL, i.src1.constant, i.dest);
|
// e.TraceLoadI32(DATA_LOCAL, i.src1.constant, i.dest);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct LOAD_LOCAL_I64
|
struct LOAD_LOCAL_I64
|
||||||
: Sequence<LOAD_LOCAL_I64, I<OPCODE_LOAD_LOCAL, I64Op, I32Op>> {
|
: Sequence<LOAD_LOCAL_I64, I<OPCODE_LOAD_LOCAL, I64Op, I32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.mov(i.dest, e.qword[e.rsp + i.src1.constant()]);
|
e.mov(i.dest, e.qword[e.GetLocalsBase() + i.src1.constant()]);
|
||||||
// e.TraceLoadI64(DATA_LOCAL, i.src1.constant, i.dest);
|
// e.TraceLoadI64(DATA_LOCAL, i.src1.constant, i.dest);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct LOAD_LOCAL_F32
|
struct LOAD_LOCAL_F32
|
||||||
: Sequence<LOAD_LOCAL_F32, I<OPCODE_LOAD_LOCAL, F32Op, I32Op>> {
|
: Sequence<LOAD_LOCAL_F32, I<OPCODE_LOAD_LOCAL, F32Op, I32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.vmovss(i.dest, e.dword[e.rsp + i.src1.constant()]);
|
e.vmovss(i.dest, e.dword[e.GetLocalsBase() + i.src1.constant()]);
|
||||||
// e.TraceLoadF32(DATA_LOCAL, i.src1.constant, i.dest);
|
// e.TraceLoadF32(DATA_LOCAL, i.src1.constant, i.dest);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct LOAD_LOCAL_F64
|
struct LOAD_LOCAL_F64
|
||||||
: Sequence<LOAD_LOCAL_F64, I<OPCODE_LOAD_LOCAL, F64Op, I32Op>> {
|
: Sequence<LOAD_LOCAL_F64, I<OPCODE_LOAD_LOCAL, F64Op, I32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.vmovsd(i.dest, e.qword[e.rsp + i.src1.constant()]);
|
e.vmovsd(i.dest, e.qword[e.GetLocalsBase() + i.src1.constant()]);
|
||||||
// e.TraceLoadF64(DATA_LOCAL, i.src1.constant, i.dest);
|
// e.TraceLoadF64(DATA_LOCAL, i.src1.constant, i.dest);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct LOAD_LOCAL_V128
|
struct LOAD_LOCAL_V128
|
||||||
: Sequence<LOAD_LOCAL_V128, I<OPCODE_LOAD_LOCAL, V128Op, I32Op>> {
|
: Sequence<LOAD_LOCAL_V128, I<OPCODE_LOAD_LOCAL, V128Op, I32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.vmovaps(i.dest, e.ptr[e.rsp + i.src1.constant()]);
|
e.vmovaps(i.dest, e.ptr[e.GetLocalsBase() + i.src1.constant()]);
|
||||||
// e.TraceLoadV128(DATA_LOCAL, i.src1.constant, i.dest);
|
// e.TraceLoadV128(DATA_LOCAL, i.src1.constant, i.dest);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -691,7 +691,7 @@ struct STORE_LOCAL_I8
|
||||||
: Sequence<STORE_LOCAL_I8, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, I8Op>> {
|
: Sequence<STORE_LOCAL_I8, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, I8Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
// e.TraceStoreI8(DATA_LOCAL, i.src1.constant, i.src2);
|
// e.TraceStoreI8(DATA_LOCAL, i.src1.constant, i.src2);
|
||||||
e.mov(e.byte[e.rsp + i.src1.constant()], i.src2);
|
e.mov(e.byte[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -705,9 +705,9 @@ struct STORE_LOCAL_I16
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
// e.TraceStoreI16(DATA_LOCAL, i.src1.constant, i.src2);
|
// e.TraceStoreI16(DATA_LOCAL, i.src1.constant, i.src2);
|
||||||
if (LocalStoreMayUseMembaseLow(e, i)) {
|
if (LocalStoreMayUseMembaseLow(e, i)) {
|
||||||
e.mov(e.word[e.rsp + i.src1.constant()], e.GetMembaseReg().cvt16());
|
e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt16());
|
||||||
} else {
|
} else {
|
||||||
e.mov(e.word[e.rsp + i.src1.constant()], i.src2);
|
e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -716,9 +716,9 @@ struct STORE_LOCAL_I32
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
// e.TraceStoreI32(DATA_LOCAL, i.src1.constant, i.src2);
|
// e.TraceStoreI32(DATA_LOCAL, i.src1.constant, i.src2);
|
||||||
if (LocalStoreMayUseMembaseLow(e, i)) {
|
if (LocalStoreMayUseMembaseLow(e, i)) {
|
||||||
e.mov(e.dword[e.rsp + i.src1.constant()], e.GetMembaseReg().cvt32());
|
e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt32());
|
||||||
} else {
|
} else {
|
||||||
e.mov(e.dword[e.rsp + i.src1.constant()], i.src2);
|
e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -728,9 +728,9 @@ struct STORE_LOCAL_I64
|
||||||
// e.TraceStoreI64(DATA_LOCAL, i.src1.constant, i.src2);
|
// e.TraceStoreI64(DATA_LOCAL, i.src1.constant, i.src2);
|
||||||
if (i.src2.is_constant && i.src2.constant() == 0) {
|
if (i.src2.is_constant && i.src2.constant() == 0) {
|
||||||
e.xor_(e.eax, e.eax);
|
e.xor_(e.eax, e.eax);
|
||||||
e.mov(e.qword[e.rsp + i.src1.constant()], e.rax);
|
e.mov(e.qword[e.GetLocalsBase() + i.src1.constant()], e.rax);
|
||||||
} else {
|
} else {
|
||||||
e.mov(e.qword[e.rsp + i.src1.constant()], i.src2);
|
e.mov(e.qword[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -738,21 +738,21 @@ struct STORE_LOCAL_F32
|
||||||
: Sequence<STORE_LOCAL_F32, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, F32Op>> {
|
: Sequence<STORE_LOCAL_F32, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, F32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
// e.TraceStoreF32(DATA_LOCAL, i.src1.constant, i.src2);
|
// e.TraceStoreF32(DATA_LOCAL, i.src1.constant, i.src2);
|
||||||
e.vmovss(e.dword[e.rsp + i.src1.constant()], i.src2);
|
e.vmovss(e.dword[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct STORE_LOCAL_F64
|
struct STORE_LOCAL_F64
|
||||||
: Sequence<STORE_LOCAL_F64, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, F64Op>> {
|
: Sequence<STORE_LOCAL_F64, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, F64Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
// e.TraceStoreF64(DATA_LOCAL, i.src1.constant, i.src2);
|
// e.TraceStoreF64(DATA_LOCAL, i.src1.constant, i.src2);
|
||||||
e.vmovsd(e.qword[e.rsp + i.src1.constant()], i.src2);
|
e.vmovsd(e.qword[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct STORE_LOCAL_V128
|
struct STORE_LOCAL_V128
|
||||||
: Sequence<STORE_LOCAL_V128, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, V128Op>> {
|
: Sequence<STORE_LOCAL_V128, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, V128Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
// e.TraceStoreV128(DATA_LOCAL, i.src1.constant, i.src2);
|
// e.TraceStoreV128(DATA_LOCAL, i.src1.constant, i.src2);
|
||||||
e.vmovaps(e.ptr[e.rsp + i.src1.constant()], i.src2);
|
e.vmovaps(e.ptr[e.GetLocalsBase() + i.src1.constant()], i.src2);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(OPCODE_STORE_LOCAL, STORE_LOCAL_I8, STORE_LOCAL_I16,
|
EMITTER_OPCODE_TABLE(OPCODE_STORE_LOCAL, STORE_LOCAL_I8, STORE_LOCAL_I16,
|
||||||
|
|
Loading…
Reference in New Issue