place locals on backend pages

This commit is contained in:
disjtqz 2023-10-01 09:14:41 -04:00 committed by Radosław Gliński
parent 67f16c4e31
commit fe7dc26e3f
3 changed files with 42 additions and 18 deletions

View File

@ -208,7 +208,26 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
// IMPORTANT: any changes to the prolog must be kept in sync with
// X64CodeCache, which dynamically generates exception information.
// Adding or changing anything here must be matched!
const size_t stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset;
/*
pick a page to use as the local base as close to the commonly accessed page that contains most backend fields
the sizes that are checked are chosen based on PTE coalescing sizes. zen does 16k or 32k
*/
size_t stack_size = StackLayout::GUEST_STACK_SIZE;
if (stack_offset < (4096 - sizeof(X64BackendContext))) {
locals_page_delta_ = 4096;
} else if (stack_offset < (16384 - sizeof(X64BackendContext))) {//16k PTE coalescing
locals_page_delta_ = 16384;
} else if (stack_offset < (32768 - sizeof(X64BackendContext))) {
locals_page_delta_ = 32768;
} else if (stack_offset < (65536 - sizeof(X64BackendContext))) {
locals_page_delta_ = 65536;
} else {
//extremely unlikely, fall back to stack
stack_size = xe::align<size_t>(StackLayout::GUEST_STACK_SIZE + stack_offset, 16);
locals_page_delta_ = 0;
}
assert_true((stack_size + 8) % 16 == 0);
func_info.stack_size = stack_size;
stack_size_ = stack_size;
@ -1591,6 +1610,9 @@ SimdDomain X64Emitter::DeduceSimdDomain(const hir::Value* for_value) {
return SimdDomain::DONTCARE;
}
Xbyak::RegExp X64Emitter::GetLocalsBase() const {
return !locals_page_delta_ ? rsp : GetContextReg() - locals_page_delta_;
}
Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const {
/*
index context ptr negatively to get to backend ctx field

View File

@ -309,6 +309,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
FunctionDebugInfo* debug_info() const { return debug_info_; }
size_t stack_size() const { return stack_size_; }
Xbyak::RegExp GetLocalsBase() const;
SimdDomain DeduceSimdDomain(const hir::Value* for_value);
void ForgetMxcsrMode() { mxcsr_mode_ = MXCSRMode::Unknown; }
@ -396,6 +397,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
XbyakAllocator* allocator_ = nullptr;
XexModule* guest_module_ = nullptr;
bool synchronize_stack_on_next_instruction_ = false;
int locals_page_delta_ = 0;
Xbyak::util::Cpu cpu_;
uint64_t feature_flags_ = 0;
uint32_t current_guest_function_ = 0;

View File

@ -633,49 +633,49 @@ EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_COMPARE_EXCHANGE,
struct LOAD_LOCAL_I8
: Sequence<LOAD_LOCAL_I8, I<OPCODE_LOAD_LOCAL, I8Op, I32Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.mov(i.dest, e.byte[e.rsp + i.src1.constant()]);
e.mov(i.dest, e.byte[e.GetLocalsBase() + i.src1.constant()]);
// e.TraceLoadI8(DATA_LOCAL, i.src1.constant, i.dest);
}
};
struct LOAD_LOCAL_I16
: Sequence<LOAD_LOCAL_I16, I<OPCODE_LOAD_LOCAL, I16Op, I32Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.mov(i.dest, e.word[e.rsp + i.src1.constant()]);
e.mov(i.dest, e.word[e.GetLocalsBase() + i.src1.constant()]);
// e.TraceLoadI16(DATA_LOCAL, i.src1.constant, i.dest);
}
};
struct LOAD_LOCAL_I32
: Sequence<LOAD_LOCAL_I32, I<OPCODE_LOAD_LOCAL, I32Op, I32Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.mov(i.dest, e.dword[e.rsp + i.src1.constant()]);
e.mov(i.dest, e.dword[e.GetLocalsBase() + i.src1.constant()]);
// e.TraceLoadI32(DATA_LOCAL, i.src1.constant, i.dest);
}
};
struct LOAD_LOCAL_I64
: Sequence<LOAD_LOCAL_I64, I<OPCODE_LOAD_LOCAL, I64Op, I32Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.mov(i.dest, e.qword[e.rsp + i.src1.constant()]);
e.mov(i.dest, e.qword[e.GetLocalsBase() + i.src1.constant()]);
// e.TraceLoadI64(DATA_LOCAL, i.src1.constant, i.dest);
}
};
struct LOAD_LOCAL_F32
: Sequence<LOAD_LOCAL_F32, I<OPCODE_LOAD_LOCAL, F32Op, I32Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vmovss(i.dest, e.dword[e.rsp + i.src1.constant()]);
e.vmovss(i.dest, e.dword[e.GetLocalsBase() + i.src1.constant()]);
// e.TraceLoadF32(DATA_LOCAL, i.src1.constant, i.dest);
}
};
struct LOAD_LOCAL_F64
: Sequence<LOAD_LOCAL_F64, I<OPCODE_LOAD_LOCAL, F64Op, I32Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vmovsd(i.dest, e.qword[e.rsp + i.src1.constant()]);
e.vmovsd(i.dest, e.qword[e.GetLocalsBase() + i.src1.constant()]);
// e.TraceLoadF64(DATA_LOCAL, i.src1.constant, i.dest);
}
};
struct LOAD_LOCAL_V128
: Sequence<LOAD_LOCAL_V128, I<OPCODE_LOAD_LOCAL, V128Op, I32Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.vmovaps(i.dest, e.ptr[e.rsp + i.src1.constant()]);
e.vmovaps(i.dest, e.ptr[e.GetLocalsBase() + i.src1.constant()]);
// e.TraceLoadV128(DATA_LOCAL, i.src1.constant, i.dest);
}
};
@ -691,7 +691,7 @@ struct STORE_LOCAL_I8
: Sequence<STORE_LOCAL_I8, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, I8Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// e.TraceStoreI8(DATA_LOCAL, i.src1.constant, i.src2);
e.mov(e.byte[e.rsp + i.src1.constant()], i.src2);
e.mov(e.byte[e.GetLocalsBase() + i.src1.constant()], i.src2);
}
};
@ -705,9 +705,9 @@ struct STORE_LOCAL_I16
static void Emit(X64Emitter& e, const EmitArgType& i) {
// e.TraceStoreI16(DATA_LOCAL, i.src1.constant, i.src2);
if (LocalStoreMayUseMembaseLow(e, i)) {
e.mov(e.word[e.rsp + i.src1.constant()], e.GetMembaseReg().cvt16());
e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt16());
} else {
e.mov(e.word[e.rsp + i.src1.constant()], i.src2);
e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], i.src2);
}
}
};
@ -716,9 +716,9 @@ struct STORE_LOCAL_I32
static void Emit(X64Emitter& e, const EmitArgType& i) {
// e.TraceStoreI32(DATA_LOCAL, i.src1.constant, i.src2);
if (LocalStoreMayUseMembaseLow(e, i)) {
e.mov(e.dword[e.rsp + i.src1.constant()], e.GetMembaseReg().cvt32());
e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt32());
} else {
e.mov(e.dword[e.rsp + i.src1.constant()], i.src2);
e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], i.src2);
}
}
};
@ -728,9 +728,9 @@ struct STORE_LOCAL_I64
// e.TraceStoreI64(DATA_LOCAL, i.src1.constant, i.src2);
if (i.src2.is_constant && i.src2.constant() == 0) {
e.xor_(e.eax, e.eax);
e.mov(e.qword[e.rsp + i.src1.constant()], e.rax);
e.mov(e.qword[e.GetLocalsBase() + i.src1.constant()], e.rax);
} else {
e.mov(e.qword[e.rsp + i.src1.constant()], i.src2);
e.mov(e.qword[e.GetLocalsBase() + i.src1.constant()], i.src2);
}
}
};
@ -738,21 +738,21 @@ struct STORE_LOCAL_F32
: Sequence<STORE_LOCAL_F32, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, F32Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// e.TraceStoreF32(DATA_LOCAL, i.src1.constant, i.src2);
e.vmovss(e.dword[e.rsp + i.src1.constant()], i.src2);
e.vmovss(e.dword[e.GetLocalsBase() + i.src1.constant()], i.src2);
}
};
struct STORE_LOCAL_F64
: Sequence<STORE_LOCAL_F64, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, F64Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// e.TraceStoreF64(DATA_LOCAL, i.src1.constant, i.src2);
e.vmovsd(e.qword[e.rsp + i.src1.constant()], i.src2);
e.vmovsd(e.qword[e.GetLocalsBase() + i.src1.constant()], i.src2);
}
};
struct STORE_LOCAL_V128
: Sequence<STORE_LOCAL_V128, I<OPCODE_STORE_LOCAL, VoidOp, I32Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// e.TraceStoreV128(DATA_LOCAL, i.src1.constant, i.src2);
e.vmovaps(e.ptr[e.rsp + i.src1.constant()], i.src2);
e.vmovaps(e.ptr[e.GetLocalsBase() + i.src1.constant()], i.src2);
}
};
EMITTER_OPCODE_TABLE(OPCODE_STORE_LOCAL, STORE_LOCAL_I8, STORE_LOCAL_I16,