From fe7dc26e3f3690ec235b0ff7c925f3df07da9d74 Mon Sep 17 00:00:00 2001 From: disjtqz Date: Sun, 1 Oct 2023 09:14:41 -0400 Subject: [PATCH] place locals on backend pages --- src/xenia/cpu/backend/x64/x64_emitter.cc | 24 ++++++++++++++- src/xenia/cpu/backend/x64/x64_emitter.h | 2 ++ src/xenia/cpu/backend/x64/x64_seq_memory.cc | 34 ++++++++++----------- 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 239c14b75..39ee0e25b 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -208,7 +208,26 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { // IMPORTANT: any changes to the prolog must be kept in sync with // X64CodeCache, which dynamically generates exception information. // Adding or changing anything here must be matched! - const size_t stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset; + + /* + pick a page to use as the local base as close to the commonly accessed page that contains most backend fields + the sizes that are checked are chosen based on PTE coalescing sizes. zen does 16k or 32k + */ + size_t stack_size = StackLayout::GUEST_STACK_SIZE; + if (stack_offset < (4096 - sizeof(X64BackendContext))) { + locals_page_delta_ = 4096; + } else if (stack_offset < (16384 - sizeof(X64BackendContext))) {//16k PTE coalescing + locals_page_delta_ = 16384; + } else if (stack_offset < (32768 - sizeof(X64BackendContext))) { + locals_page_delta_ = 32768; + } else if (stack_offset < (65536 - sizeof(X64BackendContext))) { + locals_page_delta_ = 65536; + } else { + //extremely unlikely, fall back to stack + stack_size = xe::align(StackLayout::GUEST_STACK_SIZE + stack_offset, 16); + locals_page_delta_ = 0; + } + assert_true((stack_size + 8) % 16 == 0); func_info.stack_size = stack_size; stack_size_ = stack_size; @@ -1591,6 +1610,9 @@ SimdDomain X64Emitter::DeduceSimdDomain(const hir::Value* for_value) { return SimdDomain::DONTCARE; } +Xbyak::RegExp X64Emitter::GetLocalsBase() const { + return !locals_page_delta_ ? rsp : GetContextReg() - locals_page_delta_; +} Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const { /* index context ptr negatively to get to backend ctx field diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index d6ca1e028..3806e9ee2 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -309,6 +309,7 @@ class X64Emitter : public Xbyak::CodeGenerator { FunctionDebugInfo* debug_info() const { return debug_info_; } size_t stack_size() const { return stack_size_; } + Xbyak::RegExp GetLocalsBase() const; SimdDomain DeduceSimdDomain(const hir::Value* for_value); void ForgetMxcsrMode() { mxcsr_mode_ = MXCSRMode::Unknown; } @@ -396,6 +397,7 @@ class X64Emitter : public Xbyak::CodeGenerator { XbyakAllocator* allocator_ = nullptr; XexModule* guest_module_ = nullptr; bool synchronize_stack_on_next_instruction_ = false; + int locals_page_delta_ = 0; Xbyak::util::Cpu cpu_; uint64_t feature_flags_ = 0; uint32_t current_guest_function_ = 0; diff --git a/src/xenia/cpu/backend/x64/x64_seq_memory.cc b/src/xenia/cpu/backend/x64/x64_seq_memory.cc index 75986b355..61fcbdfb9 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_memory.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_memory.cc @@ -633,49 +633,49 @@ EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_COMPARE_EXCHANGE, struct LOAD_LOCAL_I8 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.mov(i.dest, e.byte[e.rsp + i.src1.constant()]); + e.mov(i.dest, e.byte[e.GetLocalsBase() + i.src1.constant()]); // e.TraceLoadI8(DATA_LOCAL, i.src1.constant, i.dest); } }; struct LOAD_LOCAL_I16 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.mov(i.dest, e.word[e.rsp + i.src1.constant()]); + e.mov(i.dest, e.word[e.GetLocalsBase() + i.src1.constant()]); // e.TraceLoadI16(DATA_LOCAL, i.src1.constant, i.dest); } }; struct LOAD_LOCAL_I32 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.mov(i.dest, e.dword[e.rsp + i.src1.constant()]); + e.mov(i.dest, e.dword[e.GetLocalsBase() + i.src1.constant()]); // e.TraceLoadI32(DATA_LOCAL, i.src1.constant, i.dest); } }; struct LOAD_LOCAL_I64 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.mov(i.dest, e.qword[e.rsp + i.src1.constant()]); + e.mov(i.dest, e.qword[e.GetLocalsBase() + i.src1.constant()]); // e.TraceLoadI64(DATA_LOCAL, i.src1.constant, i.dest); } }; struct LOAD_LOCAL_F32 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vmovss(i.dest, e.dword[e.rsp + i.src1.constant()]); + e.vmovss(i.dest, e.dword[e.GetLocalsBase() + i.src1.constant()]); // e.TraceLoadF32(DATA_LOCAL, i.src1.constant, i.dest); } }; struct LOAD_LOCAL_F64 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vmovsd(i.dest, e.qword[e.rsp + i.src1.constant()]); + e.vmovsd(i.dest, e.qword[e.GetLocalsBase() + i.src1.constant()]); // e.TraceLoadF64(DATA_LOCAL, i.src1.constant, i.dest); } }; struct LOAD_LOCAL_V128 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - e.vmovaps(i.dest, e.ptr[e.rsp + i.src1.constant()]); + e.vmovaps(i.dest, e.ptr[e.GetLocalsBase() + i.src1.constant()]); // e.TraceLoadV128(DATA_LOCAL, i.src1.constant, i.dest); } }; @@ -691,7 +691,7 @@ struct STORE_LOCAL_I8 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { // e.TraceStoreI8(DATA_LOCAL, i.src1.constant, i.src2); - e.mov(e.byte[e.rsp + i.src1.constant()], i.src2); + e.mov(e.byte[e.GetLocalsBase() + i.src1.constant()], i.src2); } }; @@ -705,9 +705,9 @@ struct STORE_LOCAL_I16 static void Emit(X64Emitter& e, const EmitArgType& i) { // e.TraceStoreI16(DATA_LOCAL, i.src1.constant, i.src2); if (LocalStoreMayUseMembaseLow(e, i)) { - e.mov(e.word[e.rsp + i.src1.constant()], e.GetMembaseReg().cvt16()); + e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt16()); } else { - e.mov(e.word[e.rsp + i.src1.constant()], i.src2); + e.mov(e.word[e.GetLocalsBase() + i.src1.constant()], i.src2); } } }; @@ -716,9 +716,9 @@ struct STORE_LOCAL_I32 static void Emit(X64Emitter& e, const EmitArgType& i) { // e.TraceStoreI32(DATA_LOCAL, i.src1.constant, i.src2); if (LocalStoreMayUseMembaseLow(e, i)) { - e.mov(e.dword[e.rsp + i.src1.constant()], e.GetMembaseReg().cvt32()); + e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], e.GetMembaseReg().cvt32()); } else { - e.mov(e.dword[e.rsp + i.src1.constant()], i.src2); + e.mov(e.dword[e.GetLocalsBase() + i.src1.constant()], i.src2); } } }; @@ -728,9 +728,9 @@ struct STORE_LOCAL_I64 // e.TraceStoreI64(DATA_LOCAL, i.src1.constant, i.src2); if (i.src2.is_constant && i.src2.constant() == 0) { e.xor_(e.eax, e.eax); - e.mov(e.qword[e.rsp + i.src1.constant()], e.rax); + e.mov(e.qword[e.GetLocalsBase() + i.src1.constant()], e.rax); } else { - e.mov(e.qword[e.rsp + i.src1.constant()], i.src2); + e.mov(e.qword[e.GetLocalsBase() + i.src1.constant()], i.src2); } } }; @@ -738,21 +738,21 @@ struct STORE_LOCAL_F32 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { // e.TraceStoreF32(DATA_LOCAL, i.src1.constant, i.src2); - e.vmovss(e.dword[e.rsp + i.src1.constant()], i.src2); + e.vmovss(e.dword[e.GetLocalsBase() + i.src1.constant()], i.src2); } }; struct STORE_LOCAL_F64 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { // e.TraceStoreF64(DATA_LOCAL, i.src1.constant, i.src2); - e.vmovsd(e.qword[e.rsp + i.src1.constant()], i.src2); + e.vmovsd(e.qword[e.GetLocalsBase() + i.src1.constant()], i.src2); } }; struct STORE_LOCAL_V128 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { // e.TraceStoreV128(DATA_LOCAL, i.src1.constant, i.src2); - e.vmovaps(e.ptr[e.rsp + i.src1.constant()], i.src2); + e.vmovaps(e.ptr[e.GetLocalsBase() + i.src1.constant()], i.src2); } }; EMITTER_OPCODE_TABLE(OPCODE_STORE_LOCAL, STORE_LOCAL_I8, STORE_LOCAL_I16,