diff --git a/Source/Android/jni/MainAndroid.cpp b/Source/Android/jni/MainAndroid.cpp index 7902a9973f..01728ce014 100644 --- a/Source/Android/jni/MainAndroid.cpp +++ b/Source/Android/jni/MainAndroid.cpp @@ -195,26 +195,18 @@ std::unique_ptr Host_CreateGBAHost(std::weak_ptrCallStaticBooleanMethod( + IDCache::GetNativeLibraryClass(), IDCache::GetDisplayAlertMsg(), j_caption, j_text, yes_no, + style == Common::MsgType::Warning, s_need_nonblocking_alert_msg); - jstring j_caption = ToJString(env, caption); - jstring j_text = ToJString(env, text); - - // Execute the Java method. - result = env->CallStaticBooleanMethod( - IDCache::GetNativeLibraryClass(), IDCache::GetDisplayAlertMsg(), j_caption, j_text, yes_no, - style == Common::MsgType::Warning, s_need_nonblocking_alert_msg); - - env->DeleteLocalRef(j_caption); - env->DeleteLocalRef(j_text); - }).join(); + env->DeleteLocalRef(j_caption); + env->DeleteLocalRef(j_text); return result != JNI_FALSE; } diff --git a/Source/Core/Common/Thread.cpp b/Source/Core/Common/Thread.cpp index 7bbc27e5f4..810db15d78 100644 --- a/Source/Core/Common/Thread.cpp +++ b/Source/Core/Common/Thread.cpp @@ -7,6 +7,7 @@ #include #include #else +#include #include #endif @@ -185,6 +186,41 @@ void SetCurrentThreadName(const char* name) #endif } +std::tuple GetCurrentThreadStack() +{ + void* stack_addr; + size_t stack_size; + + pthread_t self = pthread_self(); + +#ifdef __APPLE__ + stack_size = pthread_get_stacksize_np(self); + stack_addr = reinterpret_cast(pthread_get_stackaddr_np(self)) - stack_size; +#elif defined __OpenBSD__ + stack_t stack; + pthread_stackseg_np(self, &stack); + + stack_addr = reinterpret_cast(stack->ss_sp) - stack->ss_size; + stack_size = stack->ss_size; +#else + pthread_attr_t attr; + +#ifdef __FreeBSD__ + pthread_attr_init(&attr); + pthread_attr_get_np(self, &attr); +#else + // Linux and NetBSD + pthread_getattr_np(self, &attr); +#endif + + pthread_attr_getstack(&attr, &stack_addr, &stack_size); + + pthread_attr_destroy(&attr); +#endif + + return std::make_tuple(stack_addr, stack_size); +} + #endif } // namespace Common diff --git a/Source/Core/Common/Thread.h b/Source/Core/Common/Thread.h index 7239df131b..fbba27a22c 100644 --- a/Source/Core/Common/Thread.h +++ b/Source/Core/Common/Thread.h @@ -5,6 +5,10 @@ #include +#ifndef _WIN32 +#include +#endif + // Don't include Common.h here as it will break LogManager #include "Common/CommonTypes.h" @@ -35,4 +39,9 @@ inline void YieldCPU() void SetCurrentThreadName(const char* name); +#ifndef _WIN32 +// Returns the lowest address of the stack and the size of the stack +std::tuple GetCurrentThreadStack(); +#endif + } // namespace Common diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index fd855b3893..6c9a616913 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -10,11 +10,14 @@ #include #include -// for the PROFILER stuff #ifdef _WIN32 #include +#include +#else +#include #endif +#include "Common/Align.h" #include "Common/CommonTypes.h" #include "Common/GekkoDisassembler.h" #include "Common/IOFile.h" @@ -23,6 +26,7 @@ #include "Common/PerformanceCounter.h" #include "Common/StringUtil.h" #include "Common/Swap.h" +#include "Common/Thread.h" #include "Common/x64ABI.h" #include "Core/Core.h" #include "Core/CoreTiming.h" @@ -140,15 +144,16 @@ using namespace PowerPC; // But when windows reaches the last guard page, it raises a "Stack Overflow" // exception which we can hook into, however by default it leaves you with less // than 4kb of stack. So we use SetThreadStackGuarantee to trigger the Stack -// Overflow early while we still have 512kb of stack remaining. +// Overflow early while we still have 256kb of stack remaining. // After resetting the stack to the top, we call _resetstkoflw() to restore -// the guard page at the 512kb mark. +// the guard page at the 256kb mark. enum { - STACK_SIZE = 2 * 1024 * 1024, - SAFE_STACK_SIZE = 512 * 1024, - GUARD_SIZE = 0x10000, // two guards - bottom (permanent) and middle (see above) + SAFE_STACK_SIZE = 256 * 1024, + MIN_UNSAFE_STACK_SIZE = 192 * 1024, + MIN_STACK_SIZE = SAFE_STACK_SIZE + MIN_UNSAFE_STACK_SIZE, + GUARD_SIZE = 64 * 1024, GUARD_OFFSET = SAFE_STACK_SIZE - GUARD_SIZE, }; @@ -158,27 +163,57 @@ Jit64::Jit64() : QuantizedMemoryRoutines(*this) Jit64::~Jit64() = default; -void Jit64::AllocStack() +void Jit64::ProtectStack() { -#ifndef _WIN32 - m_stack = static_cast(Common::AllocateMemoryPages(STACK_SIZE)); - Common::ReadProtectMemory(m_stack, GUARD_SIZE); - Common::ReadProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE); -#else - // For windows we just keep using the system stack and reserve a large amount of memory at the end - // of the stack. + if (!m_enable_blr_optimization) + return; + +#ifdef _WIN32 ULONG reserveSize = SAFE_STACK_SIZE; SetThreadStackGuarantee(&reserveSize); +#else + auto [stack_addr, stack_size] = Common::GetCurrentThreadStack(); + + const uintptr_t stack_base_addr = reinterpret_cast(stack_addr); + const uintptr_t stack_middle_addr = reinterpret_cast(&stack_addr); + if (stack_middle_addr < stack_base_addr || stack_middle_addr >= stack_base_addr + stack_size) + { + PanicAlertFmt("Failed to get correct stack base"); + m_enable_blr_optimization = false; + return; + } + + const long page_size = sysconf(_SC_PAGESIZE); + if (page_size <= 0) + { + PanicAlertFmt("Failed to get page size"); + m_enable_blr_optimization = false; + return; + } + + const uintptr_t stack_guard_addr = Common::AlignUp(stack_base_addr + GUARD_OFFSET, page_size); + if (stack_guard_addr >= stack_middle_addr || + stack_middle_addr - stack_guard_addr < GUARD_SIZE + MIN_UNSAFE_STACK_SIZE) + { + PanicAlertFmt("Stack is too small for BLR optimization (size {:x}, base {:x}, current stack " + "pointer {:x}, alignment {:x})", + stack_size, stack_base_addr, stack_middle_addr, page_size); + m_enable_blr_optimization = false; + return; + } + + m_stack_guard = reinterpret_cast(stack_guard_addr); + Common::ReadProtectMemory(m_stack_guard, GUARD_SIZE); #endif } -void Jit64::FreeStack() +void Jit64::UnprotectStack() { #ifndef _WIN32 - if (m_stack) + if (m_stack_guard) { - Common::FreeMemoryPages(m_stack, STACK_SIZE); - m_stack = nullptr; + Common::UnWriteProtectMemory(m_stack_guard, GUARD_SIZE); + m_stack_guard = nullptr; } #endif } @@ -194,11 +229,9 @@ bool Jit64::HandleStackFault() WARN_LOG_FMT(POWERPC, "BLR cache disabled due to excessive BL in the emulated program."); + UnprotectStack(); m_enable_blr_optimization = false; -#ifndef _WIN32 - // Windows does this automatically. - Common::UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE); -#endif + // We're going to need to clear the whole cache to get rid of the bad // CALLs, but we can't yet. Fake the downcount so we're forced to the // dispatcher (no block linking), and clear the cache so we're sent to @@ -214,11 +247,13 @@ bool Jit64::HandleStackFault() bool Jit64::HandleFault(uintptr_t access_address, SContext* ctx) { - uintptr_t stack = (uintptr_t)m_stack; - uintptr_t diff = access_address - stack; + const uintptr_t stack_guard = reinterpret_cast(m_stack_guard); // In the trap region? - if (m_enable_blr_optimization && diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE) + if (m_enable_blr_optimization && access_address >= stack_guard && + access_address < stack_guard + GUARD_SIZE) + { return HandleStackFault(); + } // This generates some fairly heavy trampolines, but it doesn't really hurt. // Only instructions that access I/O will get these, and there won't be that @@ -370,12 +405,10 @@ void Jit64::Init() m_enable_blr_optimization = jo.enableBlocklink && m_fastmem_enabled && !m_enable_debugging; m_cleanup_after_stackfault = false; - m_stack = nullptr; - if (m_enable_blr_optimization) - AllocStack(); + m_stack_guard = nullptr; blocks.Init(); - asm_routines.Init(m_stack ? (m_stack + STACK_SIZE) : nullptr); + asm_routines.Init(); // important: do this *after* generating the global asm routines, because we can't use farcode in // them. @@ -415,7 +448,6 @@ void Jit64::ResetFreeMemoryRanges() void Jit64::Shutdown() { - FreeStack(); FreeCodeSpace(); auto& system = Core::System::GetInstance(); @@ -735,14 +767,22 @@ void Jit64::WriteExternalExceptionExit() void Jit64::Run() { + ProtectStack(); + CompiledCode pExecAddr = (CompiledCode)asm_routines.enter_code; pExecAddr(); + + UnprotectStack(); } void Jit64::SingleStep() { + ProtectStack(); + CompiledCode pExecAddr = (CompiledCode)asm_routines.enter_code; pExecAddr(); + + UnprotectStack(); } void Jit64::Trace() diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 521c8f5f35..4d96ea518d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -255,8 +255,8 @@ private: bool HandleFunctionHooking(u32 address); - void AllocStack(); - void FreeStack(); + void ProtectStack(); + void UnprotectStack(); void ResetFreeMemoryRanges(); @@ -270,7 +270,7 @@ private: bool m_enable_blr_optimization = false; bool m_cleanup_after_stackfault = false; - u8* m_stack = nullptr; + u8* m_stack_guard = nullptr; HyoutaUtilities::RangeSizeSet m_free_ranges_near; HyoutaUtilities::RangeSizeSet m_free_ranges_far; diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 3dc98a317e..61f6f43d7b 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -24,10 +24,9 @@ Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(j { } -void Jit64AsmRoutineManager::Init(u8* stack_top) +void Jit64AsmRoutineManager::Init() { m_const_pool.Init(AllocChildCodeSpace(4096), 4096); - m_stack_top = stack_top; Generate(); WriteProtect(); } @@ -50,17 +49,8 @@ void Jit64AsmRoutineManager::Generate() // MOV(64, R(RMEM), Imm64((u64)Memory::physical_base)); MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80)); - if (m_stack_top) - { - // Pivot the stack to our custom one. - MOV(64, R(RSCRATCH), R(RSP)); - MOV(64, R(RSP), ImmPtr(m_stack_top - 0x20)); - MOV(64, MDisp(RSP, 0x18), R(RSCRATCH)); - } - else - { - MOV(64, PPCSTATE(stored_stack_pointer), R(RSP)); - } + MOV(64, PPCSTATE(stored_stack_pointer), R(RSP)); + // something that can't pass the BLR test MOV(64, MDisp(RSP, 8), Imm32((u32)-1)); @@ -209,12 +199,9 @@ void Jit64AsmRoutineManager::Generate() if (enable_debugging) SetJumpTarget(dbg_exit); + // Reset the stack pointer, since the BLR optimization may have pushed things onto the stack + // without popping them. ResetStack(*this); - if (m_stack_top) - { - ADD(64, R(RSP), Imm8(0x18)); - POP(RSP); - } ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16); RET(); @@ -226,10 +213,7 @@ void Jit64AsmRoutineManager::Generate() void Jit64AsmRoutineManager::ResetStack(X64CodeBlock& emitter) { - if (m_stack_top) - emitter.MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20)); - else - emitter.MOV(64, R(RSP), PPCSTATE(stored_stack_pointer)); + emitter.MOV(64, R(RSP), PPCSTATE(stored_stack_pointer)); } void Jit64AsmRoutineManager::GenerateCommon() diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/PowerPC/Jit64/JitAsm.h index b0d44cb867..06713b1766 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.h @@ -36,7 +36,7 @@ public: explicit Jit64AsmRoutineManager(Jit64& jit); - void Init(u8* stack_top); + void Init(); void ResetStack(Gen::X64CodeBlock& emitter); @@ -44,6 +44,5 @@ private: void Generate(); void GenerateCommon(); - u8* m_stack_top = nullptr; JitBase& m_jit; }; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 5770e30acf..78b837bfaf 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -5,6 +5,13 @@ #include +#ifdef _WIN32 +#include +#else +#include +#endif + +#include "Common/Align.h" #include "Common/Arm64Emitter.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" @@ -12,6 +19,7 @@ #include "Common/MsgHandler.h" #include "Common/PerformanceCounter.h" #include "Common/StringUtil.h" +#include "Common/Thread.h" #include "Core/ConfigManager.h" #include "Core/Core.h" @@ -38,9 +46,10 @@ constexpr size_t CODE_SIZE = 1024 * 1024 * 32; constexpr size_t FARCODE_SIZE = 1024 * 1024 * 64; constexpr size_t FARCODE_SIZE_MMU = 1024 * 1024 * 64; -constexpr size_t STACK_SIZE = 2 * 1024 * 1024; -constexpr size_t SAFE_STACK_SIZE = 512 * 1024; -constexpr size_t GUARD_SIZE = 64 * 1024; // two guards - bottom (permanent) and middle (see above) +constexpr size_t SAFE_STACK_SIZE = 256 * 1024; +constexpr size_t MIN_UNSAFE_STACK_SIZE = 192 * 1024; +constexpr size_t MIN_STACK_SIZE = SAFE_STACK_SIZE + MIN_UNSAFE_STACK_SIZE; +constexpr size_t GUARD_SIZE = 64 * 1024; constexpr size_t GUARD_OFFSET = SAFE_STACK_SIZE - GUARD_SIZE; JitArm64::JitArm64() : m_float_emit(this) @@ -74,7 +83,6 @@ void JitArm64::Init() m_enable_blr_optimization = jo.enableBlocklink && m_fastmem_enabled && !m_enable_debugging; m_cleanup_after_stackfault = false; - AllocStack(); GenerateAsm(); ResetFreeMemoryRanges(); @@ -117,9 +125,8 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx) bool success = false; // Handle BLR stack faults, may happen in C++ code. - uintptr_t stack = (uintptr_t)m_stack_base; - uintptr_t diff = access_address - stack; - if (diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE) + const uintptr_t stack_guard = reinterpret_cast(m_stack_guard); + if (access_address >= stack_guard && access_address < stack_guard + GUARD_SIZE) success = HandleStackFault(); // If the fault is in JIT code space, look for fastmem areas. @@ -162,10 +169,10 @@ bool JitArm64::HandleStackFault() return false; ERROR_LOG_FMT(POWERPC, "BLR cache disabled due to excessive BL in the emulated program."); + + UnprotectStack(); m_enable_blr_optimization = false; -#ifndef _WIN32 - Common::UnWriteProtectMemory(m_stack_base + GUARD_OFFSET, GUARD_SIZE); -#endif + GetBlockCache()->InvalidateICache(0, 0xffffffff, true); Core::System::GetInstance().GetCoreTiming().ForceExceptionCheck(0); m_cleanup_after_stackfault = true; @@ -205,7 +212,6 @@ void JitArm64::Shutdown() memory.ShutdownFastmemArena(); FreeCodeSpace(); blocks.Shutdown(); - FreeStack(); } void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) @@ -337,37 +343,56 @@ void JitArm64::ResetStack() ADD(ARM64Reg::SP, ARM64Reg::X0, 0); } -void JitArm64::AllocStack() +void JitArm64::ProtectStack() { if (!m_enable_blr_optimization) return; -#ifndef _WIN32 - m_stack_base = static_cast(Common::AllocateMemoryPages(STACK_SIZE)); - if (!m_stack_base) +#ifdef _WIN32 + ULONG reserveSize = SAFE_STACK_SIZE; + SetThreadStackGuarantee(&reserveSize); +#else + auto [stack_addr, stack_size] = Common::GetCurrentThreadStack(); + + const uintptr_t stack_base_addr = reinterpret_cast(stack_addr); + const uintptr_t stack_middle_addr = reinterpret_cast(&stack_addr); + if (stack_middle_addr < stack_base_addr || stack_middle_addr >= stack_base_addr + stack_size) { + PanicAlertFmt("Failed to get correct stack base"); m_enable_blr_optimization = false; return; } - m_stack_pointer = m_stack_base + STACK_SIZE; - Common::ReadProtectMemory(m_stack_base, GUARD_SIZE); - Common::ReadProtectMemory(m_stack_base + GUARD_OFFSET, GUARD_SIZE); -#else - // For windows we just keep using the system stack and reserve a large amount of memory at the end - // of the stack. - ULONG reserveSize = SAFE_STACK_SIZE; - SetThreadStackGuarantee(&reserveSize); + const long page_size = sysconf(_SC_PAGESIZE); + if (page_size <= 0) + { + PanicAlertFmt("Failed to get page size"); + m_enable_blr_optimization = false; + return; + } + + const uintptr_t stack_guard_addr = Common::AlignUp(stack_base_addr + GUARD_OFFSET, page_size); + if (stack_guard_addr >= stack_middle_addr || + stack_middle_addr - stack_guard_addr < GUARD_SIZE + MIN_UNSAFE_STACK_SIZE) + { + PanicAlertFmt("Stack is too small for BLR optimization (size {:x}, base {:x}, current stack " + "pointer {:x}, alignment {:x})", + stack_size, stack_base_addr, stack_middle_addr, page_size); + m_enable_blr_optimization = false; + return; + } + + m_stack_guard = reinterpret_cast(stack_guard_addr); + Common::ReadProtectMemory(m_stack_guard, GUARD_SIZE); #endif } -void JitArm64::FreeStack() +void JitArm64::UnprotectStack() { #ifndef _WIN32 - if (m_stack_base) - Common::FreeMemoryPages(m_stack_base, STACK_SIZE); - m_stack_base = nullptr; - m_stack_pointer = nullptr; + if (m_stack_guard) + Common::UnWriteProtectMemory(m_stack_guard, GUARD_SIZE); + m_stack_guard = nullptr; #endif } @@ -696,14 +721,22 @@ void JitArm64::EndTimeProfile(JitBlock* b) void JitArm64::Run() { + ProtectStack(); + CompiledCode pExecAddr = (CompiledCode)enter_code; pExecAddr(); + + UnprotectStack(); } void JitArm64::SingleStep() { + ProtectStack(); + CompiledCode pExecAddr = (CompiledCode)enter_code; pExecAddr(); + + UnprotectStack(); } void JitArm64::Trace() diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 665f234490..ca8fd80a30 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -288,8 +288,8 @@ protected: void DoDownCount(); void Cleanup(); void ResetStack(); - void AllocStack(); - void FreeStack(); + void ProtectStack(); + void UnprotectStack(); void ResetFreeMemoryRanges(); @@ -365,9 +365,7 @@ protected: bool m_enable_blr_optimization = false; bool m_cleanup_after_stackfault = false; - u8* m_stack_base = nullptr; - u8* m_stack_pointer = nullptr; - u8* m_saved_stack_pointer = nullptr; + u8* m_stack_guard = nullptr; HyoutaUtilities::RangeSizeSet m_free_ranges_near; HyoutaUtilities::RangeSizeSet m_free_ranges_far; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 7d4a25f7f8..bfc0de7449 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -45,23 +45,14 @@ void JitArm64::GenerateAsm() MOVP2R(PPC_REG, &PowerPC::ppcState); - // Swap the stack pointer, so we have proper guard pages. + // Store the stack pointer, so we can reset it if the BLR optimization fails. ADD(ARM64Reg::X0, ARM64Reg::SP, 0); - STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, - MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer)); - LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, MOVPage2R(ARM64Reg::X1, &m_stack_pointer)); - FixupBranch no_fake_stack = CBZ(ARM64Reg::X0); - ADD(ARM64Reg::SP, ARM64Reg::X0, 0); - SetJumpTarget(no_fake_stack); + STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer)); // Push {nullptr; -1} as invalid destination on the stack. MOVI2R(ARM64Reg::X0, 0xFFFFFFFF); STP(IndexType::Pre, ARM64Reg::ZR, ARM64Reg::X0, ARM64Reg::SP, -16); - // Store the stack pointer, so we can reset it if the BLR optimization fails. - ADD(ARM64Reg::X0, ARM64Reg::SP, 0); - STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer)); - // The PC will be loaded into DISPATCHER_PC after the call to CoreTiming::Advance(). // Advance() does an exception check so we don't know what PC to use until afterwards. FixupBranch to_start_of_timing_slice = B(); @@ -204,9 +195,9 @@ void JitArm64::GenerateAsm() if (enable_debugging) SetJumpTarget(debug_exit); - // Reset the stack pointer, as the BLR optimization have touched it. - LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, - MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer)); + // Reset the stack pointer, since the BLR optimization may have pushed things onto the stack + // without popping them. + LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer)); ADD(ARM64Reg::SP, ARM64Reg::X0, 0); m_float_emit.ABI_PopRegisters(regs_to_save_fpr, ARM64Reg::X30);