From c5b3a2efacd987e6a65b83ca17a6c37377a39ab0 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Mon, 15 Feb 2016 14:25:34 +1300 Subject: [PATCH] Implement BLR Overflow handling for Windows. --- Source/Core/Core/MemTools.cpp | 6 +- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 85 +++++++++++++------- Source/Core/Core/PowerPC/Jit64/Jit.h | 4 +- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 9 ++- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 1 + Source/Core/Core/PowerPC/JitInterface.cpp | 10 +++ Source/Core/Core/PowerPC/JitInterface.h | 1 + 7 files changed, 83 insertions(+), 33 deletions(-) diff --git a/Source/Core/Core/MemTools.cpp b/Source/Core/Core/MemTools.cpp index 3ec3028ea4..1f11fd55e4 100644 --- a/Source/Core/Core/MemTools.cpp +++ b/Source/Core/Core/MemTools.cpp @@ -58,8 +58,10 @@ LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs) } case EXCEPTION_STACK_OVERFLOW: - MessageBox(nullptr, _T("Stack overflow!"), nullptr, 0); - return EXCEPTION_CONTINUE_SEARCH; + if (JitInterface::HandleStackFault()) + return EXCEPTION_CONTINUE_EXECUTION; + else + return EXCEPTION_CONTINUE_SEARCH; case EXCEPTION_ILLEGAL_INSTRUCTION: //No SSE support? Or simply bad codegen? diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 1314b8b49b..111dcb46ab 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -15,6 +15,7 @@ #include "Common/StringUtil.h" #include "Common/x64ABI.h" #include "Common/Logging/Log.h" +#include "Core/Core.h" #include "Core/CoreTiming.h" #include "Core/PatchEngine.h" #include "Core/HLE/HLE.h" @@ -119,12 +120,16 @@ using namespace PowerPC; // bad RSP, because on Linux we can use sigaltstack and on OS X we're already // on a separate thread. -// On Windows, the OS gets upset if RSP doesn't work, and I don't know any -// equivalent of sigaltstack. Windows supports guard pages which, when -// accessed, immediately turn into regular pages but cause a trap... but -// putting them in the path of RSP just leads to something (in the kernel?) -// thinking a regular stack extension is required. So this protection is not -// supported on Windows yet... +// Windows is... under-documented. +// It already puts guard pages so it can automatically grow the stack and it +// doesn't look like there is a way to hook into a guard page fault and implement +// our own logic. +// But when windows reaches the last guard page, it raises a "Stack Overflow" +// exception which we can hook into, however by default it leaves you with less +// than 4kb of stack. So we use SetThreadStackGuarantee to trigger the Stack +// Overflow early while we still have 512kb of stack remaining. +// After resetting the stack to the top, we call _resetstkoflw() to restore +// the guard page at the 512kb mark. enum { @@ -140,6 +145,10 @@ void Jit64::AllocStack() m_stack = (u8*)AllocateMemoryPages(STACK_SIZE); ReadProtectMemory(m_stack, GUARD_SIZE); ReadProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE); +#else + // For windows we just keep using the system stack and reserve a large amount of memory at the end of the stack. + ULONG reserveSize = SAFE_STACK_SIZE; + SetThreadStackGuarantee(&reserveSize); #endif } @@ -154,31 +163,45 @@ void Jit64::FreeStack() #endif } +bool Jit64::HandleStackFault() +{ + // It's possible the stack fault might have been caused by something other than + // the BLR optimization. If the fault was triggered from another thread, or + // when BLR optimization isn't enabled then there is nothing we can do about the fault. + // Return false so the regular stack overflow handler can trigger (which crashes) + if (!m_enable_blr_optimization || !Core::IsCPUThread()) + return false; + + WARN_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program."); + m_enable_blr_optimization = false; +#ifndef _WIN32 + // Windows does this automatically. + UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE); +#endif + // We're going to need to clear the whole cache to get rid of the bad + // CALLs, but we can't yet. Fake the downcount so we're forced to the + // dispatcher (no block linking), and clear the cache so we're sent to + // Jit. In the case of Windows, we will also need to call _resetstkoflw() + // to reset the guard page. + // Yeah, it's kind of gross. + GetBlockCache()->InvalidateICache(0, 0xffffffff, true); + CoreTiming::ForceExceptionCheck(0); + m_cleanup_after_stackfault = true; + + return true; +} + bool Jit64::HandleFault(uintptr_t access_address, SContext* ctx) { - uintptr_t stack = (uintptr_t)m_stack, diff = access_address - stack; + uintptr_t stack = (uintptr_t)m_stack; + uintptr_t diff = access_address - stack; // In the trap region? if (m_enable_blr_optimization && diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE) - { - WARN_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program."); - m_enable_blr_optimization = false; - UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE); - // We're going to need to clear the whole cache to get rid of the bad - // CALLs, but we can't yet. Fake the downcount so we're forced to the - // dispatcher (no block linking), and clear the cache so we're sent to - // Jit. Yeah, it's kind of gross. - GetBlockCache()->InvalidateICache(0, 0xffffffff, true); - CoreTiming::ForceExceptionCheck(0); - m_clear_cache_asap = true; - - return true; - } + return HandleStackFault(); return Jitx86Base::HandleFault(access_address, ctx); } - - void Jit64::Init() { EnableBlockLink(); @@ -198,7 +221,7 @@ void Jit64::Init() // BLR optimization has the same consequences as block linking, as well as // depending on the fault handler to be safe in the event of excessive BL. m_enable_blr_optimization = jo.enableBlocklink && SConfig::GetInstance().bFastmem && !SConfig::GetInstance().bEnableDebugging; - m_clear_cache_asap = false; + m_cleanup_after_stackfault = false; m_stack = nullptr; if (m_enable_blr_optimization) @@ -226,7 +249,6 @@ void Jit64::ClearCache() ClearCodeSpace(); Clear(); UpdateMemoryOptions(); - m_clear_cache_asap = false; } void Jit64::Shutdown() @@ -511,10 +533,19 @@ void Jit64::Trace() void Jit64::Jit(u32 em_address) { + if (m_cleanup_after_stackfault) + { + ClearCache(); + m_cleanup_after_stackfault = false; +#ifdef _WIN32 + // The stack is in an invalid state with no guard page, reset it. + _resetstkoflw(); +#endif + } + if (IsAlmostFull() || farcode.IsAlmostFull() || trampolines.IsAlmostFull() || blocks.IsFull() || - SConfig::GetInstance().bJITNoBlockCache || - m_clear_cache_asap) + SConfig::GetInstance().bJITNoBlockCache) { ClearCache(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 6af36a916d..e7d169bf86 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -41,7 +41,7 @@ private: Jit64AsmRoutineManager asm_routines; bool m_enable_blr_optimization; - bool m_clear_cache_asap; + bool m_cleanup_after_stackfault; u8* m_stack; public: @@ -58,6 +58,8 @@ public: bool HandleFault(uintptr_t access_address, SContext* ctx) override; + bool HandleStackFault() override; + // Jit! void Jit(u32 em_address) override; diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 0bea34a617..556b65282a 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -186,14 +186,17 @@ void Jit64AsmRoutineManager::Generate() } SetJumpTarget(notfound); + // We reset the stack because Jit might clear the code cache. + // Also if we are in the middle of disabling BLR optimization on windows + // we need to reset the stack before _resetstkoflw() is called in Jit + // otherwise we will generate a second stack overflow exception during DoJit() + ResetStack(); + //Ok, no block, let's jit ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunctionA(32, (void *)&Jit, PPCSTATE(pc)); ABI_PopRegistersAndAdjustStack({}, 0); - // Jit might have cleared the code cache - ResetStack(); - JMP(dispatcherNoCheck, true); // no point in special casing this SetJumpTarget(bail); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 1878aed5ae..39c37caca6 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -118,6 +118,7 @@ public: virtual const CommonAsmRoutinesBase *GetAsmRoutines() = 0; virtual bool HandleFault(uintptr_t access_address, SContext* ctx) = 0; + virtual bool HandleStackFault() { return false; } }; class Jitx86Base : public JitBase, public EmuCodeBlock diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index 7295ea2a8a..7f161b5499 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -218,6 +218,16 @@ namespace JitInterface return jit->HandleFault(access_address, ctx); } + bool HandleStackFault() + { + if (!jit) + { + return false; + } + + return jit->HandleStackFault(); + } + void ClearCache() { if (jit) diff --git a/Source/Core/Core/PowerPC/JitInterface.h b/Source/Core/Core/PowerPC/JitInterface.h index e73cc9c8f0..54249e1a3f 100644 --- a/Source/Core/Core/PowerPC/JitInterface.h +++ b/Source/Core/Core/PowerPC/JitInterface.h @@ -31,6 +31,7 @@ namespace JitInterface // Memory Utilities bool HandleFault(uintptr_t access_address, SContext* ctx); + bool HandleStackFault(); // Clearing CodeCache void ClearCache();