Implement BLR Overflow handling for Windows.

2016-02-15 14:25:34 +13:00 · 2016-02-15 14:25:34 +13:00 · c5b3a2efac
parent b164b4c475
commit c5b3a2efac
7 changed files with 83 additions and 33 deletions
--- a/Source/Core/Core/MemTools.cpp
+++ b/Source/Core/Core/MemTools.cpp
@ -58,8 +58,10 @@ LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs)
 		}

 	case EXCEPTION_STACK_OVERFLOW:
-		MessageBox(nullptr, _T("Stack overflow!"), nullptr, 0);
-		return EXCEPTION_CONTINUE_SEARCH;
+		if (JitInterface::HandleStackFault())
+			return EXCEPTION_CONTINUE_EXECUTION;
+		else
+			return EXCEPTION_CONTINUE_SEARCH;

 	case EXCEPTION_ILLEGAL_INSTRUCTION:
 		//No SSE support? Or simply bad codegen?
--- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp
@ -15,6 +15,7 @@
 #include "Common/StringUtil.h"
 #include "Common/x64ABI.h"
 #include "Common/Logging/Log.h"
+#include "Core/Core.h"
 #include "Core/CoreTiming.h"
 #include "Core/PatchEngine.h"
 #include "Core/HLE/HLE.h"
@ -119,12 +120,16 @@ using namespace PowerPC;
 // bad RSP, because on Linux we can use sigaltstack and on OS X we're already
 // on a separate thread.

-// On Windows, the OS gets upset if RSP doesn't work, and I don't know any
-// equivalent of sigaltstack.  Windows supports guard pages which, when
-// accessed, immediately turn into regular pages but cause a trap... but
-// putting them in the path of RSP just leads to something (in the kernel?)
-// thinking a regular stack extension is required.  So this protection is not
-// supported on Windows yet...
+// Windows is... under-documented.
+// It already puts guard pages so it can automatically grow the stack and it
+// doesn't look like there is a way to hook into a guard page fault and implement
+// our own logic.
+// But when windows reaches the last guard page, it raises a "Stack Overflow"
+// exception which we can hook into, however by default it leaves you with less
+// than 4kb of stack. So we use SetThreadStackGuarantee to trigger the Stack
+// Overflow early while we still have 512kb of stack remaining.
+// After resetting the stack to the top, we call _resetstkoflw() to restore
+// the guard page at the 512kb mark.

 enum
 {
@ -140,6 +145,10 @@ void Jit64::AllocStack()
 	m_stack = (u8*)AllocateMemoryPages(STACK_SIZE);
 	ReadProtectMemory(m_stack, GUARD_SIZE);
 	ReadProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
+#else
+	// For windows we just keep using the system stack and reserve a large amount of memory at the end of the stack.
+	ULONG reserveSize = SAFE_STACK_SIZE;
+	SetThreadStackGuarantee(&reserveSize);
 #endif
 }

@ -154,31 +163,45 @@ void Jit64::FreeStack()
 #endif
 }

+bool Jit64::HandleStackFault()
+{
+	// It's possible the stack fault might have been caused by something other than
+	// the BLR optimization. If the fault was triggered from another thread, or
+	// when BLR optimization isn't enabled then there is nothing we can do about the fault.
+	// Return false so the regular stack overflow handler can trigger (which crashes)
+	if (!m_enable_blr_optimization || !Core::IsCPUThread())
+		return false;
+
+	WARN_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program.");
+	m_enable_blr_optimization = false;
+#ifndef _WIN32
+	// Windows does this automatically.
+	UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
+#endif
+	// We're going to need to clear the whole cache to get rid of the bad
+	// CALLs, but we can't yet.  Fake the downcount so we're forced to the
+	// dispatcher (no block linking), and clear the cache so we're sent to
+	// Jit. In the case of Windows, we will also need to call _resetstkoflw()
+	// to reset the guard page.
+	// Yeah, it's kind of gross.
+	GetBlockCache()->InvalidateICache(0, 0xffffffff, true);
+	CoreTiming::ForceExceptionCheck(0);
+	m_cleanup_after_stackfault = true;
+
+	return true;
+}
+
 bool Jit64::HandleFault(uintptr_t access_address, SContext* ctx)
 {
-	uintptr_t stack = (uintptr_t)m_stack, diff = access_address - stack;
+	uintptr_t stack = (uintptr_t)m_stack;
+	uintptr_t diff = access_address - stack;
 	// In the trap region?
 	if (m_enable_blr_optimization && diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE)
-	{
-		WARN_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program.");
-		m_enable_blr_optimization = false;
-		UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
-		// We're going to need to clear the whole cache to get rid of the bad
-		// CALLs, but we can't yet.  Fake the downcount so we're forced to the
-		// dispatcher (no block linking), and clear the cache so we're sent to
-		// Jit.  Yeah, it's kind of gross.
-		GetBlockCache()->InvalidateICache(0, 0xffffffff, true);
-		CoreTiming::ForceExceptionCheck(0);
-		m_clear_cache_asap = true;
-
-		return true;
-	}
+		return HandleStackFault();

 	return Jitx86Base::HandleFault(access_address, ctx);
 }

-
-
 void Jit64::Init()
 {
 	EnableBlockLink();
@ -198,7 +221,7 @@ void Jit64::Init()
 	// BLR optimization has the same consequences as block linking, as well as
 	// depending on the fault handler to be safe in the event of excessive BL.
 	m_enable_blr_optimization = jo.enableBlocklink && SConfig::GetInstance().bFastmem && !SConfig::GetInstance().bEnableDebugging;
-	m_clear_cache_asap = false;
+	m_cleanup_after_stackfault = false;

 	m_stack = nullptr;
 	if (m_enable_blr_optimization)
@ -226,7 +249,6 @@ void Jit64::ClearCache()
 	ClearCodeSpace();
 	Clear();
 	UpdateMemoryOptions();
-	m_clear_cache_asap = false;
 }

 void Jit64::Shutdown()
@ -511,10 +533,19 @@ void Jit64::Trace()

 void Jit64::Jit(u32 em_address)
 {
+	if (m_cleanup_after_stackfault)
+	{
+		ClearCache();
+		m_cleanup_after_stackfault = false;
+#ifdef _WIN32
+		// The stack is in an invalid state with no guard page, reset it.
+		_resetstkoflw();
+#endif
+	}
+
 	if (IsAlmostFull() || farcode.IsAlmostFull() || trampolines.IsAlmostFull() ||
 	    blocks.IsFull() ||
-	    SConfig::GetInstance().bJITNoBlockCache ||
-	    m_clear_cache_asap)
+	    SConfig::GetInstance().bJITNoBlockCache)
 	{
 		ClearCache();
 	}
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@ -41,7 +41,7 @@ private:
 	Jit64AsmRoutineManager asm_routines;

 	bool m_enable_blr_optimization;
-	bool m_clear_cache_asap;
+	bool m_cleanup_after_stackfault;
 	u8* m_stack;

 public:
@ -58,6 +58,8 @@ public:

 	bool HandleFault(uintptr_t access_address, SContext* ctx) override;

+	bool HandleStackFault() override;
+
 	// Jit!

 	void Jit(u32 em_address) override;
--- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
@ -186,14 +186,17 @@ void Jit64AsmRoutineManager::Generate()
 			}
 			SetJumpTarget(notfound);

+			// We reset the stack because Jit might clear the code cache.
+			// Also if we are in the middle of disabling BLR optimization on windows
+			// we need to reset the stack before _resetstkoflw() is called in Jit
+			// otherwise we will generate a second stack overflow exception during DoJit()
+			ResetStack();
+
 			//Ok, no block, let's jit
 			ABI_PushRegistersAndAdjustStack({}, 0);
 			ABI_CallFunctionA(32, (void *)&Jit, PPCSTATE(pc));
 			ABI_PopRegistersAndAdjustStack({}, 0);

-			// Jit might have cleared the code cache
-			ResetStack();
-
 			JMP(dispatcherNoCheck, true); // no point in special casing this

 		SetJumpTarget(bail);
--- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h
+++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h
@ -118,6 +118,7 @@ public:
 	virtual const CommonAsmRoutinesBase *GetAsmRoutines() = 0;

 	virtual bool HandleFault(uintptr_t access_address, SContext* ctx) = 0;
+	virtual bool HandleStackFault() { return false; }
 };

 class Jitx86Base : public JitBase, public EmuCodeBlock
--- a/Source/Core/Core/PowerPC/JitInterface.cpp
+++ b/Source/Core/Core/PowerPC/JitInterface.cpp
@ -218,6 +218,16 @@ namespace JitInterface
 		return jit->HandleFault(access_address, ctx);
 	}

+	bool HandleStackFault()
+	{
+		if (!jit)
+		{
+			return false;
+		}
+
+		return jit->HandleStackFault();
+	}
+
 	void ClearCache()
 	{
 		if (jit)
--- a/Source/Core/Core/PowerPC/JitInterface.h
+++ b/Source/Core/Core/PowerPC/JitInterface.h
@ -31,6 +31,7 @@ namespace JitInterface

 	// Memory Utilities
 	bool HandleFault(uintptr_t access_address, SContext* ctx);
+	bool HandleStackFault();

 	// Clearing CodeCache
 	void ClearCache();