Merge pull request #3635 from phire/lazy_comex

Implement BLR Overflow handling for Windows.
This commit is contained in:
Pierre Bourdon 2016-03-26 01:42:54 +01:00
commit 21eb1cd158
7 changed files with 83 additions and 33 deletions

View File

@ -58,8 +58,10 @@ LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs)
} }
case EXCEPTION_STACK_OVERFLOW: case EXCEPTION_STACK_OVERFLOW:
MessageBox(nullptr, _T("Stack overflow!"), nullptr, 0); if (JitInterface::HandleStackFault())
return EXCEPTION_CONTINUE_SEARCH; return EXCEPTION_CONTINUE_EXECUTION;
else
return EXCEPTION_CONTINUE_SEARCH;
case EXCEPTION_ILLEGAL_INSTRUCTION: case EXCEPTION_ILLEGAL_INSTRUCTION:
//No SSE support? Or simply bad codegen? //No SSE support? Or simply bad codegen?

View File

@ -15,6 +15,7 @@
#include "Common/StringUtil.h" #include "Common/StringUtil.h"
#include "Common/x64ABI.h" #include "Common/x64ABI.h"
#include "Common/Logging/Log.h" #include "Common/Logging/Log.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h" #include "Core/CoreTiming.h"
#include "Core/PatchEngine.h" #include "Core/PatchEngine.h"
#include "Core/HLE/HLE.h" #include "Core/HLE/HLE.h"
@ -119,12 +120,16 @@ using namespace PowerPC;
// bad RSP, because on Linux we can use sigaltstack and on OS X we're already // bad RSP, because on Linux we can use sigaltstack and on OS X we're already
// on a separate thread. // on a separate thread.
// On Windows, the OS gets upset if RSP doesn't work, and I don't know any // Windows is... under-documented.
// equivalent of sigaltstack. Windows supports guard pages which, when // It already puts guard pages so it can automatically grow the stack and it
// accessed, immediately turn into regular pages but cause a trap... but // doesn't look like there is a way to hook into a guard page fault and implement
// putting them in the path of RSP just leads to something (in the kernel?) // our own logic.
// thinking a regular stack extension is required. So this protection is not // But when windows reaches the last guard page, it raises a "Stack Overflow"
// supported on Windows yet... // exception which we can hook into, however by default it leaves you with less
// than 4kb of stack. So we use SetThreadStackGuarantee to trigger the Stack
// Overflow early while we still have 512kb of stack remaining.
// After resetting the stack to the top, we call _resetstkoflw() to restore
// the guard page at the 512kb mark.
enum enum
{ {
@ -140,6 +145,10 @@ void Jit64::AllocStack()
m_stack = (u8*)AllocateMemoryPages(STACK_SIZE); m_stack = (u8*)AllocateMemoryPages(STACK_SIZE);
ReadProtectMemory(m_stack, GUARD_SIZE); ReadProtectMemory(m_stack, GUARD_SIZE);
ReadProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE); ReadProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
#else
// For windows we just keep using the system stack and reserve a large amount of memory at the end of the stack.
ULONG reserveSize = SAFE_STACK_SIZE;
SetThreadStackGuarantee(&reserveSize);
#endif #endif
} }
@ -154,31 +163,45 @@ void Jit64::FreeStack()
#endif #endif
} }
bool Jit64::HandleStackFault()
{
// It's possible the stack fault might have been caused by something other than
// the BLR optimization. If the fault was triggered from another thread, or
// when BLR optimization isn't enabled then there is nothing we can do about the fault.
// Return false so the regular stack overflow handler can trigger (which crashes)
if (!m_enable_blr_optimization || !Core::IsCPUThread())
return false;
WARN_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program.");
m_enable_blr_optimization = false;
#ifndef _WIN32
// Windows does this automatically.
UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
#endif
// We're going to need to clear the whole cache to get rid of the bad
// CALLs, but we can't yet. Fake the downcount so we're forced to the
// dispatcher (no block linking), and clear the cache so we're sent to
// Jit. In the case of Windows, we will also need to call _resetstkoflw()
// to reset the guard page.
// Yeah, it's kind of gross.
GetBlockCache()->InvalidateICache(0, 0xffffffff, true);
CoreTiming::ForceExceptionCheck(0);
m_cleanup_after_stackfault = true;
return true;
}
bool Jit64::HandleFault(uintptr_t access_address, SContext* ctx) bool Jit64::HandleFault(uintptr_t access_address, SContext* ctx)
{ {
uintptr_t stack = (uintptr_t)m_stack, diff = access_address - stack; uintptr_t stack = (uintptr_t)m_stack;
uintptr_t diff = access_address - stack;
// In the trap region? // In the trap region?
if (m_enable_blr_optimization && diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE) if (m_enable_blr_optimization && diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE)
{ return HandleStackFault();
WARN_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program.");
m_enable_blr_optimization = false;
UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
// We're going to need to clear the whole cache to get rid of the bad
// CALLs, but we can't yet. Fake the downcount so we're forced to the
// dispatcher (no block linking), and clear the cache so we're sent to
// Jit. Yeah, it's kind of gross.
GetBlockCache()->InvalidateICache(0, 0xffffffff, true);
CoreTiming::ForceExceptionCheck(0);
m_clear_cache_asap = true;
return true;
}
return Jitx86Base::HandleFault(access_address, ctx); return Jitx86Base::HandleFault(access_address, ctx);
} }
void Jit64::Init() void Jit64::Init()
{ {
EnableBlockLink(); EnableBlockLink();
@ -198,7 +221,7 @@ void Jit64::Init()
// BLR optimization has the same consequences as block linking, as well as // BLR optimization has the same consequences as block linking, as well as
// depending on the fault handler to be safe in the event of excessive BL. // depending on the fault handler to be safe in the event of excessive BL.
m_enable_blr_optimization = jo.enableBlocklink && SConfig::GetInstance().bFastmem && !SConfig::GetInstance().bEnableDebugging; m_enable_blr_optimization = jo.enableBlocklink && SConfig::GetInstance().bFastmem && !SConfig::GetInstance().bEnableDebugging;
m_clear_cache_asap = false; m_cleanup_after_stackfault = false;
m_stack = nullptr; m_stack = nullptr;
if (m_enable_blr_optimization) if (m_enable_blr_optimization)
@ -226,7 +249,6 @@ void Jit64::ClearCache()
ClearCodeSpace(); ClearCodeSpace();
Clear(); Clear();
UpdateMemoryOptions(); UpdateMemoryOptions();
m_clear_cache_asap = false;
} }
void Jit64::Shutdown() void Jit64::Shutdown()
@ -511,10 +533,19 @@ void Jit64::Trace()
void Jit64::Jit(u32 em_address) void Jit64::Jit(u32 em_address)
{ {
if (m_cleanup_after_stackfault)
{
ClearCache();
m_cleanup_after_stackfault = false;
#ifdef _WIN32
// The stack is in an invalid state with no guard page, reset it.
_resetstkoflw();
#endif
}
if (IsAlmostFull() || farcode.IsAlmostFull() || trampolines.IsAlmostFull() || if (IsAlmostFull() || farcode.IsAlmostFull() || trampolines.IsAlmostFull() ||
blocks.IsFull() || blocks.IsFull() ||
SConfig::GetInstance().bJITNoBlockCache || SConfig::GetInstance().bJITNoBlockCache)
m_clear_cache_asap)
{ {
ClearCache(); ClearCache();
} }

View File

@ -41,7 +41,7 @@ private:
Jit64AsmRoutineManager asm_routines; Jit64AsmRoutineManager asm_routines;
bool m_enable_blr_optimization; bool m_enable_blr_optimization;
bool m_clear_cache_asap; bool m_cleanup_after_stackfault;
u8* m_stack; u8* m_stack;
public: public:
@ -58,6 +58,8 @@ public:
bool HandleFault(uintptr_t access_address, SContext* ctx) override; bool HandleFault(uintptr_t access_address, SContext* ctx) override;
bool HandleStackFault() override;
// Jit! // Jit!
void Jit(u32 em_address) override; void Jit(u32 em_address) override;

View File

@ -186,14 +186,17 @@ void Jit64AsmRoutineManager::Generate()
} }
SetJumpTarget(notfound); SetJumpTarget(notfound);
// We reset the stack because Jit might clear the code cache.
// Also if we are in the middle of disabling BLR optimization on windows
// we need to reset the stack before _resetstkoflw() is called in Jit
// otherwise we will generate a second stack overflow exception during DoJit()
ResetStack();
//Ok, no block, let's jit //Ok, no block, let's jit
ABI_PushRegistersAndAdjustStack({}, 0); ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionA(32, (void *)&Jit, PPCSTATE(pc)); ABI_CallFunctionA(32, (void *)&Jit, PPCSTATE(pc));
ABI_PopRegistersAndAdjustStack({}, 0); ABI_PopRegistersAndAdjustStack({}, 0);
// Jit might have cleared the code cache
ResetStack();
JMP(dispatcherNoCheck, true); // no point in special casing this JMP(dispatcherNoCheck, true); // no point in special casing this
SetJumpTarget(bail); SetJumpTarget(bail);

View File

@ -118,6 +118,7 @@ public:
virtual const CommonAsmRoutinesBase *GetAsmRoutines() = 0; virtual const CommonAsmRoutinesBase *GetAsmRoutines() = 0;
virtual bool HandleFault(uintptr_t access_address, SContext* ctx) = 0; virtual bool HandleFault(uintptr_t access_address, SContext* ctx) = 0;
virtual bool HandleStackFault() { return false; }
}; };
class Jitx86Base : public JitBase, public EmuCodeBlock class Jitx86Base : public JitBase, public EmuCodeBlock

View File

@ -218,6 +218,16 @@ namespace JitInterface
return jit->HandleFault(access_address, ctx); return jit->HandleFault(access_address, ctx);
} }
bool HandleStackFault()
{
if (!jit)
{
return false;
}
return jit->HandleStackFault();
}
void ClearCache() void ClearCache()
{ {
if (jit) if (jit)

View File

@ -31,6 +31,7 @@ namespace JitInterface
// Memory Utilities // Memory Utilities
bool HandleFault(uintptr_t access_address, SContext* ctx); bool HandleFault(uintptr_t access_address, SContext* ctx);
bool HandleStackFault();
// Clearing CodeCache // Clearing CodeCache
void ClearCache(); void ClearCache();