ee|iop: use xScopedStackFrame to handle dynarec frame

* Rework a bit MVU to support xScopedStackFrame. Potentially
stack frame can be optimized (save 5 instructions)

* I removed the recompiler stack check. Address sanitizer is more efficient anyway
This commit is contained in:
Gregory Hainaut 2015-12-04 19:36:29 +01:00
parent 6e66bea152
commit 859d62d2a7
4 changed files with 74 additions and 365 deletions

View File

@ -120,50 +120,6 @@ static void recEventTest()
_cpuEventTest_Shared();
}
// parameters:
// espORebp - 0 for ESP, or 1 for EBP.
// regval - current value of the register at the time the fault was detected (predates the
// stackframe setup code in this function)
static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{
pxFailDev( pxsFmt( L"(R3000A Recompiler Stackframe) Sanity check failed on %ls\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
);
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
// tinue to run with some degree of stability.
}
static void _DynGen_StackFrameCheck()
{
if( !IsDevBuild ) return;
// --------- EBP Here -----------
xCMP( ebp, ptr[&s_store_ebp] );
xForwardJE8 skipassert_ebp;
xMOV( ecx, 1 ); // 1 specifies EBP
xMOV( edx, ebp );
xCALL( StackFrameCheckFailed );
xMOV( ebp, ptr[&s_store_ebp] ); // half-hearted frame recovery attempt!
skipassert_ebp.SetTarget();
// --------- ESP There -----------
xCMP( esp, ptr[&s_store_esp] );
xForwardJE8 skipassert_esp;
xXOR( ecx, ecx ); // 0 specifies ESP
xMOV( edx, esp );
xCALL( StackFrameCheckFailed );
xMOV( esp, ptr[&s_store_esp] ); // half-hearted frame recovery attempt!
skipassert_esp.SetTarget();
}
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static DynGenFunc* _DynGen_JITCompile()
@ -171,7 +127,6 @@ static DynGenFunc* _DynGen_JITCompile()
pxAssertMsg( iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( ecx, ptr[&psxRegs.pc] );
xCALL( iopRecRecompile );
@ -196,7 +151,6 @@ static DynGenFunc* _DynGen_JITCompileInBlock()
static DynGenFunc* _DynGen_DispatcherReg()
{
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( eax, ptr[&psxRegs.pc] );
xMOV( ebx, eax );
@ -210,128 +164,21 @@ static DynGenFunc* _DynGen_DispatcherReg()
// --------------------------------------------------------------------------------------
// EnterRecompiledCode - dynamic compilation stub!
// --------------------------------------------------------------------------------------
// In Release Builds this literally generates the following code:
// push edi
// push esi
// push ebx
// jmp DispatcherReg
// pop ebx
// pop esi
// pop edi
//
// See notes on why this works in both GCC (aligned stack!) and other compilers (not-so-
// aligned stack!). In debug/dev builds the code gen is more complicated, as it constructs
// ebp stackframe mess, which allows for a complete backtrace from debug breakpoints (yay).
//
// Also, if you set PCSX2_IOP_FORCED_ALIGN_STACK to 1, the codegen for MSVC becomes slightly
// more complicated since it has to perform a full stack alignment on entry.
//
#if defined(__GNUG__) || defined(__DARWIN__)
# define PCSX2_ASSUME_ALIGNED_STACK 1
#else
# define PCSX2_ASSUME_ALIGNED_STACK 0
#endif
// Set to 0 for a speedup in release builds.
// [doesn't apply to GCC/Mac, which must always align]
#define PCSX2_IOP_FORCED_ALIGN_STACK 0 //1
// For overriding stackframe generation options in Debug builds (possibly useful for troubleshooting)
// Typically this value should be the same as IsDevBuild.
static const bool GenerateStackFrame = IsDevBuild;
static DynGenFunc* _DynGen_EnterRecompiledCode()
{
u8* retval = xGetPtr();
bool allocatedStack = GenerateStackFrame || PCSX2_IOP_FORCED_ALIGN_STACK;
// Optimization: The IOP never uses stack-based parameter invocation, so we can avoid
// allocating any room on the stack for it (which is important since the IOP's entry
// code gets invoked quite a lot).
if( allocatedStack )
{
xPUSH( ebp );
xMOV( ebp, esp );
xAND( esp, -0x10 );
u8* retval = xGetPtr();
xSUB( esp, 0x20 );
{ // Properly scope the frame prologue/epilogue
xScopedStackFrame frame(IsDevBuild);
xMOV( ptr[ebp-12], edi );
xMOV( ptr[ebp-8], esi );
xMOV( ptr[ebp-4], ebx );
}
else
{
// GCC Compiler:
// The frame pointer coming in from the EE's event test can be safely assumed to be
// aligned, since GCC always aligns stackframes. While handy in x86-64, where CALL + PUSH EBP
// results in a neatly realigned stack on entry to every function, unfortunately in x86-32
// this is usually worthless because CALL+PUSH leaves us 8 byte aligned instead (fail). So
// we have to do the usual set of stackframe alignments and simulated callstack mess
// *regardless*.
xJMP(iopDispatcherReg);
// MSVC/Intel compilers:
// The PCSX2_IOP_FORCED_ALIGN_STACK setting is 0, so we don't care. Just push regs like
// the good old days! (stack alignment will be indeterminate)
xPUSH( edi );
xPUSH( esi );
xPUSH( ebx );
allocatedStack = false;
}
uptr* imm = NULL;
if( allocatedStack )
{
if( GenerateStackFrame )
{
// Simulate a CALL function by pushing the call address and EBP onto the stack.
// This retains proper stacktrace and stack unwinding (handy in devbuilds!)
xMOV( ptr32[esp+0x0c], 0xffeeff );
imm = (uptr*)(xGetPtr()-4);
// This part simulates the "normal" stackframe prep of "push ebp, mov ebp, esp"
xMOV( ptr32[esp+0x08], ebp );
xLEA( ebp, ptr32[esp+0x08] );
}
}
if( IsDevBuild )
{
xMOV( ptr[&s_store_esp], esp );
xMOV( ptr[&s_store_ebp], ebp );
}
xJMP( iopDispatcherReg );
if( imm != NULL )
*imm = (uptr)xGetPtr();
// ----------------------
// ----> Cleanup! ---->
iopExitRecompiledCode = (DynGenFunc*)xGetPtr();
if( allocatedStack )
{
// pop the nested "simulated call" stackframe, if needed:
if( GenerateStackFrame ) xLEAVE();
xMOV( edi, ptr[ebp-12] );
xMOV( esi, ptr[ebp-8] );
xMOV( ebx, ptr[ebp-4] );
xLEAVE();
}
else
{
xPOP( ebx );
xPOP( esi );
xPOP( edi );
// Save an exit point
iopExitRecompiledCode = (DynGenFunc*)xGetPtr();
}
xRET();

View File

@ -372,50 +372,6 @@ static void recEventTest()
_cpuEventTest_Shared();
}
// parameters:
// espORebp - 0 for ESP, or 1 for EBP.
// regval - current value of the register at the time the fault was detected (predates the
// stackframe setup code in this function)
static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{
pxFailDev( wxsFormat( L"(R5900 Recompiler Stackframe) Sanity check failed on %s\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
);
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
// tinue to run with some degree of stability.
}
static void _DynGen_StackFrameCheck()
{
if( !EmuConfig.Cpu.Recompiler.StackFrameChecks ) return;
// --------- EBP Here -----------
xCMP( ebp, ptr[&s_store_ebp] );
xForwardJE8 skipassert_ebp;
xMOV( ecx, 1 ); // 1 specifies EBP
xMOV( edx, ebp );
xCALL( StackFrameCheckFailed );
xMOV( ebp, ptr[&s_store_ebp] ); // half-hearted frame recovery attempt!
skipassert_ebp.SetTarget();
// --------- ESP There -----------
xCMP( esp, ptr[&s_store_esp] );
xForwardJE8 skipassert_esp;
xXOR( ecx, ecx ); // 0 specifies ESP
xMOV( edx, esp );
xCALL( StackFrameCheckFailed );
xMOV( esp, ptr[&s_store_esp] ); // half-hearted frame recovery attempt!
skipassert_esp.SetTarget();
}
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static DynGenFunc* _DynGen_JITCompile()
@ -423,7 +379,6 @@ static DynGenFunc* _DynGen_JITCompile()
pxAssertMsg( DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
u8* retval = xGetAlignedCallTarget();
_DynGen_StackFrameCheck();
xMOV( ecx, ptr[&cpuRegs.pc] );
xCALL( recRecompile );
@ -448,7 +403,6 @@ static DynGenFunc* _DynGen_JITCompileInBlock()
static DynGenFunc* _DynGen_DispatcherReg()
{
u8* retval = xGetPtr(); // fallthrough target, can't align it!
_DynGen_StackFrameCheck();
xMOV( eax, ptr[&cpuRegs.pc] );
xMOV( ebx, eax );
@ -471,63 +425,18 @@ static DynGenFunc* _DynGen_DispatcherEvent()
static DynGenFunc* _DynGen_EnterRecompiledCode()
{
pxAssertDev( DispatcherReg != NULL, "Dynamically generated dispatchers are required prior to generating EnterRecompiledCode!" );
u8* retval = xGetAlignedCallTarget();
// "standard" frame pointer setup for aligned stack: Record the original
// esp into ebp, and then align esp. ebp references the original esp base
// for the duration of our function, and is used to restore the original
// esp before returning from the function
{ // Properly scope the frame prologue/epilogue
xScopedStackFrame frame(IsDevBuild);
xPUSH( ebp );
xMOV( ebp, esp );
xAND( esp, -0x10 );
xJMP(DispatcherReg);
// First 0x10 is for esi, edi, etc. Second 0x10 is for the return address and ebp. The
// third 0x10 is an optimization for C-style CDECL calls we might make from the recompiler
// (parameters for those calls can be stored there!) [currently no cdecl functions are
// used -- we do everything through __fastcall)
static const int cdecl_reserve = 0x00;
xSUB( esp, 0x20 + cdecl_reserve );
xMOV( ptr[ebp-12], edi );
xMOV( ptr[ebp-8], esi );
xMOV( ptr[ebp-4], ebx );
// Simulate a CALL function by pushing the call address and EBP onto the stack.
// (the dummy address here is filled in later right before we generate the LEAVE code)
xMOV( ptr32[esp+0x0c+cdecl_reserve], 0xdeadbeef );
uptr& imm = *(uptr*)(xGetPtr()-4);
// This part simulates the "normal" stackframe prep of "push ebp, mov ebp, esp"
// It is done here because we can't really generate that stuff from the Dispatchers themselves.
xMOV( ptr32[esp+0x08+cdecl_reserve], ebp );
xLEA( ebp, ptr32[esp+0x08+cdecl_reserve] );
if (EmuConfig.Cpu.Recompiler.StackFrameChecks) {
xMOV( ptr[&s_store_esp], esp );
xMOV( ptr[&s_store_ebp], ebp );
// Save an exit point
ExitRecompiledCode = (DynGenFunc*)xGetPtr();
}
xJMP( DispatcherReg );
xAlignCallTarget();
// This dummy CALL is unreachable code that some debuggers (MSVC2008) need in order to
// unwind the stack properly. This is effectively the call that we simulate above.
if( IsDevBuild ) xCALL( DispatcherReg );
imm = (uptr)xGetPtr();
ExitRecompiledCode = (DynGenFunc*)xGetPtr();
xLEAVE();
xMOV( edi, ptr[ebp-12] );
xMOV( esi, ptr[ebp-8] );
xMOV( ebx, ptr[ebp-4] );
xLEAVE();
xRET();
return (DynGenFunc*)retval;
@ -1149,8 +1058,6 @@ static u32 scaleblockcycles()
// setting "g_branch = 2";
static void iBranchTest(u32 newpc)
{
_DynGen_StackFrameCheck();
// Check the Event scheduler if our "cycle target" has been reached.
// Equiv code to:
// cpuRegs.cycle += blockcycles;

View File

@ -80,10 +80,8 @@ void mVUreset(microVU& mVU, bool resetReserve) {
else Perf::any.map((uptr)&mVU.dispCache, mVUdispCacheSize, "mVU0 Dispatcher");
x86SetPtr(mVU.dispCache);
mVUdispatcherA(mVU);
mVUdispatcherB(mVU);
mVUdispatcherC(mVU);
mVUdispatcherD(mVU);
mVUdispatcherAB(mVU);
mVUdispatcherCD(mVU);
mVUemitSearch();
// Clear All Program Data

View File

@ -19,139 +19,96 @@
// Dispatcher Functions
//------------------------------------------------------------------
// Generates the code for entering recompiled blocks
void mVUdispatcherA(mV) {
// Generates the code for entering/exit recompiled blocks
void mVUdispatcherAB(mV) {
mVU.startFunct = x86Ptr;
// Backup cpu state
xPUSH(ebp);
xPUSH(ebx);
xPUSH(esi);
xPUSH(edi);
{
xScopedStackFrame frame(false, true);
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned)
#ifdef __GNUC__
xSUB(esp, 12);
#endif
// __fastcall = The caller has already put the needed parameters in ecx/edx:
if (!isVU1) { xCALL(mVUexecuteVU0); }
else { xCALL(mVUexecuteVU1); }
// __fastcall = The caller has already put the needed parameters in ecx/edx:
if (!isVU1) { xCALL(mVUexecuteVU0); }
else { xCALL(mVUexecuteVU1); }
// Load VU's MXCSR state
xLDMXCSR(g_sseVUMXCSR);
// Load VU's MXCSR state
xLDMXCSR(g_sseVUMXCSR);
// Load Regs
xMOV(gprF0, ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL]);
xMOV(gprF1, gprF0);
xMOV(gprF2, gprF0);
xMOV(gprF3, gprF0);
// Load Regs
xMOV(gprF0, ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL]);
xMOV(gprF1, gprF0);
xMOV(gprF2, gprF0);
xMOV(gprF3, gprF0);
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_MAC_FLAG].UL]);
xSHUF.PS(xmmT1, xmmT1, 0);
xMOVAPS (ptr128[mVU.macFlag], xmmT1);
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_MAC_FLAG].UL]);
xSHUF.PS(xmmT1, xmmT1, 0);
xMOVAPS (ptr128[mVU.macFlag], xmmT1);
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_CLIP_FLAG].UL]);
xSHUF.PS(xmmT1, xmmT1, 0);
xMOVAPS (ptr128[mVU.clipFlag], xmmT1);
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_CLIP_FLAG].UL]);
xSHUF.PS(xmmT1, xmmT1, 0);
xMOVAPS (ptr128[mVU.clipFlag], xmmT1);
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_P].UL]);
xMOVAPS (xmmPQ, ptr128[&mVU.regs().VI[REG_Q].UL]);
xSHUF.PS(xmmPQ, xmmT1, 0); // wzyx = PPQQ
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_P].UL]);
xMOVAPS (xmmPQ, ptr128[&mVU.regs().VI[REG_Q].UL]);
xSHUF.PS(xmmPQ, xmmT1, 0); // wzyx = PPQQ
// Jump to Recompiled Code Block
xJMP(eax);
// Jump to Recompiled Code Block
xJMP(eax);
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}
mVU.exitFunct = x86Ptr;
// Generates the code to exit from recompiled blocks
void mVUdispatcherB(mV) {
mVU.exitFunct = x86Ptr;
// Load EE's MXCSR state
xLDMXCSR(g_sseMXCSR);
// Load EE's MXCSR state
xLDMXCSR(g_sseMXCSR);
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers;
// all other arguments are passed right to left.
if (!isVU1) { xCALL(mVUcleanUpVU0); }
else { xCALL(mVUcleanUpVU1); }
// Unalign the stackframe:
#ifdef __GNUC__
xADD( esp, 12 );
#endif
// Restore cpu state
xPOP(edi);
xPOP(esi);
xPOP(ebx);
xPOP(ebp);
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers;
// all other arguments are passed right to left.
if (!isVU1) { xCALL(mVUcleanUpVU0); }
else { xCALL(mVUcleanUpVU1); }
}
xRET();
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
"microVU: Dispatcher generation exceeded reserved cache area!");
}
// Generates the code for resuming xgkick
void mVUdispatcherC(mV) {
// Generates the code for resuming/exit xgkick
void mVUdispatcherCD(mV) {
mVU.startFunctXG = x86Ptr;
// Backup cpu state
xPUSH(ebp);
xPUSH(ebx);
xPUSH(esi);
xPUSH(edi);
{
xScopedStackFrame frame(false, true);
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned)
#ifdef __GNUC__
xSUB(esp, 12);
#endif
// Load VU's MXCSR state
xLDMXCSR(g_sseVUMXCSR);
// Load VU's MXCSR state
xLDMXCSR(g_sseVUMXCSR);
mVUrestoreRegs(mVU);
mVUrestoreRegs(mVU);
xMOV(gprF0, ptr32[&mVU.statFlag[0]]);
xMOV(gprF1, ptr32[&mVU.statFlag[1]]);
xMOV(gprF2, ptr32[&mVU.statFlag[2]]);
xMOV(gprF3, ptr32[&mVU.statFlag[3]]);
xMOV(gprF0, ptr32[&mVU.statFlag[0]]);
xMOV(gprF1, ptr32[&mVU.statFlag[1]]);
xMOV(gprF2, ptr32[&mVU.statFlag[2]]);
xMOV(gprF3, ptr32[&mVU.statFlag[3]]);
// Jump to Recompiled Code Block
xJMP(ptr32[&mVU.resumePtrXG]);
// Jump to Recompiled Code Block
xJMP(ptr32[&mVU.resumePtrXG]);
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}
mVU.exitFunctXG = x86Ptr;
// Generates the code to exit from xgkick
void mVUdispatcherD(mV) {
mVU.exitFunctXG = x86Ptr;
//xPOP(gprT1); // Pop return address
//xMOV(ptr32[&mVU.resumePtrXG], gprT1);
//xPOP(gprT1); // Pop return address
//xMOV(ptr32[&mVU.resumePtrXG], gprT1);
// Backup Status Flag (other regs were backed up on xgkick)
xMOV(ptr32[&mVU.statFlag[0]], gprF0);
xMOV(ptr32[&mVU.statFlag[1]], gprF1);
xMOV(ptr32[&mVU.statFlag[2]], gprF2);
xMOV(ptr32[&mVU.statFlag[3]], gprF3);
// Backup Status Flag (other regs were backed up on xgkick)
xMOV(ptr32[&mVU.statFlag[0]], gprF0);
xMOV(ptr32[&mVU.statFlag[1]], gprF1);
xMOV(ptr32[&mVU.statFlag[2]], gprF2);
xMOV(ptr32[&mVU.statFlag[3]], gprF3);
// Load EE's MXCSR state
xLDMXCSR(g_sseMXCSR);
// Load EE's MXCSR state
xLDMXCSR(g_sseMXCSR);
// Unalign the stackframe:
#ifdef __GNUC__
xADD( esp, 12 );
#endif
// Restore cpu state
xPOP(edi);
xPOP(esi);
xPOP(ebx);
xPOP(ebp);
}
xRET();
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}