diff --git a/common/include/x86emitter/inlines.inl b/common/include/x86emitter/inlines.inl index 5ad9e5f971..8d6e9f1854 100644 --- a/common/include/x86emitter/inlines.inl +++ b/common/include/x86emitter/inlines.inl @@ -232,10 +232,12 @@ namespace x86Emitter else if( Index.IsEmpty() ) { Index = src.Index; - Factor = 1; + Factor = src.Factor; } else if( Index == src.Index ) - Factor++; + { + Factor += src.Factor; + } else pxFailDev( L"x86Emitter: address modifiers cannot have more than two index registers." ); // oops, only 2 regs allowed per ModRm! diff --git a/pcsx2/NakedAsm.h b/pcsx2/NakedAsm.h index 06fe4dae4d..dceaf77859 100644 --- a/pcsx2/NakedAsm.h +++ b/pcsx2/NakedAsm.h @@ -47,11 +47,6 @@ extern "C" void iopJITCompileInBlock(); void iopDispatcherReg(); - // aR5900-32.S - void JITCompile(); - void JITCompileInBlock(); - void DispatcherReg(); - void DispatcherEvent(); } #endif diff --git a/pcsx2/gui/AppAssert.cpp b/pcsx2/gui/AppAssert.cpp index 1277d0b7b9..5597c14471 100644 --- a/pcsx2/gui/AppAssert.cpp +++ b/pcsx2/gui/AppAssert.cpp @@ -74,6 +74,10 @@ static __threadlocal bool _reentrant_lock = false; // via messages. void Pcsx2App::OnAssertFailure( const wxChar *file, int line, const wxChar *func, const wxChar *cond, const wxChar *msg ) { + // Used to allow the user to suppress future assertions during this application's session. + static bool disableAsserts = false; + if( disableAsserts ) return; + if( _reentrant_lock ) { // Re-entrant assertions are bad mojo -- trap immediately. @@ -82,9 +86,6 @@ void Pcsx2App::OnAssertFailure( const wxChar *file, int line, const wxChar *func _reentrant_lock = true; - // Used to allow the user to suppress future assertions during this application's session. - static bool disableAsserts = false; - wxString dbgmsg; dbgmsg.reserve( 2048 ); diff --git a/pcsx2/x86/BaseblockEx.h b/pcsx2/x86/BaseblockEx.h index 605641f887..b98503a844 100644 --- a/pcsx2/x86/BaseblockEx.h +++ b/pcsx2/x86/BaseblockEx.h @@ -46,20 +46,33 @@ struct BASEBLOCKEX class BaseBlocks { -private: +protected: + typedef std::multimap::iterator linkiter_t; + // switch to a hash map later? std::multimap links; - typedef std::multimap::iterator linkiter_t; uptr recompiler; std::vector blocks; public: + BaseBlocks() : + recompiler( NULL ) + , blocks(0) + { + blocks.reserve(0x4000); + } + BaseBlocks(uptr recompiler_) : recompiler(recompiler_), blocks(0) { blocks.reserve(0x4000); } + + void SetJITCompile( void (*recompiler_)() ) + { + recompiler = (uptr)recompiler_; + } BASEBLOCKEX* New(u32 startpc, uptr fnptr); int LastIndex (u32 startpc) const; diff --git a/pcsx2/x86/ix86-32/aR5900-32.S b/pcsx2/x86/ix86-32/aR5900-32.S deleted file mode 100644 index 13db39b2b3..0000000000 --- a/pcsx2/x86/ix86-32/aR5900-32.S +++ /dev/null @@ -1,57 +0,0 @@ - -// iR5900.c assembly routines - -.intel_syntax noprefix - -////////////////////////////////////////////////////////////////////////// -// Note that iR3000A.S and iR5900.S asm code is now identical. Only some -// function names and the following two defines should ever differ: - -#define REGINFO cpuRegs -#define RECLUT recLUT -#define PCOFFSET 0x2a8 // this must always match what Pcsx2 displays at startup - -////////////////////////////////////////////////////////////////////////// -// Preprocessor Mess! - -.extern REGINFO -.extern RECLUT -.extern recRecompile -.extern recEventTest - -////////////////////////////////////////////////////////////////////////// -// The address for all cleared blocks. It recompiles the current pc and then -// dispatches to the recompiled block address. -.global JITCompile -JITCompile: - - mov esi, dword ptr [REGINFO + PCOFFSET] - push esi - call recRecompile - add esp, 4 - mov ebx, esi - shr esi, 16 - mov ecx, dword ptr [RECLUT+esi*4] - jmp dword ptr [ecx+ebx] - -.global JITCompileInBlock -JITCompileInBlock: - jmp JITCompile - -////////////////////////////////////////////////////////////////////////// -// called when jumping to variable pc address. - -.globl DispatcherReg -DispatcherReg: - - mov eax, dword ptr [REGINFO + PCOFFSET] - mov ebx, eax - shr eax, 16 - mov ecx, dword ptr [RECLUT+eax*4] - jmp dword ptr [ecx+ebx] - -.globl DispatcherEvent -DispatcherEvent: - - call recEventTest - jmp DispatcherReg diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index a9c349138d..92254332bb 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -79,8 +79,7 @@ static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here static BASEBLOCK *recROM = NULL; // and here static BASEBLOCK *recROM1 = NULL; // also here static u32 *recRAMCopy = NULL; -void JITCompile(); -static BaseBlocks recBlocks((uptr)JITCompile); +static BaseBlocks recBlocks; static u8* recPtr = NULL; static u32 *recConstBufPtr = NULL; EEINST* s_pInstCache = NULL; @@ -310,6 +309,188 @@ u32* recGetImm64(u32 hi, u32 lo) return imm64; } +// ===================================================================================================== +// R5900 Dispatchers +// ===================================================================================================== + +extern "C" void recEventTest(); + +static u32 g_lastpc = 0; +static u32 s_store_ebp, s_store_esp; + +// Recompiled code buffer for EE recompiler dispatchers! +static u8 __pagealigned eeRecDispatchers[0x1000]; + +typedef void DynGenFunc(); + +static DynGenFunc* DispatcherEvent = NULL; +static DynGenFunc* DispatcherReg = NULL; +static DynGenFunc* JITCompile = NULL; +static DynGenFunc* JITCompileInBlock = NULL; +static DynGenFunc* EnterRecompiledCode = NULL; + +// parameters: +// espORebp - 0 for ESP, or 1 for EBP. +// regval - current value of the register at the time the fault was detected (predates the +// stackframe setup code in this function) +static void __fastcall StackFrameCheckFailed( int espORebp, int regval ) +{ + pxFailDev( wxsFormat( L"(Stackframe) Sanitycheck Failed on %s\n\tCurrent=%d; Saved=%d", + (espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp ) + ); + + // Note: The recompiler will attempt to recover ESP and EBP after returning from this function, + // so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con- + // tinue to run with some degree of stability. +} + +static void _DynGen_StackFrameCheck() +{ + if( true ) return; + + // --------- EBP Here ----------- + + xCMP( ebp, &s_store_ebp ); + xForwardJE8 skipassert_ebp; + + xMOV( ecx, 1 ); + xMOV( edx, ebp ); + xCALL( StackFrameCheckFailed ); + xMOV( ebp, &s_store_ebp ); // half-hearted frame recovery attempt! + + skipassert_ebp.SetTarget(); + + // --------- ESP There ----------- + + xCMP( esp, &s_store_esp ); + xForwardJE8 skipassert_esp; + + xMOV( ecx, 1 ); + xMOV( edx, esp ); + xCALL( StackFrameCheckFailed ); + xMOV( esp, &s_store_esp ); // half-hearted frame recovery attempt! + + skipassert_esp.SetTarget(); +} + +// The address for all cleared blocks. It recompiles the current pc and then +// dispatches to the recompiled block address. +static DynGenFunc* _DynGen_JITCompile() +{ + u8* retval = xGetPtr(); + _DynGen_StackFrameCheck(); + + xMOV( esi, &cpuRegs.pc ); + xPUSH( esi ); + xCALL( recRecompile ); + xADD( esp, 4 ); + xMOV( ebx, esi ); + xSHR( esi, 16 ); + xMOV( ecx, ptr32[recLUT + (esi*4)] ); + xJMP( ptr32[ecx+ebx] ); + + return (DynGenFunc*)retval; +} + +static DynGenFunc* _DynGen_JITCompileInBlock() +{ + u8* retval = xGetPtr(); + xJMP( JITCompile ); + return (DynGenFunc*)retval; +} + +// called when jumping to variable pc address +static DynGenFunc* _DynGen_DispatcherReg() +{ + u8* retval = xGetPtr(); + _DynGen_StackFrameCheck(); + + xMOV( eax, &cpuRegs.pc ); + xMOV( ebx, eax ); + xSHR( eax, 16 ); + xMOV( ecx, ptr[recLUT + (eax*4)] ); + xJMP( ptr32[ecx+ebx] ); + + return (DynGenFunc*)retval; +} + +static DynGenFunc* _DynGen_EnterRecompiledCode() +{ + u8* retval = xGetPtr(); + + // "standard" frame pointer setup for aligned stack: Record the original + // esp into ebp, and then align esp. ebp references the original esp base + // for the duration of our function, and is used to restore the original + // esp before returning from the function + + // Optimization: We "allocate" 0x20 bytes of stack ahead of time here. The first + // 16 bytes are used for saving esi, edi, and ebx. The second 16 bytes are used + // for passing parameters to stdcall/cdecl functions. + + xPUSH( ebp ); + xMOV( ebp, esp ); + xAND( esp, -0x10 ); + xSUB( esp, 0x20 ); + + xMOV( &s_store_ebp, ebp ); + xMOV( &s_store_esp, esp ); + + xMOV( ptr[esp+0x18], edi ); + xMOV( ptr[esp+0x14], esi ); + xMOV( ptr[esp+0x10], ebx ); + + //xPUSH( edi ); + //xPUSH( esi ); + //xPUSH( ebx ); + + xCALL( ptr32[&DispatcherReg] ); + + //xPOP( ebx ); + //xPOP( esi ); + //xPOP( edi ); + + //xMOV( esp, ebp ); + //xPOP( ebp ); + //xRET(); + + _DynGen_StackFrameCheck(); + + xMOV( edi, ptr[esp+0x18] ); + xMOV( esi, ptr[esp+0x14] ); + xMOV( ebx, ptr[esp+0x10] ); + xMOV( esp, ebp ); + xPOP( ebp ); + xRET(); + + return (DynGenFunc*)retval; +} + +static void _DynGen_Dispatchers() +{ + // In case init gets called multiple times: + HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadWrite, false ); + + // clear the buffer to 0xcc (easier debugging). + memset_8<0xcc,0x1000>( eeRecDispatchers ); + + xSetPtr( eeRecDispatchers ); + + // Place the EventTest and DispatcherReg stuff at the top, because they get called the + // most and stand to benefit from strong alignment and direct referencing. + DispatcherEvent = (DynGenFunc*)xGetPtr(); + xCALL( recEventTest ); + DispatcherReg = _DynGen_DispatcherReg(); + + JITCompile = _DynGen_JITCompile(); + JITCompileInBlock = _DynGen_JITCompileInBlock(); + EnterRecompiledCode = _DynGen_EnterRecompiledCode(); + + HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadOnly, true ); + + recBlocks.SetJITCompile( JITCompile ); +} + + ////////////////////////////////////////////////////////////////////////////////////////// // static const int REC_CACHEMEM = 0x01000000; @@ -377,6 +558,7 @@ static void recAlloc() // No errors.. Proceed with initialization: ProfilerRegisterSource( "EERec", recMem, REC_CACHEMEM+0x1000 ); + _DynGen_Dispatchers(); x86FpuState = FPU_STATE; } @@ -479,9 +661,8 @@ void recStep( void ) { } -extern "C" -{ -void recEventTest() + +extern "C" void recEventTest() { #ifdef PCSX2_DEVBUILD // dont' remove this check unless doing an official release @@ -500,62 +681,6 @@ void recEventTest() assert( !g_globalXMMSaved && !g_globalMMXSaved); #endif } -} - -//////////////////////////////////////////////////// - -static u32 g_lastpc = 0; - -#ifdef _MSC_VER - -// The address for all cleared blocks. It recompiles the current pc and then -// dispatches to the recompiled block address. -static __naked void JITCompile() -{ - __asm { - mov esi, dword ptr [cpuRegs.pc] - push esi - call recRecompile - add esp, 4 - mov ebx, esi - shr esi, 16 - mov ecx, dword ptr [recLUT+esi*4] - jmp dword ptr [ecx+ebx] - } -} - -static __naked void JITCompileInBlock() -{ - __asm { - jmp JITCompile - } -} - -// called when jumping to variable pc address -static void __naked DispatcherReg() -{ - __asm { - mov eax, dword ptr [cpuRegs.pc] - mov ebx, eax - shr eax, 16 - mov ecx, dword ptr [recLUT+eax*4] - jmp dword ptr [ecx+ebx] - } -} - -// [TODO] : Replace these functions with x86Emitter-generated code and we can compound this -// function and DispatcherReg() into a fast fall-through case (removes the DispatcerReg jump -// in this function, since execution will just fall right into the DispatcherReg implementation). -// -static void __naked DispatcherEvent() -{ - __asm - { - call recEventTest; - jmp DispatcherReg; - } -} -#endif static void recExecute() { @@ -575,26 +700,8 @@ static void recExecute() g_EEFreezeRegs = true; - try - { - - #ifdef _MSC_VER - __asm - { - push ebx - push esi - push edi - - call DispatcherReg - - pop edi - pop esi - pop ebx - } - - #else // _MSC_VER - DispatcherReg(); - #endif + try { + EnterRecompiledCode(); } catch( Exception::ForceDispatcherReg& ) { @@ -608,12 +715,9 @@ static void recExecute() g_EEFreezeRegs = false; } -namespace R5900 { -namespace Dynarec { -namespace OpcodeImpl { - //////////////////////////////////////////////////// -void recSYSCALL( void ) { +void R5900::Dynarec::OpcodeImpl::recSYSCALL( void ) +{ MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); MOV32ItoM( (uptr)&cpuRegs.pc, pc ); iFlushCall(FLUSH_NODESTROY); @@ -622,13 +726,14 @@ void recSYSCALL( void ) { CMP32ItoM((uptr)&cpuRegs.pc, pc); j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles()); - JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 )); + xJMP( DispatcherReg ); x86SetJ8(j8Ptr[0]); //branch = 2; } //////////////////////////////////////////////////// -void recBREAK( void ) { +void R5900::Dynarec::OpcodeImpl::recBREAK( void ) +{ MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); MOV32ItoM( (uptr)&cpuRegs.pc, pc ); iFlushCall(FLUSH_EVERYTHING); @@ -637,13 +742,11 @@ void recBREAK( void ) { CMP32ItoM((uptr)&cpuRegs.pc, pc); j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles()); - RET(); + xJMP( DispatcherEvent ); x86SetJ8(j8Ptr[0]); //branch = 2; } -} } } // end namespace R5900::Dynarec::OpcodeImpl - // Clears the recLUT table so that all blocks are mapped to the JIT recompiler by default. static __releaseinline void ClearRecLUT(BASEBLOCK* base, int count) { @@ -1136,8 +1239,9 @@ static void printfn() static int curcount = 0; const int skip = 0; - assert( !g_globalMMXSaved ); - assert( !g_globalXMMSaved ); + pxAssert( !g_globalMMXSaved && !g_globalXMMSaved ); + + //pxAssert( cpuRegs.pc != 0x80001300 ); if( (dumplog&2) && g_lastpc != 0x81fc0 ) {//&& lastrec != g_lastpc ) { curcount++; @@ -1151,18 +1255,16 @@ static void printfn() } } -u32 s_recblocks[] = {0}; - -void badespfn() { - Console.Error("Bad esp!"); - assert(0); -} +static u32 s_recblocks[] = {0}; // Called when a block under manual protection fails it's pre-execution integrity check. void __fastcall dyna_block_discard(u32 start,u32 sz) { DevCon.WriteLn("dyna_block_discard .. start=0x%08X size=%d", start, sz*4); recClear(start, sz); + + // Note: this function is accessed via a JMP, and thus the RET here will exit + // recompiled code and take us back to recExecute. } // called when a block under manual protection has been run enough times to be a @@ -1172,6 +1274,9 @@ void __fastcall dyna_page_reset(u32 start,u32 sz) recClear(start & ~0xfffUL, 0x400); manual_counter[start >> 12]++; mmap_MarkCountedRamPage( start ); + + // Note: this function is accessed via a JMP, and thus the RET here will exit + // recompiled code and take us back to recExecute. } void recRecompile( const u32 startpc )