mirror of https://github.com/PCSX2/pcsx2.git
aligned_stack branch: Preliminary work for having the recompilers align the stack to 16 bytes, to appease the expectations of GCC and Mac OSX.
Done: * EEcore aligns stack on entry and performs routine checks of ESP and EBP integrity. * Dispatchers are recoded using the PCSX2 Emitter. Todo: * Convert all C-code function calls from the recompiler to use the new aligned stack setup. * Implement stack-alignment checks and emitter-based dispatchers for the IOP. git-svn-id: http://pcsx2.googlecode.com/svn/branches/aligned_stack@2024 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
0880cadf64
commit
87cbaaf5ca
|
@ -232,10 +232,12 @@ namespace x86Emitter
|
|||
else if( Index.IsEmpty() )
|
||||
{
|
||||
Index = src.Index;
|
||||
Factor = 1;
|
||||
Factor = src.Factor;
|
||||
}
|
||||
else if( Index == src.Index )
|
||||
Factor++;
|
||||
{
|
||||
Factor += src.Factor;
|
||||
}
|
||||
else
|
||||
pxFailDev( L"x86Emitter: address modifiers cannot have more than two index registers." ); // oops, only 2 regs allowed per ModRm!
|
||||
|
||||
|
|
|
@ -47,11 +47,6 @@ extern "C"
|
|||
void iopJITCompileInBlock();
|
||||
void iopDispatcherReg();
|
||||
|
||||
// aR5900-32.S
|
||||
void JITCompile();
|
||||
void JITCompileInBlock();
|
||||
void DispatcherReg();
|
||||
void DispatcherEvent();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -74,6 +74,10 @@ static __threadlocal bool _reentrant_lock = false;
|
|||
// via messages.
|
||||
void Pcsx2App::OnAssertFailure( const wxChar *file, int line, const wxChar *func, const wxChar *cond, const wxChar *msg )
|
||||
{
|
||||
// Used to allow the user to suppress future assertions during this application's session.
|
||||
static bool disableAsserts = false;
|
||||
if( disableAsserts ) return;
|
||||
|
||||
if( _reentrant_lock )
|
||||
{
|
||||
// Re-entrant assertions are bad mojo -- trap immediately.
|
||||
|
@ -82,9 +86,6 @@ void Pcsx2App::OnAssertFailure( const wxChar *file, int line, const wxChar *func
|
|||
|
||||
_reentrant_lock = true;
|
||||
|
||||
// Used to allow the user to suppress future assertions during this application's session.
|
||||
static bool disableAsserts = false;
|
||||
|
||||
wxString dbgmsg;
|
||||
dbgmsg.reserve( 2048 );
|
||||
|
||||
|
|
|
@ -46,20 +46,33 @@ struct BASEBLOCKEX
|
|||
|
||||
class BaseBlocks
|
||||
{
|
||||
private:
|
||||
protected:
|
||||
typedef std::multimap<u32, uptr>::iterator linkiter_t;
|
||||
|
||||
// switch to a hash map later?
|
||||
std::multimap<u32, uptr> links;
|
||||
typedef std::multimap<u32, uptr>::iterator linkiter_t;
|
||||
uptr recompiler;
|
||||
std::vector<BASEBLOCKEX> blocks;
|
||||
|
||||
public:
|
||||
BaseBlocks() :
|
||||
recompiler( NULL )
|
||||
, blocks(0)
|
||||
{
|
||||
blocks.reserve(0x4000);
|
||||
}
|
||||
|
||||
BaseBlocks(uptr recompiler_) :
|
||||
recompiler(recompiler_),
|
||||
blocks(0)
|
||||
{
|
||||
blocks.reserve(0x4000);
|
||||
}
|
||||
|
||||
void SetJITCompile( void (*recompiler_)() )
|
||||
{
|
||||
recompiler = (uptr)recompiler_;
|
||||
}
|
||||
|
||||
BASEBLOCKEX* New(u32 startpc, uptr fnptr);
|
||||
int LastIndex (u32 startpc) const;
|
||||
|
|
|
@ -1,57 +0,0 @@
|
|||
|
||||
// iR5900.c assembly routines
|
||||
|
||||
.intel_syntax noprefix
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Note that iR3000A.S and iR5900.S asm code is now identical. Only some
|
||||
// function names and the following two defines should ever differ:
|
||||
|
||||
#define REGINFO cpuRegs
|
||||
#define RECLUT recLUT
|
||||
#define PCOFFSET 0x2a8 // this must always match what Pcsx2 displays at startup
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Preprocessor Mess!
|
||||
|
||||
.extern REGINFO
|
||||
.extern RECLUT
|
||||
.extern recRecompile
|
||||
.extern recEventTest
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// The address for all cleared blocks. It recompiles the current pc and then
|
||||
// dispatches to the recompiled block address.
|
||||
.global JITCompile
|
||||
JITCompile:
|
||||
|
||||
mov esi, dword ptr [REGINFO + PCOFFSET]
|
||||
push esi
|
||||
call recRecompile
|
||||
add esp, 4
|
||||
mov ebx, esi
|
||||
shr esi, 16
|
||||
mov ecx, dword ptr [RECLUT+esi*4]
|
||||
jmp dword ptr [ecx+ebx]
|
||||
|
||||
.global JITCompileInBlock
|
||||
JITCompileInBlock:
|
||||
jmp JITCompile
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// called when jumping to variable pc address.
|
||||
|
||||
.globl DispatcherReg
|
||||
DispatcherReg:
|
||||
|
||||
mov eax, dword ptr [REGINFO + PCOFFSET]
|
||||
mov ebx, eax
|
||||
shr eax, 16
|
||||
mov ecx, dword ptr [RECLUT+eax*4]
|
||||
jmp dword ptr [ecx+ebx]
|
||||
|
||||
.globl DispatcherEvent
|
||||
DispatcherEvent:
|
||||
|
||||
call recEventTest
|
||||
jmp DispatcherReg
|
|
@ -79,8 +79,7 @@ static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
|
|||
static BASEBLOCK *recROM = NULL; // and here
|
||||
static BASEBLOCK *recROM1 = NULL; // also here
|
||||
static u32 *recRAMCopy = NULL;
|
||||
void JITCompile();
|
||||
static BaseBlocks recBlocks((uptr)JITCompile);
|
||||
static BaseBlocks recBlocks;
|
||||
static u8* recPtr = NULL;
|
||||
static u32 *recConstBufPtr = NULL;
|
||||
EEINST* s_pInstCache = NULL;
|
||||
|
@ -310,6 +309,188 @@ u32* recGetImm64(u32 hi, u32 lo)
|
|||
return imm64;
|
||||
}
|
||||
|
||||
// =====================================================================================================
|
||||
// R5900 Dispatchers
|
||||
// =====================================================================================================
|
||||
|
||||
extern "C" void recEventTest();
|
||||
|
||||
static u32 g_lastpc = 0;
|
||||
static u32 s_store_ebp, s_store_esp;
|
||||
|
||||
// Recompiled code buffer for EE recompiler dispatchers!
|
||||
static u8 __pagealigned eeRecDispatchers[0x1000];
|
||||
|
||||
typedef void DynGenFunc();
|
||||
|
||||
static DynGenFunc* DispatcherEvent = NULL;
|
||||
static DynGenFunc* DispatcherReg = NULL;
|
||||
static DynGenFunc* JITCompile = NULL;
|
||||
static DynGenFunc* JITCompileInBlock = NULL;
|
||||
static DynGenFunc* EnterRecompiledCode = NULL;
|
||||
|
||||
// parameters:
|
||||
// espORebp - 0 for ESP, or 1 for EBP.
|
||||
// regval - current value of the register at the time the fault was detected (predates the
|
||||
// stackframe setup code in this function)
|
||||
static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
|
||||
{
|
||||
pxFailDev( wxsFormat( L"(Stackframe) Sanitycheck Failed on %s\n\tCurrent=%d; Saved=%d",
|
||||
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
|
||||
);
|
||||
|
||||
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
|
||||
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
|
||||
// tinue to run with some degree of stability.
|
||||
}
|
||||
|
||||
static void _DynGen_StackFrameCheck()
|
||||
{
|
||||
if( true ) return;
|
||||
|
||||
// --------- EBP Here -----------
|
||||
|
||||
xCMP( ebp, &s_store_ebp );
|
||||
xForwardJE8 skipassert_ebp;
|
||||
|
||||
xMOV( ecx, 1 );
|
||||
xMOV( edx, ebp );
|
||||
xCALL( StackFrameCheckFailed );
|
||||
xMOV( ebp, &s_store_ebp ); // half-hearted frame recovery attempt!
|
||||
|
||||
skipassert_ebp.SetTarget();
|
||||
|
||||
// --------- ESP There -----------
|
||||
|
||||
xCMP( esp, &s_store_esp );
|
||||
xForwardJE8 skipassert_esp;
|
||||
|
||||
xMOV( ecx, 1 );
|
||||
xMOV( edx, esp );
|
||||
xCALL( StackFrameCheckFailed );
|
||||
xMOV( esp, &s_store_esp ); // half-hearted frame recovery attempt!
|
||||
|
||||
skipassert_esp.SetTarget();
|
||||
}
|
||||
|
||||
// The address for all cleared blocks. It recompiles the current pc and then
|
||||
// dispatches to the recompiled block address.
|
||||
static DynGenFunc* _DynGen_JITCompile()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
_DynGen_StackFrameCheck();
|
||||
|
||||
xMOV( esi, &cpuRegs.pc );
|
||||
xPUSH( esi );
|
||||
xCALL( recRecompile );
|
||||
xADD( esp, 4 );
|
||||
xMOV( ebx, esi );
|
||||
xSHR( esi, 16 );
|
||||
xMOV( ecx, ptr32[recLUT + (esi*4)] );
|
||||
xJMP( ptr32[ecx+ebx] );
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
}
|
||||
|
||||
static DynGenFunc* _DynGen_JITCompileInBlock()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
xJMP( JITCompile );
|
||||
return (DynGenFunc*)retval;
|
||||
}
|
||||
|
||||
// called when jumping to variable pc address
|
||||
static DynGenFunc* _DynGen_DispatcherReg()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
_DynGen_StackFrameCheck();
|
||||
|
||||
xMOV( eax, &cpuRegs.pc );
|
||||
xMOV( ebx, eax );
|
||||
xSHR( eax, 16 );
|
||||
xMOV( ecx, ptr[recLUT + (eax*4)] );
|
||||
xJMP( ptr32[ecx+ebx] );
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
}
|
||||
|
||||
static DynGenFunc* _DynGen_EnterRecompiledCode()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
|
||||
// "standard" frame pointer setup for aligned stack: Record the original
|
||||
// esp into ebp, and then align esp. ebp references the original esp base
|
||||
// for the duration of our function, and is used to restore the original
|
||||
// esp before returning from the function
|
||||
|
||||
// Optimization: We "allocate" 0x20 bytes of stack ahead of time here. The first
|
||||
// 16 bytes are used for saving esi, edi, and ebx. The second 16 bytes are used
|
||||
// for passing parameters to stdcall/cdecl functions.
|
||||
|
||||
xPUSH( ebp );
|
||||
xMOV( ebp, esp );
|
||||
xAND( esp, -0x10 );
|
||||
xSUB( esp, 0x20 );
|
||||
|
||||
xMOV( &s_store_ebp, ebp );
|
||||
xMOV( &s_store_esp, esp );
|
||||
|
||||
xMOV( ptr[esp+0x18], edi );
|
||||
xMOV( ptr[esp+0x14], esi );
|
||||
xMOV( ptr[esp+0x10], ebx );
|
||||
|
||||
//xPUSH( edi );
|
||||
//xPUSH( esi );
|
||||
//xPUSH( ebx );
|
||||
|
||||
xCALL( ptr32[&DispatcherReg] );
|
||||
|
||||
//xPOP( ebx );
|
||||
//xPOP( esi );
|
||||
//xPOP( edi );
|
||||
|
||||
//xMOV( esp, ebp );
|
||||
//xPOP( ebp );
|
||||
//xRET();
|
||||
|
||||
_DynGen_StackFrameCheck();
|
||||
|
||||
xMOV( edi, ptr[esp+0x18] );
|
||||
xMOV( esi, ptr[esp+0x14] );
|
||||
xMOV( ebx, ptr[esp+0x10] );
|
||||
xMOV( esp, ebp );
|
||||
xPOP( ebp );
|
||||
xRET();
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
}
|
||||
|
||||
static void _DynGen_Dispatchers()
|
||||
{
|
||||
// In case init gets called multiple times:
|
||||
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadWrite, false );
|
||||
|
||||
// clear the buffer to 0xcc (easier debugging).
|
||||
memset_8<0xcc,0x1000>( eeRecDispatchers );
|
||||
|
||||
xSetPtr( eeRecDispatchers );
|
||||
|
||||
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
|
||||
// most and stand to benefit from strong alignment and direct referencing.
|
||||
DispatcherEvent = (DynGenFunc*)xGetPtr();
|
||||
xCALL( recEventTest );
|
||||
DispatcherReg = _DynGen_DispatcherReg();
|
||||
|
||||
JITCompile = _DynGen_JITCompile();
|
||||
JITCompileInBlock = _DynGen_JITCompileInBlock();
|
||||
EnterRecompiledCode = _DynGen_EnterRecompiledCode();
|
||||
|
||||
HostSys::MemProtect( eeRecDispatchers, 0x1000, Protect_ReadOnly, true );
|
||||
|
||||
recBlocks.SetJITCompile( JITCompile );
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
static const int REC_CACHEMEM = 0x01000000;
|
||||
|
@ -377,6 +558,7 @@ static void recAlloc()
|
|||
// No errors.. Proceed with initialization:
|
||||
|
||||
ProfilerRegisterSource( "EERec", recMem, REC_CACHEMEM+0x1000 );
|
||||
_DynGen_Dispatchers();
|
||||
|
||||
x86FpuState = FPU_STATE;
|
||||
}
|
||||
|
@ -479,9 +661,8 @@ void recStep( void )
|
|||
{
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void recEventTest()
|
||||
|
||||
extern "C" void recEventTest()
|
||||
{
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
// dont' remove this check unless doing an official release
|
||||
|
@ -500,62 +681,6 @@ void recEventTest()
|
|||
assert( !g_globalXMMSaved && !g_globalMMXSaved);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
static u32 g_lastpc = 0;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
// The address for all cleared blocks. It recompiles the current pc and then
|
||||
// dispatches to the recompiled block address.
|
||||
static __naked void JITCompile()
|
||||
{
|
||||
__asm {
|
||||
mov esi, dword ptr [cpuRegs.pc]
|
||||
push esi
|
||||
call recRecompile
|
||||
add esp, 4
|
||||
mov ebx, esi
|
||||
shr esi, 16
|
||||
mov ecx, dword ptr [recLUT+esi*4]
|
||||
jmp dword ptr [ecx+ebx]
|
||||
}
|
||||
}
|
||||
|
||||
static __naked void JITCompileInBlock()
|
||||
{
|
||||
__asm {
|
||||
jmp JITCompile
|
||||
}
|
||||
}
|
||||
|
||||
// called when jumping to variable pc address
|
||||
static void __naked DispatcherReg()
|
||||
{
|
||||
__asm {
|
||||
mov eax, dword ptr [cpuRegs.pc]
|
||||
mov ebx, eax
|
||||
shr eax, 16
|
||||
mov ecx, dword ptr [recLUT+eax*4]
|
||||
jmp dword ptr [ecx+ebx]
|
||||
}
|
||||
}
|
||||
|
||||
// [TODO] : Replace these functions with x86Emitter-generated code and we can compound this
|
||||
// function and DispatcherReg() into a fast fall-through case (removes the DispatcerReg jump
|
||||
// in this function, since execution will just fall right into the DispatcherReg implementation).
|
||||
//
|
||||
static void __naked DispatcherEvent()
|
||||
{
|
||||
__asm
|
||||
{
|
||||
call recEventTest;
|
||||
jmp DispatcherReg;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void recExecute()
|
||||
{
|
||||
|
@ -575,26 +700,8 @@ static void recExecute()
|
|||
|
||||
g_EEFreezeRegs = true;
|
||||
|
||||
try
|
||||
{
|
||||
|
||||
#ifdef _MSC_VER
|
||||
__asm
|
||||
{
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
|
||||
call DispatcherReg
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
}
|
||||
|
||||
#else // _MSC_VER
|
||||
DispatcherReg();
|
||||
#endif
|
||||
try {
|
||||
EnterRecompiledCode();
|
||||
}
|
||||
catch( Exception::ForceDispatcherReg& )
|
||||
{
|
||||
|
@ -608,12 +715,9 @@ static void recExecute()
|
|||
g_EEFreezeRegs = false;
|
||||
}
|
||||
|
||||
namespace R5900 {
|
||||
namespace Dynarec {
|
||||
namespace OpcodeImpl {
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recSYSCALL( void ) {
|
||||
void R5900::Dynarec::OpcodeImpl::recSYSCALL( void )
|
||||
{
|
||||
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
|
||||
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
|
||||
iFlushCall(FLUSH_NODESTROY);
|
||||
|
@ -622,13 +726,14 @@ void recSYSCALL( void ) {
|
|||
CMP32ItoM((uptr)&cpuRegs.pc, pc);
|
||||
j8Ptr[0] = JE8(0);
|
||||
ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles());
|
||||
JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 ));
|
||||
xJMP( DispatcherReg );
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
//branch = 2;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recBREAK( void ) {
|
||||
void R5900::Dynarec::OpcodeImpl::recBREAK( void )
|
||||
{
|
||||
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
|
||||
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
@ -637,13 +742,11 @@ void recBREAK( void ) {
|
|||
CMP32ItoM((uptr)&cpuRegs.pc, pc);
|
||||
j8Ptr[0] = JE8(0);
|
||||
ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles());
|
||||
RET();
|
||||
xJMP( DispatcherEvent );
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
//branch = 2;
|
||||
}
|
||||
|
||||
} } } // end namespace R5900::Dynarec::OpcodeImpl
|
||||
|
||||
// Clears the recLUT table so that all blocks are mapped to the JIT recompiler by default.
|
||||
static __releaseinline void ClearRecLUT(BASEBLOCK* base, int count)
|
||||
{
|
||||
|
@ -1136,8 +1239,9 @@ static void printfn()
|
|||
static int curcount = 0;
|
||||
const int skip = 0;
|
||||
|
||||
assert( !g_globalMMXSaved );
|
||||
assert( !g_globalXMMSaved );
|
||||
pxAssert( !g_globalMMXSaved && !g_globalXMMSaved );
|
||||
|
||||
//pxAssert( cpuRegs.pc != 0x80001300 );
|
||||
|
||||
if( (dumplog&2) && g_lastpc != 0x81fc0 ) {//&& lastrec != g_lastpc ) {
|
||||
curcount++;
|
||||
|
@ -1151,18 +1255,16 @@ static void printfn()
|
|||
}
|
||||
}
|
||||
|
||||
u32 s_recblocks[] = {0};
|
||||
|
||||
void badespfn() {
|
||||
Console.Error("Bad esp!");
|
||||
assert(0);
|
||||
}
|
||||
static u32 s_recblocks[] = {0};
|
||||
|
||||
// Called when a block under manual protection fails it's pre-execution integrity check.
|
||||
void __fastcall dyna_block_discard(u32 start,u32 sz)
|
||||
{
|
||||
DevCon.WriteLn("dyna_block_discard .. start=0x%08X size=%d", start, sz*4);
|
||||
recClear(start, sz);
|
||||
|
||||
// Note: this function is accessed via a JMP, and thus the RET here will exit
|
||||
// recompiled code and take us back to recExecute.
|
||||
}
|
||||
|
||||
// called when a block under manual protection has been run enough times to be a
|
||||
|
@ -1172,6 +1274,9 @@ void __fastcall dyna_page_reset(u32 start,u32 sz)
|
|||
recClear(start & ~0xfffUL, 0x400);
|
||||
manual_counter[start >> 12]++;
|
||||
mmap_MarkCountedRamPage( start );
|
||||
|
||||
// Note: this function is accessed via a JMP, and thus the RET here will exit
|
||||
// recompiled code and take us back to recExecute.
|
||||
}
|
||||
|
||||
void recRecompile( const u32 startpc )
|
||||
|
|
Loading…
Reference in New Issue