Merge pull request #1100 from PCSX2/recompiler-abi-wrapper

Recompiler abi wrapper
This commit is contained in:
Gregory Hainaut 2016-01-14 19:21:27 +01:00
commit a7a8c542f5
20 changed files with 362 additions and 579 deletions

View File

@ -68,5 +68,134 @@ struct xImpl_JmpCall
} }
}; };
// yes it is awful. Due to template code is in a header with a nice circular dep.
extern const xImpl_Mov xMOV;
extern const xImpl_JmpCall xCALL;
struct xImpl_FastCall
{
// FIXME: current 64 bits is mostly a copy/past potentially it would require to push/pop
// some registers. But I think it is enough to handle the first call.
// Type unsafety is nice
#ifdef __x86_64__
#define XFASTCALL \
xCALL(func);
#define XFASTCALL1 \
xMOV(rdi, a1); \
xCALL(func);
#define XFASTCALL2 \
xMOV(rdi, a1); \
xMOV(rsi, a2); \
xCALL(func);
#else
#define XFASTCALL \
xCALL(func);
#define XFASTCALL1 \
xMOV(ecx, a1); \
xCALL(func);
#define XFASTCALL2 \
xMOV(ecx, a1); \
xMOV(edx, a2); \
xCALL(func);
#endif
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const
{
#ifdef __x86_64__
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#else
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, u32 a1, const xRegister32& a2) const
{
#ifdef __x86_64__
XFASTCALL2;
#else
XFASTCALL2;
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, const xIndirectVoid& a1) const
{
#ifdef __x86_64__
XFASTCALL1;
#else
XFASTCALL1;
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, u32 a1, u32 a2) const
{
#ifdef __x86_64__
XFASTCALL2;
#else
XFASTCALL2;
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, u32 a1) const
{
#ifdef __x86_64__
XFASTCALL1;
#else
XFASTCALL1;
#endif
}
void operator()(const xIndirect32& func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const
{
#ifdef __x86_64__
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#else
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#endif
}
#undef XFASTCALL
#undef XFASTCALL1
#undef XFASTCALL2
};
} // End namespace x86Emitter } // End namespace x86Emitter

View File

@ -93,6 +93,7 @@ namespace x86Emitter
#else #else
extern const xImpl_JmpCall xCALL; extern const xImpl_JmpCall xCALL;
#endif #endif
extern const xImpl_FastCall xFastCall;
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
extern const xImpl_CMov extern const xImpl_CMov
@ -183,19 +184,15 @@ namespace x86Emitter
extern void xINTO(); extern void xINTO();
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Helper function to handle the various functions ABI // Helper object to handle the various functions ABI
extern void xFastCall(void* func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg);
extern void xFastCall(void* func, const xRegisterSSE& a1, const xRegisterSSE& a2);
extern void xFastCall(void* func, u32 a1, u32 a2);
extern void xFastCall(void* func, u32 a1);
extern void xStdCall(void* func, u32 a1);
class xScopedStackFrame class xScopedStackFrame
{ {
bool m_base_frame; bool m_base_frame;
bool m_save_base_pointer;
int m_offset;
xScopedStackFrame(bool base_frame); public:
xScopedStackFrame(bool base_frame, bool save_base_pointer = false, int offset = 0);
~xScopedStackFrame(); ~xScopedStackFrame();
}; };

View File

@ -42,6 +42,8 @@ void xImpl_JmpCall::operator()( const xIndirect16& src ) const { xOpWrite( 0x6
const xImpl_JmpCall xJMP = { true }; const xImpl_JmpCall xJMP = { true };
const xImpl_JmpCall xCALL = { false }; const xImpl_JmpCall xCALL = { false };
const xImpl_FastCall xFastCall = { };
void xSmartJump::SetTarget() void xSmartJump::SetTarget()
{ {
u8* target = xGetPtr(); u8* target = xGetPtr();

View File

@ -1022,123 +1022,99 @@ __emitinline void xRestoreReg( const xRegisterSSE& dest )
xMOVDQA( dest, ptr[&xmm_data[dest.Id*2]] ); xMOVDQA( dest, ptr[&xmm_data[dest.Id*2]] );
} }
//////////////////////////////////////////////////////////////////////////////////////////
// Helper function to handle the various functions ABI
__emitinline void xFastCall(void *func, const xRegister32& a1, const xRegister32& a2)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
if (!a1.IsEmpty())
xMOV(ecx, a1);
if (!a2.IsEmpty())
xMOV(edx, a2);
xCALL(func);
#endif
}
__emitinline void xFastCall(void *func, const xRegisterSSE& a1, const xRegisterSSE& a2)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
xMOVD(ecx, a1);
xMOVD(edx, a2);
xCALL(func);
#endif
}
__emitinline void xFastCall(void *func, u32 a1, u32 a2)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
xMOV(ecx, a1);
xMOV(edx, a2);
xCALL(func);
#endif
}
__emitinline void xFastCall(void *func, u32 a1)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
xMOV(ecx, a1);
xCALL(func);
#endif
}
__emitinline void xStdCall(void *func, u32 a1)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
// GCC note: unlike C call, GCC doesn't requires
// strict 16B alignment on std call
xPUSH(a1);
xCALL(func);
#endif
}
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Helper object to handle ABI frame // Helper object to handle ABI frame
#ifdef __GNUC__
xScopedStackFrame::xScopedStackFrame(bool base_frame) #ifdef __x86_64__
// GCC ensures/requires stack to be 16 bytes aligned (but when?)
#define ALIGN_STACK(v) xADD(rsp, v)
#else
// GCC ensures/requires stack to be 16 bytes aligned before the call
// Call will store 4 bytes. EDI/ESI/EBX will take another 12 bytes.
// EBP will take 4 bytes if m_base_frame is enabled
#define ALIGN_STACK(v) xADD(esp, v)
#endif
#else
#define ALIGN_STACK(v)
#endif
xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, int offset)
{ {
m_base_frame = base_frame; m_base_frame = base_frame;
m_save_base_pointer = save_base_pointer;
m_offset = offset;
#ifdef __x86_64__ #ifdef __x86_64__
// NEW ABI
pxAssert(0); m_offset += 8; // Call stores the return address (4 bytes)
// Note rbp can surely be optimized in 64 bits
if (m_base_frame) {
xPUSH( rbp );
xMOV( rbp, rsp );
m_offset += 8;
} else if (m_save_base_pointer) {
xPUSH( rbp );
m_offset += 8;
}
xPUSH( rbx );
xPUSH( r12 );
xPUSH( r13 );
xPUSH( r14 );
xPUSH( r15 );
m_offset += 40;
#else #else
m_offset += 4; // Call stores the return address (4 bytes)
// Create a new frame // Create a new frame
if (m_base_frame) { if (m_base_frame) {
xPUSH( ebp ); xPUSH( ebp );
xMOV( ebp, esp ); xMOV( ebp, esp );
m_offset += 4;
} else if (m_save_base_pointer) {
xPUSH( ebp );
m_offset += 4;
} }
// Save the register context // Save the register context
xPUSH( edi ); xPUSH( edi );
xPUSH( esi ); xPUSH( esi );
xPUSH( ebx ); xPUSH( ebx );
m_offset += 12;
#ifdef __GNUC__
// Realign the stack to 16 byte
if (m_base_frame) {
xSUB( esp, 12);
}
#endif
#endif #endif
ALIGN_STACK(-(16 - m_offset % 16));
} }
xScopedStackFrame::~xScopedStackFrame() xScopedStackFrame::~xScopedStackFrame()
{ {
#ifdef __x86_64__ ALIGN_STACK(16 - m_offset % 16);
// NEW ABI
pxAssert(0);
#else
#ifdef __GNUC__ #ifdef __x86_64__
// Restore the stack (due to the above alignment)
// Potentially it can be restored from ebp // Restore the register context
xPOP( r15 );
xPOP( r14 );
xPOP( r13 );
xPOP( r12 );
xPOP( rbx );
// Destroy the frame
if (m_base_frame) { if (m_base_frame) {
xADD( esp, 12); xLEAVE();
} else if (m_save_base_pointer) {
xPOP( rbp );
} }
#endif
#else
// Restore the register context // Restore the register context
xPOP( ebx ); xPOP( ebx );
@ -1148,6 +1124,8 @@ xScopedStackFrame::~xScopedStackFrame()
// Destroy the frame // Destroy the frame
if (m_base_frame) { if (m_base_frame) {
xLEAVE(); xLEAVE();
} else if (m_save_base_pointer) {
xPOP( ebp );
} }
#endif #endif

View File

@ -112,7 +112,7 @@ void recDI()
//xMOV(eax, ptr[&cpuRegs.cycle ]); //xMOV(eax, ptr[&cpuRegs.cycle ]);
//xMOV(ptr[&g_nextBranchCycle], eax); //xMOV(ptr[&g_nextBranchCycle], eax);
//xCALL((void*)(uptr)Interp::DI ); //xFastCall((void*)(uptr)Interp::DI );
xMOV(eax, ptr[&cpuRegs.CP0.n.Status]); xMOV(eax, ptr[&cpuRegs.CP0.n.Status]);
xTEST(eax, 0x20006); // EXL | ERL | EDI xTEST(eax, 0x20006); // EXL | ERL | EDI
@ -170,12 +170,12 @@ void recMFC0()
case 1: case 1:
iFlushCall(FLUSH_INTERPRETER); iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR ); xFastCall(COP0_UpdatePCCR );
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]); xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]);
break; break;
case 3: case 3:
iFlushCall(FLUSH_INTERPRETER); iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR ); xFastCall(COP0_UpdatePCCR );
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]); xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]);
break; break;
} }
@ -207,8 +207,7 @@ void recMTC0()
{ {
case 12: case 12:
iFlushCall(FLUSH_INTERPRETER); iFlushCall(FLUSH_INTERPRETER);
xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0] ); xFastCall(WriteCP0Status, g_cpuConstRegs[_Rt_].UL[0] );
xCALL( WriteCP0Status );
break; break;
case 9: case 9:
@ -222,9 +221,9 @@ void recMTC0()
{ {
case 0: case 0:
iFlushCall(FLUSH_INTERPRETER); iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR ); xFastCall(COP0_UpdatePCCR );
xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] ); xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] );
xCALL( COP0_DiagnosticPCCR ); xFastCall(COP0_DiagnosticPCCR );
break; break;
case 1: case 1:
@ -257,7 +256,7 @@ void recMTC0()
case 12: case 12:
iFlushCall(FLUSH_INTERPRETER); iFlushCall(FLUSH_INTERPRETER);
_eeMoveGPRtoR(ecx, _Rt_); _eeMoveGPRtoR(ecx, _Rt_);
xCALL( WriteCP0Status ); xFastCall(WriteCP0Status, ecx );
break; break;
case 9: case 9:
@ -271,9 +270,9 @@ void recMTC0()
{ {
case 0: case 0:
iFlushCall(FLUSH_INTERPRETER); iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR ); xFastCall(COP0_UpdatePCCR );
_eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_); _eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_);
xCALL( COP0_DiagnosticPCCR ); xFastCall(COP0_DiagnosticPCCR );
break; break;
case 1: case 1:

View File

@ -92,7 +92,7 @@ static const __aligned16 u32 s_pos[4] = { 0x7fffffff, 0xffffffff, 0xffffffff, 0x
void f(); \ void f(); \
void rec##f() { \ void rec##f() { \
iFlushCall(FLUSH_INTERPRETER); \ iFlushCall(FLUSH_INTERPRETER); \
xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \ xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
branch = 2; \ branch = 2; \
} }
@ -100,7 +100,7 @@ static const __aligned16 u32 s_pos[4] = { 0x7fffffff, 0xffffffff, 0xffffffff, 0x
void f(); \ void f(); \
void rec##f() { \ void rec##f() { \
iFlushCall(FLUSH_INTERPRETER); \ iFlushCall(FLUSH_INTERPRETER); \
xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \ xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -550,7 +550,7 @@ void FPU_MUL(int regd, int regt, bool reverseOperands)
{ {
xMOVD(ecx, xRegisterSSE(reverseOperands ? regt : regd)); xMOVD(ecx, xRegisterSSE(reverseOperands ? regt : regd));
xMOVD(edx, xRegisterSSE(reverseOperands ? regd : regt)); xMOVD(edx, xRegisterSSE(reverseOperands ? regd : regt));
xCALL((void*)(uptr)&FPU_MUL_HACK ); //returns the hacked result or 0 xFastCall((void*)(uptr)&FPU_MUL_HACK, ecx, edx); //returns the hacked result or 0
xTEST(eax, eax); xTEST(eax, eax);
noHack = JZ8(0); noHack = JZ8(0);
xMOVDZX(xRegisterSSE(regd), eax); xMOVDZX(xRegisterSSE(regd), eax);

View File

@ -89,20 +89,6 @@ namespace DOUBLE {
#define FPUflagSO 0X00000010 #define FPUflagSO 0X00000010
#define FPUflagSU 0X00000008 #define FPUflagSU 0X00000008
#define REC_FPUBRANCH(f) \
void f(); \
void rec##f() { \
iFlushCall(FLUSH_INTERPRETER); \
xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
branch = 2; \
}
#define REC_FPUFUNC(f) \
void f(); \
void rec##f() { \
iFlushCall(FLUSH_INTERPRETER); \
xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
}
//------------------------------------------------------------------ //------------------------------------------------------------------
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -416,7 +402,7 @@ void FPU_MUL(int info, int regd, int sreg, int treg, bool acc)
{ {
xMOVD(ecx, xRegisterSSE(sreg)); xMOVD(ecx, xRegisterSSE(sreg));
xMOVD(edx, xRegisterSSE(treg)); xMOVD(edx, xRegisterSSE(treg));
xCALL((void*)(uptr)&FPU_MUL_HACK ); //returns the hacked result or 0 xFastCall((void*)(uptr)&FPU_MUL_HACK, ecx, edx); //returns the hacked result or 0
xTEST(eax, eax); xTEST(eax, eax);
noHack = JZ8(0); noHack = JZ8(0);
xMOVDZX(xRegisterSSE(regd), eax); xMOVDZX(xRegisterSSE(regd), eax);

View File

@ -185,7 +185,7 @@ void recPMFHL()
// fall to interp // fall to interp
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
iFlushCall(FLUSH_INTERPRETER); // since calling CALLFunc iFlushCall(FLUSH_INTERPRETER); // since calling CALLFunc
xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::MMI::PMFHL ); xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::MMI::PMFHL );
break; break;
case 0x03: // LH case 0x03: // LH

View File

@ -120,50 +120,6 @@ static void recEventTest()
_cpuEventTest_Shared(); _cpuEventTest_Shared();
} }
// parameters:
// espORebp - 0 for ESP, or 1 for EBP.
// regval - current value of the register at the time the fault was detected (predates the
// stackframe setup code in this function)
static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{
pxFailDev( pxsFmt( L"(R3000A Recompiler Stackframe) Sanity check failed on %ls\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
);
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
// tinue to run with some degree of stability.
}
static void _DynGen_StackFrameCheck()
{
if( !IsDevBuild ) return;
// --------- EBP Here -----------
xCMP( ebp, ptr[&s_store_ebp] );
xForwardJE8 skipassert_ebp;
xMOV( ecx, 1 ); // 1 specifies EBP
xMOV( edx, ebp );
xCALL( StackFrameCheckFailed );
xMOV( ebp, ptr[&s_store_ebp] ); // half-hearted frame recovery attempt!
skipassert_ebp.SetTarget();
// --------- ESP There -----------
xCMP( esp, ptr[&s_store_esp] );
xForwardJE8 skipassert_esp;
xXOR( ecx, ecx ); // 0 specifies ESP
xMOV( edx, esp );
xCALL( StackFrameCheckFailed );
xMOV( esp, ptr[&s_store_esp] ); // half-hearted frame recovery attempt!
skipassert_esp.SetTarget();
}
// The address for all cleared blocks. It recompiles the current pc and then // The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address. // dispatches to the recompiled block address.
static DynGenFunc* _DynGen_JITCompile() static DynGenFunc* _DynGen_JITCompile()
@ -171,10 +127,8 @@ static DynGenFunc* _DynGen_JITCompile()
pxAssertMsg( iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." ); pxAssertMsg( iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
u8* retval = xGetPtr(); u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( ecx, ptr[&psxRegs.pc] ); xFastCall(iopRecRecompile, ptr[&psxRegs.pc] );
xCALL( iopRecRecompile );
xMOV( eax, ptr[&psxRegs.pc] ); xMOV( eax, ptr[&psxRegs.pc] );
xMOV( ebx, eax ); xMOV( ebx, eax );
@ -196,7 +150,6 @@ static DynGenFunc* _DynGen_JITCompileInBlock()
static DynGenFunc* _DynGen_DispatcherReg() static DynGenFunc* _DynGen_DispatcherReg()
{ {
u8* retval = xGetPtr(); u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( eax, ptr[&psxRegs.pc] ); xMOV( eax, ptr[&psxRegs.pc] );
xMOV( ebx, eax ); xMOV( ebx, eax );
@ -210,128 +163,21 @@ static DynGenFunc* _DynGen_DispatcherReg()
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// EnterRecompiledCode - dynamic compilation stub! // EnterRecompiledCode - dynamic compilation stub!
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// In Release Builds this literally generates the following code:
// push edi
// push esi
// push ebx
// jmp DispatcherReg
// pop ebx
// pop esi
// pop edi
//
// See notes on why this works in both GCC (aligned stack!) and other compilers (not-so-
// aligned stack!). In debug/dev builds the code gen is more complicated, as it constructs
// ebp stackframe mess, which allows for a complete backtrace from debug breakpoints (yay).
//
// Also, if you set PCSX2_IOP_FORCED_ALIGN_STACK to 1, the codegen for MSVC becomes slightly
// more complicated since it has to perform a full stack alignment on entry.
//
#if defined(__GNUG__) || defined(__DARWIN__)
# define PCSX2_ASSUME_ALIGNED_STACK 1
#else
# define PCSX2_ASSUME_ALIGNED_STACK 0
#endif
// Set to 0 for a speedup in release builds.
// [doesn't apply to GCC/Mac, which must always align]
#define PCSX2_IOP_FORCED_ALIGN_STACK 0 //1
// For overriding stackframe generation options in Debug builds (possibly useful for troubleshooting)
// Typically this value should be the same as IsDevBuild.
static const bool GenerateStackFrame = IsDevBuild;
static DynGenFunc* _DynGen_EnterRecompiledCode() static DynGenFunc* _DynGen_EnterRecompiledCode()
{ {
u8* retval = xGetPtr();
bool allocatedStack = GenerateStackFrame || PCSX2_IOP_FORCED_ALIGN_STACK;
// Optimization: The IOP never uses stack-based parameter invocation, so we can avoid // Optimization: The IOP never uses stack-based parameter invocation, so we can avoid
// allocating any room on the stack for it (which is important since the IOP's entry // allocating any room on the stack for it (which is important since the IOP's entry
// code gets invoked quite a lot). // code gets invoked quite a lot).
if( allocatedStack ) u8* retval = xGetPtr();
{
xPUSH( ebp );
xMOV( ebp, esp );
xAND( esp, -0x10 );
xSUB( esp, 0x20 ); { // Properly scope the frame prologue/epilogue
xScopedStackFrame frame(IsDevBuild);
xMOV( ptr[ebp-12], edi ); xJMP(iopDispatcherReg);
xMOV( ptr[ebp-8], esi );
xMOV( ptr[ebp-4], ebx );
}
else
{
// GCC Compiler:
// The frame pointer coming in from the EE's event test can be safely assumed to be
// aligned, since GCC always aligns stackframes. While handy in x86-64, where CALL + PUSH EBP
// results in a neatly realigned stack on entry to every function, unfortunately in x86-32
// this is usually worthless because CALL+PUSH leaves us 8 byte aligned instead (fail). So
// we have to do the usual set of stackframe alignments and simulated callstack mess
// *regardless*.
// MSVC/Intel compilers: // Save an exit point
// The PCSX2_IOP_FORCED_ALIGN_STACK setting is 0, so we don't care. Just push regs like iopExitRecompiledCode = (DynGenFunc*)xGetPtr();
// the good old days! (stack alignment will be indeterminate)
xPUSH( edi );
xPUSH( esi );
xPUSH( ebx );
allocatedStack = false;
}
uptr* imm = NULL;
if( allocatedStack )
{
if( GenerateStackFrame )
{
// Simulate a CALL function by pushing the call address and EBP onto the stack.
// This retains proper stacktrace and stack unwinding (handy in devbuilds!)
xMOV( ptr32[esp+0x0c], 0xffeeff );
imm = (uptr*)(xGetPtr()-4);
// This part simulates the "normal" stackframe prep of "push ebp, mov ebp, esp"
xMOV( ptr32[esp+0x08], ebp );
xLEA( ebp, ptr32[esp+0x08] );
}
}
if( IsDevBuild )
{
xMOV( ptr[&s_store_esp], esp );
xMOV( ptr[&s_store_ebp], ebp );
}
xJMP( iopDispatcherReg );
if( imm != NULL )
*imm = (uptr)xGetPtr();
// ----------------------
// ----> Cleanup! ---->
iopExitRecompiledCode = (DynGenFunc*)xGetPtr();
if( allocatedStack )
{
// pop the nested "simulated call" stackframe, if needed:
if( GenerateStackFrame ) xLEAVE();
xMOV( edi, ptr[ebp-12] );
xMOV( esi, ptr[ebp-8] );
xMOV( ebx, ptr[ebp-4] );
xLEAVE();
}
else
{
xPOP( ebx );
xPOP( esi );
xPOP( edi );
} }
xRET(); xRET();
@ -352,7 +198,7 @@ static void _DynGen_Dispatchers()
// Place the EventTest and DispatcherReg stuff at the top, because they get called the // Place the EventTest and DispatcherReg stuff at the top, because they get called the
// most and stand to benefit from strong alignment and direct referencing. // most and stand to benefit from strong alignment and direct referencing.
iopDispatcherEvent = (DynGenFunc*)xGetPtr(); iopDispatcherEvent = (DynGenFunc*)xGetPtr();
xCALL( recEventTest ); xFastCall(recEventTest );
iopDispatcherReg = _DynGen_DispatcherReg(); iopDispatcherReg = _DynGen_DispatcherReg();
iopJITCompile = _DynGen_JITCompile(); iopJITCompile = _DynGen_JITCompile();
@ -676,11 +522,11 @@ void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
} }
if (debug) if (debug)
xCALL(debug); xFastCall(debug);
#endif #endif
irxHLE hle = irxImportHLE(libname, index); irxHLE hle = irxImportHLE(libname, index);
if (hle) { if (hle) {
xCALL(hle); xFastCall(hle);
xCMP(eax, 0); xCMP(eax, 0);
xJNE(iopDispatcherReg); xJNE(iopDispatcherReg);
} }
@ -1060,7 +906,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
xSUB(ptr32[&iopCycleEE], eax); xSUB(ptr32[&iopCycleEE], eax);
xJLE(iopExitRecompiledCode); xJLE(iopExitRecompiledCode);
xCALL(iopEventTest); xFastCall(iopEventTest);
if( newpc != 0xffffffff ) if( newpc != 0xffffffff )
{ {
@ -1082,7 +928,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
xSUB(eax, ptr32[&g_iopNextEventCycle]); xSUB(eax, ptr32[&g_iopNextEventCycle]);
xForwardJS<u8> nointerruptpending; xForwardJS<u8> nointerruptpending;
xCALL(iopEventTest); xFastCall(iopEventTest);
if( newpc != 0xffffffff ) { if( newpc != 0xffffffff ) {
xCMP(ptr32[&psxRegs.pc], newpc); xCMP(ptr32[&psxRegs.pc], newpc);
@ -1117,9 +963,9 @@ void rpsxSYSCALL()
xMOV(ptr32[&psxRegs.pc], psxpc - 4); xMOV(ptr32[&psxRegs.pc], psxpc - 4);
_psxFlushCall(FLUSH_NODESTROY); _psxFlushCall(FLUSH_NODESTROY);
xMOV( ecx, 0x20 ); // exception code //xMOV( ecx, 0x20 ); // exception code
xMOV( edx, psxbranch==1 ); // branch delay slot? //xMOV( edx, psxbranch==1 ); // branch delay slot?
xCALL( psxException ); xFastCall(psxException, 0x20, psxbranch == 1 );
xCMP(ptr32[&psxRegs.pc], psxpc-4); xCMP(ptr32[&psxRegs.pc], psxpc-4);
j8Ptr[0] = JE8(0); j8Ptr[0] = JE8(0);
@ -1140,9 +986,9 @@ void rpsxBREAK()
xMOV(ptr32[&psxRegs.pc], psxpc - 4); xMOV(ptr32[&psxRegs.pc], psxpc - 4);
_psxFlushCall(FLUSH_NODESTROY); _psxFlushCall(FLUSH_NODESTROY);
xMOV( ecx, 0x24 ); // exception code //xMOV( ecx, 0x24 ); // exception code
xMOV( edx, psxbranch==1 ); // branch delay slot? //xMOV( edx, psxbranch==1 ); // branch delay slot?
xCALL( psxException ); xFastCall(psxException, 0x24, psxbranch == 1 );
xCMP(ptr32[&psxRegs.pc], psxpc-4); xCMP(ptr32[&psxRegs.pc], psxpc-4);
j8Ptr[0] = JE8(0); j8Ptr[0] = JE8(0);
@ -1255,8 +1101,7 @@ static void __fastcall iopRecRecompile( const u32 startpc )
if( IsDebugBuild ) if( IsDebugBuild )
{ {
xMOV(ecx, psxpc); xFastCall(PreBlockCheck, psxpc);
xCALL(PreBlockCheck);
} }
// go until the next branch // go until the next branch

View File

@ -32,7 +32,7 @@ extern u32 g_psxMaxRecMem;
static void rpsx##f() { \ static void rpsx##f() { \
xMOV(ptr32[&psxRegs.code], (u32)psxRegs.code); \ xMOV(ptr32[&psxRegs.code], (u32)psxRegs.code); \
_psxFlushCall(FLUSH_EVERYTHING); \ _psxFlushCall(FLUSH_EVERYTHING); \
xCALL((void*)(uptr)psx##f); \ xFastCall((void*)(uptr)psx##f); \
PSX_DEL_CONST(_Rt_); \ PSX_DEL_CONST(_Rt_); \
/* branch = 2; */\ /* branch = 2; */\
} }
@ -626,7 +626,7 @@ static void rpsxLB()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xCALL( iopMemRead8 ); // returns value in EAX xFastCall(iopMemRead8, ecx ); // returns value in EAX
if (_Rt_) { if (_Rt_) {
xMOVSX(eax, al); xMOVSX(eax, al);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
@ -642,7 +642,7 @@ static void rpsxLBU()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xCALL( iopMemRead8 ); // returns value in EAX xFastCall(iopMemRead8, ecx ); // returns value in EAX
if (_Rt_) { if (_Rt_) {
xMOVZX(eax, al); xMOVZX(eax, al);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
@ -658,7 +658,7 @@ static void rpsxLH()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xCALL( iopMemRead16 ); // returns value in EAX xFastCall(iopMemRead16, ecx ); // returns value in EAX
if (_Rt_) { if (_Rt_) {
xMOVSX(eax, ax); xMOVSX(eax, ax);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
@ -674,7 +674,7 @@ static void rpsxLHU()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xCALL( iopMemRead16 ); // returns value in EAX xFastCall(iopMemRead16, ecx ); // returns value in EAX
if (_Rt_) { if (_Rt_) {
xMOVZX(eax, ax); xMOVZX(eax, ax);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
@ -695,7 +695,7 @@ static void rpsxLW()
xTEST(ecx, 0x10000000); xTEST(ecx, 0x10000000);
j8Ptr[0] = JZ8(0); j8Ptr[0] = JZ8(0);
xCALL( iopMemRead32 ); // returns value in EAX xFastCall(iopMemRead32, ecx ); // returns value in EAX
if (_Rt_) { if (_Rt_) {
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
} }
@ -721,7 +721,7 @@ static void rpsxSB()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] ); xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
xCALL( iopMemWrite8 ); xFastCall(iopMemWrite8, ecx, edx );
} }
static void rpsxSH() static void rpsxSH()
@ -732,7 +732,7 @@ static void rpsxSH()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] ); xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
xCALL( iopMemWrite16 ); xFastCall(iopMemWrite16, ecx, edx );
} }
static void rpsxSW() static void rpsxSW()
@ -743,7 +743,7 @@ static void rpsxSW()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] ); xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
xCALL( iopMemWrite32 ); xFastCall(iopMemWrite32, ecx, edx );
} }
//// SLL //// SLL
@ -1371,7 +1371,7 @@ void rpsxRFE()
// Test the IOP's INTC status, so that any pending ints get raised. // Test the IOP's INTC status, so that any pending ints get raised.
_psxFlushCall(0); _psxFlushCall(0);
xCALL((void*)(uptr)&iopTestIntc ); xFastCall((void*)(uptr)&iopTestIntc );
} }
// R3000A tables // R3000A tables

View File

@ -71,7 +71,7 @@ namespace OpcodeImpl {
// xMOV(ptr32[&cpuRegs.code], cpuRegs.code ); // xMOV(ptr32[&cpuRegs.code], cpuRegs.code );
// xMOV(ptr32[&cpuRegs.pc], pc ); // xMOV(ptr32[&cpuRegs.pc], pc );
// iFlushCall(FLUSH_EVERYTHING); // iFlushCall(FLUSH_EVERYTHING);
// xCALL((void*)(uptr)CACHE ); // xFastCall((void*)(uptr)CACHE );
// //branch = 2; // //branch = 2;
// //
// xCMP(ptr32[(u32*)((int)&cpuRegs.pc)], pc); // xCMP(ptr32[(u32*)((int)&cpuRegs.pc)], pc);
@ -203,7 +203,7 @@ void recMTSAH()
//xMOV(ptr32[&cpuRegs.code], (u32)cpuRegs.code ); //xMOV(ptr32[&cpuRegs.code], (u32)cpuRegs.code );
//xMOV(ptr32[&cpuRegs.pc], (u32)pc ); //xMOV(ptr32[&cpuRegs.pc], (u32)pc );
//iFlushCall(FLUSH_EVERYTHING); //iFlushCall(FLUSH_EVERYTHING);
//xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::CACHE ); //xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::CACHE );
//branch = 2; //branch = 2;
} }

View File

@ -340,7 +340,7 @@ void recBranchCall( void (*func)() )
void recCall( void (*func)() ) void recCall( void (*func)() )
{ {
iFlushCall(FLUSH_INTERPRETER); iFlushCall(FLUSH_INTERPRETER);
xCALL(func); xFastCall(func);
} }
// ===================================================================================================== // =====================================================================================================
@ -372,50 +372,6 @@ static void recEventTest()
_cpuEventTest_Shared(); _cpuEventTest_Shared();
} }
// parameters:
// espORebp - 0 for ESP, or 1 for EBP.
// regval - current value of the register at the time the fault was detected (predates the
// stackframe setup code in this function)
static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{
pxFailDev( wxsFormat( L"(R5900 Recompiler Stackframe) Sanity check failed on %s\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
);
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
// tinue to run with some degree of stability.
}
static void _DynGen_StackFrameCheck()
{
if( !EmuConfig.Cpu.Recompiler.StackFrameChecks ) return;
// --------- EBP Here -----------
xCMP( ebp, ptr[&s_store_ebp] );
xForwardJE8 skipassert_ebp;
xMOV( ecx, 1 ); // 1 specifies EBP
xMOV( edx, ebp );
xCALL( StackFrameCheckFailed );
xMOV( ebp, ptr[&s_store_ebp] ); // half-hearted frame recovery attempt!
skipassert_ebp.SetTarget();
// --------- ESP There -----------
xCMP( esp, ptr[&s_store_esp] );
xForwardJE8 skipassert_esp;
xXOR( ecx, ecx ); // 0 specifies ESP
xMOV( edx, esp );
xCALL( StackFrameCheckFailed );
xMOV( esp, ptr[&s_store_esp] ); // half-hearted frame recovery attempt!
skipassert_esp.SetTarget();
}
// The address for all cleared blocks. It recompiles the current pc and then // The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address. // dispatches to the recompiled block address.
static DynGenFunc* _DynGen_JITCompile() static DynGenFunc* _DynGen_JITCompile()
@ -423,10 +379,8 @@ static DynGenFunc* _DynGen_JITCompile()
pxAssertMsg( DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." ); pxAssertMsg( DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
u8* retval = xGetAlignedCallTarget(); u8* retval = xGetAlignedCallTarget();
_DynGen_StackFrameCheck();
xMOV( ecx, ptr[&cpuRegs.pc] ); xFastCall(recRecompile, ptr[&cpuRegs.pc] );
xCALL( recRecompile );
xMOV( eax, ptr[&cpuRegs.pc] ); xMOV( eax, ptr[&cpuRegs.pc] );
xMOV( ebx, eax ); xMOV( ebx, eax );
@ -448,7 +402,6 @@ static DynGenFunc* _DynGen_JITCompileInBlock()
static DynGenFunc* _DynGen_DispatcherReg() static DynGenFunc* _DynGen_DispatcherReg()
{ {
u8* retval = xGetPtr(); // fallthrough target, can't align it! u8* retval = xGetPtr(); // fallthrough target, can't align it!
_DynGen_StackFrameCheck();
xMOV( eax, ptr[&cpuRegs.pc] ); xMOV( eax, ptr[&cpuRegs.pc] );
xMOV( ebx, eax ); xMOV( ebx, eax );
@ -463,7 +416,7 @@ static DynGenFunc* _DynGen_DispatcherEvent()
{ {
u8* retval = xGetPtr(); u8* retval = xGetPtr();
xCALL( recEventTest ); xFastCall(recEventTest );
return (DynGenFunc*)retval; return (DynGenFunc*)retval;
} }
@ -474,60 +427,15 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
u8* retval = xGetAlignedCallTarget(); u8* retval = xGetAlignedCallTarget();
// "standard" frame pointer setup for aligned stack: Record the original { // Properly scope the frame prologue/epilogue
// esp into ebp, and then align esp. ebp references the original esp base xScopedStackFrame frame(IsDevBuild);
// for the duration of our function, and is used to restore the original
// esp before returning from the function
xPUSH( ebp ); xJMP(DispatcherReg);
xMOV( ebp, esp );
xAND( esp, -0x10 );
// First 0x10 is for esi, edi, etc. Second 0x10 is for the return address and ebp. The // Save an exit point
// third 0x10 is an optimization for C-style CDECL calls we might make from the recompiler ExitRecompiledCode = (DynGenFunc*)xGetPtr();
// (parameters for those calls can be stored there!) [currently no cdecl functions are
// used -- we do everything through __fastcall)
static const int cdecl_reserve = 0x00;
xSUB( esp, 0x20 + cdecl_reserve );
xMOV( ptr[ebp-12], edi );
xMOV( ptr[ebp-8], esi );
xMOV( ptr[ebp-4], ebx );
// Simulate a CALL function by pushing the call address and EBP onto the stack.
// (the dummy address here is filled in later right before we generate the LEAVE code)
xMOV( ptr32[esp+0x0c+cdecl_reserve], 0xdeadbeef );
uptr& imm = *(uptr*)(xGetPtr()-4);
// This part simulates the "normal" stackframe prep of "push ebp, mov ebp, esp"
// It is done here because we can't really generate that stuff from the Dispatchers themselves.
xMOV( ptr32[esp+0x08+cdecl_reserve], ebp );
xLEA( ebp, ptr32[esp+0x08+cdecl_reserve] );
if (EmuConfig.Cpu.Recompiler.StackFrameChecks) {
xMOV( ptr[&s_store_esp], esp );
xMOV( ptr[&s_store_ebp], ebp );
} }
xJMP( DispatcherReg );
xAlignCallTarget();
// This dummy CALL is unreachable code that some debuggers (MSVC2008) need in order to
// unwind the stack properly. This is effectively the call that we simulate above.
if( IsDevBuild ) xCALL( DispatcherReg );
imm = (uptr)xGetPtr();
ExitRecompiledCode = (DynGenFunc*)xGetPtr();
xLEAVE();
xMOV( edi, ptr[ebp-12] );
xMOV( esi, ptr[ebp-8] );
xMOV( ebx, ptr[ebp-4] );
xLEAVE();
xRET(); xRET();
return (DynGenFunc*)retval; return (DynGenFunc*)retval;
@ -537,7 +445,7 @@ static DynGenFunc* _DynGen_DispatchBlockDiscard()
{ {
u8* retval = xGetPtr(); u8* retval = xGetPtr();
xEMMS(); xEMMS();
xCALL(dyna_block_discard); xFastCall(dyna_block_discard);
xJMP(ExitRecompiledCode); xJMP(ExitRecompiledCode);
return (DynGenFunc*)retval; return (DynGenFunc*)retval;
} }
@ -546,7 +454,7 @@ static DynGenFunc* _DynGen_DispatchPageReset()
{ {
u8* retval = xGetPtr(); u8* retval = xGetPtr();
xEMMS(); xEMMS();
xCALL(dyna_page_reset); xFastCall(dyna_page_reset);
xJMP(ExitRecompiledCode); xJMP(ExitRecompiledCode);
return (DynGenFunc*)retval; return (DynGenFunc*)retval;
} }
@ -1007,7 +915,7 @@ void SetBranchReg( u32 reg )
// xCMP(ptr32[&cpuRegs.pc], 0); // xCMP(ptr32[&cpuRegs.pc], 0);
// j8Ptr[5] = JNE8(0); // j8Ptr[5] = JNE8(0);
// xCALL((void*)(uptr)tempfn); // xFastCall((void*)(uptr)tempfn);
// x86SetJ8( j8Ptr[5] ); // x86SetJ8( j8Ptr[5] );
iFlushCall(FLUSH_EVERYTHING); iFlushCall(FLUSH_EVERYTHING);
@ -1149,8 +1057,6 @@ static u32 scaleblockcycles()
// setting "g_branch = 2"; // setting "g_branch = 2";
static void iBranchTest(u32 newpc) static void iBranchTest(u32 newpc)
{ {
_DynGen_StackFrameCheck();
// Check the Event scheduler if our "cycle target" has been reached. // Check the Event scheduler if our "cycle target" has been reached.
// Equiv code to: // Equiv code to:
// cpuRegs.cycle += blockcycles; // cpuRegs.cycle += blockcycles;
@ -1294,7 +1200,7 @@ void recMemcheck(u32 op, u32 bits, bool store)
if (bits == 128) if (bits == 128)
xAND(ecx, ~0x0F); xAND(ecx, ~0x0F);
xCALL(standardizeBreakpointAddress); xFastCall(standardizeBreakpointAddress, ecx);
xMOV(ecx,eax); xMOV(ecx,eax);
xMOV(edx,eax); xMOV(edx,eax);
xADD(edx,bits/8); xADD(edx,bits/8);
@ -1325,10 +1231,10 @@ void recMemcheck(u32 op, u32 bits, bool store)
// hit the breakpoint // hit the breakpoint
if (checks[i].result & MEMCHECK_LOG) { if (checks[i].result & MEMCHECK_LOG) {
xMOV(edx, store); xMOV(edx, store);
xCALL(&dynarecMemLogcheck); xFastCall(dynarecMemLogcheck, ecx, edx);
} }
if (checks[i].result & MEMCHECK_BREAK) { if (checks[i].result & MEMCHECK_BREAK) {
xCALL(&dynarecMemcheck); xFastCall(dynarecMemcheck);
} }
next1.SetTarget(); next1.SetTarget();
@ -1341,7 +1247,7 @@ void encodeBreakpoint()
if (isBreakpointNeeded(pc) != 0) if (isBreakpointNeeded(pc) != 0)
{ {
iFlushCall(FLUSH_EVERYTHING|FLUSH_PC); iFlushCall(FLUSH_EVERYTHING|FLUSH_PC);
xCALL(&dynarecCheckBreakpoint); xFastCall(dynarecCheckBreakpoint);
} }
} }
@ -1767,14 +1673,14 @@ static void __fastcall recRecompile( const u32 startpc )
pxAssert(s_pCurBlockEx); pxAssert(s_pCurBlockEx);
if (g_SkipBiosHack && HWADDR(startpc) == EELOAD_START) { if (g_SkipBiosHack && HWADDR(startpc) == EELOAD_START) {
xCALL(eeloadReplaceOSDSYS); xFastCall(eeloadReplaceOSDSYS);
xCMP(ptr32[&cpuRegs.pc], startpc); xCMP(ptr32[&cpuRegs.pc], startpc);
xJNE(DispatcherReg); xJNE(DispatcherReg);
} }
// this is the only way patches get applied, doesn't depend on a hack // this is the only way patches get applied, doesn't depend on a hack
if (HWADDR(startpc) == ElfEntry) { if (HWADDR(startpc) == ElfEntry) {
xCALL(eeGameStarting); xFastCall(eeGameStarting);
// Apply patch as soon as possible. Normally it is done in // Apply patch as soon as possible. Normally it is done in
// eeGameStarting but first block is already compiled. // eeGameStarting but first block is already compiled.
// //
@ -1804,20 +1710,18 @@ static void __fastcall recRecompile( const u32 startpc )
// [TODO] : These must be enabled from the GUI or INI to be used, otherwise the // [TODO] : These must be enabled from the GUI or INI to be used, otherwise the
// code that calls PreBlockCheck will not be generated. // code that calls PreBlockCheck will not be generated.
xMOV(ecx, pc); xFastCall(PreBlockCheck, pc);
xCALL(PreBlockCheck);
} }
if (EmuConfig.Gamefixes.GoemonTlbHack) { if (EmuConfig.Gamefixes.GoemonTlbHack) {
if (pc == 0x33ad48 || pc == 0x35060c) { if (pc == 0x33ad48 || pc == 0x35060c) {
// 0x33ad48 and 0x35060c are the return address of the function (0x356250) that populate the TLB cache // 0x33ad48 and 0x35060c are the return address of the function (0x356250) that populate the TLB cache
xCALL(GoemonPreloadTlb); xFastCall(GoemonPreloadTlb);
} else if (pc == 0x3563b8) { } else if (pc == 0x3563b8) {
// Game will unmap some virtual addresses. If a constant address were hardcoded in the block, we would be in a bad situation. // Game will unmap some virtual addresses. If a constant address were hardcoded in the block, we would be in a bad situation.
AtomicExchange( eeRecNeedsReset, true ); AtomicExchange( eeRecNeedsReset, true );
// 0x3563b8 is the start address of the function that invalidate entry in TLB cache // 0x3563b8 is the start address of the function that invalidate entry in TLB cache
xMOV(ecx, ptr[&cpuRegs.GPR.n.a0.UL[ 0 ] ]); xFastCall(GoemonUnloadTlb, ptr[&cpuRegs.GPR.n.a0.UL[0]]);
xCALL(GoemonUnloadTlb);
} }
} }

View File

@ -396,7 +396,7 @@ EERECOMPILE_CODE0(BNEL, XMMINFO_READS|XMMINFO_READT);
// xMOV(ptr32[(u32*)((int)&cpuRegs.code)], cpuRegs.code ); // xMOV(ptr32[(u32*)((int)&cpuRegs.code)], cpuRegs.code );
// xMOV(ptr32[(u32*)((int)&cpuRegs.pc)], pc ); // xMOV(ptr32[(u32*)((int)&cpuRegs.pc)], pc );
// iFlushCall(FLUSH_EVERYTHING); // iFlushCall(FLUSH_EVERYTHING);
// xCALL((void*)(int)BLTZAL ); // xFastCall((void*)(int)BLTZAL );
// branch = 2; // branch = 2;
//} //}

View File

@ -314,7 +314,7 @@ void vtlb_dynarec_init()
// jump to the indirect handler, which is a __fastcall C++ function. // jump to the indirect handler, which is a __fastcall C++ function.
// [ecx is address, edx is data] // [ecx is address, edx is data]
xCALL( ptr32[(eax*4) + vtlbdata.RWFT[bits][mode]] ); xFastCall(ptr32[(eax*4) + vtlbdata.RWFT[bits][mode]], ecx, edx);
if (!mode) if (!mode)
{ {
@ -410,8 +410,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
} }
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
xMOV( ecx, paddr ); xFastCall( vtlbdata.RWFT[szidx][0][handler], paddr );
xCALL( vtlbdata.RWFT[szidx][0][handler] );
} }
} }
@ -474,8 +473,7 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const )
else else
{ {
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
xMOV( ecx, paddr ); xFastCall( vtlbdata.RWFT[szidx][0][handler], paddr );
xCALL( vtlbdata.RWFT[szidx][0][handler] );
// perform sign extension on the result: // perform sign extension on the result:
@ -565,8 +563,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
} }
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
xMOV( ecx, paddr ); xFastCall( vtlbdata.RWFT[szidx][1][handler], paddr, edx );
xCALL( vtlbdata.RWFT[szidx][1][handler] );
} }
} }

View File

@ -80,10 +80,8 @@ void mVUreset(microVU& mVU, bool resetReserve) {
else Perf::any.map((uptr)&mVU.dispCache, mVUdispCacheSize, "mVU0 Dispatcher"); else Perf::any.map((uptr)&mVU.dispCache, mVUdispCacheSize, "mVU0 Dispatcher");
x86SetPtr(mVU.dispCache); x86SetPtr(mVU.dispCache);
mVUdispatcherA(mVU); mVUdispatcherAB(mVU);
mVUdispatcherB(mVU); mVUdispatcherCD(mVU);
mVUdispatcherC(mVU);
mVUdispatcherD(mVU);
mVUemitSearch(); mVUemitSearch();
// Clear All Program Data // Clear All Program Data

View File

@ -57,8 +57,8 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) {
mVU_XGKICK_DELAY(mVU); mVU_XGKICK_DELAY(mVU);
} }
if (doEarlyExit(mVU)) { if (doEarlyExit(mVU)) {
if (!isVU1) xCALL(mVU0clearlpStateJIT); if (!isVU1) xFastCall(mVU0clearlpStateJIT);
else xCALL(mVU1clearlpStateJIT); else xFastCall(mVU1clearlpStateJIT);
} }
} }
@ -117,9 +117,9 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) {
} }
if (doEarlyExit(mVU)) { if (doEarlyExit(mVU)) {
if (!isVU1) if (!isVU1)
xCALL(mVU0clearlpStateJIT); xFastCall(mVU0clearlpStateJIT);
else else
xCALL(mVU1clearlpStateJIT); xFastCall(mVU1clearlpStateJIT);
} }
} }
@ -192,8 +192,8 @@ void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) {
xJMP(mVU.exitFunct); xJMP(mVU.exitFunct);
} }
if (!mVU.index) xCALL(mVUcompileJIT<0>); //(u32 startPC, uptr pState) if (!mVU.index) xFastCall(mVUcompileJIT<0>, gprT2, gprT3); //(u32 startPC, uptr pState)
else xCALL(mVUcompileJIT<1>); else xFastCall(mVUcompileJIT<1>, gprT2, gprT3);
mVUrestoreRegs(mVU); mVUrestoreRegs(mVU);
xJMP(gprT1); // Jump to rec-code address xJMP(gprT1); // Jump to rec-code address

View File

@ -199,10 +199,8 @@ __fi void handleBadOp(mV, int count) {
#ifdef PCSX2_DEVBUILD #ifdef PCSX2_DEVBUILD
if (mVUinfo.isBadOp) { if (mVUinfo.isBadOp) {
mVUbackupRegs(mVU, true); mVUbackupRegs(mVU, true);
xMOV(gprT2, mVU.prog.cur->idx); if (!isVU1) xFastCall(mVUbadOp0, mVU.prog.cur->idx, xPC);
xMOV(gprT3, xPC); else xFastCall(mVUbadOp1, mVU.prog.cur->idx, xPC);
if (!isVU1) xCALL(mVUbadOp0);
else xCALL(mVUbadOp1);
mVUrestoreRegs(mVU, true); mVUrestoreRegs(mVU, true);
} }
#endif #endif
@ -350,9 +348,8 @@ void mVUsetCycles(mV) {
void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC) { void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC) {
if (mVUdebugNow) { if (mVUdebugNow) {
mVUbackupRegs(mVU, true); mVUbackupRegs(mVU, true);
xMOV(gprT2, xPC); if (isEndPC) xFastCall(mVUprintPC2, xPC);
if (isEndPC) xCALL(mVUprintPC2); else xFastCall(mVUprintPC1, xPC);
else xCALL(mVUprintPC1);
mVUrestoreRegs(mVU, true); mVUrestoreRegs(mVU, true);
} }
} }
@ -380,9 +377,7 @@ void mVUtestCycles(microVU& mVU) {
// TEST32ItoM((uptr)&mVU.regs().flags, VUFLAG_MFLAGSET); // TEST32ItoM((uptr)&mVU.regs().flags, VUFLAG_MFLAGSET);
// xFowardJZ32 vu0jmp; // xFowardJZ32 vu0jmp;
// mVUbackupRegs(mVU, true); // mVUbackupRegs(mVU, true);
// xMOV(gprT2, mVU.prog.cur->idx); // xFastCall(mVUwarning0, mVU.prog.cur->idx, xPC); // VU0 is allowed early exit for COP2 Interlock Simulation
// xMOV(gprT3, xPC);
// xCALL(mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation
// mVUrestoreRegs(mVU, true); // mVUrestoreRegs(mVU, true);
mVUsavePipelineState(mVU); mVUsavePipelineState(mVU);
mVUendProgram(mVU, NULL, 0); mVUendProgram(mVU, NULL, 0);
@ -390,9 +385,7 @@ void mVUtestCycles(microVU& mVU) {
} }
else { else {
mVUbackupRegs(mVU, true); mVUbackupRegs(mVU, true);
xMOV(gprT2, mVU.prog.cur->idx); xFastCall(mVUwarning1, mVU.prog.cur->idx, xPC);
xMOV(gprT3, xPC);
xCALL(mVUwarning1);
mVUrestoreRegs(mVU, true); mVUrestoreRegs(mVU, true);
mVUsavePipelineState(mVU); mVUsavePipelineState(mVU);
mVUendProgram(mVU, NULL, 0); mVUendProgram(mVU, NULL, 0);

View File

@ -19,139 +19,96 @@
// Dispatcher Functions // Dispatcher Functions
//------------------------------------------------------------------ //------------------------------------------------------------------
// Generates the code for entering recompiled blocks // Generates the code for entering/exit recompiled blocks
void mVUdispatcherA(mV) { void mVUdispatcherAB(mV) {
mVU.startFunct = x86Ptr; mVU.startFunct = x86Ptr;
// Backup cpu state {
xPUSH(ebp); xScopedStackFrame frame(false, true);
xPUSH(ebx);
xPUSH(esi);
xPUSH(edi);
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned) // __fastcall = The caller has already put the needed parameters in ecx/edx:
#ifdef __GNUC__ if (!isVU1) { xFastCall(mVUexecuteVU0, ecx, edx); }
xSUB(esp, 12); else { xFastCall(mVUexecuteVU1, ecx, edx); }
#endif
// __fastcall = The caller has already put the needed parameters in ecx/edx: // Load VU's MXCSR state
if (!isVU1) { xCALL(mVUexecuteVU0); } xLDMXCSR(g_sseVUMXCSR);
else { xCALL(mVUexecuteVU1); }
// Load VU's MXCSR state // Load Regs
xLDMXCSR(g_sseVUMXCSR); xMOV(gprF0, ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL]);
xMOV(gprF1, gprF0);
xMOV(gprF2, gprF0);
xMOV(gprF3, gprF0);
// Load Regs xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_MAC_FLAG].UL]);
xMOV(gprF0, ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL]); xSHUF.PS(xmmT1, xmmT1, 0);
xMOV(gprF1, gprF0); xMOVAPS (ptr128[mVU.macFlag], xmmT1);
xMOV(gprF2, gprF0);
xMOV(gprF3, gprF0);
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_MAC_FLAG].UL]); xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_CLIP_FLAG].UL]);
xSHUF.PS(xmmT1, xmmT1, 0); xSHUF.PS(xmmT1, xmmT1, 0);
xMOVAPS (ptr128[mVU.macFlag], xmmT1); xMOVAPS (ptr128[mVU.clipFlag], xmmT1);
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_CLIP_FLAG].UL]); xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_P].UL]);
xSHUF.PS(xmmT1, xmmT1, 0); xMOVAPS (xmmPQ, ptr128[&mVU.regs().VI[REG_Q].UL]);
xMOVAPS (ptr128[mVU.clipFlag], xmmT1); xSHUF.PS(xmmPQ, xmmT1, 0); // wzyx = PPQQ
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_P].UL]); // Jump to Recompiled Code Block
xMOVAPS (xmmPQ, ptr128[&mVU.regs().VI[REG_Q].UL]); xJMP(eax);
xSHUF.PS(xmmPQ, xmmT1, 0); // wzyx = PPQQ
// Jump to Recompiled Code Block mVU.exitFunct = x86Ptr;
xJMP(eax);
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}
// Generates the code to exit from recompiled blocks // Load EE's MXCSR state
void mVUdispatcherB(mV) { xLDMXCSR(g_sseMXCSR);
mVU.exitFunct = x86Ptr;
// Load EE's MXCSR state // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers;
xLDMXCSR(g_sseMXCSR); // all other arguments are passed right to left.
if (!isVU1) { xFastCall(mVUcleanUpVU0); }
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; else { xFastCall(mVUcleanUpVU1); }
// all other arguments are passed right to left. }
if (!isVU1) { xCALL(mVUcleanUpVU0); }
else { xCALL(mVUcleanUpVU1); }
// Unalign the stackframe:
#ifdef __GNUC__
xADD( esp, 12 );
#endif
// Restore cpu state
xPOP(edi);
xPOP(esi);
xPOP(ebx);
xPOP(ebp);
xRET(); xRET();
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize), pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!"); "microVU: Dispatcher generation exceeded reserved cache area!");
} }
// Generates the code for resuming xgkick // Generates the code for resuming/exit xgkick
void mVUdispatcherC(mV) { void mVUdispatcherCD(mV) {
mVU.startFunctXG = x86Ptr; mVU.startFunctXG = x86Ptr;
// Backup cpu state {
xPUSH(ebp); xScopedStackFrame frame(false, true);
xPUSH(ebx);
xPUSH(esi);
xPUSH(edi);
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned) // Load VU's MXCSR state
#ifdef __GNUC__ xLDMXCSR(g_sseVUMXCSR);
xSUB(esp, 12);
#endif
// Load VU's MXCSR state mVUrestoreRegs(mVU);
xLDMXCSR(g_sseVUMXCSR);
mVUrestoreRegs(mVU); xMOV(gprF0, ptr32[&mVU.statFlag[0]]);
xMOV(gprF1, ptr32[&mVU.statFlag[1]]);
xMOV(gprF2, ptr32[&mVU.statFlag[2]]);
xMOV(gprF3, ptr32[&mVU.statFlag[3]]);
xMOV(gprF0, ptr32[&mVU.statFlag[0]]); // Jump to Recompiled Code Block
xMOV(gprF1, ptr32[&mVU.statFlag[1]]); xJMP(ptr32[&mVU.resumePtrXG]);
xMOV(gprF2, ptr32[&mVU.statFlag[2]]);
xMOV(gprF3, ptr32[&mVU.statFlag[3]]);
// Jump to Recompiled Code Block mVU.exitFunctXG = x86Ptr;
xJMP(ptr32[&mVU.resumePtrXG]);
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}
// Generates the code to exit from xgkick //xPOP(gprT1); // Pop return address
void mVUdispatcherD(mV) { //xMOV(ptr32[&mVU.resumePtrXG], gprT1);
mVU.exitFunctXG = x86Ptr;
//xPOP(gprT1); // Pop return address // Backup Status Flag (other regs were backed up on xgkick)
//xMOV(ptr32[&mVU.resumePtrXG], gprT1); xMOV(ptr32[&mVU.statFlag[0]], gprF0);
xMOV(ptr32[&mVU.statFlag[1]], gprF1);
xMOV(ptr32[&mVU.statFlag[2]], gprF2);
xMOV(ptr32[&mVU.statFlag[3]], gprF3);
// Backup Status Flag (other regs were backed up on xgkick) // Load EE's MXCSR state
xMOV(ptr32[&mVU.statFlag[0]], gprF0); xLDMXCSR(g_sseMXCSR);
xMOV(ptr32[&mVU.statFlag[1]], gprF1);
xMOV(ptr32[&mVU.statFlag[2]], gprF2);
xMOV(ptr32[&mVU.statFlag[3]], gprF3);
// Load EE's MXCSR state }
xLDMXCSR(g_sseMXCSR);
// Unalign the stackframe:
#ifdef __GNUC__
xADD( esp, 12 );
#endif
// Restore cpu state
xPOP(edi);
xPOP(esi);
xPOP(ebx);
xPOP(ebp);
xRET(); xRET();
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize), pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!"); "microVU: Dispatcher generation exceeded reserved cache area!");
} }

View File

@ -1219,8 +1219,7 @@ static __fi void mVU_XGKICK_DELAY(mV) {
xMOV (ptr32[&mVU.resumePtrXG], (uptr)xGetPtr() + 10 + 6); xMOV (ptr32[&mVU.resumePtrXG], (uptr)xGetPtr() + 10 + 6);
xJcc32(Jcc_NotZero, (uptr)mVU.exitFunctXG - ((uptr)xGetPtr()+6)); xJcc32(Jcc_NotZero, (uptr)mVU.exitFunctXG - ((uptr)xGetPtr()+6));
#endif #endif
xMOV(gprT2, ptr32[&mVU.VIxgkick]); xFastCall(mVU_XGKICK_, ptr32[&mVU.VIxgkick]);
xCALL(mVU_XGKICK_);
mVUrestoreRegs(mVU); mVUrestoreRegs(mVU);
} }

View File

@ -249,15 +249,15 @@ void recBC2TL() { _setupBranchTest(JZ32, true); }
void COP2_Interlock(bool mBitSync) { void COP2_Interlock(bool mBitSync) {
if (cpuRegs.code & 1) { if (cpuRegs.code & 1) {
iFlushCall(FLUSH_EVERYTHING | FLUSH_PC); iFlushCall(FLUSH_EVERYTHING | FLUSH_PC);
if (mBitSync) xCALL(_vu0WaitMicro); if (mBitSync) xFastCall(_vu0WaitMicro);
else xCALL(_vu0FinishMicro); else xFastCall(_vu0FinishMicro);
} }
} }
void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex) { void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex) {
xTEST(eax, (vuIndex) ? 0x200 : 0x002); xTEST(eax, (vuIndex) ? 0x200 : 0x002);
xForwardJZ8 skip; xForwardJZ8 skip;
xCALL(resetFunct); xFastCall(resetFunct);
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
skip.SetTarget(); skip.SetTarget();
} }
@ -316,8 +316,8 @@ static void recCTC2() {
xMOV(ecx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMOV(ecx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
else xXOR(ecx, ecx); else xXOR(ecx, ecx);
xCALL(vu1ExecMicro); xFastCall(vu1ExecMicro, ecx);
xCALL(vif1VUFinish); xFastCall(vif1VUFinish);
break; break;
case REG_FBRST: case REG_FBRST:
if (!_Rt_) { if (!_Rt_) {
@ -336,8 +336,7 @@ static void recCTC2() {
// Executing vu0 block here fixes the intro of Ratchet and Clank // Executing vu0 block here fixes the intro of Ratchet and Clank
// sVU's COP2 has a comment that "Donald Duck" needs this too... // sVU's COP2 has a comment that "Donald Duck" needs this too...
if (_Rd_) _eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_); if (_Rd_) _eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_);
xMOV(ecx, (uptr)CpuVU0); xFastCall(BaseVUmicroCPU::ExecuteBlockJIT, (uptr)CpuVU0);
xCALL(BaseVUmicroCPU::ExecuteBlockJIT);
break; break;
} }
} }