Merge pull request #1100 from PCSX2/recompiler-abi-wrapper

Recompiler abi wrapper
This commit is contained in:
Gregory Hainaut 2016-01-14 19:21:27 +01:00
commit a7a8c542f5
20 changed files with 362 additions and 579 deletions

View File

@ -68,5 +68,134 @@ struct xImpl_JmpCall
}
};
// yes it is awful. Due to template code is in a header with a nice circular dep.
extern const xImpl_Mov xMOV;
extern const xImpl_JmpCall xCALL;
struct xImpl_FastCall
{
// FIXME: current 64 bits is mostly a copy/past potentially it would require to push/pop
// some registers. But I think it is enough to handle the first call.
// Type unsafety is nice
#ifdef __x86_64__
#define XFASTCALL \
xCALL(func);
#define XFASTCALL1 \
xMOV(rdi, a1); \
xCALL(func);
#define XFASTCALL2 \
xMOV(rdi, a1); \
xMOV(rsi, a2); \
xCALL(func);
#else
#define XFASTCALL \
xCALL(func);
#define XFASTCALL1 \
xMOV(ecx, a1); \
xCALL(func);
#define XFASTCALL2 \
xMOV(ecx, a1); \
xMOV(edx, a2); \
xCALL(func);
#endif
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const
{
#ifdef __x86_64__
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#else
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, u32 a1, const xRegister32& a2) const
{
#ifdef __x86_64__
XFASTCALL2;
#else
XFASTCALL2;
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, const xIndirectVoid& a1) const
{
#ifdef __x86_64__
XFASTCALL1;
#else
XFASTCALL1;
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, u32 a1, u32 a2) const
{
#ifdef __x86_64__
XFASTCALL2;
#else
XFASTCALL2;
#endif
}
template< typename T > __fi __always_inline_tmpl_fail
void operator()( T* func, u32 a1) const
{
#ifdef __x86_64__
XFASTCALL1;
#else
XFASTCALL1;
#endif
}
void operator()(const xIndirect32& func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const
{
#ifdef __x86_64__
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#else
if (a1.IsEmpty()) {
XFASTCALL;
} else if (a2.IsEmpty()) {
XFASTCALL1;
} else {
XFASTCALL2;
}
#endif
}
#undef XFASTCALL
#undef XFASTCALL1
#undef XFASTCALL2
};
} // End namespace x86Emitter

View File

@ -93,6 +93,7 @@ namespace x86Emitter
#else
extern const xImpl_JmpCall xCALL;
#endif
extern const xImpl_FastCall xFastCall;
// ------------------------------------------------------------------------
extern const xImpl_CMov
@ -183,19 +184,15 @@ namespace x86Emitter
extern void xINTO();
//////////////////////////////////////////////////////////////////////////////////////////
// Helper function to handle the various functions ABI
extern void xFastCall(void* func, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg);
extern void xFastCall(void* func, const xRegisterSSE& a1, const xRegisterSSE& a2);
extern void xFastCall(void* func, u32 a1, u32 a2);
extern void xFastCall(void* func, u32 a1);
extern void xStdCall(void* func, u32 a1);
// Helper object to handle the various functions ABI
class xScopedStackFrame
{
bool m_base_frame;
bool m_save_base_pointer;
int m_offset;
xScopedStackFrame(bool base_frame);
public:
xScopedStackFrame(bool base_frame, bool save_base_pointer = false, int offset = 0);
~xScopedStackFrame();
};

View File

@ -42,6 +42,8 @@ void xImpl_JmpCall::operator()( const xIndirect16& src ) const { xOpWrite( 0x6
const xImpl_JmpCall xJMP = { true };
const xImpl_JmpCall xCALL = { false };
const xImpl_FastCall xFastCall = { };
void xSmartJump::SetTarget()
{
u8* target = xGetPtr();

View File

@ -1022,123 +1022,99 @@ __emitinline void xRestoreReg( const xRegisterSSE& dest )
xMOVDQA( dest, ptr[&xmm_data[dest.Id*2]] );
}
//////////////////////////////////////////////////////////////////////////////////////////
// Helper function to handle the various functions ABI
__emitinline void xFastCall(void *func, const xRegister32& a1, const xRegister32& a2)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
if (!a1.IsEmpty())
xMOV(ecx, a1);
if (!a2.IsEmpty())
xMOV(edx, a2);
xCALL(func);
#endif
}
__emitinline void xFastCall(void *func, const xRegisterSSE& a1, const xRegisterSSE& a2)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
xMOVD(ecx, a1);
xMOVD(edx, a2);
xCALL(func);
#endif
}
__emitinline void xFastCall(void *func, u32 a1, u32 a2)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
xMOV(ecx, a1);
xMOV(edx, a2);
xCALL(func);
#endif
}
__emitinline void xFastCall(void *func, u32 a1)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
xMOV(ecx, a1);
xCALL(func);
#endif
}
__emitinline void xStdCall(void *func, u32 a1)
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
// GCC note: unlike C call, GCC doesn't requires
// strict 16B alignment on std call
xPUSH(a1);
xCALL(func);
#endif
}
//////////////////////////////////////////////////////////////////////////////////////////
// Helper object to handle ABI frame
#ifdef __GNUC__
xScopedStackFrame::xScopedStackFrame(bool base_frame)
#ifdef __x86_64__
// GCC ensures/requires stack to be 16 bytes aligned (but when?)
#define ALIGN_STACK(v) xADD(rsp, v)
#else
// GCC ensures/requires stack to be 16 bytes aligned before the call
// Call will store 4 bytes. EDI/ESI/EBX will take another 12 bytes.
// EBP will take 4 bytes if m_base_frame is enabled
#define ALIGN_STACK(v) xADD(esp, v)
#endif
#else
#define ALIGN_STACK(v)
#endif
xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, int offset)
{
m_base_frame = base_frame;
m_save_base_pointer = save_base_pointer;
m_offset = offset;
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
m_offset += 8; // Call stores the return address (4 bytes)
// Note rbp can surely be optimized in 64 bits
if (m_base_frame) {
xPUSH( rbp );
xMOV( rbp, rsp );
m_offset += 8;
} else if (m_save_base_pointer) {
xPUSH( rbp );
m_offset += 8;
}
xPUSH( rbx );
xPUSH( r12 );
xPUSH( r13 );
xPUSH( r14 );
xPUSH( r15 );
m_offset += 40;
#else
m_offset += 4; // Call stores the return address (4 bytes)
// Create a new frame
if (m_base_frame) {
xPUSH( ebp );
xMOV( ebp, esp );
m_offset += 4;
} else if (m_save_base_pointer) {
xPUSH( ebp );
m_offset += 4;
}
// Save the register context
xPUSH( edi );
xPUSH( esi );
xPUSH( ebx );
#ifdef __GNUC__
// Realign the stack to 16 byte
if (m_base_frame) {
xSUB( esp, 12);
}
#endif
m_offset += 12;
#endif
ALIGN_STACK(-(16 - m_offset % 16));
}
xScopedStackFrame::~xScopedStackFrame()
{
#ifdef __x86_64__
// NEW ABI
pxAssert(0);
#else
ALIGN_STACK(16 - m_offset % 16);
#ifdef __GNUC__
// Restore the stack (due to the above alignment)
// Potentially it can be restored from ebp
#ifdef __x86_64__
// Restore the register context
xPOP( r15 );
xPOP( r14 );
xPOP( r13 );
xPOP( r12 );
xPOP( rbx );
// Destroy the frame
if (m_base_frame) {
xADD( esp, 12);
xLEAVE();
} else if (m_save_base_pointer) {
xPOP( rbp );
}
#endif
#else
// Restore the register context
xPOP( ebx );
@ -1148,6 +1124,8 @@ xScopedStackFrame::~xScopedStackFrame()
// Destroy the frame
if (m_base_frame) {
xLEAVE();
} else if (m_save_base_pointer) {
xPOP( ebp );
}
#endif

View File

@ -112,7 +112,7 @@ void recDI()
//xMOV(eax, ptr[&cpuRegs.cycle ]);
//xMOV(ptr[&g_nextBranchCycle], eax);
//xCALL((void*)(uptr)Interp::DI );
//xFastCall((void*)(uptr)Interp::DI );
xMOV(eax, ptr[&cpuRegs.CP0.n.Status]);
xTEST(eax, 0x20006); // EXL | ERL | EDI
@ -170,12 +170,12 @@ void recMFC0()
case 1:
iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR );
xFastCall(COP0_UpdatePCCR );
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]);
break;
case 3:
iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR );
xFastCall(COP0_UpdatePCCR );
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]);
break;
}
@ -207,8 +207,7 @@ void recMTC0()
{
case 12:
iFlushCall(FLUSH_INTERPRETER);
xMOV( ecx, g_cpuConstRegs[_Rt_].UL[0] );
xCALL( WriteCP0Status );
xFastCall(WriteCP0Status, g_cpuConstRegs[_Rt_].UL[0] );
break;
case 9:
@ -222,9 +221,9 @@ void recMTC0()
{
case 0:
iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR );
xFastCall(COP0_UpdatePCCR );
xMOV( ptr32[&cpuRegs.PERF.n.pccr], g_cpuConstRegs[_Rt_].UL[0] );
xCALL( COP0_DiagnosticPCCR );
xFastCall(COP0_DiagnosticPCCR );
break;
case 1:
@ -257,7 +256,7 @@ void recMTC0()
case 12:
iFlushCall(FLUSH_INTERPRETER);
_eeMoveGPRtoR(ecx, _Rt_);
xCALL( WriteCP0Status );
xFastCall(WriteCP0Status, ecx );
break;
case 9:
@ -271,9 +270,9 @@ void recMTC0()
{
case 0:
iFlushCall(FLUSH_INTERPRETER);
xCALL( COP0_UpdatePCCR );
xFastCall(COP0_UpdatePCCR );
_eeMoveGPRtoM((uptr)&cpuRegs.PERF.n.pccr, _Rt_);
xCALL( COP0_DiagnosticPCCR );
xFastCall(COP0_DiagnosticPCCR );
break;
case 1:

View File

@ -92,7 +92,7 @@ static const __aligned16 u32 s_pos[4] = { 0x7fffffff, 0xffffffff, 0xffffffff, 0x
void f(); \
void rec##f() { \
iFlushCall(FLUSH_INTERPRETER); \
xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
branch = 2; \
}
@ -100,7 +100,7 @@ static const __aligned16 u32 s_pos[4] = { 0x7fffffff, 0xffffffff, 0xffffffff, 0x
void f(); \
void rec##f() { \
iFlushCall(FLUSH_INTERPRETER); \
xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
}
//------------------------------------------------------------------
@ -550,7 +550,7 @@ void FPU_MUL(int regd, int regt, bool reverseOperands)
{
xMOVD(ecx, xRegisterSSE(reverseOperands ? regt : regd));
xMOVD(edx, xRegisterSSE(reverseOperands ? regd : regt));
xCALL((void*)(uptr)&FPU_MUL_HACK ); //returns the hacked result or 0
xFastCall((void*)(uptr)&FPU_MUL_HACK, ecx, edx); //returns the hacked result or 0
xTEST(eax, eax);
noHack = JZ8(0);
xMOVDZX(xRegisterSSE(regd), eax);

View File

@ -89,20 +89,6 @@ namespace DOUBLE {
#define FPUflagSO 0X00000010
#define FPUflagSU 0X00000008
#define REC_FPUBRANCH(f) \
void f(); \
void rec##f() { \
iFlushCall(FLUSH_INTERPRETER); \
xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
branch = 2; \
}
#define REC_FPUFUNC(f) \
void f(); \
void rec##f() { \
iFlushCall(FLUSH_INTERPRETER); \
xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
}
//------------------------------------------------------------------
//------------------------------------------------------------------
@ -416,7 +402,7 @@ void FPU_MUL(int info, int regd, int sreg, int treg, bool acc)
{
xMOVD(ecx, xRegisterSSE(sreg));
xMOVD(edx, xRegisterSSE(treg));
xCALL((void*)(uptr)&FPU_MUL_HACK ); //returns the hacked result or 0
xFastCall((void*)(uptr)&FPU_MUL_HACK, ecx, edx); //returns the hacked result or 0
xTEST(eax, eax);
noHack = JZ8(0);
xMOVDZX(xRegisterSSE(regd), eax);

View File

@ -185,7 +185,7 @@ void recPMFHL()
// fall to interp
_deleteEEreg(_Rd_, 0);
iFlushCall(FLUSH_INTERPRETER); // since calling CALLFunc
xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::MMI::PMFHL );
xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::MMI::PMFHL );
break;
case 0x03: // LH

View File

@ -120,50 +120,6 @@ static void recEventTest()
_cpuEventTest_Shared();
}
// parameters:
// espORebp - 0 for ESP, or 1 for EBP.
// regval - current value of the register at the time the fault was detected (predates the
// stackframe setup code in this function)
static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{
pxFailDev( pxsFmt( L"(R3000A Recompiler Stackframe) Sanity check failed on %ls\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
);
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
// tinue to run with some degree of stability.
}
static void _DynGen_StackFrameCheck()
{
if( !IsDevBuild ) return;
// --------- EBP Here -----------
xCMP( ebp, ptr[&s_store_ebp] );
xForwardJE8 skipassert_ebp;
xMOV( ecx, 1 ); // 1 specifies EBP
xMOV( edx, ebp );
xCALL( StackFrameCheckFailed );
xMOV( ebp, ptr[&s_store_ebp] ); // half-hearted frame recovery attempt!
skipassert_ebp.SetTarget();
// --------- ESP There -----------
xCMP( esp, ptr[&s_store_esp] );
xForwardJE8 skipassert_esp;
xXOR( ecx, ecx ); // 0 specifies ESP
xMOV( edx, esp );
xCALL( StackFrameCheckFailed );
xMOV( esp, ptr[&s_store_esp] ); // half-hearted frame recovery attempt!
skipassert_esp.SetTarget();
}
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static DynGenFunc* _DynGen_JITCompile()
@ -171,10 +127,8 @@ static DynGenFunc* _DynGen_JITCompile()
pxAssertMsg( iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( ecx, ptr[&psxRegs.pc] );
xCALL( iopRecRecompile );
xFastCall(iopRecRecompile, ptr[&psxRegs.pc] );
xMOV( eax, ptr[&psxRegs.pc] );
xMOV( ebx, eax );
@ -196,7 +150,6 @@ static DynGenFunc* _DynGen_JITCompileInBlock()
static DynGenFunc* _DynGen_DispatcherReg()
{
u8* retval = xGetPtr();
_DynGen_StackFrameCheck();
xMOV( eax, ptr[&psxRegs.pc] );
xMOV( ebx, eax );
@ -210,128 +163,21 @@ static DynGenFunc* _DynGen_DispatcherReg()
// --------------------------------------------------------------------------------------
// EnterRecompiledCode - dynamic compilation stub!
// --------------------------------------------------------------------------------------
// In Release Builds this literally generates the following code:
// push edi
// push esi
// push ebx
// jmp DispatcherReg
// pop ebx
// pop esi
// pop edi
//
// See notes on why this works in both GCC (aligned stack!) and other compilers (not-so-
// aligned stack!). In debug/dev builds the code gen is more complicated, as it constructs
// ebp stackframe mess, which allows for a complete backtrace from debug breakpoints (yay).
//
// Also, if you set PCSX2_IOP_FORCED_ALIGN_STACK to 1, the codegen for MSVC becomes slightly
// more complicated since it has to perform a full stack alignment on entry.
//
#if defined(__GNUG__) || defined(__DARWIN__)
# define PCSX2_ASSUME_ALIGNED_STACK 1
#else
# define PCSX2_ASSUME_ALIGNED_STACK 0
#endif
// Set to 0 for a speedup in release builds.
// [doesn't apply to GCC/Mac, which must always align]
#define PCSX2_IOP_FORCED_ALIGN_STACK 0 //1
// For overriding stackframe generation options in Debug builds (possibly useful for troubleshooting)
// Typically this value should be the same as IsDevBuild.
static const bool GenerateStackFrame = IsDevBuild;
static DynGenFunc* _DynGen_EnterRecompiledCode()
{
u8* retval = xGetPtr();
bool allocatedStack = GenerateStackFrame || PCSX2_IOP_FORCED_ALIGN_STACK;
// Optimization: The IOP never uses stack-based parameter invocation, so we can avoid
// allocating any room on the stack for it (which is important since the IOP's entry
// code gets invoked quite a lot).
if( allocatedStack )
{
xPUSH( ebp );
xMOV( ebp, esp );
xAND( esp, -0x10 );
u8* retval = xGetPtr();
xSUB( esp, 0x20 );
xMOV( ptr[ebp-12], edi );
xMOV( ptr[ebp-8], esi );
xMOV( ptr[ebp-4], ebx );
}
else
{
// GCC Compiler:
// The frame pointer coming in from the EE's event test can be safely assumed to be
// aligned, since GCC always aligns stackframes. While handy in x86-64, where CALL + PUSH EBP
// results in a neatly realigned stack on entry to every function, unfortunately in x86-32
// this is usually worthless because CALL+PUSH leaves us 8 byte aligned instead (fail). So
// we have to do the usual set of stackframe alignments and simulated callstack mess
// *regardless*.
// MSVC/Intel compilers:
// The PCSX2_IOP_FORCED_ALIGN_STACK setting is 0, so we don't care. Just push regs like
// the good old days! (stack alignment will be indeterminate)
xPUSH( edi );
xPUSH( esi );
xPUSH( ebx );
allocatedStack = false;
}
uptr* imm = NULL;
if( allocatedStack )
{
if( GenerateStackFrame )
{
// Simulate a CALL function by pushing the call address and EBP onto the stack.
// This retains proper stacktrace and stack unwinding (handy in devbuilds!)
xMOV( ptr32[esp+0x0c], 0xffeeff );
imm = (uptr*)(xGetPtr()-4);
// This part simulates the "normal" stackframe prep of "push ebp, mov ebp, esp"
xMOV( ptr32[esp+0x08], ebp );
xLEA( ebp, ptr32[esp+0x08] );
}
}
if( IsDevBuild )
{
xMOV( ptr[&s_store_esp], esp );
xMOV( ptr[&s_store_ebp], ebp );
}
{ // Properly scope the frame prologue/epilogue
xScopedStackFrame frame(IsDevBuild);
xJMP(iopDispatcherReg);
if( imm != NULL )
*imm = (uptr)xGetPtr();
// ----------------------
// ----> Cleanup! ---->
// Save an exit point
iopExitRecompiledCode = (DynGenFunc*)xGetPtr();
if( allocatedStack )
{
// pop the nested "simulated call" stackframe, if needed:
if( GenerateStackFrame ) xLEAVE();
xMOV( edi, ptr[ebp-12] );
xMOV( esi, ptr[ebp-8] );
xMOV( ebx, ptr[ebp-4] );
xLEAVE();
}
else
{
xPOP( ebx );
xPOP( esi );
xPOP( edi );
}
xRET();
@ -352,7 +198,7 @@ static void _DynGen_Dispatchers()
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
// most and stand to benefit from strong alignment and direct referencing.
iopDispatcherEvent = (DynGenFunc*)xGetPtr();
xCALL( recEventTest );
xFastCall(recEventTest );
iopDispatcherReg = _DynGen_DispatcherReg();
iopJITCompile = _DynGen_JITCompile();
@ -676,11 +522,11 @@ void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
}
if (debug)
xCALL(debug);
xFastCall(debug);
#endif
irxHLE hle = irxImportHLE(libname, index);
if (hle) {
xCALL(hle);
xFastCall(hle);
xCMP(eax, 0);
xJNE(iopDispatcherReg);
}
@ -1060,7 +906,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
xSUB(ptr32[&iopCycleEE], eax);
xJLE(iopExitRecompiledCode);
xCALL(iopEventTest);
xFastCall(iopEventTest);
if( newpc != 0xffffffff )
{
@ -1082,7 +928,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
xSUB(eax, ptr32[&g_iopNextEventCycle]);
xForwardJS<u8> nointerruptpending;
xCALL(iopEventTest);
xFastCall(iopEventTest);
if( newpc != 0xffffffff ) {
xCMP(ptr32[&psxRegs.pc], newpc);
@ -1117,9 +963,9 @@ void rpsxSYSCALL()
xMOV(ptr32[&psxRegs.pc], psxpc - 4);
_psxFlushCall(FLUSH_NODESTROY);
xMOV( ecx, 0x20 ); // exception code
xMOV( edx, psxbranch==1 ); // branch delay slot?
xCALL( psxException );
//xMOV( ecx, 0x20 ); // exception code
//xMOV( edx, psxbranch==1 ); // branch delay slot?
xFastCall(psxException, 0x20, psxbranch == 1 );
xCMP(ptr32[&psxRegs.pc], psxpc-4);
j8Ptr[0] = JE8(0);
@ -1140,9 +986,9 @@ void rpsxBREAK()
xMOV(ptr32[&psxRegs.pc], psxpc - 4);
_psxFlushCall(FLUSH_NODESTROY);
xMOV( ecx, 0x24 ); // exception code
xMOV( edx, psxbranch==1 ); // branch delay slot?
xCALL( psxException );
//xMOV( ecx, 0x24 ); // exception code
//xMOV( edx, psxbranch==1 ); // branch delay slot?
xFastCall(psxException, 0x24, psxbranch == 1 );
xCMP(ptr32[&psxRegs.pc], psxpc-4);
j8Ptr[0] = JE8(0);
@ -1255,8 +1101,7 @@ static void __fastcall iopRecRecompile( const u32 startpc )
if( IsDebugBuild )
{
xMOV(ecx, psxpc);
xCALL(PreBlockCheck);
xFastCall(PreBlockCheck, psxpc);
}
// go until the next branch

View File

@ -32,7 +32,7 @@ extern u32 g_psxMaxRecMem;
static void rpsx##f() { \
xMOV(ptr32[&psxRegs.code], (u32)psxRegs.code); \
_psxFlushCall(FLUSH_EVERYTHING); \
xCALL((void*)(uptr)psx##f); \
xFastCall((void*)(uptr)psx##f); \
PSX_DEL_CONST(_Rt_); \
/* branch = 2; */\
}
@ -626,7 +626,7 @@ static void rpsxLB()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xCALL( iopMemRead8 ); // returns value in EAX
xFastCall(iopMemRead8, ecx ); // returns value in EAX
if (_Rt_) {
xMOVSX(eax, al);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
@ -642,7 +642,7 @@ static void rpsxLBU()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xCALL( iopMemRead8 ); // returns value in EAX
xFastCall(iopMemRead8, ecx ); // returns value in EAX
if (_Rt_) {
xMOVZX(eax, al);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
@ -658,7 +658,7 @@ static void rpsxLH()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xCALL( iopMemRead16 ); // returns value in EAX
xFastCall(iopMemRead16, ecx ); // returns value in EAX
if (_Rt_) {
xMOVSX(eax, ax);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
@ -674,7 +674,7 @@ static void rpsxLHU()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xCALL( iopMemRead16 ); // returns value in EAX
xFastCall(iopMemRead16, ecx ); // returns value in EAX
if (_Rt_) {
xMOVZX(eax, ax);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
@ -695,7 +695,7 @@ static void rpsxLW()
xTEST(ecx, 0x10000000);
j8Ptr[0] = JZ8(0);
xCALL( iopMemRead32 ); // returns value in EAX
xFastCall(iopMemRead32, ecx ); // returns value in EAX
if (_Rt_) {
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
}
@ -721,7 +721,7 @@ static void rpsxSB()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
xCALL( iopMemWrite8 );
xFastCall(iopMemWrite8, ecx, edx );
}
static void rpsxSH()
@ -732,7 +732,7 @@ static void rpsxSH()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
xCALL( iopMemWrite16 );
xFastCall(iopMemWrite16, ecx, edx );
}
static void rpsxSW()
@ -743,7 +743,7 @@ static void rpsxSW()
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
xCALL( iopMemWrite32 );
xFastCall(iopMemWrite32, ecx, edx );
}
//// SLL
@ -1371,7 +1371,7 @@ void rpsxRFE()
// Test the IOP's INTC status, so that any pending ints get raised.
_psxFlushCall(0);
xCALL((void*)(uptr)&iopTestIntc );
xFastCall((void*)(uptr)&iopTestIntc );
}
// R3000A tables

View File

@ -71,7 +71,7 @@ namespace OpcodeImpl {
// xMOV(ptr32[&cpuRegs.code], cpuRegs.code );
// xMOV(ptr32[&cpuRegs.pc], pc );
// iFlushCall(FLUSH_EVERYTHING);
// xCALL((void*)(uptr)CACHE );
// xFastCall((void*)(uptr)CACHE );
// //branch = 2;
//
// xCMP(ptr32[(u32*)((int)&cpuRegs.pc)], pc);
@ -203,7 +203,7 @@ void recMTSAH()
//xMOV(ptr32[&cpuRegs.code], (u32)cpuRegs.code );
//xMOV(ptr32[&cpuRegs.pc], (u32)pc );
//iFlushCall(FLUSH_EVERYTHING);
//xCALL((void*)(uptr)R5900::Interpreter::OpcodeImpl::CACHE );
//xFastCall((void*)(uptr)R5900::Interpreter::OpcodeImpl::CACHE );
//branch = 2;
}

View File

@ -340,7 +340,7 @@ void recBranchCall( void (*func)() )
void recCall( void (*func)() )
{
iFlushCall(FLUSH_INTERPRETER);
xCALL(func);
xFastCall(func);
}
// =====================================================================================================
@ -372,50 +372,6 @@ static void recEventTest()
_cpuEventTest_Shared();
}
// parameters:
// espORebp - 0 for ESP, or 1 for EBP.
// regval - current value of the register at the time the fault was detected (predates the
// stackframe setup code in this function)
static void __fastcall StackFrameCheckFailed( int espORebp, int regval )
{
pxFailDev( wxsFormat( L"(R5900 Recompiler Stackframe) Sanity check failed on %s\n\tCurrent=%d; Saved=%d",
(espORebp==0) ? L"ESP" : L"EBP", regval, (espORebp==0) ? s_store_esp : s_store_ebp )
);
// Note: The recompiler will attempt to recover ESP and EBP after returning from this function,
// so typically selecting Continue/Ignore/Cancel for this assertion should allow PCSX2 to con-
// tinue to run with some degree of stability.
}
static void _DynGen_StackFrameCheck()
{
if( !EmuConfig.Cpu.Recompiler.StackFrameChecks ) return;
// --------- EBP Here -----------
xCMP( ebp, ptr[&s_store_ebp] );
xForwardJE8 skipassert_ebp;
xMOV( ecx, 1 ); // 1 specifies EBP
xMOV( edx, ebp );
xCALL( StackFrameCheckFailed );
xMOV( ebp, ptr[&s_store_ebp] ); // half-hearted frame recovery attempt!
skipassert_ebp.SetTarget();
// --------- ESP There -----------
xCMP( esp, ptr[&s_store_esp] );
xForwardJE8 skipassert_esp;
xXOR( ecx, ecx ); // 0 specifies ESP
xMOV( edx, esp );
xCALL( StackFrameCheckFailed );
xMOV( esp, ptr[&s_store_esp] ); // half-hearted frame recovery attempt!
skipassert_esp.SetTarget();
}
// The address for all cleared blocks. It recompiles the current pc and then
// dispatches to the recompiled block address.
static DynGenFunc* _DynGen_JITCompile()
@ -423,10 +379,8 @@ static DynGenFunc* _DynGen_JITCompile()
pxAssertMsg( DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks." );
u8* retval = xGetAlignedCallTarget();
_DynGen_StackFrameCheck();
xMOV( ecx, ptr[&cpuRegs.pc] );
xCALL( recRecompile );
xFastCall(recRecompile, ptr[&cpuRegs.pc] );
xMOV( eax, ptr[&cpuRegs.pc] );
xMOV( ebx, eax );
@ -448,7 +402,6 @@ static DynGenFunc* _DynGen_JITCompileInBlock()
static DynGenFunc* _DynGen_DispatcherReg()
{
u8* retval = xGetPtr(); // fallthrough target, can't align it!
_DynGen_StackFrameCheck();
xMOV( eax, ptr[&cpuRegs.pc] );
xMOV( ebx, eax );
@ -463,7 +416,7 @@ static DynGenFunc* _DynGen_DispatcherEvent()
{
u8* retval = xGetPtr();
xCALL( recEventTest );
xFastCall(recEventTest );
return (DynGenFunc*)retval;
}
@ -474,60 +427,15 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
u8* retval = xGetAlignedCallTarget();
// "standard" frame pointer setup for aligned stack: Record the original
// esp into ebp, and then align esp. ebp references the original esp base
// for the duration of our function, and is used to restore the original
// esp before returning from the function
xPUSH( ebp );
xMOV( ebp, esp );
xAND( esp, -0x10 );
// First 0x10 is for esi, edi, etc. Second 0x10 is for the return address and ebp. The
// third 0x10 is an optimization for C-style CDECL calls we might make from the recompiler
// (parameters for those calls can be stored there!) [currently no cdecl functions are
// used -- we do everything through __fastcall)
static const int cdecl_reserve = 0x00;
xSUB( esp, 0x20 + cdecl_reserve );
xMOV( ptr[ebp-12], edi );
xMOV( ptr[ebp-8], esi );
xMOV( ptr[ebp-4], ebx );
// Simulate a CALL function by pushing the call address and EBP onto the stack.
// (the dummy address here is filled in later right before we generate the LEAVE code)
xMOV( ptr32[esp+0x0c+cdecl_reserve], 0xdeadbeef );
uptr& imm = *(uptr*)(xGetPtr()-4);
// This part simulates the "normal" stackframe prep of "push ebp, mov ebp, esp"
// It is done here because we can't really generate that stuff from the Dispatchers themselves.
xMOV( ptr32[esp+0x08+cdecl_reserve], ebp );
xLEA( ebp, ptr32[esp+0x08+cdecl_reserve] );
if (EmuConfig.Cpu.Recompiler.StackFrameChecks) {
xMOV( ptr[&s_store_esp], esp );
xMOV( ptr[&s_store_ebp], ebp );
}
{ // Properly scope the frame prologue/epilogue
xScopedStackFrame frame(IsDevBuild);
xJMP(DispatcherReg);
xAlignCallTarget();
// This dummy CALL is unreachable code that some debuggers (MSVC2008) need in order to
// unwind the stack properly. This is effectively the call that we simulate above.
if( IsDevBuild ) xCALL( DispatcherReg );
imm = (uptr)xGetPtr();
// Save an exit point
ExitRecompiledCode = (DynGenFunc*)xGetPtr();
}
xLEAVE();
xMOV( edi, ptr[ebp-12] );
xMOV( esi, ptr[ebp-8] );
xMOV( ebx, ptr[ebp-4] );
xLEAVE();
xRET();
return (DynGenFunc*)retval;
@ -537,7 +445,7 @@ static DynGenFunc* _DynGen_DispatchBlockDiscard()
{
u8* retval = xGetPtr();
xEMMS();
xCALL(dyna_block_discard);
xFastCall(dyna_block_discard);
xJMP(ExitRecompiledCode);
return (DynGenFunc*)retval;
}
@ -546,7 +454,7 @@ static DynGenFunc* _DynGen_DispatchPageReset()
{
u8* retval = xGetPtr();
xEMMS();
xCALL(dyna_page_reset);
xFastCall(dyna_page_reset);
xJMP(ExitRecompiledCode);
return (DynGenFunc*)retval;
}
@ -1007,7 +915,7 @@ void SetBranchReg( u32 reg )
// xCMP(ptr32[&cpuRegs.pc], 0);
// j8Ptr[5] = JNE8(0);
// xCALL((void*)(uptr)tempfn);
// xFastCall((void*)(uptr)tempfn);
// x86SetJ8( j8Ptr[5] );
iFlushCall(FLUSH_EVERYTHING);
@ -1149,8 +1057,6 @@ static u32 scaleblockcycles()
// setting "g_branch = 2";
static void iBranchTest(u32 newpc)
{
_DynGen_StackFrameCheck();
// Check the Event scheduler if our "cycle target" has been reached.
// Equiv code to:
// cpuRegs.cycle += blockcycles;
@ -1294,7 +1200,7 @@ void recMemcheck(u32 op, u32 bits, bool store)
if (bits == 128)
xAND(ecx, ~0x0F);
xCALL(standardizeBreakpointAddress);
xFastCall(standardizeBreakpointAddress, ecx);
xMOV(ecx,eax);
xMOV(edx,eax);
xADD(edx,bits/8);
@ -1325,10 +1231,10 @@ void recMemcheck(u32 op, u32 bits, bool store)
// hit the breakpoint
if (checks[i].result & MEMCHECK_LOG) {
xMOV(edx, store);
xCALL(&dynarecMemLogcheck);
xFastCall(dynarecMemLogcheck, ecx, edx);
}
if (checks[i].result & MEMCHECK_BREAK) {
xCALL(&dynarecMemcheck);
xFastCall(dynarecMemcheck);
}
next1.SetTarget();
@ -1341,7 +1247,7 @@ void encodeBreakpoint()
if (isBreakpointNeeded(pc) != 0)
{
iFlushCall(FLUSH_EVERYTHING|FLUSH_PC);
xCALL(&dynarecCheckBreakpoint);
xFastCall(dynarecCheckBreakpoint);
}
}
@ -1767,14 +1673,14 @@ static void __fastcall recRecompile( const u32 startpc )
pxAssert(s_pCurBlockEx);
if (g_SkipBiosHack && HWADDR(startpc) == EELOAD_START) {
xCALL(eeloadReplaceOSDSYS);
xFastCall(eeloadReplaceOSDSYS);
xCMP(ptr32[&cpuRegs.pc], startpc);
xJNE(DispatcherReg);
}
// this is the only way patches get applied, doesn't depend on a hack
if (HWADDR(startpc) == ElfEntry) {
xCALL(eeGameStarting);
xFastCall(eeGameStarting);
// Apply patch as soon as possible. Normally it is done in
// eeGameStarting but first block is already compiled.
//
@ -1804,20 +1710,18 @@ static void __fastcall recRecompile( const u32 startpc )
// [TODO] : These must be enabled from the GUI or INI to be used, otherwise the
// code that calls PreBlockCheck will not be generated.
xMOV(ecx, pc);
xCALL(PreBlockCheck);
xFastCall(PreBlockCheck, pc);
}
if (EmuConfig.Gamefixes.GoemonTlbHack) {
if (pc == 0x33ad48 || pc == 0x35060c) {
// 0x33ad48 and 0x35060c are the return address of the function (0x356250) that populate the TLB cache
xCALL(GoemonPreloadTlb);
xFastCall(GoemonPreloadTlb);
} else if (pc == 0x3563b8) {
// Game will unmap some virtual addresses. If a constant address were hardcoded in the block, we would be in a bad situation.
AtomicExchange( eeRecNeedsReset, true );
// 0x3563b8 is the start address of the function that invalidate entry in TLB cache
xMOV(ecx, ptr[&cpuRegs.GPR.n.a0.UL[ 0 ] ]);
xCALL(GoemonUnloadTlb);
xFastCall(GoemonUnloadTlb, ptr[&cpuRegs.GPR.n.a0.UL[0]]);
}
}

View File

@ -396,7 +396,7 @@ EERECOMPILE_CODE0(BNEL, XMMINFO_READS|XMMINFO_READT);
// xMOV(ptr32[(u32*)((int)&cpuRegs.code)], cpuRegs.code );
// xMOV(ptr32[(u32*)((int)&cpuRegs.pc)], pc );
// iFlushCall(FLUSH_EVERYTHING);
// xCALL((void*)(int)BLTZAL );
// xFastCall((void*)(int)BLTZAL );
// branch = 2;
//}

View File

@ -314,7 +314,7 @@ void vtlb_dynarec_init()
// jump to the indirect handler, which is a __fastcall C++ function.
// [ecx is address, edx is data]
xCALL( ptr32[(eax*4) + vtlbdata.RWFT[bits][mode]] );
xFastCall(ptr32[(eax*4) + vtlbdata.RWFT[bits][mode]], ecx, edx);
if (!mode)
{
@ -410,8 +410,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
}
iFlushCall(FLUSH_FULLVTLB);
xMOV( ecx, paddr );
xCALL( vtlbdata.RWFT[szidx][0][handler] );
xFastCall( vtlbdata.RWFT[szidx][0][handler], paddr );
}
}
@ -474,8 +473,7 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const )
else
{
iFlushCall(FLUSH_FULLVTLB);
xMOV( ecx, paddr );
xCALL( vtlbdata.RWFT[szidx][0][handler] );
xFastCall( vtlbdata.RWFT[szidx][0][handler], paddr );
// perform sign extension on the result:
@ -565,8 +563,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
}
iFlushCall(FLUSH_FULLVTLB);
xMOV( ecx, paddr );
xCALL( vtlbdata.RWFT[szidx][1][handler] );
xFastCall( vtlbdata.RWFT[szidx][1][handler], paddr, edx );
}
}

View File

@ -80,10 +80,8 @@ void mVUreset(microVU& mVU, bool resetReserve) {
else Perf::any.map((uptr)&mVU.dispCache, mVUdispCacheSize, "mVU0 Dispatcher");
x86SetPtr(mVU.dispCache);
mVUdispatcherA(mVU);
mVUdispatcherB(mVU);
mVUdispatcherC(mVU);
mVUdispatcherD(mVU);
mVUdispatcherAB(mVU);
mVUdispatcherCD(mVU);
mVUemitSearch();
// Clear All Program Data

View File

@ -57,8 +57,8 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) {
mVU_XGKICK_DELAY(mVU);
}
if (doEarlyExit(mVU)) {
if (!isVU1) xCALL(mVU0clearlpStateJIT);
else xCALL(mVU1clearlpStateJIT);
if (!isVU1) xFastCall(mVU0clearlpStateJIT);
else xFastCall(mVU1clearlpStateJIT);
}
}
@ -117,9 +117,9 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) {
}
if (doEarlyExit(mVU)) {
if (!isVU1)
xCALL(mVU0clearlpStateJIT);
xFastCall(mVU0clearlpStateJIT);
else
xCALL(mVU1clearlpStateJIT);
xFastCall(mVU1clearlpStateJIT);
}
}
@ -192,8 +192,8 @@ void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) {
xJMP(mVU.exitFunct);
}
if (!mVU.index) xCALL(mVUcompileJIT<0>); //(u32 startPC, uptr pState)
else xCALL(mVUcompileJIT<1>);
if (!mVU.index) xFastCall(mVUcompileJIT<0>, gprT2, gprT3); //(u32 startPC, uptr pState)
else xFastCall(mVUcompileJIT<1>, gprT2, gprT3);
mVUrestoreRegs(mVU);
xJMP(gprT1); // Jump to rec-code address

View File

@ -199,10 +199,8 @@ __fi void handleBadOp(mV, int count) {
#ifdef PCSX2_DEVBUILD
if (mVUinfo.isBadOp) {
mVUbackupRegs(mVU, true);
xMOV(gprT2, mVU.prog.cur->idx);
xMOV(gprT3, xPC);
if (!isVU1) xCALL(mVUbadOp0);
else xCALL(mVUbadOp1);
if (!isVU1) xFastCall(mVUbadOp0, mVU.prog.cur->idx, xPC);
else xFastCall(mVUbadOp1, mVU.prog.cur->idx, xPC);
mVUrestoreRegs(mVU, true);
}
#endif
@ -350,9 +348,8 @@ void mVUsetCycles(mV) {
void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC) {
if (mVUdebugNow) {
mVUbackupRegs(mVU, true);
xMOV(gprT2, xPC);
if (isEndPC) xCALL(mVUprintPC2);
else xCALL(mVUprintPC1);
if (isEndPC) xFastCall(mVUprintPC2, xPC);
else xFastCall(mVUprintPC1, xPC);
mVUrestoreRegs(mVU, true);
}
}
@ -380,9 +377,7 @@ void mVUtestCycles(microVU& mVU) {
// TEST32ItoM((uptr)&mVU.regs().flags, VUFLAG_MFLAGSET);
// xFowardJZ32 vu0jmp;
// mVUbackupRegs(mVU, true);
// xMOV(gprT2, mVU.prog.cur->idx);
// xMOV(gprT3, xPC);
// xCALL(mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation
// xFastCall(mVUwarning0, mVU.prog.cur->idx, xPC); // VU0 is allowed early exit for COP2 Interlock Simulation
// mVUrestoreRegs(mVU, true);
mVUsavePipelineState(mVU);
mVUendProgram(mVU, NULL, 0);
@ -390,9 +385,7 @@ void mVUtestCycles(microVU& mVU) {
}
else {
mVUbackupRegs(mVU, true);
xMOV(gprT2, mVU.prog.cur->idx);
xMOV(gprT3, xPC);
xCALL(mVUwarning1);
xFastCall(mVUwarning1, mVU.prog.cur->idx, xPC);
mVUrestoreRegs(mVU, true);
mVUsavePipelineState(mVU);
mVUendProgram(mVU, NULL, 0);

View File

@ -19,24 +19,16 @@
// Dispatcher Functions
//------------------------------------------------------------------
// Generates the code for entering recompiled blocks
void mVUdispatcherA(mV) {
// Generates the code for entering/exit recompiled blocks
void mVUdispatcherAB(mV) {
mVU.startFunct = x86Ptr;
// Backup cpu state
xPUSH(ebp);
xPUSH(ebx);
xPUSH(esi);
xPUSH(edi);
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned)
#ifdef __GNUC__
xSUB(esp, 12);
#endif
{
xScopedStackFrame frame(false, true);
// __fastcall = The caller has already put the needed parameters in ecx/edx:
if (!isVU1) { xCALL(mVUexecuteVU0); }
else { xCALL(mVUexecuteVU1); }
if (!isVU1) { xFastCall(mVUexecuteVU0, ecx, edx); }
else { xFastCall(mVUexecuteVU1, ecx, edx); }
// Load VU's MXCSR state
xLDMXCSR(g_sseVUMXCSR);
@ -61,12 +53,7 @@ void mVUdispatcherA(mV) {
// Jump to Recompiled Code Block
xJMP(eax);
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}
// Generates the code to exit from recompiled blocks
void mVUdispatcherB(mV) {
mVU.exitFunct = x86Ptr;
// Load EE's MXCSR state
@ -74,39 +61,22 @@ void mVUdispatcherB(mV) {
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers;
// all other arguments are passed right to left.
if (!isVU1) { xCALL(mVUcleanUpVU0); }
else { xCALL(mVUcleanUpVU1); }
// Unalign the stackframe:
#ifdef __GNUC__
xADD( esp, 12 );
#endif
// Restore cpu state
xPOP(edi);
xPOP(esi);
xPOP(ebx);
xPOP(ebp);
if (!isVU1) { xFastCall(mVUcleanUpVU0); }
else { xFastCall(mVUcleanUpVU1); }
}
xRET();
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}
// Generates the code for resuming xgkick
void mVUdispatcherC(mV) {
// Generates the code for resuming/exit xgkick
void mVUdispatcherCD(mV) {
mVU.startFunctXG = x86Ptr;
// Backup cpu state
xPUSH(ebp);
xPUSH(ebx);
xPUSH(esi);
xPUSH(edi);
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned)
#ifdef __GNUC__
xSUB(esp, 12);
#endif
{
xScopedStackFrame frame(false, true);
// Load VU's MXCSR state
xLDMXCSR(g_sseVUMXCSR);
@ -120,12 +90,7 @@ void mVUdispatcherC(mV) {
// Jump to Recompiled Code Block
xJMP(ptr32[&mVU.resumePtrXG]);
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}
// Generates the code to exit from xgkick
void mVUdispatcherD(mV) {
mVU.exitFunctXG = x86Ptr;
//xPOP(gprT1); // Pop return address
@ -140,18 +105,10 @@ void mVUdispatcherD(mV) {
// Load EE's MXCSR state
xLDMXCSR(g_sseMXCSR);
// Unalign the stackframe:
#ifdef __GNUC__
xADD( esp, 12 );
#endif
// Restore cpu state
xPOP(edi);
xPOP(esi);
xPOP(ebx);
xPOP(ebp);
}
xRET();
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}

View File

@ -1219,8 +1219,7 @@ static __fi void mVU_XGKICK_DELAY(mV) {
xMOV (ptr32[&mVU.resumePtrXG], (uptr)xGetPtr() + 10 + 6);
xJcc32(Jcc_NotZero, (uptr)mVU.exitFunctXG - ((uptr)xGetPtr()+6));
#endif
xMOV(gprT2, ptr32[&mVU.VIxgkick]);
xCALL(mVU_XGKICK_);
xFastCall(mVU_XGKICK_, ptr32[&mVU.VIxgkick]);
mVUrestoreRegs(mVU);
}

View File

@ -249,15 +249,15 @@ void recBC2TL() { _setupBranchTest(JZ32, true); }
void COP2_Interlock(bool mBitSync) {
if (cpuRegs.code & 1) {
iFlushCall(FLUSH_EVERYTHING | FLUSH_PC);
if (mBitSync) xCALL(_vu0WaitMicro);
else xCALL(_vu0FinishMicro);
if (mBitSync) xFastCall(_vu0WaitMicro);
else xFastCall(_vu0FinishMicro);
}
}
void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex) {
xTEST(eax, (vuIndex) ? 0x200 : 0x002);
xForwardJZ8 skip;
xCALL(resetFunct);
xFastCall(resetFunct);
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
skip.SetTarget();
}
@ -316,8 +316,8 @@ static void recCTC2() {
xMOV(ecx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
}
else xXOR(ecx, ecx);
xCALL(vu1ExecMicro);
xCALL(vif1VUFinish);
xFastCall(vu1ExecMicro, ecx);
xFastCall(vif1VUFinish);
break;
case REG_FBRST:
if (!_Rt_) {
@ -336,8 +336,7 @@ static void recCTC2() {
// Executing vu0 block here fixes the intro of Ratchet and Clank
// sVU's COP2 has a comment that "Donald Duck" needs this too...
if (_Rd_) _eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_);
xMOV(ecx, (uptr)CpuVU0);
xCALL(BaseVUmicroCPU::ExecuteBlockJIT);
xFastCall(BaseVUmicroCPU::ExecuteBlockJIT, (uptr)CpuVU0);
break;
}
}