R5900: Flush complete machine state before all loads and stores, primarily because they can raise exceptions. This is slower, but not significantly in any game tested provided that the two recommended speedhacks are enabled, and it allows us to make another optimisation later that should more than make up for the small drop. We have an alternative implementation in mind should it prove too costly in any game even with both recommended speedhacks enabled.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2767 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
sudonim1 2010-03-26 00:33:41 +00:00
parent 928816732b
commit 5a6a1d5a01
9 changed files with 74 additions and 97 deletions

View File

@ -86,10 +86,10 @@ void recBC0TL()
recDoBranchImm_Likely(JNE32(0));
}
void recTLBR() { recCall( Interp::TLBR, -1 ); }
void recTLBP() { recCall( Interp::TLBP, -1 ); }
void recTLBWI() { recCall( Interp::TLBWI, -1 ); }
void recTLBWR() { recCall( Interp::TLBWR, -1 ); }
void recTLBR() { recCall(Interp::TLBR); }
void recTLBP() { recCall(Interp::TLBP); }
void recTLBWI() { recCall(Interp::TLBWI); }
void recTLBWR() { recCall(Interp::TLBWR); }
void recERET()
{

View File

@ -355,8 +355,14 @@ extern u16 x86FpuState;
#define FLUSH_FREE_TEMPX86 64 // flush and free temporary x86 regs
#define FLUSH_FREE_ALLX86 128 // free all x86 regs
#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs
#define FLUSH_PC 0x200 // program counter
#define FLUSH_CAUSE 0x400 // cause register, only the branch delay bit
#define FLUSH_CODE 0x800 // opcode for interpreter
#define FLUSH_EVERYTHING 0x1ff
#define FLUSH_EXCEPTION 0x7ff
#define FLUSH_INTERPRETER 0xfff
#define FLUSH_EVERYTHING 0xfff
// no freeing, used when callee won't destroy mmx/xmm regs
#define FLUSH_NODESTROY (FLUSH_CACHED_REGS|FLUSH_FLUSH_XMM|FLUSH_FLUSH_MMX|FLUSH_FLUSH_ALLX86)
// used when regs aren't going to be changed be callee

View File

@ -84,9 +84,7 @@ static const __aligned16 u32 s_pos[4] = { 0x7fffffff, 0xffffffff, 0xffffffff, 0x
#define REC_FPUBRANCH(f) \
void f(); \
void rec##f() { \
MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); \
MOV32ItoM((uptr)&cpuRegs.pc, pc); \
iFlushCall(FLUSH_EVERYTHING); \
iFlushCall(FLUSH_INTERPRETER); \
CALLFunc((uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
branch = 2; \
}
@ -94,9 +92,7 @@ static const __aligned16 u32 s_pos[4] = { 0x7fffffff, 0xffffffff, 0xffffffff, 0x
#define REC_FPUFUNC(f) \
void f(); \
void rec##f() { \
MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); \
MOV32ItoM((uptr)&cpuRegs.pc, pc); \
iFlushCall(FLUSH_EVERYTHING); \
iFlushCall(FLUSH_INTERPRETER); \
CALLFunc((uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
}
//------------------------------------------------------------------

View File

@ -88,9 +88,7 @@ namespace DOUBLE {
#define REC_FPUBRANCH(f) \
void f(); \
void rec##f() { \
MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); \
MOV32ItoM((uptr)&cpuRegs.pc, pc); \
iFlushCall(FLUSH_EVERYTHING); \
iFlushCall(FLUSH_INTERPRETER); \
CALLFunc((uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
branch = 2; \
}
@ -98,9 +96,7 @@ namespace DOUBLE {
#define REC_FPUFUNC(f) \
void f(); \
void rec##f() { \
MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); \
MOV32ItoM((uptr)&cpuRegs.pc, pc); \
iFlushCall(FLUSH_EVERYTHING); \
iFlushCall(FLUSH_INTERPRETER); \
CALLFunc((uptr)R5900::Interpreter::OpcodeImpl::COP1::f); \
}
//------------------------------------------------------------------

View File

@ -187,13 +187,8 @@ void recPMFHL()
case 0x02: // SLW
// fall to interp
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
_flushCachedRegs();
_deleteEEreg(_Rd_, 0);
_deleteEEreg(XMMGPR_LO, 1);
_deleteEEreg(XMMGPR_HI, 1);
iFlushCall(FLUSH_CACHED_REGS); // since calling CALLFunc
iFlushCall(FLUSH_INTERPRETER); // since calling CALLFunc
CALLFunc( (uptr)R5900::Interpreter::OpcodeImpl::MMI::PMFHL );
break;
@ -1710,7 +1705,8 @@ REC_FUNC_DEL( PROT3W, _Rd_ );
void recPMADDW()
{
if( !x86caps.hasStreamingSIMD4Extensions ) {
recCall( Interp::PMADDW, _Rd_ );
_deleteEEreg(_Rd_, 0);
recCall(Interp::PMADDW);
return;
}
@ -1888,7 +1884,8 @@ void recPSRLVW()
void recPMSUBW()
{
if( !x86caps.hasStreamingSIMD4Extensions ) {
recCall( Interp::PMSUBW, _Rd_ );
_deleteEEreg(_Rd_, 0);
recCall(Interp::PMSUBW);
return;
}
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI );
@ -1939,7 +1936,8 @@ void recPMSUBW()
void recPMULTW()
{
if( !x86caps.hasStreamingSIMD4Extensions ) {
recCall( Interp::PMULTW, _Rd_ );
_deleteEEreg(_Rd_, 0);
recCall(Interp::PMULTW);
return;
}
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI );
@ -1979,13 +1977,15 @@ void recPMULTW()
////////////////////////////////////////////////////
void recPDIVW()
{
recCall( Interp::PDIVW, _Rd_ );
_deleteEEreg(_Rd_, 0);
recCall(Interp::PDIVW);
}
////////////////////////////////////////////////////
void recPDIVBW()
{
recCall( Interp::PDIVBW, _Rd_ ); //--
_deleteEEreg(_Rd_, 0);
recCall(Interp::PDIVBW); //--
}
////////////////////////////////////////////////////
@ -2608,7 +2608,8 @@ void recPMADDUW()
////////////////////////////////////////////////////
void recPDIVUW()
{
recCall( Interp::PDIVUW, _Rd_ );
_deleteEEreg(_Rd_, 0);
recCall(Interp::PDIVUW);
}
////////////////////////////////////////////////////

View File

@ -37,41 +37,27 @@ extern u32 s_nBlockCycles; // cycles of current block recompiling
#define REC_FUNC( f ) \
void rec##f( void ) \
{ \
MOV32ItoM( (uptr)&cpuRegs.code, (u32)cpuRegs.code ); \
MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc ); \
iFlushCall(FLUSH_EVERYTHING); \
CALLFunc( (uptr)Interp::f ); \
recCall(Interp::f); \
}
#define REC_FUNC_DEL( f, delreg ) \
void rec##f( void ) \
{ \
MOV32ItoM( (uptr)&cpuRegs.code, (u32)cpuRegs.code ); \
MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc ); \
iFlushCall(FLUSH_EVERYTHING); \
if( (delreg) > 0 ) _deleteEEreg(delreg, 0); \
CALLFunc( (uptr)Interp::f ); \
recCall(Interp::f); \
}
#define REC_SYS( f ) \
void rec##f( void ) \
{ \
MOV32ItoM( (uptr)&cpuRegs.code, (u32)cpuRegs.code ); \
MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc ); \
iFlushCall(FLUSH_EVERYTHING); \
CALLFunc( (uptr)Interp::f ); \
branch = 2; \
recBranchCall(Interp::f); \
}
#define REC_SYS_DEL( f, delreg ) \
void rec##f( void ) \
{ \
MOV32ItoM( (uptr)&cpuRegs.code, (u32)cpuRegs.code ); \
MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc ); \
iFlushCall(FLUSH_EVERYTHING); \
if( (delreg) > 0 ) _deleteEEreg(delreg, 0); \
CALLFunc( (uptr)Interp::f ); \
branch = 2; \
recBranchCall(Interp::f); \
}
@ -89,7 +75,7 @@ void SetBranchImm( u32 imm );
void iFlushCall(int flushtype);
void recBranchCall( void (*func)() );
void recCall( void (*func)(), int delreg );
void recCall( void (*func)() );
namespace R5900{
namespace Dynarec {

View File

@ -50,6 +50,7 @@ int branch; // set for branch
__aligned16 GPR_reg64 g_cpuConstRegs[32] = {0};
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
bool g_cpuFlushedPC;
////////////////////////////////////////////////////////////////
// Static Private Variables - R5900 Dynarec
@ -308,26 +309,17 @@ void recBranchCall( void (*func)() )
// In order to make sure a branch test is performed, the nextBranchCycle is set
// to the current cpu cycle.
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32MtoR( EAX, (uptr)&cpuRegs.cycle );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
MOV32RtoM( (uptr)&g_nextBranchCycle, EAX );
// Might as well flush everything -- it'll all get flushed when the
// recompiler inserts the branchtest anyway.
iFlushCall(FLUSH_EVERYTHING);
CALLFunc( (uptr)func );
recCall(func);
branch = 2;
}
void recCall( void (*func)(), int delreg )
void recCall( void (*func)() )
{
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
iFlushCall(FLUSH_EVERYTHING);
if( delreg > 0 ) _deleteEEreg(delreg, 0);
CALLFunc( (uptr)func );
iFlushCall(FLUSH_INTERPRETER);
xCALL(func);
}
// =====================================================================================================
@ -786,10 +778,7 @@ static void recExecuteBiosStub()
////////////////////////////////////////////////////
void R5900::Dynarec::OpcodeImpl::recSYSCALL( void )
{
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
iFlushCall(FLUSH_NODESTROY);
CALLFunc( (uptr)R5900::Interpreter::OpcodeImpl::SYSCALL );
recCall(R5900::Interpreter::OpcodeImpl::SYSCALL);
CMP32ItoM((uptr)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
@ -802,10 +791,7 @@ void R5900::Dynarec::OpcodeImpl::recSYSCALL( void )
////////////////////////////////////////////////////
void R5900::Dynarec::OpcodeImpl::recBREAK( void )
{
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
iFlushCall(FLUSH_EVERYTHING);
CALLFunc( (uptr)R5900::Interpreter::OpcodeImpl::BREAK );
recCall(R5900::Interpreter::OpcodeImpl::BREAK);
CMP32ItoM((uptr)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
@ -1026,6 +1012,15 @@ void iFlushCall(int flushtype)
_freeX86reg(ECX);
_freeX86reg(EDX);
if (flushtype & FLUSH_PC && !g_cpuFlushedPC) {
xMOV(ptr32[&cpuRegs.pc], pc);
g_cpuFlushedPC = true;
}
if (flushtype & FLUSH_CODE)
xMOV(ptr32[&cpuRegs.code], cpuRegs.code);
if (flushtype & FLUSH_CAUSE)
; // TODO
if( flushtype & FLUSH_FREE_XMM )
_freeXMMregs();
else if( flushtype & FLUSH_FLUSH_XMM)
@ -1208,6 +1203,7 @@ void recompileNextInstruction(int delayslot)
cpuRegs.code = *(int *)s_pCode;
pc += 4;
g_cpuFlushedPC = false;
g_pCurInstInfo++;

View File

@ -123,6 +123,7 @@ void recLoad64( u32 bits, bool sign )
}
else
{
iFlushCall(FLUSH_EXCEPTION);
// Load ECX with the source memory address that we're reading from.
_eeMoveGPRtoR(ECX, _Rs_);
if ( _Imm_ != 0 )
@ -158,6 +159,7 @@ void recLoad32( u32 bits, bool sign )
}
else
{
iFlushCall(FLUSH_EXCEPTION);
// Load ECX with the source memory address that we're reading from.
_eeMoveGPRtoR(ECX, _Rs_);
if ( _Imm_ != 0 )
@ -219,6 +221,7 @@ void recStore(u32 sz, bool edxAlreadyAssigned=false)
}
else
{
iFlushCall(FLUSH_EXCEPTION);
_eeMoveGPRtoR(ECX, _Rs_);
if ( _Imm_ != 0 )
@ -254,25 +257,23 @@ void recSD( void ) { recStore(64); }
////////////////////////////////////////////////////
void recLWL( void )
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 1);
MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code );
//MOV32ItoM( (int)&cpuRegs.pc, pc );
CALLFunc( (int)LWL );
recCall(LWL);
}
////////////////////////////////////////////////////
void recLWR( void )
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 1);
MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code );
//MOV32ItoM( (int)&cpuRegs.pc, pc );
CALLFunc( (int)LWR );
recCall(LWR);
}
static const u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0x00000000 };
@ -312,64 +313,58 @@ void recSWL( void )
}
else
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_deleteEEreg(_Rt_, 1);
MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code );
//MOV32ItoM( (int)&cpuRegs.pc, pc ); // pc's not needed by SWL
CALLFunc( (int)SWL );
recCall(SWL);
}
}
////////////////////////////////////////////////////
void recSWR( void )
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_deleteEEreg(_Rt_, 1);
MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code );
//MOV32ItoM( (int)&cpuRegs.pc, pc );
CALLFunc( (int)SWR );
recCall(SWR);
}
////////////////////////////////////////////////////
void recLDL( void )
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 1);
MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code );
//MOV32ItoM( (int)&cpuRegs.pc, pc );
CALLFunc( (int)LDL );
recCall(LDL);
}
////////////////////////////////////////////////////
void recLDR( void )
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 1);
MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code );
//MOV32ItoM( (int)&cpuRegs.pc, pc );
CALLFunc( (int)LDR );
recCall(LDR);
}
////////////////////////////////////////////////////
void recSDL( void )
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_deleteEEreg(_Rt_, 1);
MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code );
//MOV32ItoM( (int)&cpuRegs.pc, pc );
CALLFunc( (int)SDL );
recCall(SDL);
}
////////////////////////////////////////////////////
void recSDR( void )
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_deleteEEreg(_Rt_, 1);
MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code );
//MOV32ItoM( (int)&cpuRegs.pc, pc );
CALLFunc( (int)SDR );
recCall(SDR);
}
//////////////////////////////////////////////////////////////////////////////////////////
@ -381,6 +376,7 @@ void recSDR( void )
////////////////////////////////////////////////////
void recLWC1( void )
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_deleteFPtoXMMreg(_Rt_, 2);
@ -395,6 +391,7 @@ void recLWC1( void )
////////////////////////////////////////////////////
void recSWC1( void )
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_deleteFPtoXMMreg(_Rt_, 0);
@ -419,6 +416,7 @@ void recSWC1( void )
void recLQC2( void )
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_deleteVFtoXMMreg(_Ft_, 0, 2);
@ -437,6 +435,7 @@ void recLQC2( void )
////////////////////////////////////////////////////
void recSQC2( void )
{
iFlushCall(FLUSH_EXCEPTION);
_deleteEEreg(_Rs_, 1);
_deleteVFtoXMMreg(_Ft_, 0, 0);

View File

@ -79,10 +79,7 @@ void endMacroOp(int mode) {
#define INTERPRETATE_COP2_FUNC(f) \
void recV##f() { \
MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); \
MOV32ItoM((uptr)&cpuRegs.pc, pc); \
iFlushCall(FLUSH_EVERYTHING); \
CALLFunc((uptr)V##f); \
recCall(V##f); \
_freeX86regs(); \
}