From 1a02e889978339f67eab7378ab82a4cf56139284 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Wed, 17 Dec 2008 15:12:35 +0000 Subject: [PATCH] Added somewhat more "accurate" cycle timings, by weighting mul/div and load/store instructions to more closely match typical/avg cycle counts for those instructions on the PS2. In addition to helping fix some obscure timing glitches, it also allows the emulator to run a little more efficiently. True accuracy is impossible since we can't realistically emulate the MIPs superscalar 8-stage pipeline, branch predictions, or data cache hits/misses. But at least the "average" cycle timings should be a little closer to the real thing now. Improved stability of both EE/X2 and EE/X3 sync hacks. In particular, some FMVs that broke under the X2 hack should work a bit better now, and the X3 hack should behave a lot better now too (but will still break pretty much any FMV/audio sync). X2 sync hack performance also improved, by maybe 6% or so. git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@446 a6443dda-0b58-4228-96e9-037be469359c --- pcsx2/x86/iFPU.c | 8 +- pcsx2/x86/iMMI.c | 25 ++++- pcsx2/x86/iR3000A.cpp | 4 + pcsx2/x86/iR3000A.h | 26 ++++- pcsx2/x86/iR3000Atables.cpp | 8 +- pcsx2/x86/iR5900.h | 40 ++++++- pcsx2/x86/ir5900tables.c | 167 +--------------------------- pcsx2/x86/ix86-32/iR5900-32.c | 72 ++++++++---- pcsx2/x86/ix86-32/iR5900LoadStore.c | 76 ++++++++----- pcsx2/x86/ix86-32/iR5900MultDiv.c | 16 +-- 10 files changed, 199 insertions(+), 243 deletions(-) diff --git a/pcsx2/x86/iFPU.c b/pcsx2/x86/iFPU.c index c043aec7c1..ec0501f88d 100644 --- a/pcsx2/x86/iFPU.c +++ b/pcsx2/x86/iFPU.c @@ -1284,7 +1284,7 @@ void recDIV_S_xmm(int info) _freeXMMreg(t0reg); } -FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE_PENALTY(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT,InstCycles_FPU_Sqrt); //------------------------------------------------------------------ @@ -1670,7 +1670,7 @@ void recMUL_S_xmm(int info) ClampValues(recCommutativeOp(info, EEREC_D, 1)); } -FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE_PENALTY(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT,InstCycles_FPU_Sqrt/2); void recMULA_S_xmm(int info) { @@ -1811,7 +1811,7 @@ void recSQRT_S_xmm(int info) _freeX86reg(tempReg); } -FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT); +FPURECOMPILE_CONSTCODE_PENALTY(SQRT_S, XMMINFO_WRITED|XMMINFO_READT, InstCycles_FPU_Sqrt); //------------------------------------------------------------------ @@ -1916,7 +1916,7 @@ void recRSQRT_S_xmm(int info) _freeXMMreg(t0reg); } -FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE_PENALTY(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT,InstCycles_FPU_Sqrt*2); #endif // FPU_RECOMPILE diff --git a/pcsx2/x86/iMMI.c b/pcsx2/x86/iMMI.c index e2bb4543e2..f80cbcbac7 100644 --- a/pcsx2/x86/iMMI.c +++ b/pcsx2/x86/iMMI.c @@ -2222,6 +2222,8 @@ void recPSRLVW() //////////////////////////////////////////////////// void recPMSUBW() { + g_eeCyclePenalty = InstCycles_MMI_Mult; + EEINST_SETSIGNEXT(_Rs_); EEINST_SETSIGNEXT(_Rt_); if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_); @@ -2262,6 +2264,7 @@ void recPMSUBW() //////////////////////////////////////////////////// void recPMULTW() { + g_eeCyclePenalty = InstCycles_MMI_Mult; EEINST_SETSIGNEXT(_Rs_); EEINST_SETSIGNEXT(_Rt_); if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_); @@ -2270,6 +2273,7 @@ void recPMULTW() //////////////////////////////////////////////////// void recPDIVW() { + g_eeCyclePenalty = InstCycles_MMI_Div; EEINST_SETSIGNEXT(_Rs_); EEINST_SETSIGNEXT(_Rt_); REC_FUNC_INLINE( PDIVW, _Rd_ ); @@ -2278,6 +2282,7 @@ void recPDIVW() //////////////////////////////////////////////////// void recPDIVBW() { + g_eeCyclePenalty = InstCycles_MMI_Div; REC_FUNC_INLINE( PDIVBW, _Rd_ ); //-- } @@ -2286,6 +2291,8 @@ PCSX2_ALIGNED16(int s_mask[4]) = {~0, 0, ~0, 0}; void recPHMADH() { + g_eeCyclePenalty = InstCycles_MMI_Mult; + CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI) int t0reg = _Rd_ ? EEREC_D : _allocTempXMMreg(XMMT_INT, -1); @@ -2345,6 +2352,8 @@ CPU_SSE_XMMCACHE_END //////////////////////////////////////////////////// void recPMSUBH() { + g_eeCyclePenalty = InstCycles_MMI_Mult; + CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI) int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -2388,6 +2397,8 @@ CPU_SSE_XMMCACHE_END //////////////////////////////////////////////////// void recPHMSBH() { + g_eeCyclePenalty = InstCycles_MMI_Mult; + CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI) int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -2542,6 +2553,8 @@ CPU_SSE_XMMCACHE_END void recPMULTH( void ) { + g_eeCyclePenalty = InstCycles_MMI_Mult; + CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI) int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -2794,6 +2807,8 @@ CPU_SSE_XMMCACHE_END void recPMADDH( void ) { + g_eeCyclePenalty = InstCycles_MMI_Mult; + CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI) int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -2978,6 +2993,8 @@ CPU_SSE_XMMCACHE_END //////////////////////////////////////////////////// void recPMULTUW() { + g_eeCyclePenalty = InstCycles_MMI_Mult; + CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI) int t0reg = _allocTempXMMreg(XMMT_INT, -1); EEINST_SETSIGNEXT(_Rs_); @@ -3011,6 +3028,8 @@ CPU_SSE_XMMCACHE_END //////////////////////////////////////////////////// void recPMADDUW() { + g_eeCyclePenalty = InstCycles_MMI_Mult; + CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI) int t0reg = _allocTempXMMreg(XMMT_INT, -1); EEINST_SETSIGNEXT(_Rs_); @@ -3049,7 +3068,11 @@ CPU_SSE_XMMCACHE_END //////////////////////////////////////////////////// //do EEINST_SETSIGNEXT -REC_FUNC( PDIVUW, _Rd_ ); +void recPDIVUW() +{ + g_eeCyclePenalty = InstCycles_MMI_Div; + REC_FUNC_INLINE( PDIVUW, _Rd_ ); +} //////////////////////////////////////////////////// void recPEXCW() diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index d5a8e969cb..1d28726aa3 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -74,6 +74,8 @@ static BASEBLOCKEX *recBlocks = NULL; static u8 *recPtr; u32 psxpc; // recompiler psxpc int psxbranch; // set for branch +u32 g_iopCyclePenalty; + static EEINST* s_pInstCache = NULL; static u32 s_nInstCacheSize = 0; @@ -1154,7 +1156,9 @@ void psxRecompileNextInstruction(int delayslot) } else { assert( !(g_pCurInstInfo->info & EEINSTINFO_NOREC) ); + g_iopCyclePenalty = 0; rpsxBSC[ psxRegs.code >> 26 ](); + s_psxBlockCycles += g_iopCyclePenalty; } if( !delayslot ) { diff --git a/pcsx2/x86/iR3000A.h b/pcsx2/x86/iR3000A.h index 3a2c1472bc..ced713bc36 100644 --- a/pcsx2/x86/iR3000A.h +++ b/pcsx2/x86/iR3000A.h @@ -20,6 +20,16 @@ extern void __Log(const char *fmt, ...); +// Cycle penalties for particuarly slow instructions. +static const int psxInstCycles_Mult = 8; +static const int psxInstCycles_Div = 60; + +// Currently unused (iop mod incomplete) +static const int psxInstCycles_Peephole_Store = 0; +static const int psxInstCycles_Store = 0; +static const int psxInstCycles_Load = 0; + + // to be consistent with EE #define PSX_HI XMMGPR_HI #define PSX_LO XMMGPR_LO @@ -43,6 +53,7 @@ void PSX_CHECK_SAVE_REG(int reg); extern u32 psxpc; // recompiler pc extern int psxbranch; // set for branch +extern u32 g_iopCyclePenalty; void psxSaveBranchState(); void psxLoadBranchState(); @@ -64,28 +75,35 @@ void psxRecClearMem(BASEBLOCK* p); void rpsx##fn(void) \ { \ psxRecompileCodeConst0(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_); \ -} \ +} // rt = rs op imm16 #define PSXRECOMPILE_CONSTCODE1(fn) \ void rpsx##fn(void) \ { \ psxRecompileCodeConst1(rpsx##fn##_const, rpsx##fn##_); \ -} \ +} // rd = rt op sa #define PSXRECOMPILE_CONSTCODE2(fn) \ void rpsx##fn(void) \ { \ psxRecompileCodeConst2(rpsx##fn##_const, rpsx##fn##_); \ -} \ +} // [lo,hi] = rt op rs #define PSXRECOMPILE_CONSTCODE3(fn, LOHI) \ void rpsx##fn(void) \ { \ psxRecompileCodeConst3(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_, LOHI); \ -} \ +} + +#define PSXRECOMPILE_CONSTCODE3_PENALTY(fn, LOHI, cycles) \ +void rpsx##fn(void) \ +{ \ + psxRecompileCodeConst3(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_, LOHI); \ + g_iopCyclePenalty = cycles; \ +} // rd = rs op rt void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode); diff --git a/pcsx2/x86/iR3000Atables.cpp b/pcsx2/x86/iR3000Atables.cpp index 7578461cab..6d50088079 100644 --- a/pcsx2/x86/iR3000Atables.cpp +++ b/pcsx2/x86/iR3000Atables.cpp @@ -469,7 +469,7 @@ void rpsxMULT_consts(int info) { rpsxMULTsuperconst(info, _Rt_, g_psxConstRegs[_ void rpsxMULT_constt(int info) { rpsxMULTsuperconst(info, _Rs_, g_psxConstRegs[_Rt_], 1); } void rpsxMULT_(int info) { rpsxMULTsuper(info, 1); } -PSXRECOMPILE_CONSTCODE3(MULT, 1); +PSXRECOMPILE_CONSTCODE3_PENALTY(MULT, 1, psxInstCycles_Mult); //// MULTU void rpsxMULTU_const() @@ -484,7 +484,7 @@ void rpsxMULTU_consts(int info) { rpsxMULTsuperconst(info, _Rt_, g_psxConstRegs[ void rpsxMULTU_constt(int info) { rpsxMULTsuperconst(info, _Rs_, g_psxConstRegs[_Rt_], 0); } void rpsxMULTU_(int info) { rpsxMULTsuper(info, 0); } -PSXRECOMPILE_CONSTCODE3(MULTU, 1); +PSXRECOMPILE_CONSTCODE3_PENALTY(MULTU, 1, psxInstCycles_Mult); //// DIV void rpsxDIV_const() @@ -582,7 +582,7 @@ void rpsxDIV_consts(int info) { rpsxDIVsuperconsts(info, 1); } void rpsxDIV_constt(int info) { rpsxDIVsuperconstt(info, 1); } void rpsxDIV_(int info) { rpsxDIVsuper(info, 1); } -PSXRECOMPILE_CONSTCODE3(DIV, 1); +PSXRECOMPILE_CONSTCODE3_PENALTY(DIV, 1, psxInstCycles_Div); //// DIVU void rpsxDIVU_const() @@ -601,7 +601,7 @@ void rpsxDIVU_consts(int info) { rpsxDIVsuperconsts(info, 0); } void rpsxDIVU_constt(int info) { rpsxDIVsuperconstt(info, 0); } void rpsxDIVU_(int info) { rpsxDIVsuper(info, 0); } -PSXRECOMPILE_CONSTCODE3(DIVU, 1); +PSXRECOMPILE_CONSTCODE3_PENALTY(DIVU, 1, psxInstCycles_Div); //// LoadStores #ifdef PCSX2_VIRTUAL_MEM diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 091b2a19c4..c753255c8a 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -40,6 +40,20 @@ #define CP0_RECOMPILE #define CP2_RECOMPILE +// Cycle penalties for particuarly slow instructions. +static const int InstCycles_Mult = 1*4; +static const int InstCycles_Div = 12*4; +static const int InstCycles_FPU_Sqrt = 3*4; +static const int InstCycles_MMI_Mult = 2*4; +static const int InstCycles_MMI_Div = 20*4; + +// Setting Loads to 1 or higher breaks Disgaea 2 FMV audio syncs. +static const int InstCycles_Peephole_Store = 7; // 1.75 cycle penalty +static const int InstCycles_Peephole_Load = 1; // 0.25 cycle penalty +static const int InstCycles_Store = 7; // 1.75 cycle penalty +static const int InstCycles_Load = 1; // 0.25 cycle penalty + + #define EE_CONST_PROP // rec2 - enables constant propagation (faster) //#define EE_FPU_REGCACHING 1 // Not used anymore, its always on! @@ -63,6 +77,7 @@ extern u32 target; // branch target extern u16 x86FpuState; extern u16 iCWstate; extern u32 s_nBlockCycles; // cycles of current block recompiling +extern u32 g_eeCyclePenalty; void recBranchCall( void (*func)() ); @@ -146,13 +161,27 @@ typedef void (*R5900FNPTR_INFO)(int info); void rec##fn(void) \ { \ eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \ -} \ +} + +#define EERECOMPILE_CODE0_PENALTY(fn, xmminfo, cycles) \ +void rec##fn(void) \ +{ \ + eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \ + g_eeCyclePenalty = (cycles); \ +} + +#define EERECOMPILE_CODE0_PENALTY(fn, xmminfo, cycles) \ +void rec##fn(void) \ +{ \ + eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \ + g_eeCyclePenalty = (cycles); \ +} #define EERECOMPILE_CODEX(codename, fn) \ void rec##fn(void) \ { \ codename(rec##fn##_const, rec##fn##_); \ -} \ +} // // MMX/XMM caching helpers @@ -233,19 +262,18 @@ void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode return; \ } \ -#ifdef __x86_64__ #define FPURECOMPILE_CONSTCODE(fn, xmminfo) \ void rec##fn(void) \ { \ eeFPURecompileCode(rec##fn##_xmm, fn, xmminfo); \ } -#else -#define FPURECOMPILE_CONSTCODE(fn, xmminfo) \ + +#define FPURECOMPILE_CONSTCODE_PENALTY(fn, xmminfo, cycles) \ void rec##fn(void) \ { \ eeFPURecompileCode(rec##fn##_xmm, fn, xmminfo); \ + g_eeCyclePenalty = (cycles); \ } -#endif // rd = rs op rt (all regs need to be in xmm) int eeRecompileCodeXMM(int xmminfo); diff --git a/pcsx2/x86/ir5900tables.c b/pcsx2/x86/ir5900tables.c index 25337ca602..46612881ea 100644 --- a/pcsx2/x86/ir5900tables.c +++ b/pcsx2/x86/ir5900tables.c @@ -331,7 +331,7 @@ void (*recCP1W[64] )() = { }; void (*recMMIt[64] )() = { - recMADD, recMADDU, recNULL, recNULL, recPLZCW, recNULL, recNULL, recNULL, + recMADD, recMADDU, recNULL, recNULL, recPLZCW, recNULL, recNULL, recNULL, recMMI0, recMMI2, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recMFHI1, recMTHI1, recMFLO1, recMTLO1, recNULL, recNULL, recNULL, recNULL, recMULT1, recMULTU1, recDIV1, recDIVU1, recNULL, recNULL, recNULL, recNULL, @@ -471,171 +471,6 @@ __forceinline void BSCPropagate::rpropSetFPUWrite( int reg, int mask ) #define EEINST_REALXMM EEINST_XMM - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - //SLL, NULL, SRL, SRA, SLLV, NULL, SRLV, SRAV, //JR, JALR, MOVZ, MOVN, SYSCALL, BREAK, NULL, SYNC, //MFHI, MTHI, MFLO, MTLO, DSLLV, NULL, DSRLV, DSRAV, diff --git a/pcsx2/x86/ix86-32/iR5900-32.c b/pcsx2/x86/ix86-32/iR5900-32.c index 975548bbda..aec6465969 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.c +++ b/pcsx2/x86/ix86-32/iR5900-32.c @@ -95,6 +95,8 @@ static u32 s_saveConstGPRreg = 0, s_saveHasConstReg = 0, s_saveFlushedConstReg = static EEINST* s_psaveInstInfo = NULL; u32 s_nBlockCycles = 0; // cycles of current block recompiling +u32 g_eeCyclePenalty; // cycle penalty of the current recompiled instruction + static u32 s_savenBlockCycles = 0; void recCOP2RecompileInst(); @@ -2250,19 +2252,36 @@ void iFlushCall(int flushtype) // } //} -//static void cleanup() -//{ -// assert( !g_globalMMXSaved ); -// assert( !g_globalXMMSaved ); -//} -//fixme : this is all a huge hack, we base the counter advancements on the average an opcode should take (wtf?) -// If that wasn't bad enough we have default values like 9/8 which will get cast to int later -// (yeah, that means all sync code couldn't have worked to beginn with) -// So for now these are new settings that work. I would've set 1 for default but that seemed too low -// (rama) +static u32 eeScaleBlockCycles() +{ + // Note: s_nBlockCycles is 2 bit fixed point. Divide by 4 when done! -#define EECYCLE_MULT (CHECK_EESYNC_HACK ? (CHECK_EE_IOP_EXTRA ? 3 : 2) : (1.2)) + // Let's not scale blocks under 5-ish cycles. This fixes countless "problems" + // caused by sync hacks and such, since games seem to care a lot more about + // these small blocks having accurate cycle counts. + + if( s_nBlockCycles <= 5*4 || !CHECK_EESYNC_HACK ) return s_nBlockCycles / 4; + + u32 scalar = CHECK_EE_IOP_EXTRA ? 14 : 9; // 3.5 and 2.25 scales + + if( s_nBlockCycles <= 10*4 ) + { + // Mid-size blocks should get a mid-sized scale: + // (using an additional 2 bits fixed point math here) + + scalar = CHECK_EE_IOP_EXTRA ? 9 : 7; // 2.25 and 1.75 scales + } + else if( s_nBlockCycles >= 22*4 ) + { + // larger blocks get a smaller scalar as well, to help keep + // them from becoming "too fat" and delaying branch tests. + scalar = CHECK_EE_IOP_EXTRA ? 10 : 7; // 2.5 and 1.75 scales + } + + s_nBlockCycles *= scalar; + return s_nBlockCycles / (4*4); +} static void iBranchTest(u32 newpc, u32 cpuBranch) { @@ -2278,7 +2297,7 @@ static void iBranchTest(u32 newpc, u32 cpuBranch) #endif MOV32MtoR(ECX, (uptr)&cpuRegs.cycle); - ADD32ItoR(ECX, s_nBlockCycles*EECYCLE_MULT); + ADD32ItoR(ECX, eeScaleBlockCycles()); MOV32RtoM((uptr)&cpuRegs.cycle, ECX); // update cycles SUB32MtoR(ECX, (uptr)&g_nextBranchCycle); @@ -2322,7 +2341,7 @@ void recSYSCALL( void ) { CMP32ItoM((uptr)&cpuRegs.pc, pc); j8Ptr[0] = JE8(0); - ADD32ItoM((uptr)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT); + ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles()); JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 )); x86SetJ8(j8Ptr[0]); //branch = 2; @@ -2337,7 +2356,7 @@ void recBREAK( void ) { CMP32ItoM((uptr)&cpuRegs.pc, pc); j8Ptr[0] = JE8(0); - ADD32ItoM((uptr)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT); + ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles()); RET(); x86SetJ8(j8Ptr[0]); //branch = 2; @@ -2532,7 +2551,7 @@ void recompileNextInstruction(int delayslot) #endif cpuRegs.code = *(int *)s_pCode; - s_nBlockCycles++; + s_nBlockCycles+=4; pc += 4; //#ifdef _DEBUG @@ -2589,26 +2608,33 @@ void recompileNextInstruction(int delayslot) #ifdef PCSX2_VIRTUAL_MEM if( g_pCurInstInfo->numpeeps > 1 ) { + g_eeCyclePenalty = InstCycles_Store; switch(cpuRegs.code>>26) { - case 30: recLQ_coX(g_pCurInstInfo->numpeeps); break; + case 30: recLQ_coX(g_pCurInstInfo->numpeeps); g_eeCyclePenalty = InstCycles_Load; break; case 31: recSQ_coX(g_pCurInstInfo->numpeeps); break; - case 49: recLWC1_coX(g_pCurInstInfo->numpeeps); break; + case 49: recLWC1_coX(g_pCurInstInfo->numpeeps); g_eeCyclePenalty = InstCycles_Load; break; case 57: recSWC1_coX(g_pCurInstInfo->numpeeps); break; - case 55: recLD_coX(g_pCurInstInfo->numpeeps); break; + case 55: recLD_coX(g_pCurInstInfo->numpeeps); g_eeCyclePenalty = InstCycles_Load; break; case 63: recSD_coX(g_pCurInstInfo->numpeeps, 1); break; //not sure if should be set to 1 or 0; looks like "1" handles alignment, so i'm going with that for now default: assert(0); } - pc += g_pCurInstInfo->numpeeps*4; - s_nBlockCycles += g_pCurInstInfo->numpeeps; + s_nBlockCycles += g_pCurInstInfo->numpeeps * (g_eeCyclePenalty+4); g_pCurInstInfo += g_pCurInstInfo->numpeeps; } else { + g_eeCyclePenalty = 0; recBSC_co[cpuRegs.code>>26](); pc += 4; - s_nBlockCycles++; g_pCurInstInfo++; + + // ugh! we're actually writing two instructions as one load/store opt here, + // so we need to factor the cycle penalty*2, and add 1 for the actual instruction + // base cycle counter. We don't add 2 becuase s_nBlockCycles was already + // incremeneted above. + + s_nBlockCycles += (g_eeCyclePenalty*2) + 4; } #else assert(0); @@ -2639,7 +2665,9 @@ void recompileNextInstruction(int delayslot) return; } } + g_eeCyclePenalty = 0; recBSC[ cpuRegs.code >> 26 ](); + s_nBlockCycles += g_eeCyclePenalty; } if( !delayslot ) { @@ -3237,7 +3265,7 @@ StartRecomp: else { assert( branch != 3 ); if( branch ) assert( !willbranch3 ); - else ADD32ItoM((int)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT); + else ADD32ItoM((int)&cpuRegs.cycle, eeScaleBlockCycles() ); if( willbranch3 ) { BASEBLOCK* pblock = PC_GETBLOCK(s_nEndBlock); diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.c b/pcsx2/x86/ix86-32/iR5900LoadStore.c index 19b2582e04..e0df5aeac0 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.c +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.c @@ -630,18 +630,18 @@ void recLoad32_co(u32 bit, u32 sign) } } -void recLB( void ) { recLoad32(8, _Imm_, 1); } -void recLB_co( void ) { recLoad32_co(8, 1); } -void recLBU( void ) { recLoad32(8, _Imm_, 0); } -void recLBU_co( void ) { recLoad32_co(8, 0); } -void recLH( void ) { recLoad32(16, _Imm_, 1); } -void recLH_co( void ) { recLoad32_co(16, 1); } -void recLHU( void ) { recLoad32(16, _Imm_, 0); } -void recLHU_co( void ) { recLoad32_co(16, 0); } -void recLW( void ) { recLoad32(32, _Imm_, 1); } -void recLW_co( void ) { recLoad32_co(32, 1); } -void recLWU( void ) { recLoad32(32, _Imm_, 0); } -void recLWU_co( void ) { recLoad32_co(32, 0); } +void recLB( void ) { recLoad32(8, _Imm_, 1); g_eeCyclePenalty = InstCycles_Load; } +void recLB_co( void ) { recLoad32_co(8, 1); g_eeCyclePenalty = InstCycles_Load; } +void recLBU( void ) { recLoad32(8, _Imm_, 0); g_eeCyclePenalty = InstCycles_Load; } +void recLBU_co( void ) { recLoad32_co(8, 0); g_eeCyclePenalty = InstCycles_Load; } +void recLH( void ) { recLoad32(16, _Imm_, 1); g_eeCyclePenalty = InstCycles_Load; } +void recLH_co( void ) { recLoad32_co(16, 1); g_eeCyclePenalty = InstCycles_Load; } +void recLHU( void ) { recLoad32(16, _Imm_, 0); g_eeCyclePenalty = InstCycles_Load; } +void recLHU_co( void ) { recLoad32_co(16, 0); g_eeCyclePenalty = InstCycles_Load; } +void recLW( void ) { recLoad32(32, _Imm_, 1); g_eeCyclePenalty = InstCycles_Load; } +void recLW_co( void ) { recLoad32_co(32, 1); g_eeCyclePenalty = InstCycles_Load; } +void recLWU( void ) { recLoad32(32, _Imm_, 0); g_eeCyclePenalty = InstCycles_Load; } +void recLWU_co( void ) { recLoad32_co(32, 0); g_eeCyclePenalty = InstCycles_Load; } //////////////////////////////////////////////////// @@ -650,6 +650,8 @@ void recLWL_co(void) { recLoad32(32, _Imm_-3, 1); } void recLWL( void ) { + g_eeCyclePenalty = InstCycles_Load; + #ifdef REC_SLOWREAD _flushConstReg(_Rs_); #else @@ -734,6 +736,7 @@ void recLWR_co(void) { recLoad32(32, _Imm_, 1); } void recLWR( void ) { + g_eeCyclePenalty = InstCycles_Load; #ifdef REC_SLOWREAD _flushConstReg(_Rs_); #else @@ -931,7 +934,7 @@ void recLoad64(u32 imm, int align) if( _Rt_ ) _eeOnWriteReg(_Rt_, 0); } -void recLD(void) { recLoad64(_Imm_, 1); } +void recLD(void) { recLoad64(_Imm_, 1); g_eeCyclePenalty = InstCycles_Load; } void recLD_co( void ) { @@ -1191,11 +1194,15 @@ void recLD_coX( int num ) } //////////////////////////////////////////////////// -void recLDL_co(void) { - recLoad64(_Imm_-7, 0); } +void recLDL_co(void) +{ + g_eeCyclePenalty = InstCycles_Load; + recLoad64(_Imm_-7, 0); +} void recLDL( void ) { + g_eeCyclePenalty = InstCycles_Load; iFlushCall(FLUSH_NOCONST); if( GPR_IS_CONST1( _Rs_ ) ) { @@ -1217,10 +1224,11 @@ void recLDL( void ) } //////////////////////////////////////////////////// -void recLDR_co(void) { recLoad64(_Imm_, 0); } +void recLDR_co(void) { recLoad64(_Imm_, 0); g_eeCyclePenalty = InstCycles_Load; } void recLDR( void ) { + g_eeCyclePenalty = InstCycles_Load; iFlushCall(FLUSH_NOCONST); if( GPR_IS_CONST1( _Rs_ ) ) { @@ -1244,6 +1252,8 @@ void recLDR( void ) //////////////////////////////////////////////////// void recLQ( void ) { + g_eeCyclePenalty = InstCycles_Load; + int mmreg = -1; #ifdef REC_SLOWREAD _flushConstReg(_Rs_); @@ -1373,6 +1383,8 @@ void recLQ( void ) void recLQ_co( void ) { + g_eeCyclePenalty = InstCycles_Load; + #ifdef REC_SLOWREAD _flushConstReg(_Rs_); #else @@ -2412,18 +2424,20 @@ void recStore_co(int bit, int align) _clearNeededXMMregs(); // needed since allocing } -void recSB( void ) { recStore(8, _Imm_, 1); } -void recSB_co( void ) { recStore_co(8, 1); } -void recSH( void ) { recStore(16, _Imm_, 1); } -void recSH_co( void ) { recStore_co(16, 1); } -void recSW( void ) { recStore(32, _Imm_, 1); } -void recSW_co( void ) { recStore_co(32, 1); } +void recSB( void ) { recStore(8, _Imm_, 1); g_eeCyclePenalty = InstCycles_Store; } +void recSB_co( void ) { recStore_co(8, 1); g_eeCyclePenalty = InstCycles_Store; } +void recSH( void ) { recStore(16, _Imm_, 1); g_eeCyclePenalty = InstCycles_Store; } +void recSH_co( void ) { recStore_co(16, 1); g_eeCyclePenalty = InstCycles_Store; } +void recSW( void ) { recStore(32, _Imm_, 1); g_eeCyclePenalty = InstCycles_Store; } +void recSW_co( void ) { recStore_co(32, 1); g_eeCyclePenalty = InstCycles_Store; } //////////////////////////////////////////////////// -void recSWL_co(void) { recStore(32, _Imm_-3, 0); } +void recSWL_co(void) { recStore(32, _Imm_-3, 0); g_eeCyclePenalty = InstCycles_Store; } void recSWL( void ) { + g_eeCyclePenalty = InstCycles_Store; + #ifdef REC_SLOWWRITE _flushConstReg(_Rs_); #else @@ -2511,10 +2525,12 @@ void recSWL( void ) } //////////////////////////////////////////////////// -void recSWR_co(void) { recStore(32, _Imm_, 0); } +void recSWR_co(void) { recStore(32, _Imm_, 0); g_eeCyclePenalty = InstCycles_Store; } void recSWR( void ) { + g_eeCyclePenalty = InstCycles_Store; + #ifdef REC_SLOWWRITE _flushConstReg(_Rs_); #else @@ -2720,10 +2736,12 @@ void recSD_coX(int num, int align) } //////////////////////////////////////////////////// -void recSDL_co(void) { recStore(64, _Imm_-7, 0); } +void recSDL_co(void) { recStore(64, _Imm_-7, 0); g_eeCyclePenalty = InstCycles_Store; } void recSDL( void ) { + g_eeCyclePenalty = InstCycles_Store; + iFlushCall(FLUSH_NOCONST); if( GPR_IS_CONST1( _Rs_ ) ) { @@ -2744,10 +2762,12 @@ void recSDL( void ) } //////////////////////////////////////////////////// -void recSDR_co(void) { recStore(64, _Imm_, 0); } +void recSDR_co(void) { recStore(64, _Imm_, 0); g_eeCyclePenalty = InstCycles_Store; } void recSDR( void ) { + g_eeCyclePenalty = InstCycles_Store; + iFlushCall(FLUSH_NOCONST); if( GPR_IS_CONST1( _Rs_ ) ) { @@ -2768,8 +2788,8 @@ void recSDR( void ) } //////////////////////////////////////////////////// -void recSQ( void ) { recStore(128, _Imm_, 1); } -void recSQ_co( void ) { recStore_co(128, 1); } +void recSQ( void ) { recStore(128, _Imm_, 1); g_eeCyclePenalty = InstCycles_Store; } +void recSQ_co( void ) { recStore_co(128, 1); g_eeCyclePenalty = InstCycles_Store; } // coissues more than 2 SQs void recSQ_coX(int num) diff --git a/pcsx2/x86/ix86-32/iR5900MultDiv.c b/pcsx2/x86/ix86-32/iR5900MultDiv.c index e77083d214..c72b4d0b00 100644 --- a/pcsx2/x86/ix86-32/iR5900MultDiv.c +++ b/pcsx2/x86/ix86-32/iR5900MultDiv.c @@ -424,7 +424,7 @@ void recMULT_constt(int info) } // don't set XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI -EERECOMPILE_CODE0(MULT, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)); +EERECOMPILE_CODE0_PENALTY(MULT, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0), InstCycles_Mult ); //// MULTU void recMULTU_const() @@ -537,7 +537,7 @@ void recMULTU_constt(int info) } // don't specify XMMINFO_WRITELO or XMMINFO_WRITEHI, that is taken care of -EERECOMPILE_CODE0(MULTU, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)); +EERECOMPILE_CODE0_PENALTY(MULTU, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0), InstCycles_Mult); //////////////////////////////////////////////////// void recMULT1_const() @@ -571,7 +571,7 @@ void recMULT1_constt(int info) else recMULTUsuper(info, 1, PROCESS_CONSTT); } -EERECOMPILE_CODE0(MULT1, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)); +EERECOMPILE_CODE0_PENALTY(MULT1, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0), InstCycles_Mult ); //////////////////////////////////////////////////// void recMULTU1_const() @@ -596,7 +596,7 @@ void recMULTU1_constt(int info) recMULTUsuper(info, 1, PROCESS_CONSTT); } -EERECOMPILE_CODE0(MULTU1, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)); +EERECOMPILE_CODE0_PENALTY(MULTU1, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0), InstCycles_Mult); //// DIV void recDIV_const() @@ -661,7 +661,7 @@ void recDIV_constt(int info) recDIVsuper(info, 1, 0, PROCESS_CONSTT); } -EERECOMPILE_CODE0(DIV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI); +EERECOMPILE_CODE0_PENALTY(DIV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI, InstCycles_Div); //// DIVU void recDIVU_const() @@ -689,7 +689,7 @@ void recDIVU_constt(int info) recDIVsuper(info, 0, 0, PROCESS_CONSTT); } -EERECOMPILE_CODE0(DIVU, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI); +EERECOMPILE_CODE0_PENALTY(DIVU, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI, InstCycles_Div); void recDIV1_const() { @@ -716,7 +716,7 @@ void recDIV1_constt(int info) recDIVsuper(info, 1, 1, PROCESS_CONSTT); } -EERECOMPILE_CODE0(DIV1, XMMINFO_READS|XMMINFO_READT); +EERECOMPILE_CODE0_PENALTY(DIV1, XMMINFO_READS|XMMINFO_READT, InstCycles_Div); void recDIVU1_const() { @@ -743,7 +743,7 @@ void recDIVU1_constt(int info) recDIVsuper(info, 0, 1, PROCESS_CONSTT); } -EERECOMPILE_CODE0(DIVU1, XMMINFO_READS|XMMINFO_READT); +EERECOMPILE_CODE0_PENALTY(DIVU1, XMMINFO_READS|XMMINFO_READT, InstCycles_Div); //do EEINST_SETSIGNEXT REC_FUNC( MADD, _Rd_ );