Added somewhat more "accurate" cycle timings, by weighting mul/div and load/store instructions to more closely match typical/avg cycle counts for those instructions on the PS2. In addition to helping fix some obscure timing glitches, it also allows the emulator to run a little more efficiently. True accuracy is impossible since we can't realistically emulate the MIPs superscalar 8-stage pipeline, branch predictions, or data cache hits/misses. But at least the "average" cycle timings should be a little closer to the real thing now.

Improved stability of both EE/X2 and EE/X3 sync hacks.  In particular, some FMVs that broke under the X2 hack should work a bit better now, and the X3 hack should behave a lot better now too (but will still break pretty much any FMV/audio sync).

X2 sync hack performance also improved, by maybe 6% or so.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@446 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
Jake.Stine 2008-12-17 15:12:35 +00:00 committed by Gregory Hainaut
parent 6696e776e5
commit 1a02e88997
10 changed files with 199 additions and 243 deletions

View File

@ -1284,7 +1284,7 @@ void recDIV_S_xmm(int info)
_freeXMMreg(t0reg);
}
FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
FPURECOMPILE_CONSTCODE_PENALTY(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT,InstCycles_FPU_Sqrt);
//------------------------------------------------------------------
@ -1670,7 +1670,7 @@ void recMUL_S_xmm(int info)
ClampValues(recCommutativeOp(info, EEREC_D, 1));
}
FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
FPURECOMPILE_CONSTCODE_PENALTY(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT,InstCycles_FPU_Sqrt/2);
void recMULA_S_xmm(int info)
{
@ -1811,7 +1811,7 @@ void recSQRT_S_xmm(int info)
_freeX86reg(tempReg);
}
FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT);
FPURECOMPILE_CONSTCODE_PENALTY(SQRT_S, XMMINFO_WRITED|XMMINFO_READT, InstCycles_FPU_Sqrt);
//------------------------------------------------------------------
@ -1916,7 +1916,7 @@ void recRSQRT_S_xmm(int info)
_freeXMMreg(t0reg);
}
FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
FPURECOMPILE_CONSTCODE_PENALTY(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT,InstCycles_FPU_Sqrt*2);
#endif // FPU_RECOMPILE

View File

@ -2222,6 +2222,8 @@ void recPSRLVW()
////////////////////////////////////////////////////
void recPMSUBW()
{
g_eeCyclePenalty = InstCycles_MMI_Mult;
EEINST_SETSIGNEXT(_Rs_);
EEINST_SETSIGNEXT(_Rt_);
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
@ -2262,6 +2264,7 @@ void recPMSUBW()
////////////////////////////////////////////////////
void recPMULTW()
{
g_eeCyclePenalty = InstCycles_MMI_Mult;
EEINST_SETSIGNEXT(_Rs_);
EEINST_SETSIGNEXT(_Rt_);
if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_);
@ -2270,6 +2273,7 @@ void recPMULTW()
////////////////////////////////////////////////////
void recPDIVW()
{
g_eeCyclePenalty = InstCycles_MMI_Div;
EEINST_SETSIGNEXT(_Rs_);
EEINST_SETSIGNEXT(_Rt_);
REC_FUNC_INLINE( PDIVW, _Rd_ );
@ -2278,6 +2282,7 @@ void recPDIVW()
////////////////////////////////////////////////////
void recPDIVBW()
{
g_eeCyclePenalty = InstCycles_MMI_Div;
REC_FUNC_INLINE( PDIVBW, _Rd_ ); //--
}
@ -2286,6 +2291,8 @@ PCSX2_ALIGNED16(int s_mask[4]) = {~0, 0, ~0, 0};
void recPHMADH()
{
g_eeCyclePenalty = InstCycles_MMI_Mult;
CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI)
int t0reg = _Rd_ ? EEREC_D : _allocTempXMMreg(XMMT_INT, -1);
@ -2345,6 +2352,8 @@ CPU_SSE_XMMCACHE_END
////////////////////////////////////////////////////
void recPMSUBH()
{
g_eeCyclePenalty = InstCycles_MMI_Mult;
CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI)
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
@ -2388,6 +2397,8 @@ CPU_SSE_XMMCACHE_END
////////////////////////////////////////////////////
void recPHMSBH()
{
g_eeCyclePenalty = InstCycles_MMI_Mult;
CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI)
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
@ -2542,6 +2553,8 @@ CPU_SSE_XMMCACHE_END
void recPMULTH( void )
{
g_eeCyclePenalty = InstCycles_MMI_Mult;
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI)
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
@ -2794,6 +2807,8 @@ CPU_SSE_XMMCACHE_END
void recPMADDH( void )
{
g_eeCyclePenalty = InstCycles_MMI_Mult;
CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI)
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
@ -2978,6 +2993,8 @@ CPU_SSE_XMMCACHE_END
////////////////////////////////////////////////////
void recPMULTUW()
{
g_eeCyclePenalty = InstCycles_MMI_Mult;
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI)
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
EEINST_SETSIGNEXT(_Rs_);
@ -3011,6 +3028,8 @@ CPU_SSE_XMMCACHE_END
////////////////////////////////////////////////////
void recPMADDUW()
{
g_eeCyclePenalty = InstCycles_MMI_Mult;
CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI)
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
EEINST_SETSIGNEXT(_Rs_);
@ -3049,7 +3068,11 @@ CPU_SSE_XMMCACHE_END
////////////////////////////////////////////////////
//do EEINST_SETSIGNEXT
REC_FUNC( PDIVUW, _Rd_ );
void recPDIVUW()
{
g_eeCyclePenalty = InstCycles_MMI_Div;
REC_FUNC_INLINE( PDIVUW, _Rd_ );
}
////////////////////////////////////////////////////
void recPEXCW()

View File

@ -74,6 +74,8 @@ static BASEBLOCKEX *recBlocks = NULL;
static u8 *recPtr;
u32 psxpc; // recompiler psxpc
int psxbranch; // set for branch
u32 g_iopCyclePenalty;
static EEINST* s_pInstCache = NULL;
static u32 s_nInstCacheSize = 0;
@ -1154,7 +1156,9 @@ void psxRecompileNextInstruction(int delayslot)
}
else {
assert( !(g_pCurInstInfo->info & EEINSTINFO_NOREC) );
g_iopCyclePenalty = 0;
rpsxBSC[ psxRegs.code >> 26 ]();
s_psxBlockCycles += g_iopCyclePenalty;
}
if( !delayslot ) {

View File

@ -20,6 +20,16 @@
extern void __Log(const char *fmt, ...);
// Cycle penalties for particuarly slow instructions.
static const int psxInstCycles_Mult = 8;
static const int psxInstCycles_Div = 60;
// Currently unused (iop mod incomplete)
static const int psxInstCycles_Peephole_Store = 0;
static const int psxInstCycles_Store = 0;
static const int psxInstCycles_Load = 0;
// to be consistent with EE
#define PSX_HI XMMGPR_HI
#define PSX_LO XMMGPR_LO
@ -43,6 +53,7 @@ void PSX_CHECK_SAVE_REG(int reg);
extern u32 psxpc; // recompiler pc
extern int psxbranch; // set for branch
extern u32 g_iopCyclePenalty;
void psxSaveBranchState();
void psxLoadBranchState();
@ -64,28 +75,35 @@ void psxRecClearMem(BASEBLOCK* p);
void rpsx##fn(void) \
{ \
psxRecompileCodeConst0(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_); \
} \
}
// rt = rs op imm16
#define PSXRECOMPILE_CONSTCODE1(fn) \
void rpsx##fn(void) \
{ \
psxRecompileCodeConst1(rpsx##fn##_const, rpsx##fn##_); \
} \
}
// rd = rt op sa
#define PSXRECOMPILE_CONSTCODE2(fn) \
void rpsx##fn(void) \
{ \
psxRecompileCodeConst2(rpsx##fn##_const, rpsx##fn##_); \
} \
}
// [lo,hi] = rt op rs
#define PSXRECOMPILE_CONSTCODE3(fn, LOHI) \
void rpsx##fn(void) \
{ \
psxRecompileCodeConst3(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_, LOHI); \
} \
}
#define PSXRECOMPILE_CONSTCODE3_PENALTY(fn, LOHI, cycles) \
void rpsx##fn(void) \
{ \
psxRecompileCodeConst3(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_, LOHI); \
g_iopCyclePenalty = cycles; \
}
// rd = rs op rt
void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode);

View File

@ -469,7 +469,7 @@ void rpsxMULT_consts(int info) { rpsxMULTsuperconst(info, _Rt_, g_psxConstRegs[_
void rpsxMULT_constt(int info) { rpsxMULTsuperconst(info, _Rs_, g_psxConstRegs[_Rt_], 1); }
void rpsxMULT_(int info) { rpsxMULTsuper(info, 1); }
PSXRECOMPILE_CONSTCODE3(MULT, 1);
PSXRECOMPILE_CONSTCODE3_PENALTY(MULT, 1, psxInstCycles_Mult);
//// MULTU
void rpsxMULTU_const()
@ -484,7 +484,7 @@ void rpsxMULTU_consts(int info) { rpsxMULTsuperconst(info, _Rt_, g_psxConstRegs[
void rpsxMULTU_constt(int info) { rpsxMULTsuperconst(info, _Rs_, g_psxConstRegs[_Rt_], 0); }
void rpsxMULTU_(int info) { rpsxMULTsuper(info, 0); }
PSXRECOMPILE_CONSTCODE3(MULTU, 1);
PSXRECOMPILE_CONSTCODE3_PENALTY(MULTU, 1, psxInstCycles_Mult);
//// DIV
void rpsxDIV_const()
@ -582,7 +582,7 @@ void rpsxDIV_consts(int info) { rpsxDIVsuperconsts(info, 1); }
void rpsxDIV_constt(int info) { rpsxDIVsuperconstt(info, 1); }
void rpsxDIV_(int info) { rpsxDIVsuper(info, 1); }
PSXRECOMPILE_CONSTCODE3(DIV, 1);
PSXRECOMPILE_CONSTCODE3_PENALTY(DIV, 1, psxInstCycles_Div);
//// DIVU
void rpsxDIVU_const()
@ -601,7 +601,7 @@ void rpsxDIVU_consts(int info) { rpsxDIVsuperconsts(info, 0); }
void rpsxDIVU_constt(int info) { rpsxDIVsuperconstt(info, 0); }
void rpsxDIVU_(int info) { rpsxDIVsuper(info, 0); }
PSXRECOMPILE_CONSTCODE3(DIVU, 1);
PSXRECOMPILE_CONSTCODE3_PENALTY(DIVU, 1, psxInstCycles_Div);
//// LoadStores
#ifdef PCSX2_VIRTUAL_MEM

View File

@ -40,6 +40,20 @@
#define CP0_RECOMPILE
#define CP2_RECOMPILE
// Cycle penalties for particuarly slow instructions.
static const int InstCycles_Mult = 1*4;
static const int InstCycles_Div = 12*4;
static const int InstCycles_FPU_Sqrt = 3*4;
static const int InstCycles_MMI_Mult = 2*4;
static const int InstCycles_MMI_Div = 20*4;
// Setting Loads to 1 or higher breaks Disgaea 2 FMV audio syncs.
static const int InstCycles_Peephole_Store = 7; // 1.75 cycle penalty
static const int InstCycles_Peephole_Load = 1; // 0.25 cycle penalty
static const int InstCycles_Store = 7; // 1.75 cycle penalty
static const int InstCycles_Load = 1; // 0.25 cycle penalty
#define EE_CONST_PROP // rec2 - enables constant propagation (faster)
//#define EE_FPU_REGCACHING 1 // Not used anymore, its always on!
@ -63,6 +77,7 @@ extern u32 target; // branch target
extern u16 x86FpuState;
extern u16 iCWstate;
extern u32 s_nBlockCycles; // cycles of current block recompiling
extern u32 g_eeCyclePenalty;
void recBranchCall( void (*func)() );
@ -146,13 +161,27 @@ typedef void (*R5900FNPTR_INFO)(int info);
void rec##fn(void) \
{ \
eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \
} \
}
#define EERECOMPILE_CODE0_PENALTY(fn, xmminfo, cycles) \
void rec##fn(void) \
{ \
eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \
g_eeCyclePenalty = (cycles); \
}
#define EERECOMPILE_CODE0_PENALTY(fn, xmminfo, cycles) \
void rec##fn(void) \
{ \
eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \
g_eeCyclePenalty = (cycles); \
}
#define EERECOMPILE_CODEX(codename, fn) \
void rec##fn(void) \
{ \
codename(rec##fn##_const, rec##fn##_); \
} \
}
//
// MMX/XMM caching helpers
@ -233,19 +262,18 @@ void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode
return; \
} \
#ifdef __x86_64__
#define FPURECOMPILE_CONSTCODE(fn, xmminfo) \
void rec##fn(void) \
{ \
eeFPURecompileCode(rec##fn##_xmm, fn, xmminfo); \
}
#else
#define FPURECOMPILE_CONSTCODE(fn, xmminfo) \
#define FPURECOMPILE_CONSTCODE_PENALTY(fn, xmminfo, cycles) \
void rec##fn(void) \
{ \
eeFPURecompileCode(rec##fn##_xmm, fn, xmminfo); \
g_eeCyclePenalty = (cycles); \
}
#endif
// rd = rs op rt (all regs need to be in xmm)
int eeRecompileCodeXMM(int xmminfo);

View File

@ -331,7 +331,7 @@ void (*recCP1W[64] )() = {
};
void (*recMMIt[64] )() = {
recMADD, recMADDU, recNULL, recNULL, recPLZCW, recNULL, recNULL, recNULL,
recMADD, recMADDU, recNULL, recNULL, recPLZCW, recNULL, recNULL, recNULL,
recMMI0, recMMI2, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL,
recMFHI1, recMTHI1, recMFLO1, recMTLO1, recNULL, recNULL, recNULL, recNULL,
recMULT1, recMULTU1, recDIV1, recDIVU1, recNULL, recNULL, recNULL, recNULL,
@ -471,171 +471,6 @@ __forceinline void BSCPropagate::rpropSetFPUWrite( int reg, int mask )
#define EEINST_REALXMM EEINST_XMM
//SLL, NULL, SRL, SRA, SLLV, NULL, SRLV, SRAV,
//JR, JALR, MOVZ, MOVN, SYSCALL, BREAK, NULL, SYNC,
//MFHI, MTHI, MFLO, MTLO, DSLLV, NULL, DSRLV, DSRAV,

View File

@ -95,6 +95,8 @@ static u32 s_saveConstGPRreg = 0, s_saveHasConstReg = 0, s_saveFlushedConstReg =
static EEINST* s_psaveInstInfo = NULL;
u32 s_nBlockCycles = 0; // cycles of current block recompiling
u32 g_eeCyclePenalty; // cycle penalty of the current recompiled instruction
static u32 s_savenBlockCycles = 0;
void recCOP2RecompileInst();
@ -2250,19 +2252,36 @@ void iFlushCall(int flushtype)
// }
//}
//static void cleanup()
//{
// assert( !g_globalMMXSaved );
// assert( !g_globalXMMSaved );
//}
//fixme : this is all a huge hack, we base the counter advancements on the average an opcode should take (wtf?)
// If that wasn't bad enough we have default values like 9/8 which will get cast to int later
// (yeah, that means all sync code couldn't have worked to beginn with)
// So for now these are new settings that work. I would've set 1 for default but that seemed too low
// (rama)
static u32 eeScaleBlockCycles()
{
// Note: s_nBlockCycles is 2 bit fixed point. Divide by 4 when done!
#define EECYCLE_MULT (CHECK_EESYNC_HACK ? (CHECK_EE_IOP_EXTRA ? 3 : 2) : (1.2))
// Let's not scale blocks under 5-ish cycles. This fixes countless "problems"
// caused by sync hacks and such, since games seem to care a lot more about
// these small blocks having accurate cycle counts.
if( s_nBlockCycles <= 5*4 || !CHECK_EESYNC_HACK ) return s_nBlockCycles / 4;
u32 scalar = CHECK_EE_IOP_EXTRA ? 14 : 9; // 3.5 and 2.25 scales
if( s_nBlockCycles <= 10*4 )
{
// Mid-size blocks should get a mid-sized scale:
// (using an additional 2 bits fixed point math here)
scalar = CHECK_EE_IOP_EXTRA ? 9 : 7; // 2.25 and 1.75 scales
}
else if( s_nBlockCycles >= 22*4 )
{
// larger blocks get a smaller scalar as well, to help keep
// them from becoming "too fat" and delaying branch tests.
scalar = CHECK_EE_IOP_EXTRA ? 10 : 7; // 2.5 and 1.75 scales
}
s_nBlockCycles *= scalar;
return s_nBlockCycles / (4*4);
}
static void iBranchTest(u32 newpc, u32 cpuBranch)
{
@ -2278,7 +2297,7 @@ static void iBranchTest(u32 newpc, u32 cpuBranch)
#endif
MOV32MtoR(ECX, (uptr)&cpuRegs.cycle);
ADD32ItoR(ECX, s_nBlockCycles*EECYCLE_MULT);
ADD32ItoR(ECX, eeScaleBlockCycles());
MOV32RtoM((uptr)&cpuRegs.cycle, ECX); // update cycles
SUB32MtoR(ECX, (uptr)&g_nextBranchCycle);
@ -2322,7 +2341,7 @@ void recSYSCALL( void ) {
CMP32ItoM((uptr)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT);
ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles());
JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 ));
x86SetJ8(j8Ptr[0]);
//branch = 2;
@ -2337,7 +2356,7 @@ void recBREAK( void ) {
CMP32ItoM((uptr)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT);
ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles());
RET();
x86SetJ8(j8Ptr[0]);
//branch = 2;
@ -2532,7 +2551,7 @@ void recompileNextInstruction(int delayslot)
#endif
cpuRegs.code = *(int *)s_pCode;
s_nBlockCycles++;
s_nBlockCycles+=4;
pc += 4;
//#ifdef _DEBUG
@ -2589,26 +2608,33 @@ void recompileNextInstruction(int delayslot)
#ifdef PCSX2_VIRTUAL_MEM
if( g_pCurInstInfo->numpeeps > 1 ) {
g_eeCyclePenalty = InstCycles_Store;
switch(cpuRegs.code>>26) {
case 30: recLQ_coX(g_pCurInstInfo->numpeeps); break;
case 30: recLQ_coX(g_pCurInstInfo->numpeeps); g_eeCyclePenalty = InstCycles_Load; break;
case 31: recSQ_coX(g_pCurInstInfo->numpeeps); break;
case 49: recLWC1_coX(g_pCurInstInfo->numpeeps); break;
case 49: recLWC1_coX(g_pCurInstInfo->numpeeps); g_eeCyclePenalty = InstCycles_Load; break;
case 57: recSWC1_coX(g_pCurInstInfo->numpeeps); break;
case 55: recLD_coX(g_pCurInstInfo->numpeeps); break;
case 55: recLD_coX(g_pCurInstInfo->numpeeps); g_eeCyclePenalty = InstCycles_Load; break;
case 63: recSD_coX(g_pCurInstInfo->numpeeps, 1); break; //not sure if should be set to 1 or 0; looks like "1" handles alignment, so i'm going with that for now
default:
assert(0);
}
pc += g_pCurInstInfo->numpeeps*4;
s_nBlockCycles += g_pCurInstInfo->numpeeps;
s_nBlockCycles += g_pCurInstInfo->numpeeps * (g_eeCyclePenalty+4);
g_pCurInstInfo += g_pCurInstInfo->numpeeps;
}
else {
g_eeCyclePenalty = 0;
recBSC_co[cpuRegs.code>>26]();
pc += 4;
s_nBlockCycles++;
g_pCurInstInfo++;
// ugh! we're actually writing two instructions as one load/store opt here,
// so we need to factor the cycle penalty*2, and add 1 for the actual instruction
// base cycle counter. We don't add 2 becuase s_nBlockCycles was already
// incremeneted above.
s_nBlockCycles += (g_eeCyclePenalty*2) + 4;
}
#else
assert(0);
@ -2639,7 +2665,9 @@ void recompileNextInstruction(int delayslot)
return;
}
}
g_eeCyclePenalty = 0;
recBSC[ cpuRegs.code >> 26 ]();
s_nBlockCycles += g_eeCyclePenalty;
}
if( !delayslot ) {
@ -3237,7 +3265,7 @@ StartRecomp:
else {
assert( branch != 3 );
if( branch ) assert( !willbranch3 );
else ADD32ItoM((int)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT);
else ADD32ItoM((int)&cpuRegs.cycle, eeScaleBlockCycles() );
if( willbranch3 ) {
BASEBLOCK* pblock = PC_GETBLOCK(s_nEndBlock);

View File

@ -630,18 +630,18 @@ void recLoad32_co(u32 bit, u32 sign)
}
}
void recLB( void ) { recLoad32(8, _Imm_, 1); }
void recLB_co( void ) { recLoad32_co(8, 1); }
void recLBU( void ) { recLoad32(8, _Imm_, 0); }
void recLBU_co( void ) { recLoad32_co(8, 0); }
void recLH( void ) { recLoad32(16, _Imm_, 1); }
void recLH_co( void ) { recLoad32_co(16, 1); }
void recLHU( void ) { recLoad32(16, _Imm_, 0); }
void recLHU_co( void ) { recLoad32_co(16, 0); }
void recLW( void ) { recLoad32(32, _Imm_, 1); }
void recLW_co( void ) { recLoad32_co(32, 1); }
void recLWU( void ) { recLoad32(32, _Imm_, 0); }
void recLWU_co( void ) { recLoad32_co(32, 0); }
void recLB( void ) { recLoad32(8, _Imm_, 1); g_eeCyclePenalty = InstCycles_Load; }
void recLB_co( void ) { recLoad32_co(8, 1); g_eeCyclePenalty = InstCycles_Load; }
void recLBU( void ) { recLoad32(8, _Imm_, 0); g_eeCyclePenalty = InstCycles_Load; }
void recLBU_co( void ) { recLoad32_co(8, 0); g_eeCyclePenalty = InstCycles_Load; }
void recLH( void ) { recLoad32(16, _Imm_, 1); g_eeCyclePenalty = InstCycles_Load; }
void recLH_co( void ) { recLoad32_co(16, 1); g_eeCyclePenalty = InstCycles_Load; }
void recLHU( void ) { recLoad32(16, _Imm_, 0); g_eeCyclePenalty = InstCycles_Load; }
void recLHU_co( void ) { recLoad32_co(16, 0); g_eeCyclePenalty = InstCycles_Load; }
void recLW( void ) { recLoad32(32, _Imm_, 1); g_eeCyclePenalty = InstCycles_Load; }
void recLW_co( void ) { recLoad32_co(32, 1); g_eeCyclePenalty = InstCycles_Load; }
void recLWU( void ) { recLoad32(32, _Imm_, 0); g_eeCyclePenalty = InstCycles_Load; }
void recLWU_co( void ) { recLoad32_co(32, 0); g_eeCyclePenalty = InstCycles_Load; }
////////////////////////////////////////////////////
@ -650,6 +650,8 @@ void recLWL_co(void) { recLoad32(32, _Imm_-3, 1); }
void recLWL( void )
{
g_eeCyclePenalty = InstCycles_Load;
#ifdef REC_SLOWREAD
_flushConstReg(_Rs_);
#else
@ -734,6 +736,7 @@ void recLWR_co(void) { recLoad32(32, _Imm_, 1); }
void recLWR( void )
{
g_eeCyclePenalty = InstCycles_Load;
#ifdef REC_SLOWREAD
_flushConstReg(_Rs_);
#else
@ -931,7 +934,7 @@ void recLoad64(u32 imm, int align)
if( _Rt_ ) _eeOnWriteReg(_Rt_, 0);
}
void recLD(void) { recLoad64(_Imm_, 1); }
void recLD(void) { recLoad64(_Imm_, 1); g_eeCyclePenalty = InstCycles_Load; }
void recLD_co( void )
{
@ -1191,11 +1194,15 @@ void recLD_coX( int num )
}
////////////////////////////////////////////////////
void recLDL_co(void) {
recLoad64(_Imm_-7, 0); }
void recLDL_co(void)
{
g_eeCyclePenalty = InstCycles_Load;
recLoad64(_Imm_-7, 0);
}
void recLDL( void )
{
g_eeCyclePenalty = InstCycles_Load;
iFlushCall(FLUSH_NOCONST);
if( GPR_IS_CONST1( _Rs_ ) ) {
@ -1217,10 +1224,11 @@ void recLDL( void )
}
////////////////////////////////////////////////////
void recLDR_co(void) { recLoad64(_Imm_, 0); }
void recLDR_co(void) { recLoad64(_Imm_, 0); g_eeCyclePenalty = InstCycles_Load; }
void recLDR( void )
{
g_eeCyclePenalty = InstCycles_Load;
iFlushCall(FLUSH_NOCONST);
if( GPR_IS_CONST1( _Rs_ ) ) {
@ -1244,6 +1252,8 @@ void recLDR( void )
////////////////////////////////////////////////////
void recLQ( void )
{
g_eeCyclePenalty = InstCycles_Load;
int mmreg = -1;
#ifdef REC_SLOWREAD
_flushConstReg(_Rs_);
@ -1373,6 +1383,8 @@ void recLQ( void )
void recLQ_co( void )
{
g_eeCyclePenalty = InstCycles_Load;
#ifdef REC_SLOWREAD
_flushConstReg(_Rs_);
#else
@ -2412,18 +2424,20 @@ void recStore_co(int bit, int align)
_clearNeededXMMregs(); // needed since allocing
}
void recSB( void ) { recStore(8, _Imm_, 1); }
void recSB_co( void ) { recStore_co(8, 1); }
void recSH( void ) { recStore(16, _Imm_, 1); }
void recSH_co( void ) { recStore_co(16, 1); }
void recSW( void ) { recStore(32, _Imm_, 1); }
void recSW_co( void ) { recStore_co(32, 1); }
void recSB( void ) { recStore(8, _Imm_, 1); g_eeCyclePenalty = InstCycles_Store; }
void recSB_co( void ) { recStore_co(8, 1); g_eeCyclePenalty = InstCycles_Store; }
void recSH( void ) { recStore(16, _Imm_, 1); g_eeCyclePenalty = InstCycles_Store; }
void recSH_co( void ) { recStore_co(16, 1); g_eeCyclePenalty = InstCycles_Store; }
void recSW( void ) { recStore(32, _Imm_, 1); g_eeCyclePenalty = InstCycles_Store; }
void recSW_co( void ) { recStore_co(32, 1); g_eeCyclePenalty = InstCycles_Store; }
////////////////////////////////////////////////////
void recSWL_co(void) { recStore(32, _Imm_-3, 0); }
void recSWL_co(void) { recStore(32, _Imm_-3, 0); g_eeCyclePenalty = InstCycles_Store; }
void recSWL( void )
{
g_eeCyclePenalty = InstCycles_Store;
#ifdef REC_SLOWWRITE
_flushConstReg(_Rs_);
#else
@ -2511,10 +2525,12 @@ void recSWL( void )
}
////////////////////////////////////////////////////
void recSWR_co(void) { recStore(32, _Imm_, 0); }
void recSWR_co(void) { recStore(32, _Imm_, 0); g_eeCyclePenalty = InstCycles_Store; }
void recSWR( void )
{
g_eeCyclePenalty = InstCycles_Store;
#ifdef REC_SLOWWRITE
_flushConstReg(_Rs_);
#else
@ -2720,10 +2736,12 @@ void recSD_coX(int num, int align)
}
////////////////////////////////////////////////////
void recSDL_co(void) { recStore(64, _Imm_-7, 0); }
void recSDL_co(void) { recStore(64, _Imm_-7, 0); g_eeCyclePenalty = InstCycles_Store; }
void recSDL( void )
{
g_eeCyclePenalty = InstCycles_Store;
iFlushCall(FLUSH_NOCONST);
if( GPR_IS_CONST1( _Rs_ ) ) {
@ -2744,10 +2762,12 @@ void recSDL( void )
}
////////////////////////////////////////////////////
void recSDR_co(void) { recStore(64, _Imm_, 0); }
void recSDR_co(void) { recStore(64, _Imm_, 0); g_eeCyclePenalty = InstCycles_Store; }
void recSDR( void )
{
g_eeCyclePenalty = InstCycles_Store;
iFlushCall(FLUSH_NOCONST);
if( GPR_IS_CONST1( _Rs_ ) ) {
@ -2768,8 +2788,8 @@ void recSDR( void )
}
////////////////////////////////////////////////////
void recSQ( void ) { recStore(128, _Imm_, 1); }
void recSQ_co( void ) { recStore_co(128, 1); }
void recSQ( void ) { recStore(128, _Imm_, 1); g_eeCyclePenalty = InstCycles_Store; }
void recSQ_co( void ) { recStore_co(128, 1); g_eeCyclePenalty = InstCycles_Store; }
// coissues more than 2 SQs
void recSQ_coX(int num)

View File

@ -424,7 +424,7 @@ void recMULT_constt(int info)
}
// don't set XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI
EERECOMPILE_CODE0(MULT, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0));
EERECOMPILE_CODE0_PENALTY(MULT, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0), InstCycles_Mult );
//// MULTU
void recMULTU_const()
@ -537,7 +537,7 @@ void recMULTU_constt(int info)
}
// don't specify XMMINFO_WRITELO or XMMINFO_WRITEHI, that is taken care of
EERECOMPILE_CODE0(MULTU, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0));
EERECOMPILE_CODE0_PENALTY(MULTU, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0), InstCycles_Mult);
////////////////////////////////////////////////////
void recMULT1_const()
@ -571,7 +571,7 @@ void recMULT1_constt(int info)
else recMULTUsuper(info, 1, PROCESS_CONSTT);
}
EERECOMPILE_CODE0(MULT1, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0));
EERECOMPILE_CODE0_PENALTY(MULT1, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0), InstCycles_Mult );
////////////////////////////////////////////////////
void recMULTU1_const()
@ -596,7 +596,7 @@ void recMULTU1_constt(int info)
recMULTUsuper(info, 1, PROCESS_CONSTT);
}
EERECOMPILE_CODE0(MULTU1, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0));
EERECOMPILE_CODE0_PENALTY(MULTU1, XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0), InstCycles_Mult);
//// DIV
void recDIV_const()
@ -661,7 +661,7 @@ void recDIV_constt(int info)
recDIVsuper(info, 1, 0, PROCESS_CONSTT);
}
EERECOMPILE_CODE0(DIV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI);
EERECOMPILE_CODE0_PENALTY(DIV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI, InstCycles_Div);
//// DIVU
void recDIVU_const()
@ -689,7 +689,7 @@ void recDIVU_constt(int info)
recDIVsuper(info, 0, 0, PROCESS_CONSTT);
}
EERECOMPILE_CODE0(DIVU, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI);
EERECOMPILE_CODE0_PENALTY(DIVU, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI, InstCycles_Div);
void recDIV1_const()
{
@ -716,7 +716,7 @@ void recDIV1_constt(int info)
recDIVsuper(info, 1, 1, PROCESS_CONSTT);
}
EERECOMPILE_CODE0(DIV1, XMMINFO_READS|XMMINFO_READT);
EERECOMPILE_CODE0_PENALTY(DIV1, XMMINFO_READS|XMMINFO_READT, InstCycles_Div);
void recDIVU1_const()
{
@ -743,7 +743,7 @@ void recDIVU1_constt(int info)
recDIVsuper(info, 0, 1, PROCESS_CONSTT);
}
EERECOMPILE_CODE0(DIVU1, XMMINFO_READS|XMMINFO_READT);
EERECOMPILE_CODE0_PENALTY(DIVU1, XMMINFO_READS|XMMINFO_READT, InstCycles_Div);
//do EEINST_SETSIGNEXT
REC_FUNC( MADD, _Rd_ );