fixed 1-cycle branch delays
fixed an issue with FDIV instructions whose operands are vf00
EE:
fixed recompilation of SRA and SRL when shift amount is 0
fixed conditional recompilation #defines a bit

(as an aside, this fixes a crash in Ratchet and Clank, but the game has other issues)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@895 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Nneeve 2009-04-03 18:49:23 +00:00
parent 33d7e6e4f4
commit 006b81aabf
10 changed files with 106 additions and 65 deletions

View File

@ -73,6 +73,8 @@ void LoadCW() {
iCWstate = 0; iCWstate = 0;
} }
*/ */
#ifdef FPU_RECOMPILE
//------------------------------------------------------------------ //------------------------------------------------------------------
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
@ -1058,3 +1060,4 @@ FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
} } } } } } } } } }
#endif

View File

@ -88,6 +88,17 @@ extern u32 s_nBlockCycles; // cycles of current block recompiling
branch = 2; \ branch = 2; \
} }
#define REC_SYS_DEL( f, delreg ) \
void rec##f( void ) \
{ \
MOV32ItoM( (uptr)&cpuRegs.code, (u32)cpuRegs.code ); \
MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc ); \
iFlushCall(FLUSH_EVERYTHING); \
if( (delreg) > 0 ) _deleteEEreg(delreg, 0); \
CALLFunc( (uptr)Interp::f ); \
branch = 2; \
}
// Used to clear recompiled code blocks during memory/dma write operations. // Used to clear recompiled code blocks during memory/dma write operations.
u32 recClearMem(u32 pc); u32 recClearMem(u32 pc);

View File

@ -918,20 +918,39 @@ static VuInstruction* getDelayInst(VuInstruction* pInst)
// ibeq vi05, vi03 // ibeq vi05, vi03
// The ibeq should read the vi05 before the first sqi // The ibeq should read the vi05 before the first sqi
//more info:
// iaddiu vi01, 0, 1
// ibeq vi01, 0 <- reads vi01 before the iaddiu
// iaddiu vi01, 0, 1
// iaddiu vi01, vi01, 1
// iaddiu vi01, vi01, 1
// ibeq vi01, 0 <- reads vi01 before the last two iaddiu's (so the value read is 1)
// ilw vi02, addr
// iaddiu vi01, 0, 1
// ibeq vi01, vi02 <- reads current values of both vi01 and vi02 because the branch instruction stalls
int delay = 1; int delay = 1;
VuInstruction* pDelayInst = NULL; VuInstruction* pDelayInst = NULL;
VuInstruction* pTargetInst = pInst->pPrevInst; VuInstruction* pTargetInst = pInst->pPrevInst;
while( 1 ) { // fixme: is 3-cycle delay really maximum? while( 1 ) {
if( pTargetInst != NULL if( pTargetInst != NULL
&& pTargetInst->info.cycle+delay==pInst->info.cycle && pTargetInst->info.cycle+delay==pInst->info.cycle
&& (pTargetInst->regs[0].pipe == VUPIPE_IALU||pTargetInst->regs[0].pipe == VUPIPE_FMAC) && (pTargetInst->regs[0].pipe == VUPIPE_IALU||pTargetInst->regs[0].pipe == VUPIPE_FMAC)
&& ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) && ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff)
&& ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) == ((pTargetInst->regs[0].VIwrite & pInst->pPrevInst->regs[0].VIread) & 0xffff) && (delay == 1 || ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) == ((pTargetInst->regs[0].VIwrite & pInst->pPrevInst->regs[0].VIread) & 0xffff))
&& !(pTargetInst->regs[0].VIread&((1<<REG_STATUS_FLAG)|(1<<REG_MAC_FLAG)|(1<<REG_CLIP_FLAG))) ) && !(pTargetInst->regs[0].VIread&((1<<REG_STATUS_FLAG)|(1<<REG_MAC_FLAG)|(1<<REG_CLIP_FLAG))) )
{ {
pDelayInst = pTargetInst; pDelayInst = pTargetInst;
pTargetInst = pTargetInst->pPrevInst; pTargetInst = pTargetInst->pPrevInst;
delay++; delay++;
if (delay == 5) //maximum delay is 4 (length of the pipeline)
{
DevCon::WriteLn("supervu: cycle branch delay maximum (4) is reached");
break;
}
} }
else break; else break;
} }
@ -2041,9 +2060,9 @@ void VuBaseBlock::AssignVFRegs()
_freeXMMreg(free1); _freeXMMreg(free1);
_freeXMMreg(free2); _freeXMMreg(free2);
} }
else if( regs->VIwrite & (1<<REG_P) ) { else if( regs->VIwrite & (1<<REG_P) || regs->VIwrite & (1<<REG_Q)) {
free1 = _allocTempXMMreg(XMMT_FPS, -1); free1 = _allocTempXMMreg(XMMT_FPS, -1);
// protects against insts like esadd vf0 // protects against insts like esadd vf0 and sqrt vf0
if( free0 == -1 ) if( free0 == -1 )
free0 = free1; free0 = free1;
_freeXMMreg(free1); _freeXMMreg(free1);

View File

@ -40,20 +40,20 @@ namespace OpcodeImpl
namespace Interp = R5900::Interpreter::OpcodeImpl; namespace Interp = R5900::Interpreter::OpcodeImpl;
REC_FUNC(ADD); REC_FUNC_DEL(ADD, _Rd_);
REC_FUNC(ADDU); REC_FUNC_DEL(ADDU, _Rd_);
REC_FUNC(DADD); REC_FUNC_DEL(DADD, _Rd_);
REC_FUNC(DADDU); REC_FUNC_DEL(DADDU, _Rd_);
REC_FUNC(SUB); REC_FUNC_DEL(SUB, _Rd_);
REC_FUNC(SUBU); REC_FUNC_DEL(SUBU, _Rd_);
REC_FUNC(DSUB); REC_FUNC_DEL(DSUB, _Rd_);
REC_FUNC(DSUBU); REC_FUNC_DEL(DSUBU, _Rd_);
REC_FUNC(AND); REC_FUNC_DEL(AND, _Rd_);
REC_FUNC(OR); REC_FUNC_DEL(OR, _Rd_);
REC_FUNC(XOR); REC_FUNC_DEL(XOR, _Rd_);
REC_FUNC(NOR); REC_FUNC_DEL(NOR, _Rd_);
REC_FUNC(SLT); REC_FUNC_DEL(SLT, _Rd_);
REC_FUNC(SLTU); REC_FUNC_DEL(SLTU, _Rd_);
#elif defined(EE_CONST_PROP) #elif defined(EE_CONST_PROP)

View File

@ -47,12 +47,12 @@ REC_SYS(BLEZ);
REC_SYS(BGEZ); REC_SYS(BGEZ);
REC_SYS(BGTZL); REC_SYS(BGTZL);
REC_SYS(BLTZL); REC_SYS(BLTZL);
REC_SYS(BLTZAL); REC_SYS_DEL(BLTZAL, 31);
REC_SYS(BLTZALL); REC_SYS_DEL(BLTZALL, 31);
REC_SYS(BLEZL); REC_SYS(BLEZL);
REC_SYS(BGEZL); REC_SYS(BGEZL);
REC_SYS(BGEZAL); REC_SYS_DEL(BGEZAL, 31);
REC_SYS(BGEZALL); REC_SYS_DEL(BGEZALL, 31);
#else #else

View File

@ -38,9 +38,9 @@ namespace OpcodeImpl
namespace Interp = R5900::Interpreter::OpcodeImpl; namespace Interp = R5900::Interpreter::OpcodeImpl;
REC_SYS(J); REC_SYS(J);
REC_SYS(JAL); REC_SYS_DEL(JAL, 31);
REC_SYS(JR); REC_SYS(JR);
REC_SYS(JALR); REC_SYS_DEL(JALR, _Rd_);
#else #else

View File

@ -38,18 +38,18 @@ namespace OpcodeImpl {
namespace Interp = R5900::Interpreter::OpcodeImpl; namespace Interp = R5900::Interpreter::OpcodeImpl;
REC_FUNC(LB); REC_FUNC_DEL(LB, _Rt_);
REC_FUNC(LBU); REC_FUNC_DEL(LBU, _Rt_);
REC_FUNC(LH); REC_FUNC_DEL(LH, _Rt_);
REC_FUNC(LHU); REC_FUNC_DEL(LHU, _Rt_);
REC_FUNC(LW); REC_FUNC_DEL(LW, _Rt_);
REC_FUNC(LWU); REC_FUNC_DEL(LWU, _Rt_);
REC_FUNC(LWL); REC_FUNC_DEL(LWL, _Rt_);
REC_FUNC(LWR); REC_FUNC_DEL(LWR, _Rt_);
REC_FUNC(LD); REC_FUNC_DEL(LD, _Rt_);
REC_FUNC(LDR); REC_FUNC_DEL(LDR, _Rt_);
REC_FUNC(LDL); REC_FUNC_DEL(LDL, _Rt_);
REC_FUNC(LQ); REC_FUNC_DEL(LQ, _Rt_);
REC_FUNC(SB); REC_FUNC(SB);
REC_FUNC(SH); REC_FUNC(SH);
REC_FUNC(SW); REC_FUNC(SW);

View File

@ -42,18 +42,18 @@ namespace OpcodeImpl
namespace Interp = R5900::Interpreter::OpcodeImpl; namespace Interp = R5900::Interpreter::OpcodeImpl;
REC_FUNC_DEL(LUI,_Rt_); REC_FUNC_DEL(LUI,_Rt_);
REC_FUNC(MFLO); REC_FUNC_DEL(MFLO, _Rd_);
REC_FUNC(MFHI); REC_FUNC_DEL(MFHI, _Rd_);
REC_FUNC(MTLO); REC_FUNC(MTLO);
REC_FUNC(MTHI); REC_FUNC(MTHI);
REC_FUNC( MFHI1 ); REC_FUNC_DEL(MFLO1, _Rd_);
REC_FUNC( MFLO1 ); REC_FUNC_DEL(MFHI1, _Rd_);
REC_FUNC( MTHI1 ); REC_FUNC( MTHI1 );
REC_FUNC( MTLO1 ); REC_FUNC( MTLO1 );
REC_FUNC(MOVZ); REC_FUNC_DEL(MOVZ, _Rd_);
REC_FUNC(MOVN); REC_FUNC_DEL(MOVN, _Rd_);
#elif defined(EE_CONST_PROP) #elif defined(EE_CONST_PROP)

View File

@ -35,20 +35,20 @@ namespace OpcodeImpl
*********************************************************/ *********************************************************/
#ifndef MULTDIV_RECOMPILE #ifndef MULTDIV_RECOMPILE
REC_FUNC(MULT); REC_FUNC_DEL(MULT , _Rd_);
REC_FUNC(MULTU); REC_FUNC_DEL(MULTU , _Rd_);
REC_FUNC( MULT1 ); REC_FUNC_DEL( MULT1 , _Rd_);
REC_FUNC( MULTU1 ); REC_FUNC_DEL( MULTU1 , _Rd_);
REC_FUNC(DIV); REC_FUNC(DIV);
REC_FUNC(DIVU); REC_FUNC(DIVU);
REC_FUNC( DIV1 ); REC_FUNC( DIV1 );
REC_FUNC( DIVU1 ); REC_FUNC( DIVU1 );
REC_FUNC( MADD ); REC_FUNC_DEL( MADD , _Rd_ );
REC_FUNC( MADDU ); REC_FUNC_DEL( MADDU , _Rd_);
REC_FUNC( MADD1 ); REC_FUNC_DEL( MADD1 , _Rd_);
REC_FUNC( MADDU1 ); REC_FUNC_DEL( MADDU1 , _Rd_ );
#elif defined(EE_CONST_PROP) #elif defined(EE_CONST_PROP)

View File

@ -36,22 +36,22 @@ namespace OpcodeImpl
namespace Interp = R5900::Interpreter::OpcodeImpl; namespace Interp = R5900::Interpreter::OpcodeImpl;
REC_FUNC(SLL); REC_FUNC_DEL(SLL, _Rd_);
REC_FUNC(SRL); REC_FUNC_DEL(SRL, _Rd_);
REC_FUNC(SRA); REC_FUNC_DEL(SRA, _Rd_);
REC_FUNC(DSLL); REC_FUNC_DEL(DSLL, _Rd_);
REC_FUNC(DSRL); REC_FUNC_DEL(DSRL, _Rd_);
REC_FUNC(DSRA); REC_FUNC_DEL(DSRA, _Rd_);
REC_FUNC(DSLL32); REC_FUNC_DEL(DSLL32, _Rd_);
REC_FUNC(DSRL32); REC_FUNC_DEL(DSRL32, _Rd_);
REC_FUNC(DSRA32); REC_FUNC_DEL(DSRA32, _Rd_);
REC_FUNC(SLLV); REC_FUNC_DEL(SLLV, _Rd_);
REC_FUNC(SRLV); REC_FUNC_DEL(SRLV, _Rd_);
REC_FUNC(SRAV); REC_FUNC_DEL(SRAV, _Rd_);
REC_FUNC(DSLLV); REC_FUNC_DEL(DSLLV, _Rd_);
REC_FUNC(DSRLV); REC_FUNC_DEL(DSRLV, _Rd_);
REC_FUNC(DSRAV); REC_FUNC_DEL(DSRAV, _Rd_);
#elif defined(EE_CONST_PROP) #elif defined(EE_CONST_PROP)
@ -189,6 +189,10 @@ void recSRLs_(int info, int sa)
// PUNPCKLDQRtoR(rdreg, t0reg); // PUNPCKLDQRtoR(rdreg, t0reg);
// _freeMMXreg(t0reg); // _freeMMXreg(t0reg);
} }
else {
if( EEINST_ISLIVE1(_Rd_) ) _signExtendGPRtoMMX(rdreg, _Rd_, 0);
else EEINST_RESETHASLIVE1(_Rd_);
}
} }
void recSRL_(int info) void recSRL_(int info)
@ -264,6 +268,10 @@ void recSRAs_(int info, int sa)
mmxregs[t0reg] = mmxregs[rdreg]; mmxregs[t0reg] = mmxregs[rdreg];
mmxregs[rdreg].inuse = 0; mmxregs[rdreg].inuse = 0;
} }
else {
if( EEINST_ISLIVE1(_Rd_) ) _signExtendGPRtoMMX(rdreg, _Rd_, 0);
else EEINST_RESETHASLIVE1(_Rd_);
}
} }
void recSRA_(int info) void recSRA_(int info)