New EE/IOP synchronization system -- found the root cause of many synchronization problems by issuing an IOP branch test after BIOS calls. That allowed me to get rid of the overly complicated adaptive EE_WAIT_CYCLES mess from r291 and replace it with a much cleaner and more efficient branching system. Also fixed a few bugs where-by the IOP would run waaaay ahead of the EE (leading to skippy sound, slowdowns, other things), and also a bug that caused crashes when resetting or starting new games.

Note: New IOP Counters code is a Work-in-Progress.  Should be fine for most games, but if a game uses the IOP counter gates it might break.  Will finish them soon!

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@345 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
Jake.Stine 2008-11-19 21:23:55 +00:00 committed by Gregory Hainaut
parent 2f23574430
commit f6ecf0bd68
15 changed files with 1112 additions and 1063 deletions

View File

@ -157,43 +157,6 @@ extern TESTRUNARGS g_TestRun;
VBlank non-interlaced 59.82 Hz
HBlank 15.73426573 KHz */
//VBlanks per second
#define VBLANK_NTSC ((Config.PsxType & 2) ? 59.94 : 59.82) //59.94005994 is more precise
#define VBLANK_PAL ((Config.PsxType & 2) ? 50.00 : 49.76)
//HBlanks per second
#define HBLANK_NTSC (15734.26573)
#define HBLANK_PAL (15625)
//VBlank timers for EE, bit more accurate.
#define VBLANKCNT(count) ((u32)((Config.PsxType & 1) ? (VBLANKPALSELECT * count) : (VBLANKNTSCSELECT * count)))
#define VBLANKPALSELECT ((Config.PsxType & 2) ? (PS2CLK / 50.00) : (PS2CLK / 49.76))
#define VBLANKNTSCSELECT ((Config.PsxType & 2) ? (PS2CLK / 59.94) : (PS2CLK / 59.82)) //59.94005994 is more precise
//EE VBlank speeds
#define PS2VBLANK_NTSC_INT ((PS2CLK / 59.94005994))
#define PS2VBLANK_NTSC ((PS2CLK / 59.82))
#define PS2VBLANK_PAL_INT ((PS2CLK / 50.00))
#define PS2VBLANK_PAL ((PS2CLK / 49.76))
//HBlank timer for EE, bit more accurate.
#define HBLANKCNT(count) ((u32)(PS2HBLANK * count))
//EE HBlank speeds
#define PS2HBLANK_NTSC ((int)(PS2CLK / HBLANK_NTSC))
#define PS2HBLANK_PAL ((int)(PS2CLK / HBLANK_PAL))
#define PS2HBLANK ((int)((Config.PsxType & 1) ? PS2HBLANK_PAL : PS2HBLANK_NTSC))
//IOP VBlank speeds
#define PSXVBLANK_NTSC ((int)(PSXCLK / VBLANK_NTSC))
#define PSXVBLANK_PAL ((int)(PSXCLK / VBLANK_PAL))
#define PSXVBLANK ((int)((Config.PsxType & 1) ? PSXVBLANK_PAL : PSXVBLANK_NTSC))
//IOP HBlank speeds
#define PSXHBLANK_NTSC ((int)(PSXCLK / HBLANK_NTSC))
#define PSXHBLANK_PAL ((int)(PSXCLK / HBLANK_PAL))
#define PSXHBLANK ((int)((Config.PsxType & 1) ? PSXHBLANK_PAL : PSXHBLANK_NTSC))
//Misc Clocks
#define PSXPIXEL ((int)(PSXCLK / 13500000))
#define PSXSOUNDCLK ((int)(48000))

View File

@ -29,23 +29,20 @@ u64 profile_totalticks = 0;
int gates = 0;
extern u8 psxhblankgate;
// Counter 4 takes care of scanlines - hSync/hBlanks
// Counter 5 takes care of vSync/vBlanks
Counter counters[6];
u32 nextCounter, nextsCounter;
u32 nextsCounter; // records the cpuRegs.cycle value of the last call to rcntUpdate()
s32 nextCounter; // delta from nextsCounter, in cycles, until the next rcntUpdate()
static void (*s_prevExecuteVU1Block)() = NULL;
LARGE_INTEGER lfreq;
void rcntUpdTarget(int index) {
counters[index].sCycleT = cpuRegs.cycle;
}
void rcntUpd(int index) {
counters[index].sCycle = cpuRegs.cycle;
rcntUpdTarget(index);
}
void rcntReset(int index) {
counters[index].count = 0;
rcntUpd(index);
counters[index].sCycleT = cpuRegs.cycle;
}
// Updates the state of the nextCounter value (if needed) to serve
@ -53,8 +50,11 @@ void rcntReset(int index) {
// Call this method after any modifications to the state of a counter.
static __forceinline void _rcntSet( int i )
{
u32 c;
if (!(counters[i].mode & 0x80) || (counters[i].mode & 0x3) == 0x3) return; // Stopped
s32 c;
assert( i <= 4 ); // rcntSet isn't valid for h/vsync counters.
// Stopped or special hsync gate?
if (!(counters[i].mode & 0x80) || (counters[i].mode & 0x3) == 0x3) return;
// nextCounter is relative to the cpuRegs.cycle when rcntUpdate() was last called.
// However, the current _rcntSet could be called at any cycle count, so we need to take
@ -65,31 +65,37 @@ static __forceinline void _rcntSet( int i )
c += cpuRegs.cycle - nextsCounter; // adjust for time passed since last rcntUpdate();
if (c < nextCounter) nextCounter = c;
//if(!(counters[i].mode & 0x100) || counters[i].target > 0xffff) continue;
// Ignore target diff if target is currently disabled.
// (the overflow is all we care about since it goes first, and then the
// target will be turned on afterward).
if( counters[i].target & 0x10000000 ) return;
c = ((counters[i].target - counters[i].count) * counters[i].rate) - (cpuRegs.cycle - counters[i].sCycleT);
c += cpuRegs.cycle - nextsCounter; // adjust for time passed since last rcntUpdate();
if (c < nextCounter) nextCounter = c;
}
static __forceinline void cpuRcntSet() {
// Calculate our target cycle deltas.
// This must be done regardless of if the hblank/vblank counters updated since
// cpuRegs.cycle changes, even if sCycle hasn't!
u32 counter4CycleT = ( counters[4].mode == MODE_HBLANK ) ? HBLANK_TIME_ : HRENDER_TIME_;
u32 counter5CycleT = VSYNC_HALF_ - (cpuRegs.cycle - counters[5].sCycle);
counter4CycleT -= (cpuRegs.cycle - counters[4].sCycle);
nextCounter = (counter4CycleT < counter5CycleT) ? counter4CycleT : counter5CycleT;
static __forceinline void cpuRcntSet()
{
int i;
nextsCounter = cpuRegs.cycle;
_rcntSet( 0 );
_rcntSet( 1 );
_rcntSet( 2 );
_rcntSet( 3 );
nextCounter = (counters[5].sCycle + counters[5].CycleT) - cpuRegs.cycle;
// if we're running behind, the diff will be negative.
// (and running behind means we need to branch again ASAP)
if( nextCounter <= 0 )
{
nextCounter = 0;
return;
}
for (i = 0; i < 4; i++)
_rcntSet( i );
// sanity check!
if( nextCounter < 0 ) nextCounter = 0;
}
void rcntInit() {
@ -106,13 +112,18 @@ void rcntInit() {
counters[2].interrupt = 11;
counters[3].interrupt = 12;
counters[4].mode = MODE_HRENDER;
counters[4].sCycle = cpuRegs.cycle;
counters[5].mode = MODE_VRENDER;
counters[5].sCycle = cpuRegs.cycle;
UpdateVSyncRate();
#ifdef _WIN32
QueryPerformanceFrequency(&lfreq);
#endif
for (i=0; i<4; i++) rcntUpd(i);
for (i=0; i<4; i++) rcntReset(i);
cpuRcntSet();
assert(Cpu != NULL && Cpu->ExecuteVU1Block != NULL );
@ -151,29 +162,120 @@ u64 GetCPUTicks()
#endif
}
void UpdateVSyncRate() {
typedef struct
{
u32 Framerate; // frames per second * 100 (so 2500 for PAL and 2997 for NTSC)
u32 Render; // time from vblank end to vblank start (cycles)
u32 Blank; // time from vblank start to vblank end (cycles)
counters[4].mode = MODE_HRENDER; // Counter 4 takes care of scanlines, so set the mode to HRENDER (drawing part of scanline)
counters[4].sCycle = cpuRegs.cycle; // Update Counter 4's Start Cycle to match CPU's cycle
counters[4].CycleT = HRENDER_TIME_; // Amount of cycles before the counter will be updated
counters[5].mode = MODE_VRENDER; // Counter 5 takes care of vSync/vBlanks
counters[5].sCycle = cpuRegs.cycle; // Update Counter 5's Start Cycle to match CPU's cycle
counters[5].CycleT = VSYNC_HALF_; // Amount of cycles before the counter will be updated
u32 hSyncError; // rounding error after the duration of a rendered frame (cycles)
u32 hRender; // time from hblank end to hblank start (cycles)
u32 hBlank; // time from hblank start to hblank end (cycles)
u32 hScanlinesPerFrame; // number of scanlines per frame (525/625 for NTSC/PAL)
} vSyncTimingInfo;
if (Config.CustomFps > 0) {
iTicks = GetTickFrequency() / Config.CustomFps;
SysPrintf("Framelimiter rate updated (UpdateVSyncRate): %d fps\n", Config.CustomFps);
static vSyncTimingInfo vSyncInfo;
static __forceinline void vSyncInfoCalc( vSyncTimingInfo* info, u32 framesPerSecond, u32 scansPerFrame )
{
// Important: Cannot use floats or doubles here. The emulator changes rounding modes
// depending on user-set speedhack options, and it can break float/double code
// (as in returning infinities and junk)
u64 Frame = ((u64)PS2CLK * 1000000ULL) / framesPerSecond;
u64 HalfFrame = Frame / 2;
u64 Blank = HalfFrame / 4; // two blanks and renders per frame
u64 Render = HalfFrame - Blank; // so use the half-frame value for these...
// Important! The hRender/hBlank timers should be 50/50 for best results.
// In theory a 70%/30% ratio would be more correct but in practice it runs
// like crap and totally screws audio synchronization and other things.
u64 Scanline = Frame / scansPerFrame;
u64 hBlank = Scanline / 2;
u64 hRender = Scanline - hBlank;
info->Framerate = framesPerSecond;
info->Render = (u32)(Render/10000);
info->Blank = (u32)(Blank/10000);
info->hRender = (u32)(hRender/10000);
info->hBlank = (u32)(hBlank/10000);
info->hScanlinesPerFrame = scansPerFrame;
// Apply rounding:
if( ( Render - info->Render ) >= 5000 ) info->Render++;
else if( ( Blank - info->Blank ) >= 5000 ) info->Blank++;
if( ( hRender - info->hRender ) >= 5000 ) info->hRender++;
else if( ( hBlank - info->hBlank ) >= 5000 ) info->hBlank++;
// Calculate accumulative hSync rounding error per half-frame:
{
u32 hSyncCycles = ((info->hRender + info->hBlank) * scansPerFrame) / 2;
u32 vSyncCycles = (info->Render + info->Blank);
info->hSyncError = vSyncCycles - hSyncCycles;
}
else if (Config.PsxType & 1) {
iTicks = (GetTickFrequency() * 100) / 5000;
SysPrintf("Framelimiter rate updated (UpdateVSyncRate): 50 fps Pal\n");
// Note: In NTSC modes there is some small rounding error in the vsync too,
// however it would take thousands of frames for it to amount to anything and
// is thus not worth the effort at this time.
}
void UpdateVSyncRate()
{
const char *limiterMsg = "Framelimiter rate updated (UpdateVSyncRate): %s fps\n";
// fixme - According to some docs, progressive-scan modes actually refresh slower than
// interlaced modes. But I can't fathom how, since the refresh rate is a function of
// the television and all the docs I found on TVS made no indication that they ever
// run anything except their native refresh rate.
//#define VBLANK_NTSC ((Config.PsxType & 2) ? 59.94 : 59.82) //59.94 is more precise
//#define VBLANK_PAL ((Config.PsxType & 2) ? 50.00 : 49.76)
if(Config.PsxType & 1)
{
if( vSyncInfo.Framerate != FRAMERATE_PAL )
{
SysPrintf( "PCSX2: Switching to PAL display timings.\n" );
vSyncInfoCalc( &vSyncInfo, FRAMERATE_PAL, SCANLINES_TOTAL_PAL );
}
}
else {
iTicks = (GetTickFrequency() / 5994) * 100;
SysPrintf("Framelimiter rate updated (UpdateVSyncRate): 59.94 fps NTSC\n");
else
{
if( vSyncInfo.Framerate != FRAMERATE_NTSC )
{
SysPrintf( "PCSX2: Switching to NTSC display timings.\n" );
vSyncInfoCalc( &vSyncInfo, FRAMERATE_NTSC, SCANLINES_TOTAL_NTSC );
}
}
counters[4].CycleT = vSyncInfo.hRender; // Amount of cycles before the counter will be updated
counters[5].CycleT = vSyncInfo.Render; // Amount of cycles before the counter will be updated
if (Config.CustomFps > 0)
{
u32 ticks = (u32)(GetTickFrequency() / Config.CustomFps);
if( iTicks != ticks )
{
iTicks = ticks;
SysPrintf( limiterMsg, Config.CustomFps );
}
}
else
{
u32 ticks = (u32)((GetTickFrequency() * 50) / vSyncInfo.Framerate);
if( iTicks != ticks )
{
iTicks = ticks;
SysPrintf( limiterMsg, (Config.PsxType & 1) ? "50" : "59.94" );
}
}
cpuRcntSet();
}
@ -189,7 +291,10 @@ void FrameLimiter()
if (iEnd>=iExpectedEnd) {
// Compensation: If the framelate drops too low, reset the
// expected value. This avoids "fast forward" syndrome.
// expected value. This avoids excessive amounts of
// "fast forward" syndrome which would occur if we tried to
// catch up too much.
u64 diff = iEnd-iExpectedEnd;
if ((diff>>3)>iTicks) iExpectedEnd=iEnd;
}
@ -209,8 +314,6 @@ extern u32 vu0time;
extern void DummyExecuteVU1Block(void);
//static u32 lastWasSkip=0;
//extern u32 unpacktotal;
void vSyncDebugStuff() {
#ifdef EE_PROFILING
@ -394,104 +497,102 @@ static __forceinline void VSyncEnd(u32 sCycle) // VSync End
frameLimit(); // limit FPS (also handles frameskip and VUskip)
}
static __forceinline void hScanlineNextCycle( u32 diff, u32 cyclesAmount )
#ifndef PCSX2_PUBLIC
static u32 hsc=0;
static int vblankinc = 0;
#endif
__forceinline void rcntUpdate_hScanline()
{
// This function: Now Unneeded?
// This code doesn't appear to be run anymore after fixing the CycleT bug
// and fixing the EE/IOP code execution sync issues (tested on 6 games,
// with EEx3 hack too).
// And it makes sense -- bad behavior by the counters would have led
// to cpuBranchTest being delayed beyond the span of a full hsync.
// It could still happen in some isolated part of some particular game,
// but probably we're better off letting that game lose a couple hsyncs
// once in a while rather than slow everyone else down needlessly.
u32 scanlineCycles = SCANLINE_;
diff -= cyclesAmount;
if (diff >= scanlineCycles)
{
u32 increment = diff / scanlineCycles;
// Counter Optimization:
// If the time passed is beyond a single scanline, then increment all scanline
// counters as a set here.
SysPrintf("Counters Optimization %d\n", diff / scanlineCycles);
/* if counter's count increases on hblank gate's off signal OR if counter increases every hblank, THEN add to the counter */
if ( (!(counters[0].mode & 0x30) && (gates & (1<<0))) || (((counters[0].mode & 0x83) == 0x83) && !(gates & (1<<0))) ) counters[0].count += (increment * HBLANK_COUNTER_SPEED);
if ( (!(counters[1].mode & 0x30) && (gates & (1<<1))) || (((counters[1].mode & 0x83) == 0x83) && !(gates & (1<<1))) ) counters[1].count += (increment * HBLANK_COUNTER_SPEED);
if ( (!(counters[2].mode & 0x30) && (gates & (1<<2))) || (((counters[2].mode & 0x83) == 0x83) && !(gates & (1<<2))) ) counters[2].count += (increment * HBLANK_COUNTER_SPEED);
if ( (!(counters[3].mode & 0x30) && (gates & (1<<3))) || (((counters[3].mode & 0x83) == 0x83) && !(gates & (1<<3))) ) counters[3].count += (increment * HBLANK_COUNTER_SPEED);
counters[4].sCycle += (increment * scanlineCycles);
}
}
static __forceinline void hScanline()
{
u32 difference = (cpuRegs.cycle - counters[4].sCycle);
if( !cpuTestCycle( counters[4].sCycle, counters[4].CycleT ) ) return;
iopBranchAction = 1;
if (counters[4].mode & MODE_HBLANK) { //HBLANK Start
const u32 modeCycles = HBLANK_TIME_;
if (difference >= modeCycles ) {
//hScanlineNextCycle(difference, modeCycles);
counters[4].sCycle += modeCycles;
rcntStartGate(0, counters[4].sCycle);
psxCheckStartGate16(0);
counters[4].mode = MODE_HRENDER;
}
//hScanlineNextCycle(difference, modeCycles);
rcntStartGate(0, counters[4].sCycle);
psxCheckStartGate16(0);
// Setup the hRender's start and end cycle information:
counters[4].sCycle += vSyncInfo.hBlank; // start (absolute cycle value)
counters[4].CycleT = vSyncInfo.hRender; // endpoint (delta from start value)
counters[4].mode = MODE_HRENDER;
}
else { //HBLANK END / HRENDER Begin
const u32 modeCycles = HRENDER_TIME_;
if (difference >= modeCycles) {
//hScanlineNextCycle(difference, modeCycles);
counters[4].sCycle += modeCycles;
if (CSRw & 0x4) GSCSRr |= 4; // signal
if (!(GSIMR&0x400)) gsIrq();
if (gates) rcntEndGate(0, counters[4].sCycle);
if (psxhblankgate) psxCheckEndGate16(0);
counters[4].mode = MODE_HBLANK;
}
//hScanlineNextCycle(difference, modeCycles);
if (CSRw & 0x4) GSCSRr |= 4; // signal
if (!(GSIMR&0x400)) gsIrq();
if (gates) rcntEndGate(0, counters[4].sCycle);
if (psxhblankgate) psxCheckEndGate16(0);
// set up the hblank's start and end cycle information:
counters[4].sCycle += vSyncInfo.hRender; // start (absolute cycle value)
counters[4].CycleT = vSyncInfo.hBlank; // endpoint (delta from start value)
counters[4].mode = MODE_HBLANK;
# ifndef PCSX2_PUBLIC
hsc++;
# endif
}
/*if(counters[4].CycleT < 0) {
counters[4].sCycle += -counters[4].CycleT;
counters[4].CycleT = 0;
}*/
}
// Only called from one place so might as well inline it.
static __forceinline void vSync()
__forceinline void rcntUpdate_vSync()
{
u32 diff = (cpuRegs.cycle - counters[5].sCycle);
s32 diff = (cpuRegs.cycle - counters[5].sCycle);
if( diff < counters[5].CycleT ) return;
hScanline();
iopBranchAction = 1;
if (counters[5].mode == MODE_VSYNC)
{
VSyncEnd(counters[5].sCycle);
if (diff >= (VSYNC_HALF_)) {
counters[5].sCycle += VSYNC_HALF_; // * (u32)(diff / VSYNC_HALF_));
counters[5].sCycle += vSyncInfo.Blank;
counters[5].CycleT = vSyncInfo.Render;
counters[5].mode = MODE_VRENDER;
}
else // VSYNC end / VRENDER begin
{
VSyncStart(counters[5].sCycle);
if (counters[5].mode == MODE_VSYNC) {
VSyncEnd(counters[5].sCycle);
counters[5].mode = MODE_VRENDER;
counters[5].sCycle += vSyncInfo.Render;
counters[5].CycleT = vSyncInfo.Blank;
counters[5].mode = MODE_VSYNC;
// Accumulate hsync rounding errors:
counters[4].sCycle += vSyncInfo.hSyncError;
# ifndef PCSX2_PUBLIC
vblankinc++;
if( vblankinc > 1 )
{
if( hsc != vSyncInfo.hScanlinesPerFrame )
SysPrintf( " ** vSync > Abnornal Scanline Count: %d\n", hsc );
hsc = 0;
vblankinc = 0;
}
else {
VSyncStart(counters[5].sCycle);
counters[5].mode = MODE_VSYNC;
# endif
// Accumulate hsync rounding errors:
counters[4].sCycle += HSYNC_ERROR;
// Tighten up EE/IOP responsiveness for a wee bit.
// Games are usually most sensitive to vSync sessions since that's
// when the hard thinking usually occurs.
g_eeTightenSync += 2;
}
g_nextBranchCycle = cpuRegs.cycle + 384;
}
}
static __forceinline void __fastcall _cpuTestTarget( int i )
{
//counters[i].target &= 0xffff;
if(counters[i].mode & 0x100) {
EECNT_LOG("EE counter %d target reached mode %x count %x target %x\n", i, counters[i].mode, counters[i].count, counters[i].target);
counters[i].mode|= 0x0400; // Equal Target flag
hwIntcIrq(counters[i].interrupt);
if (counters[i].mode & 0x40) { //The PS2 only resets if the interrupt is enabled - Tested on PS2
counters[i].count -= counters[i].target; // Reset on target
}
else counters[i].target |= 0x10000000;
}
else counters[i].target |= 0x10000000;
}
// forceinline note: this method is called from two locations, but one
// of them is the interpreter, which doesn't count. ;) So might as
// well forceinline it!
@ -499,42 +600,36 @@ __forceinline void rcntUpdate()
{
int i;
vSync(); //hBlank and vSync Timers
rcntUpdate_vSync();
// Update all counters?
// This code shouldn't be needed. Counters are updated as needed when
// Reads, Writes, and Target/Overflow events occur. The rest of the
// time the counters can be left unmodified.
for (i=0; i<=3; i++) {
if ( gates & (1<<i) ) continue;
if ((counters[i].mode & 0x80) && (counters[i].mode & 0x3) != 0x3) {
//counters[i].count += (cpuRegs.cycle - counters[i].sCycleT) / counters[i].rate;
//counters[i].sCycleT = cpuRegs.cycle;
u32 change = cpuRegs.cycle - counters[i].sCycleT;
counters[i].count += (int)(change / counters[i].rate);
change -= (change / counters[i].rate) * counters[i].rate;
counters[i].sCycleT = cpuRegs.cycle - change;
s32 change = cpuRegs.cycle - counters[i].sCycleT;
if( change > 0 ) {
counters[i].count += change / counters[i].rate;
change -= (change / counters[i].rate) * counters[i].rate;
counters[i].sCycleT = cpuRegs.cycle - change;
}
}
else counters[i].sCycleT = cpuRegs.cycle;
}
for (i=0; i<=3; i++) {
// Check Counter Targets and Overflows:
for (i=0; i<=3; i++)
{
if (!(counters[i].mode & 0x80)) continue; // Stopped
if (counters[i].count >= counters[i].target) { // Target interrupt
counters[i].target &= 0xffff;
if(counters[i].mode & 0x100) {
EECNT_LOG("EE counter %d target reached mode %x count %x target %x\n", i, counters[i].mode, counters[i].count, counters[i].target);
counters[i].mode|= 0x0400; // Equal Target flag
hwIntcIrq(counters[i].interrupt);
if (counters[i].mode & 0x40) { //The PS2 only resets if the interrupt is enabled - Tested on PS2
counters[i].count -= counters[i].target; // Reset on target
}
else counters[i].target |= 0x10000000;
}
else counters[i].target |= 0x10000000;
}
// Target reached?
if (counters[i].count >= counters[i].target)
_cpuTestTarget( i );
if (counters[i].count > 0xffff) {
@ -545,9 +640,14 @@ __forceinline void rcntUpdate()
}
counters[i].count -= 0x10000;
counters[i].target &= 0xffff;
// Target reached after overflow?
// It's possible that a Target very near zero (1-10, etc) could have already been reached.
// Checking for it now
//if (counters[i].count >= counters[i].target)
// _cpuTestTarget( i );
}
}
cpuRcntSet();
}
@ -556,15 +656,17 @@ void rcntWcount(int index, u32 value)
EECNT_LOG("EE count write %d count %x with %x target %x eecycle %x\n", index, counters[index].count, value, counters[index].target, cpuRegs.eCycle);
counters[index].count = value & 0xffff;
counters[index].target &= 0xffff;
//rcntUpd(index);
if((counters[index].mode & 0x3) != 0x3) {
//counters[index].sCycleT = cpuRegs.cycle;
u32 change = cpuRegs.cycle - counters[index].sCycleT;
change -= (change / counters[index].rate) * counters[index].rate;
counters[index].sCycleT = cpuRegs.cycle - change;
}
if(counters[index].mode & 0x80) {
if((counters[index].mode & 0x3) != 0x3) {
s32 change = cpuRegs.cycle - counters[index].sCycleT;
if( change > 0 ) {
change -= (change / counters[index].rate) * counters[index].rate;
counters[index].sCycleT = cpuRegs.cycle - change;
}
}
}
else counters[index].sCycleT = cpuRegs.cycle;
_rcntSet( index );
}
@ -573,13 +675,14 @@ void rcntWmode(int index, u32 value)
{
if(counters[index].mode & 0x80) {
if((counters[index].mode & 0x3) != 0x3) {
//counters[index].count += (cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate;
//counters[index].sCycleT = cpuRegs.cycle;
u32 change = cpuRegs.cycle - counters[index].sCycleT;
counters[index].count += (int)(change / counters[index].rate);
change -= (change / counters[index].rate) * counters[index].rate;
counters[index].sCycleT = cpuRegs.cycle - change;
if( change > 0 )
{
counters[index].count += change / counters[index].rate;
change -= (change / counters[index].rate) * counters[index].rate;
counters[index].sCycleT = cpuRegs.cycle - change;
}
}
}
else counters[index].sCycleT = cpuRegs.cycle;
@ -592,7 +695,7 @@ void rcntWmode(int index, u32 value)
case 0: counters[index].rate = 2; break;
case 1: counters[index].rate = 32; break;
case 2: counters[index].rate = 512; break;
case 3: counters[index].rate = SCANLINE_; break;
case 3: counters[index].rate = vSyncInfo.hBlank+vSyncInfo.hRender; break;
}
if((counters[index].mode & 0xF) == 0x7) {
@ -607,11 +710,6 @@ void rcntWmode(int index, u32 value)
}
else gates &= ~(1<<index);
/*if ((value & 0x580) == 0x580) { // If we need to compare the target value again, correct the target
//SysPrintf("EE Correcting target %x after mode write\n", index);
counters[index].target &= 0xffff;
}*/
_rcntSet( index );
}
@ -620,33 +718,41 @@ void rcntStartGate(unsigned int mode, u32 sCycle) {
for (i=0; i <=3; i++) { //Gates for counters
if ((mode == 0) && ((counters[i].mode & 0x83) == 0x83)) counters[i].count += HBLANK_COUNTER_SPEED; //Update counters using the hblank as the clock
if ((mode == 0) && ((counters[i].mode & 0x83) == 0x83))
counters[i].count += HBLANK_COUNTER_SPEED; //Update counters using the hblank as the clock
if (!(gates & (1<<i))) continue;
if ((counters[i].mode & 0x8) != mode) continue;
switch (counters[i].mode & 0x30) {
case 0x00: //Count When Signal is low (off)
// Just set the start cycle (sCycleT) -- counting will be done as needed
// for events (overflows, targets, mode changes, and the gate off below)
counters[i].mode |= 0x80;
counters[i].count += (int)((cpuRegs.cycle - counters[i].sCycleT) / counters[i].rate);
counters[i].sCycle = sCycle;
counters[i].sCycleT = sCycle;
break;
case 0x20:
counters[i].count = rcntRcount(i);
case 0x20: // reset and start counting on vsync end
// this is the vsync start so do nothing.
break;
case 0x10: //Reset and start counting on Vsync start
case 0x30: //Reset and start counting on Vsync start and end
counters[i].mode |= 0x80;
counters[i].count = 0;
counters[i].sCycle = sCycle;
counters[i].sCycleT = sCycle;
counters[i].target &= 0xffff;
break;
}
}
// Note: No need to set counters here.
// They'll get set later on in rcntUpdate, since we're
// being called from there anyway.
// No need to update actual counts here. Counts are calculated as needed by reads to
// rcntRcount(). And so long as sCycleT is set properly, any targets or overflows
// will be scheduled and handled.
// Note: No need to set counters here. They'll get set when control returns to
// rcntUpdate, since we're being called from there anyway.
}
void rcntEndGate(unsigned int mode, u32 sCycle) {
@ -658,26 +764,28 @@ void rcntEndGate(unsigned int mode, u32 sCycle) {
switch (counters[i].mode & 0x30) {
case 0x00: //Count When Signal is low (off)
counters[i].mode &= ~0x80;
counters[i].sCycle = sCycle;
counters[i].sCycleT = sCycle;
break;
case 0x10:
// Set the count here. Since the timer is being turned off it's
// important to record its count at this point.
counters[i].count = rcntRcount(i);
break; // skip the _rcntSet
counters[i].mode &= ~0x80;
counters[i].sCycleT = sCycle;
break;
case 0x10: // Reset and start counting on Vsync start
// this is the vsync end so do nothing
break;
case 0x20: //Reset and start counting on Vsync end
case 0x30: //Reset and start counting on Vsync start and end
counters[i].mode |= 0x80;
counters[i].count = 0;
counters[i].sCycle = sCycle;
counters[i].sCycleT = sCycle;
counters[i].target &= 0xffff;
break;
}
}
// Note: No need to set counters here.
// They'll get set later on in rcntUpdate, since we're
// being called from there anyway.
// Note: No need to set counters here. They'll get set when control returns to
// rcntUpdate, since we're being called from there anyway.
}
void rcntWtarget(int index, u32 value) {
@ -685,10 +793,13 @@ void rcntWtarget(int index, u32 value) {
EECNT_LOG("EE target write %d target %x value %x\n", index, counters[index].target, value);
counters[index].target = value & 0xffff;
if (counters[index].target <= rcntCycle(index)/* && counters[index].target != 0*/) {
//SysPrintf("EE Saving target %d from early trigger, target = %x, count = %x\n", index, counters[index].target, rcntCycle(index));
// guard against premature (instant) targeting.
// If the target is behind the current count, set it up so that the counter must
// overflow first before the target fires:
if( counters[index].target <= rcntCycle(index) )
counters[index].target |= 0x10000000;
}
_rcntSet( index );
}
@ -700,8 +811,9 @@ void rcntWhold(int index, u32 value) {
u32 rcntRcount(int index) {
u32 ret;
if ((counters[index].mode & 0x80))
ret = counters[index].count + (int)((cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate);
// only count if the counter is turned on (0x80) and is not an hsync gate (!0x03)
if ((counters[index].mode & 0x80) && ((counters[index].mode & 0x3) != 0x3))
ret = counters[index].count + ((cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate);
else
ret = counters[index].count;
@ -711,13 +823,24 @@ u32 rcntRcount(int index) {
u32 rcntCycle(int index) {
if ((counters[index].mode & 0x80))
return (u32)counters[index].count + (int)((cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate);
if ((counters[index].mode & 0x80) && ((counters[index].mode & 0x3) != 0x3))
return counters[index].count + ((cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate);
else
return (u32)counters[index].count;
return counters[index].count;
}
int rcntFreeze(gzFile f, int Mode) {
if( Mode == 1 )
{
// Temp Hack Fix: Adjust some values so that they'll load properly
// in the future (this should be removed when a new savestate version
// is introduced).
counters[4].sCycle += vSyncInfo.hRender;
counters[5].sCycle += vSyncInfo.Render;
}
gzfreezel(counters);
gzfreeze(&nextCounter, sizeof(nextCounter));
gzfreeze(&nextsCounter, sizeof(nextsCounter));
@ -725,6 +848,7 @@ int rcntFreeze(gzFile f, int Mode) {
if( Mode == 0 )
{
// Sanity check for loading older savestates:
if( counters[4].sCycle == 0 )
counters[4].sCycle = cpuRegs.cycle;
@ -732,6 +856,15 @@ int rcntFreeze(gzFile f, int Mode) {
counters[5].sCycle = cpuRegs.cycle;
}
// Old versions of PCSX2 saved the counters *after* incrementing them.
// So if we don't roll back here, the counters move past cpuRegs.cycle
// and everthing explodes!
// Note: Roll back regardless of load or save, since we roll them forward
// when saving (above). It's a hack, but it works.
counters[4].sCycle -= vSyncInfo.hRender;
counters[5].sCycle -= vSyncInfo.Render;
return 0;
}

View File

@ -19,11 +19,14 @@
#ifndef __COUNTERS_H__
#define __COUNTERS_H__
// fixme: Cycle and sCycleT members are unused.
// But they can't be removed without making a new savestate version.
typedef struct {
u32 count, mode, target, hold;
u32 rate, interrupt;
u32 Cycle, sCycle;
u32 CycleT, sCycleT;
s32 CycleT;
u32 sCycleT;
} Counter;
//------------------------------------------------------------------
@ -35,11 +38,7 @@ typedef struct {
//------------------------------------------------------------------
// NTSC Timing Information!!! (some scanline info is guessed)
//------------------------------------------------------------------
#define SCANLINE_NTSC (u32)(PS2CLK / 15734.25)//18743 //when using 59.94005994 it rounds to 15734.27 :p (rama)
#define HRENDER_TIME_NTSC (u32)(SCANLINE_NTSC / 2)//15528 //time from hblank end to hblank start (PS2CLK / 18991.368423051722991900181367568)
#define HBLANK_TIME_NTSC (u32)(SCANLINE_NTSC / 2)//3215 //time from hblank start to hblank end (PS2CLK / 91738.91105912572817760653181028)
#define VSYNC_NTSC (u32)(PS2CLK / 59.94) //hz //59.94005994 is more precise
#define VSYNC_HALF_NTSC (u32)(VSYNC_NTSC / 2) //hz
#define FRAMERATE_NTSC 2997// frames per second * 100 (29.97)
#define SCANLINES_TOTAL_NTSC 525 // total number of scanlines
#define SCANLINES_VSYNC_NTSC 3 // scanlines that are used for syncing every half-frame
@ -52,37 +51,14 @@ typedef struct {
//------------------------------------------------------------------
// PAL Timing Information!!! (some scanline info is guessed)
//------------------------------------------------------------------
#define SCANLINE_PAL (u32)(PS2CLK / 15625)//18874
#define HRENDER_TIME_PAL (u32)(SCANLINE_PAL / 2)//15335 //time from hblank end to hblank start
#define HBLANK_TIME_PAL (u32)(SCANLINE_PAL / 2)//3539 //time from hblank start to hblank end
#define VSYNC_PAL (u32)(PS2CLK / 50) //hz
#define VSYNC_HALF_PAL (u32)(VSYNC_PAL / 2) //hz
#define FRAMERATE_PAL 2500// frames per second * 100 (25)
#define SCANLINES_TOTAL_PAL 625 // total number of scanlines
#define SCANLINES_TOTAL_PAL 625 // total number of scanlines per frame
#define SCANLINES_VSYNC_PAL 5 // scanlines that are used for syncing every half-frame
#define SCANLINES_VRENDER_PAL 288 // scanlines in a half-frame (because of interlacing)
#define SCANLINES_VBLANK1_PAL 19 // scanlines used for vblank1 (even interlace)
#define SCANLINES_VBLANK2_PAL 20 // scanlines used for vblank2 (odd interlace)
#define HSYNC_ERROR_PAL ((s32)VSYNC_PAL - (s32)((SCANLINE_PAL * SCANLINES_TOTAL_PAL) / 2))
//------------------------------------------------------------------
// Timing (PAL/NTSC) Information!!!
//------------------------------------------------------------------
#define SCANLINE_ ((Config.PsxType&1) ? SCANLINE_PAL : SCANLINE_NTSC)
#define HRENDER_TIME_ ((Config.PsxType&1) ? HRENDER_TIME_PAL : HRENDER_TIME_NTSC)
#define HBLANK_TIME_ ((Config.PsxType&1) ? HBLANK_TIME_PAL : HBLANK_TIME_NTSC)
#define VSYNC_ ((Config.PsxType&1) ? VSYNC_PAL : VSYNC_NTSC)
#define VSYNC_HALF_ ((Config.PsxType&1) ? VSYNC_HALF_PAL : VSYNC_HALF_NTSC)
#define HSYNC_ERROR ((Config.PsxType&1) ? HSYNC_ERROR_PAL : HSYNC_ERROR_NTSC)
#define SCANLINES_TOTAL_ ((Config.PsxType&1) ? SCANLINES_TOTAL_PAL : SCANLINES_TOTAL_NTSC)
#define SCANLINES_VSYNC_ ((Config.PsxType&1) ? SCANLINES_VSYNC_PAL : SCANLINES_VSYNC_NTSC)
#define SCANLINES_VRENDER_ ((Config.PsxType&1) ? SCANLINES_VRENDER_PAL : SCANLINES_VRENDER_NTSC)
#define SCANLINES_VBLANK1_ ((Config.PsxType&1) ? SCANLINES_VBLANK1_PAL : SCANLINES_VBLANK1_NTSC)
#define SCANLINES_VBLANK2_ ((Config.PsxType&1) ? SCANLINES_VBLANK2_PAL : SCANLINES_VBLANK2_NTSC)
//------------------------------------------------------------------
// vSync and hBlank Timing Modes
//------------------------------------------------------------------
@ -91,13 +67,19 @@ typedef struct {
#define MODE_VSYNC 0x3 //Set during the Syncing Scanlines
#define MODE_VBLANK1 0x0 //Set during the Blanking Scanlines (half-frame 1)
#define MODE_VBLANK2 0x1 //Set during the Blanking Scanlines (half-frame 2)
#define MODE_HRENDER 0x0 //Set for ~5/6 of 1 Scanline
#define MODE_HBLANK 0x1 //Set for the remaining ~1/6 of 1 Scanline
extern Counter counters[6];
extern u32 nextCounter, nextsCounter;
extern s32 nextCounter; // delta until the next counter event (must be signed)
extern u32 nextsCounter;
extern u32 g_lastVSyncCycle;
extern u32 g_deltaVSyncCycle;
extern void rcntUpdate_hScanline();
extern void rcntUpdate_vSync();
extern void rcntUpdate();
void rcntInit();

View File

@ -1140,7 +1140,7 @@ void hwWrite128(u32 mem, u64 *value) {
}
}
void intcInterrupt() {
__forceinline void intcInterrupt() {
cpuRegs.interrupt &= ~(1 << 30);
if ((cpuRegs.CP0.n.Status.val & 0x400) != 0x400) return;
@ -1159,6 +1159,7 @@ void intcInterrupt() {
cpuException(0x400, cpuRegs.branch);
}
// fixme: dead/unused code?
void dmacTestInterrupt() {
cpuRegs.interrupt &= ~(1 << 31);
if ((cpuRegs.CP0.n.Status.val & 0x800) != 0x800) return;
@ -1169,9 +1170,7 @@ void dmacTestInterrupt() {
if((psHu32(DMAC_CTRL) & 0x1) == 0) return;
}
void dmacInterrupt()
__forceinline void dmacInterrupt()
{
cpuRegs.interrupt &= ~(1 << 31);
if ((cpuRegs.CP0.n.Status.val & 0x10807) != 0x10801) return;

View File

@ -39,7 +39,11 @@ extern u64 *psHD;
#define psHu64(mem) (*(u64*)&PS2MEM_HW[(mem) & 0xffff])
extern u32 g_nextBranchCycle;
extern void CPU_INT( u32 n, u32 ecycle );
extern int cpuSetNextBranch( u32 startCycle, s32 delta );
extern int cpuSetNextBranchDelta( s32 delta );
extern int cpuTestCycle( u32 startCycle, s32 delta );
extern void CPU_INT( u32 n, s32 ecycle );
// VIF0 -- 0x10004000 -- psH[0x4000]
// VIF1 -- 0x10005000 -- psH[0x5000]
@ -404,7 +408,7 @@ int hwMFIFOWrite(u32 addr, u8 *data, u32 size);
int hwDmacSrcChainWithStack(DMACh *dma, int id);
int hwDmacSrcChain(DMACh *dma, int id);
void intcInterrupt();
void dmacInterrupt();
extern void intcInterrupt();
extern void dmacInterrupt();
#endif /* __HW_H__ */

File diff suppressed because it is too large Load Diff

View File

@ -19,16 +19,21 @@
#ifndef __PSXCOUNTERS_H__
#define __PSXCOUNTERS_H__
// fixme: sCycle and Cycle are unused.
// Can't remove them without making a new savestate version though.
typedef struct {
u64 count, target;
u32 mode;
u32 rate, interrupt, otarget;
u32 sCycle, Cycle;
u32 sCycleT, CycleT;
u32 sCycleT;
s32 CycleT;
} psxCounter;
extern psxCounter psxCounters[8];
extern u32 psxNextCounter, psxNextsCounter;
extern s32 psxNextCounter;
extern u32 psxNextsCounter;
void psxRcntInit();
void psxRcntUpdate();

View File

@ -78,7 +78,7 @@
#define HW_DMA_PCR2 (psxHu32(0x1570))
#define HW_DMA_ICR2 (psxHu32(0x1574))
extern void PSX_INT( int n, u32 ecycle);
extern void PSX_INT( int n, s32 ecycle);
void psxHwReset();
u8 psxHwRead8 (u32 add);

View File

@ -385,7 +385,7 @@ void spyFunctions(){
* Format: OP rt, rs, immediate *
*********************************************************/
void psxADDI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im (Exception on Integer Overflow)
void psxADDIU() { if (!_Rt_) { g_eeTightenSync+=3; zeroEx(); return; } _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im
void psxADDIU() { if (!_Rt_) { g_psxNextBranchCycle=psxRegs.cycle; zeroEx(); return; } _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im
void psxANDI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) & _ImmU_; } // Rt = Rs And Im
void psxORI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) | _ImmU_; } // Rt = Rs Or Im
void psxXORI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) ^ _ImmU_; } // Rt = Rs Xor Im

View File

@ -23,10 +23,13 @@
#include "PsxCommon.h"
#include "Misc.h"
// used for constant propagation
R3000Acpu *psxCpu;
// used for constant propagation
u32 g_psxConstRegs[32];
u32 g_psxHasConstReg, g_psxFlushedConstReg;
// Controls when branch tests are performed.
u32 g_psxNextBranchCycle = 0;
// This value is used when the IOP execution is broken to return contorl to the EE.
@ -39,6 +42,8 @@ s32 psxBreak = 0;
// control is returned to the EE.
s32 psxCycleEE = -1;
int iopBranchAction = 0;
PCSX2_ALIGNED16(psxRegisters psxRegs);
@ -46,6 +51,10 @@ int psxInit()
{
psxCpu = CHECK_EEREC ? &psxRec : &psxInt;
g_psxNextBranchCycle = 8;
psxBreak = 0;
psxCycleEE = -1;
#ifdef PCSX2_DEVBUILD
Log=0;
#endif
@ -144,52 +153,68 @@ void psxException(u32 code, u32 bd) {
}*/
}
__forceinline void PSX_INT( int n, u32 ecycle )
__forceinline void psxSetNextBranch( u32 startCycle, s32 delta )
{
//assert( startCycle <= psxRegs.cycle );
// typecast the conditional to signed so that things don't blow up
// if startCycle is greater than our next branch cycle.
if( (int)(g_psxNextBranchCycle - startCycle) > delta )
g_psxNextBranchCycle = startCycle + delta;
}
__forceinline void psxSetNextBranchDelta( s32 delta )
{
psxSetNextBranch( psxRegs.cycle, delta );
}
__forceinline int psxTestCycle( u32 startCycle, s32 delta )
{
// typecast the conditional to signed so that things don't explode
// if the startCycle is ahead of our current cpu cycle.
return (int)(psxRegs.cycle - startCycle) >= delta;
}
__forceinline void PSX_INT( int n, s32 ecycle )
{
psxRegs.interrupt |= 1 << n;
psxRegs.sCycle[n] = psxRegs.cycle;
psxRegs.eCycle[n] = ecycle;
if( (g_psxNextBranchCycle - psxRegs.sCycle[n]) <= psxRegs.eCycle[n] ) return;
// Interrupt is happening soon: make sure everyone is aware (including the EE!)
// Interrupt is happening soon: make sure everyone is aware (includeing the EE!)
g_psxNextBranchCycle = psxRegs.sCycle[n] + psxRegs.eCycle[n];
psxSetNextBranchDelta( ecycle );
if( psxCycleEE < 0 )
{
// The EE called this int, so inform it to branch as needed:
u32 iopDelta = (g_psxNextBranchCycle-psxRegs.cycle)*8;
if( g_nextBranchCycle - cpuRegs.cycle > iopDelta )
{
// Optimization note: This method inlines witn 'n' as a constant, so the
// following conditionals will optimize nicely.
g_nextBranchCycle = cpuRegs.cycle + iopDelta;
if( n > 12 ) g_eeTightenSync += 5;
if( n >= 19 ) g_eeTightenSync += 2;
}
s32 iopDelta = (g_psxNextBranchCycle-psxRegs.cycle)*8;
cpuSetNextBranchDelta( iopDelta );
}
}
static __forceinline void PSX_TESTINT( u32 n, void (*callback)(), int runIOPcode )
{
if( !(psxRegs.interrupt & (1 << n)) ) return;
#define PSX_TESTINT(n, callback) \
if (psxRegs.interrupt & (1 << n)) { \
if ((int)(psxRegs.cycle - psxRegs.sCycle[n]) >= psxRegs.eCycle[n]) { \
callback(); \
} \
else if( (int)(g_psxNextBranchCycle - psxRegs.sCycle[n]) > psxRegs.eCycle[n] ) \
g_psxNextBranchCycle = psxRegs.sCycle[n] + psxRegs.eCycle[n]; \
if( psxTestCycle( psxRegs.sCycle[n], psxRegs.eCycle[n] ) )
{
callback();
//if( runIOPcode ) iopBranchAction = 1;
}
else
psxSetNextBranch( psxRegs.sCycle[n], psxRegs.eCycle[n] );
}
static __forceinline void _psxTestInterrupts()
{
PSX_TESTINT(9, sif0Interrupt); // SIF0
PSX_TESTINT(10, sif1Interrupt); // SIF1
PSX_TESTINT(16, sioInterrupt);
PSX_TESTINT(19, cdvdReadInterrupt);
PSX_TESTINT(9, sif0Interrupt, 1); // SIF0
PSX_TESTINT(10, sif1Interrupt, 1); // SIF1
PSX_TESTINT(16, sioInterrupt, 0);
PSX_TESTINT(19, cdvdReadInterrupt, 1);
// Profile-guided Optimization (sorta)
// The following ints are rarely called. Encasing them in a conditional
@ -197,24 +222,27 @@ static __forceinline void _psxTestInterrupts()
if( psxRegs.interrupt & ( (3ul<<11) | (3ul<<20) | (3ul<<17) ) )
{
PSX_TESTINT(17, cdrInterrupt);
PSX_TESTINT(18, cdrReadInterrupt);
PSX_TESTINT(11, psxDMA11Interrupt); // SIO2
PSX_TESTINT(12, psxDMA12Interrupt); // SIO2
PSX_TESTINT(20, dev9Interrupt);
PSX_TESTINT(21, usbInterrupt);
PSX_TESTINT(11, psxDMA11Interrupt,0); // SIO2
PSX_TESTINT(12, psxDMA12Interrupt,0); // SIO2
PSX_TESTINT(17, cdrInterrupt,0);
PSX_TESTINT(18, cdrReadInterrupt,0);
PSX_TESTINT(20, dev9Interrupt,1);
PSX_TESTINT(21, usbInterrupt,1);
}
}
void psxBranchTest()
{
if ((psxRegs.cycle - psxNextsCounter) >= psxNextCounter)
if( psxTestCycle( psxNextsCounter, psxNextCounter ) )
{
psxRcntUpdate();
iopBranchAction = 1;
}
// start the next branch at the next counter event by default
// the int code below will assign nearer branches if needed.
// the interrupt code below will assign nearer branches if needed.
g_psxNextBranchCycle = psxNextsCounter+psxNextCounter;
if (psxRegs.interrupt) _psxTestInterrupts();
if (psxHu32(0x1078)) {
@ -223,6 +251,7 @@ void psxBranchTest()
{
// PSXCPU_LOG("Interrupt: %x %x\n", HWMu32(0x1070), HWMu32(0x1074));
psxException(0, 0);
iopBranchAction = 1;
}
}
}

View File

@ -127,8 +127,8 @@ typedef struct psxRegisters_t {
u32 code; /* The instruction */
u32 cycle;
u32 interrupt;
u32 sCycle[64];
u32 eCycle[64];
u32 sCycle[64]; // start cycle for signaled ints
s32 eCycle[64]; // cycle delta for signaled ints (sCycle + eCycle == branch cycle)
u32 _msflag[32];
u32 _smflag[32];
} psxRegisters;
@ -213,4 +213,8 @@ void psxBranchTest();
void psxExecuteBios();
void psxRestartCPU();
extern s32 psxNextCounter;
extern u32 psxNextsCounter;
extern int iopBranchAction;
#endif /* __R3000A_H__ */

View File

@ -131,7 +131,7 @@ void cpuReset()
fpuRegs.fprc[0] = 0x00002e00; // fpu Revision..
fpuRegs.fprc[31] = 0x01000001; // fpu Status/Control
vu0Reset();
vu0Reset();
vu1Reset();
hwReset();
vif0Reset();
@ -325,16 +325,44 @@ void cpuTestMissingHwInts() {
}
}
// sets a branch test to occur some time from an arbitrary starting point.
__forceinline int cpuSetNextBranch( u32 startCycle, s32 delta )
{
// typecast the conditional to signed so that things don't blow up
// if startCycle is greater than our next branch cycle.
if( (int)(g_nextBranchCycle - startCycle) > delta )
{
g_nextBranchCycle = startCycle + delta;
return 1;
}
return 0;
}
// sets a branch to occur some time from the current cycle
__forceinline int cpuSetNextBranchDelta( s32 delta )
{
return cpuSetNextBranch( cpuRegs.cycle, delta );
}
// tests the cpu cycle agaisnt the given start and delta values.
// Returns true if the delta time has passed.
__forceinline int cpuTestCycle( u32 startCycle, s32 delta )
{
// typecast the conditional to signed so that things don't explode
// if the startCycle is ahead of our current cpu cycle.
return (int)(cpuRegs.cycle - startCycle) >= delta;
}
static __forceinline void TESTINT( u8 n, void (*callback)() )
{
if( !(cpuRegs.interrupt & (1 << n)) ) return;
if( (cpuRegs.cycle - cpuRegs.sCycle[n]) >= cpuRegs.eCycle[n] )
{
if( cpuTestCycle( cpuRegs.sCycle[n], cpuRegs.eCycle[n] ) )
callback();
}
else if( (g_nextBranchCycle - cpuRegs.sCycle[n]) > cpuRegs.eCycle[n] )
g_nextBranchCycle = cpuRegs.sCycle[n] + cpuRegs.eCycle[n];
else
cpuSetNextBranch( cpuRegs.sCycle[n], cpuRegs.eCycle[n] );
}
static __forceinline void _cpuTestInterrupts()
@ -373,14 +401,13 @@ u32 s_iLastPERFCycle[2] = {0,0};
static __forceinline void _cpuTestTIMR()
{
cpuRegs.CP0.n.Count += cpuRegs.cycle-s_iLastCOP0Cycle;
s_iLastCOP0Cycle = cpuRegs.cycle;
// The interpreter and recompiler both re-calculate these values whenever they
// are read, so updating them at regular intervals is merely a common courtesy.
// For that reason they're part of the Counters event, since it's garaunteed
// to be called at least 100 times a second.
// [Air] : Are these necessary? The recompiler and interpreter code both appear
// to recalculate them whenever they're read. (although if they were not read
// for a long time they could overflow). Maybe these checks could be moved to
// the Ints or Counters so they they get called less frequently, but still get
// called enough to avoid overflows.
// Updating them more frequently is pointless and, in fact, they could
// just as well be updated 20 times a second if it were convenient to do so.
if((cpuRegs.PERF.n.pccr & 0x800003E0) == 0x80000020) {
cpuRegs.PERF.n.pcr0 += cpuRegs.cycle-s_iLastPERFCycle[0];
@ -391,6 +418,9 @@ static __forceinline void _cpuTestTIMR()
s_iLastPERFCycle[1] = cpuRegs.cycle;
}
cpuRegs.CP0.n.Count += cpuRegs.cycle-s_iLastCOP0Cycle;
s_iLastCOP0Cycle = cpuRegs.cycle;
if ( (cpuRegs.CP0.n.Status.val & 0x8000) &&
cpuRegs.CP0.n.Count >= cpuRegs.CP0.n.Compare && cpuRegs.CP0.n.Count < cpuRegs.CP0.n.Compare+1000 ) {
SysPrintf("timr intr: %x, %x\n", cpuRegs.CP0.n.Count, cpuRegs.CP0.n.Compare);
@ -398,17 +428,10 @@ static __forceinline void _cpuTestTIMR()
}
}
// maximum wait between branches. Lower values provide a tighter synchronization between
// Maximum wait between branches. Lower values provide a tighter synchronization between
// the EE and the IOP, but incur more execution overhead.
#define EE_WAIT_CYCLE 2048
// maximum wait between branches when EE/IOP sync is tightened via g_eeTightenSync.
// Lower values don't always make games better, since places where this value is set
// will have to use higher numbers to achieve the same cycle count.
#define EE_ACTIVE_CYCLE 192
#define EE_ACTIVE_CYCLE_SUB (EE_WAIT_CYCLE - EE_ACTIVE_CYCLE)
// if cpuRegs.cycle is greater than this cycle, should check cpuBranchTest for updates
u32 g_nextBranchCycle = 0;
@ -416,13 +439,79 @@ u32 g_nextBranchCycle = 0;
// synchronization). Value decremented each branch test.
u32 g_eeTightenSync = 0;
#if !defined( PCSX2_NORECBUILD ) && !defined( PCSX2_PUBLIC )
// Shared portion of the branch test, called from both the Interpreter
// and the recompiler. (moved here to help alleviate redundant code)
static __forceinline void _cpuBranchTest_Shared()
{
g_nextBranchCycle = cpuRegs.cycle + EE_WAIT_CYCLE;
EEsCycle += cpuRegs.cycle - EEoCycle;
EEoCycle = cpuRegs.cycle;
iopBranchAction = ( EEsCycle > 0 );
// ---- Counters -------------
rcntUpdate_hScanline();
if( cpuTestCycle( nextsCounter, nextCounter ) )
{
rcntUpdate();
_cpuTestTIMR();
}
//#ifdef CPU_LOG
// cpuTestMissingHwInts();
//#endif
// ---- Interrupts -------------
if( cpuRegs.interrupt )
_cpuTestInterrupts();
// ---- IOP -------------
// * It's important to run a psxBranchTest before calling ExecuteBlock. This
// is because the IOP does not always perform branch tests before returning
// (during the prev branch) and also so it can act on the state the EE has
// given it before executing any code.
//
// * The IOP cannot always be run. If we run IOP code every time through the
// cpuBranchTest, the IOP generally starts to run way ahead of the EE.
//
// * However! The IOP should be run during certain important events: vsync/hsync
// events and IOP interrupts / exceptions -- even if it's already getting
// a little ahead of the EE. the iopBranchAction global will flag true if
// something like that happens.
psxBranchTest();
if( iopBranchAction )
{
//if( EEsCycle < -400 )
// SysPrintf( " IOP ahead by: %d\n", -EEsCycle );
psxCpu->ExecuteBlock();
}
// The IOP cound be running ahead of us, so adjust the iop's next branch by its
// relative position to the EE (via EEsCycle)
cpuSetNextBranchDelta( ((g_psxNextBranchCycle-psxRegs.cycle)*8) - EEsCycle );
// Apply the hsync counter's nextCycle
cpuSetNextBranch( counters[4].sCycle, counters[4].CycleT );
// Apply other counter nextCycles
cpuSetNextBranch( nextsCounter, nextCounter );
}
#ifndef PCSX2_NORECBUILD
#ifndef PCSX2_PUBLIC
extern u8 g_globalXMMSaved;
X86_32CODE(extern u8 g_globalMMXSaved;)
#endif
#endif
u32 g_MTGSVifStart = 0, g_MTGSVifCount=0;
extern void gsWaitGS();
void cpuBranchTest()
{
@ -436,67 +525,19 @@ void cpuBranchTest()
g_EEFreezeRegs = 0;
#endif
g_nextBranchCycle = cpuRegs.cycle + EE_WAIT_CYCLE;
if( g_eeTightenSync != 0 )
{
// This means we're running "sync-sensitive" code.
// tighten up the EE's cycle rate to ensure a more responsive
// EE/IOP operation:
g_eeTightenSync--;
g_nextBranchCycle -= EE_ACTIVE_CYCLE_SUB;
}
// ---- Counters -------------
if( (cpuRegs.cycle - nextsCounter) >= nextCounter )
rcntUpdate();
if( (g_nextBranchCycle-nextsCounter) >= nextCounter )
g_nextBranchCycle = nextsCounter+nextCounter;
// ---- Interrupts -------------
if( cpuRegs.interrupt )
_cpuTestInterrupts();
// Perform counters, ints, and IOP updates:
_cpuBranchTest_Shared();
// ---- MTGS -------------
// stall mtgs if it is taking too long
if( g_MTGSVifCount > 0 ) {
if( cpuRegs.cycle-g_MTGSVifStart > g_MTGSVifCount ) {
if( (int)(cpuRegs.cycle-g_MTGSVifStart) > g_MTGSVifCount ) {
gsWaitGS();
g_MTGSVifCount = 0;
}
}
//#ifdef CPU_LOG
// cpuTestMissingHwInts();
//#endif
_cpuTestTIMR();
// ---- IOP -------------
// Signal for an immediate branch test! This is important! The IOP must
// be able to act on the state the EE has given it before executing any
// additional code.
psxBranchTest();
EEsCycle += cpuRegs.cycle - EEoCycle;
EEoCycle = cpuRegs.cycle;
psxCpu->ExecuteBlock();
// IOP Synchronization:
// If the IOP needs to branch soon then so should the EE.
// As the master of all, the EE should look out for its children and
// assure them the love they deserve:
{
u32 iopDelta = (g_psxNextBranchCycle-psxRegs.cycle)*8;
if( g_nextBranchCycle - cpuRegs.cycle > iopDelta )
g_nextBranchCycle = cpuRegs.cycle + iopDelta;
}
// ---- VU0 -------------
if (VU0.VI[REG_VPU_STAT].UL & 0x1)
@ -514,82 +555,64 @@ void cpuBranchTest()
#endif
}
__forceinline void CPU_INT( u32 n, u32 ecycle)
__forceinline void CPU_INT( u32 n, s32 ecycle)
{
cpuRegs.interrupt|= 1 << n;
cpuRegs.sCycle[n] = cpuRegs.cycle;
cpuRegs.eCycle[n] = ecycle;
if( (g_nextBranchCycle - cpuRegs.sCycle[n]) <= cpuRegs.eCycle[n] ) return;
// Interrupt is happening soon: make sure both EE and IOP are aware.
// Interrupt is happening soon: make sure everyone's aware!
g_nextBranchCycle = cpuRegs.sCycle[n] + cpuRegs.eCycle[n];
// Optimization note: this method inlines nicely since 'n' is almost always a
// constant. The following conditional optimizes to virtually nothing in most
// cases.
if( ( n == 3 || n == 4 || n == 30 || n == 31 ) &&
ecycle <= 28 && psxCycleEE > 0 )
if( ecycle <= 28 && psxCycleEE > 0 )
{
// If running in the IOP, force it to break immediately into the EE.
// the EE's branch test is due to run.
psxBreak += psxCycleEE; // number of cycles we didn't run.
psxBreak += psxCycleEE; // record the number of cycles the IOP didn't run.
psxCycleEE = 0;
if( n == 3 || n == 4 )
g_eeTightenSync += 1; // only tighten IPU a bit, otherwise it's too slow!
else
g_eeTightenSync += 4;
}
cpuSetNextBranchDelta( cpuRegs.eCycle[n] );
}
static void _cpuTestINTC() {
if ((cpuRegs.CP0.n.Status.val & 0x10407) == 0x10401){
if (psHu32(INTC_STAT) & psHu32(INTC_MASK)) {
if ((cpuRegs.interrupt & (1 << 30)) == 0) {
CPU_INT(30,4);
}
}
}
__forceinline void cpuTestINTCInts() {
if( (cpuRegs.CP0.n.Status.val & 0x10407) != 0x10401 ) return;
if( (psHu32(INTC_STAT) & psHu32(INTC_MASK)) == 0 ) return;
if( cpuRegs.interrupt & (1 << 30) ) return;
// fixme: The counters code throws INT30's alot, and most of the time they're
// "late" already, so firing them immediately instead of after the next branch
// (in which case they'll be really late) would be a lot better in theory.
// However, setting this to zero for everything breaks games, so if it's done
// it needs to be done for counters only.
CPU_INT(30,4);
}
static void _cpuTestDMAC() {
if ((cpuRegs.CP0.n.Status.val & 0x10807) == 0x10801){
if (psHu16(0xe012) & psHu16(0xe010) ||
psHu16(0xe010) & 0x8000) {
if ( (cpuRegs.interrupt & (1 << 31)) == 0) {
CPU_INT(31, 4);
}
}
}
__forceinline void cpuTestDMACInts() {
if ((cpuRegs.CP0.n.Status.val & 0x10807) != 0x10801) return;
if ( cpuRegs.interrupt & (1 << 31) ) return;
if ( ( (psHu16(0xe012) & psHu16(0xe010)) == 0) &&
( (psHu16(0xe010) & 0x8000) == 0) ) return;
CPU_INT(31, 4);
}
void cpuTestHwInts() {
//if ((cpuRegs.CP0.n.Status.val & 0x10007) != 0x10001) return;
_cpuTestINTC();
_cpuTestDMAC();
_cpuTestTIMR();
}
void cpuTestINTCInts() {
//if ((cpuRegs.CP0.n.Status.val & 0x10407) == 0x10401) {
_cpuTestINTC();
//}
}
void cpuTestDMACInts() {
//if ((cpuRegs.CP0.n.Status.val & 0x10807) == 0x10801) {
_cpuTestDMAC();
//}
}
void cpuTestTIMRInts() {
// fixme: Unused code. delete or find its true purpose?
__forceinline void cpuTestTIMRInts() {
if ((cpuRegs.CP0.n.Status.val & 0x10007) == 0x10001) {
_cpuTestTIMR();
}
}
// fixme: unused code. delete or find its true purpose?
void cpuTestHwInts() {
cpuTestINTCInts();
cpuTestDMACInts();
cpuTestTIMRInts();
}
void cpuExecuteBios()
{
// filter CPU options
@ -601,41 +624,20 @@ void cpuExecuteBios()
case PCSX2_FRAMELIMIT_SKIP:
case PCSX2_FRAMELIMIT_VUSKIP:
if( GSsetFrameSkip == NULL )
{
Config.Options &= ~PCSX2_FRAMELIMIT_MASK;
SysPrintf("Notice: Disabling frameskip -- GS plugin does not support it.\n");
}
break;
}
SysPrintf("Using Frame Skipping: ");
switch(CHECK_FRAMELIMIT) {
case PCSX2_FRAMELIMIT_NORMAL: SysPrintf("Normal\n"); break;
case PCSX2_FRAMELIMIT_LIMIT: SysPrintf("Limit\n"); break;
case PCSX2_FRAMELIMIT_SKIP: SysPrintf("Skip\n"); break;
case PCSX2_FRAMELIMIT_VUSKIP: SysPrintf("VU Skip\n"); break;
}
//? if(CHECK_FRAMELIMIT==PCSX2_FRAMELIMIT_LIMIT)
{
extern u64 GetTickFrequency();
extern u64 iTicks;
if (Config.CustomFps > 0) {
iTicks = GetTickFrequency() / Config.CustomFps;
SysPrintf("Framelimiter rate updated (UpdateVSyncRate): %d fps\n", Config.CustomFps);
}
else if (Config.PsxType & 1) {
iTicks = (GetTickFrequency() / 5000) * 100;
SysPrintf("Framelimiter rate updated (UpdateVSyncRate): 50 fps\n");
}
else {
iTicks = (GetTickFrequency() / 5994) * 100;
SysPrintf("Framelimiter rate updated (UpdateVSyncRate): 59.94 fps\n");
}
}
UpdateVSyncRate();
SysPrintf("* PCSX2 *: ExecuteBios\n");
bExecBIOS = TRUE;
while (cpuRegs.pc != 0x00200008 &&
cpuRegs.pc != 0x00100008) {
g_nextBranchCycle = cpuRegs.cycle;
Cpu->ExecuteBlock();
}
@ -687,30 +689,9 @@ void IntcpuBranchTest()
g_EEFreezeRegs = 0;
#endif
// Interpreter uses a high-resolution wait cycle all the time:
g_nextBranchCycle = cpuRegs.cycle + 256;
// Perform counters, ints, and IOP updates:
_cpuBranchTest_Shared();
if ((cpuRegs.cycle - nextsCounter) >= nextCounter)
rcntUpdate();
if (cpuRegs.interrupt)
_cpuTestInterrupts();
if( (g_nextBranchCycle-nextsCounter) >= nextCounter )
g_nextBranchCycle = nextsCounter+nextCounter;
//#ifdef CPU_LOG
// cpuTestMissingHwInts();
//#endif
_cpuTestTIMR();
psxBranchTest();
EEsCycle += cpuRegs.cycle - EEoCycle;
EEoCycle = cpuRegs.cycle;
psxCpu->ExecuteBlock();
if (VU0.VI[REG_VPU_STAT].UL & 0x1) {
Cpu->ExecuteVU0Block();
}

View File

@ -233,9 +233,9 @@ void cpuTlbMissW(u32 addr, u32 bd);
void IntcpuBranchTest();
void cpuBranchTest();
void cpuTestHwInts();
void cpuTestINTCInts();
void cpuTestDMACInts();
void cpuTestTIMRInts();
extern void cpuTestINTCInts();
extern void cpuTestDMACInts();
extern void cpuTestTIMRInts();
void cpuExecuteBios();
void cpuRestartCPU();
@ -251,7 +251,6 @@ void JumpCheckSym(u32 addr, u32 pc);
void JumpCheckSymRet(u32 addr);
extern u32 g_EEFreezeRegs;
extern u32 g_eeTightenSync; // non-zero values tighten EE/IOP code synchronization for short periods.
//exception code
#define EXC_CODE(x) ((x)<<2)

View File

@ -59,7 +59,6 @@ extern "C" {
#include "iR3000A.h"
#include "PsxCounters.h"
extern u32 psxNextCounter, psxNextsCounter;
u32 g_psxMaxRecMem = 0;
extern char *disRNameGPR[];
extern char* disR3000Fasm(u32 code, u32 pc);
@ -445,7 +444,6 @@ void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode,
}
extern "C" void zeroEx();
extern "C" u32 g_eeTightenSync;
// rt = rs op imm16
void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
@ -459,11 +457,10 @@ void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
_psxFlushCall(FLUSH_NODESTROY);
CALLFunc((uptr)zeroEx);
#endif
// Tighten up the EE/IOP sync (helps prevent crashes)
// [TODO] should probably invoke a branch test or EE code control break here,
// but it would require the use of registers and I have no eff'ing idea how
// the register allocation stuff works in the recompiler. :/
ADD32ItoM( (uptr)&g_eeTightenSync, 3 );
// Bios Call: Force the IOP to do a Branch Test ASAP.
// Important! This helps prevent game freeze-ups during boot-up and stage loads.
MOV32MtoR( EAX, (uptr)&psxRegs.cycle );
MOV32RtoM( (uptr)&g_psxNextBranchCycle, EAX );
}
return;
}
@ -1021,7 +1018,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
MOV32RtoM((uptr)&psxRegs.cycle, ECX); // update cycles
MOV32RtoM((uptr)&psxCycleEE, EAX);
j8Ptr[2] = JNS8( 0 ); // jump if no, on (psxCycleEE - blockCycles*8) < 0
j8Ptr[2] = JG8( 0 ); // jump if psxCycleEE > blockCycles*8
if( REC_INC_STACK )
ADD64ItoR(ESP, REC_INC_STACK);
@ -1072,9 +1069,12 @@ void rpsxSYSCALL()
CMP32ItoM((uptr)&psxRegs.pc, psxpc-4);
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() );
SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 );
JMP32((uptr)psxDispatcherReg - ( (uptr)x86Ptr + 5 ));
// jump target for skipping blockCycle updates
x86SetJ8(j8Ptr[0]);
//if (!psxbranch) psxbranch = 2;
@ -1117,9 +1117,6 @@ void psxRecompileNextInstruction(int delayslot)
BASEBLOCK* pblock = PSX_GETBLOCK(psxpc);
//if( psxpc == 0x5264 )
// SysPrintf( "Woot!" );
// need *ppblock != s_pCurBlock because of branches
if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) {

View File

@ -1776,16 +1776,15 @@ static void iBranchTest(u32 newpc, u32 cpuBranch)
//CALLFunc((uptr)testfpu);
#endif
if( !USE_FAST_BRANCHES || cpuBranch ) {
MOV32MtoR(ECX, (uptr)&cpuRegs.cycle);
ADD32ItoR(ECX, s_nBlockCycles * EECYCLE_MULT); // NOTE: mulitply cycles here, 6/5 ratio stops pal ffx from randomly crashing, but crashes jakI
MOV32RtoM((uptr)&cpuRegs.cycle, ECX); // update cycles
}
else {
if( USE_FAST_BRANCHES && (cpuBranch==0) )
{
ADD32ItoM((uptr)&cpuRegs.cycle, s_nBlockCycles*9/8);
return;
}
MOV32MtoR(ECX, (uptr)&cpuRegs.cycle);
ADD32ItoR(ECX, s_nBlockCycles * EECYCLE_MULT);
MOV32RtoM((uptr)&cpuRegs.cycle, ECX); // update cycles
SUB32MtoR(ECX, (uptr)&g_nextBranchCycle);
// check if should branch