From 07a6979176471f03d3d0e0c56732041709f0d1d2 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 20 Nov 2008 06:30:26 +0000 Subject: [PATCH] Resolved random crashing in MTGS modes, and added some handy debugging items to the MTGS code along the way. git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@349 a6443dda-0b58-4228-96e9-037be469359c --- pcsx2/Counters.c | 12 +-- pcsx2/GS.cpp | 268 +++++++++++++++++++++++++++++++---------------- pcsx2/GS.h | 1 - pcsx2/Misc.c | 8 ++ pcsx2/Misc.h | 9 +- pcsx2/R5900.c | 6 +- 6 files changed, 191 insertions(+), 113 deletions(-) diff --git a/pcsx2/Counters.c b/pcsx2/Counters.c index f5393d779e..788d7e270f 100644 --- a/pcsx2/Counters.c +++ b/pcsx2/Counters.c @@ -476,19 +476,13 @@ static __forceinline void VSyncStart(u32 sCycle) // VSync Start if (Config.Patch) applypatch(1); // Apply patches (ToDo: clean up patch code) } +extern void GSPostVsyncEnd(); + static __forceinline void VSyncEnd(u32 sCycle) // VSync End { iFrame++; - *(u32*)(PS2MEM_GS+0x1000) ^= 0x2000; // swap the vsync field - // wait until GS stops - if( CHECK_MULTIGS ) GSRingBufSimplePacket(GS_RINGTYPE_VSYNC, (*(u32*)(PS2MEM_GS+0x1000)&0x2000), 0, 0); - else { - GSvsync((*(u32*)(PS2MEM_GS+0x1000)&0x2000)); - // update here on single thread mode *OBSOLETE* - if( PAD1update != NULL ) PAD1update(0); - if( PAD2update != NULL ) PAD2update(1); - } + GSPostVsyncEnd(); hwIntcIrq(3); // HW Irq psxVBlankEnd(); // psxCounters vBlank End diff --git a/pcsx2/GS.cpp b/pcsx2/GS.cpp index 5cc8b56cf5..bbe397c3d6 100644 --- a/pcsx2/GS.cpp +++ b/pcsx2/GS.cpp @@ -32,6 +32,20 @@ using namespace std; +#ifndef _WIN32 +// fixme - Linux needs a proper implementation of locks using pthreads. +// a set of placebo types and functions might do for now though. + +typedef int CRITICAL_SECTION; + +void EnterCriticalSection( CRITICAL_SECTION* handle ) {} +void LeaveCriticalSection( CRITICAL_SECTION* handle ) {} + +void InitializeCriticalSection( CRITICAL_SECTION* handle ) {} +void DeleteCriticalSection( CRITICAL_SECTION* handle ) {} +#endif + + extern "C" { #define PLUGINtypedefs // for GSgifTransfer1 @@ -106,9 +120,11 @@ void* GSThreadProc(void* idp); #endif -bool gsHasToExit=false; int g_FFXHack=0; +static bool gsHasToExit=false; +static LONG g_pGSvSyncCount = 0; + #ifdef PCSX2_DEVBUILD // GS Playback @@ -162,11 +178,19 @@ static GIFTAG g_path[3]; static PCSX2_ALIGNED16(u8 s_byRegs[3][16]); // g_pGSRingPos == g_pGSWritePos => fifo is empty -u8* g_pGSRingPos = NULL, // cur pos ring is at +static u8* g_pGSRingPos = NULL, // cur pos ring is at *g_pGSWritePos = NULL; // cur pos ee thread is at +CRITICAL_SECTION gsRestartLock; + extern int g_nCounters[]; +#ifdef RINGBUF_DEBUG_STACK +#include +std::list ringposStack; +CRITICAL_SECTION stackLock; +#endif + void gsInit() { if( CHECK_MULTIGS ) { @@ -182,11 +206,10 @@ void gsInit() exit(0); } - // I guess the InterlockedExchange below is just for practice... - // ... seeing how we haven't even STARTED the thread yet! - memcpy(g_MTGSMem, PS2MEM_GS, sizeof(g_MTGSMem)); - InterlockedExchangePointer((volatile PVOID*)&g_pGSWritePos, GS_RINGBUFFERBASE); + + g_pGSWritePos = GS_RINGBUFFERBASE; + //InterlockedExchangePointer((volatile PVOID*)&g_pGSWritePos, GS_RINGBUFFERBASE); if( GSsetBaseMem != NULL ) GSsetBaseMem(g_MTGSMem); @@ -206,6 +229,12 @@ void gsInit() g_hGSOpen = CreateEvent(NULL, FALSE, FALSE, NULL); g_hGSDone = CreateEvent(NULL, FALSE, FALSE, NULL); + InitializeCriticalSection( &gsRestartLock ); + +#ifdef RINGBUF_DEBUG_STACK + InitializeCriticalSection( &stackLock ); +#endif + SysPrintf("gsInit\n"); g_hVuGsThread = CreateThread(NULL, 0, GSThreadProc, NULL, 0, NULL); @@ -265,7 +294,6 @@ void GSRINGBUF_DONECOPY(const u8 *mem, u32 size) if( !CHECK_DUALCORE ) GS_SETEVENT(); } - void gsShutdown() { if( CHECK_MULTIGS ) { @@ -281,6 +309,12 @@ void gsShutdown() CloseHandle(g_hVuGSExit); CloseHandle(g_hGSOpen); CloseHandle(g_hGSDone); + + DeleteCriticalSection(&gsRestartLock); +#ifdef RINGBUF_DEBUG_STACK + DeleteCriticalSection(&stackLock); +#endif + #else InterlockedExchange((long*)&g_nGsThreadExit, 1); sem_post(&g_semGsThread); @@ -315,6 +349,7 @@ void gsShutdown() GSclose(); } + u8* GSRingBufCopy(void* mem, u32 size, u32 type) { // Note on volatiles: g_pGSWritePos is not modified by the GS thread, @@ -335,23 +370,36 @@ u8* GSRingBufCopy(void* mem, u32 size, u32 type) // the start of the ring buffer (it's a lot easier than trying // to wrap the packet around the end of the buffer). - // We have to be careful not to leapfrog our readposition. If it's + // We have to be careful not to leapfrog our read-position. If it's // greater than the current write position then we need to stall - // until it loops around: + // until it loops around to the beginning of the buffer - //const u8* readps = *(volatile PU8*)&g_pGSRingPos; - while( *(volatile PU8*)&g_pGSRingPos > writepos ) // && readpos == GS_RINGBUFFERBASE ) + while( *(volatile PU8*)&g_pGSRingPos > writepos ) gsSetEventWait(); + // Wait for the readpos to go past the start of the buffer + // Otherwise it'll stop dead in its tracks when we set the new write + // position below (bad!) + while( *(volatile PU8*)&g_pGSRingPos == GS_RINGBUFFERBASE) + gsSetEventWait(); + + EnterCriticalSection( &gsRestartLock ); GSRingBufSimplePacket( GS_RINGTYPE_RESTART, 0, 0, 0 ); + g_pGSWritePos = writepos = GS_RINGBUFFERBASE; + LeaveCriticalSection( &gsRestartLock ); - writepos = GS_RINGBUFFERBASE; + // two conditionals in the following while() loop, so precache + // the readpos for more efficient behavior: + const u8* readpos = *(volatile PU8*)&g_pGSRingPos; - // stall until the read position is past the end of our incoming block: - while( writepos+size >= *(volatile PU8*)&g_pGSRingPos ) + // stall until the read position is past the end of our incoming block, + // or until it reaches the new write position (signals an empty buffer) + // (the second part should never happen actually, but safe is safe!) + while( writepos+size >= readpos && readpos != writepos ) + { gsSetEventWait(); - - InterlockedExchangePointer((void**)&g_pGSWritePos, GS_RINGBUFFERBASE); + readpos = *(volatile PU8*)&g_pGSRingPos; + } } else if( writepos + size == GS_RINGBUFFEREND ) { @@ -360,23 +408,31 @@ u8* GSRingBufCopy(void* mem, u32 size, u32 type) // of a gsWaitGS (stalling the GS until the ring buffer emptied completely) and // no one noticed enough to fix it. :) + //SysPrintf( "MTGS > Perfect Fit!\n"); while( writepos < *(volatile PU8*)&g_pGSRingPos ) gsSetEventWait(); } else { - // two conditionals in the following while() loop, so precache - // the readpos for more efficient behavior: - const u8* readpos = *(volatile PU8*)&g_pGSRingPos; - // generic gs wait/stall. // Waits until the readpos is outside the scope of the write area. - while( writepos < readpos && writepos+size >= readpos ) // || (writepos+size == GS_RINGBUFFEREND && readpos == GS_RINGBUFFERBASE)) ) { + while( true ) { + // three conditionals in the following while() loop, so precache + // the readpos for more efficient behavior: + const u8* readpos = *(volatile PU8*)&g_pGSRingPos; + + if( writepos >= readpos ) break; + if( writepos+size < readpos ) break; + gsSetEventWait(); - readpos = *(volatile PU8*)&g_pGSRingPos; } } +#ifdef RINGBUF_DEBUG_STACK + EnterCriticalSection( &stackLock ); + ringposStack.push_front( (long)writepos ); + LeaveCriticalSection( &stackLock ); +#endif // just copy *(u32*)writepos = type | (((size-16)>>4)<<16); @@ -396,11 +452,18 @@ void GSRingBufSimplePacket(int type, int data0, int data1, int data2) while( future_writepos == *(volatile PU8*)&g_pGSRingPos ) gsSetEventWait(); +#ifdef RINGBUF_DEBUG_STACK + EnterCriticalSection( &stackLock ); + ringposStack.push_front( (long)writepos ); + LeaveCriticalSection( &stackLock ); +#endif + *(u32*)writepos = type; *(u32*)(writepos+4) = data0; *(u32*)(writepos+8) = data1; *(u32*)(writepos+12) = data2; + assert( future_writepos != *(volatile PU8*)&g_pGSRingPos ); InterlockedExchangePointer((void**)&g_pGSWritePos, future_writepos); if( !CHECK_DUALCORE ) @@ -424,6 +487,7 @@ void gsReset() #endif gsHasToExit=false; g_pGSRingPos = g_pGSWritePos; + g_pGSvSyncCount = 0; } memset(g_path, 0, sizeof(g_path)); @@ -474,7 +538,7 @@ void CSRwrite(u32 value) } if (value & 0x200) { // resetGS - //GSCSRr = 0x400E; // The host FIFO neeeds to be empty too or GSsync will fail (saqib) + //GSCSRr = 0x400E; // The host FIFO needs to be empty too or GSsync will fail (saqib) //GSIMR = 0xff00; if( GSgifSoftReset != NULL ) { GSgifSoftReset(7); @@ -525,15 +589,12 @@ extern void UpdateVSyncRate(); void gsWrite16(u32 mem, u16 value) { + GIF_LOG("GS write 16 at %8.8lx with data %8.8lx\n", mem, value); + switch (mem) { case 0x12000010: // GS_SMODE1 if((value & 0x6000) == 0x6000) Config.PsxType |= 1; // PAL else Config.PsxType &= ~1; // NTSC - *(u16*)PS2GS_BASE(mem) = value; - - if( CHECK_MULTIGS ) { - GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE16, mem&0x13ff, value, 0); - } UpdateVSyncRate(); break; @@ -541,38 +602,33 @@ void gsWrite16(u32 mem, u16 value) { case 0x12000020: // GS_SMODE2 if(value & 0x1) Config.PsxType |= 2; // Interlaced else Config.PsxType &= ~2; // Non-Interlaced - *(u16*)PS2GS_BASE(mem) = value; - - if( CHECK_MULTIGS ) { - GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE16, mem&0x13ff, value, 0); - } break; case 0x12001000: // GS_CSR CSRwrite( (CSRw&0xffff0000) | value); - break; + return; // do not write to MTGS memory case 0x12001002: // GS_CSR CSRwrite( (CSRw&0xffff) | ((u32)value<<16)); - break; + return; // do not write to MTGS memory case 0x12001010: // GS_IMR - SysPrintf("writing to IMR 16\n"); + //SysPrintf("writing to IMR 16\n"); IMRwrite(value); - break; - - default: - *(u16*)PS2GS_BASE(mem) = value; - - if( CHECK_MULTIGS ) { - GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE16, mem&0x13ff, value, 0); - } + return; // do not write to MTGS memory + } + + *(u16*)PS2GS_BASE(mem) = value; + + if( CHECK_MULTIGS ) { + GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE16, mem&0x13ff, value, 0); } - GIF_LOG("GS write 16 at %8.8lx with data %8.8lx\n", mem, value); } void gsWrite32(u32 mem, u32 value) { assert( !(mem&3)); + GIF_LOG("GS write 32 at %8.8lx with data %8.8lx\n", mem, value); + switch (mem) { case 0x12000010: // GS_SMODE1 if((value & 0x6000) == 0x6000) Config.PsxType |= 1; // PAL @@ -581,79 +637,60 @@ void gsWrite32(u32 mem, u32 value) UpdateVSyncRate(); - if( CHECK_MULTIGS ) { - GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE32, mem&0x13ff, value, 0); - } - break; case 0x12000020: // GS_SMODE2 if(value & 0x1) Config.PsxType |= 2; // Interlaced else Config.PsxType &= ~2; // Non-Interlaced - *(u32*)PS2GS_BASE(mem) = value; - if( CHECK_MULTIGS ) { - GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE32, mem&0x13ff, value, 0); - } break; case 0x12001000: // GS_CSR CSRwrite(value); - break; + return; case 0x12001010: // GS_IMR IMRwrite(value); - break; - default: - *(u32*)PS2GS_BASE(mem) = value; - - if( CHECK_MULTIGS ) { - GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE32, mem&0x13ff, value, 0); - } + return; + } + + *(u32*)PS2GS_BASE(mem) = value; + + if( CHECK_MULTIGS ) { + GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE32, mem&0x13ff, value, 0); } - GIF_LOG("GS write 32 at %8.8lx with data %8.8lx\n", mem, value); } void gsWrite64(u32 mem, u64 value) { + GIF_LOG("GS write 64 at %8.8lx with data %8.8lx_%8.8lx\n", mem, ((u32*)&value)[1], (u32)value); + switch (mem) { case 0x12000010: // GS_SMODE1 if((value & 0x6000) == 0x6000) Config.PsxType |= 1; // PAL else Config.PsxType &= ~1; // NTSC UpdateVSyncRate(); - *(u64*)PS2GS_BASE(mem) = value; - - if( CHECK_MULTIGS ) { - GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE64, mem&0x13ff, (u32)value, (u32)(value>>32)); - } break; case 0x12000020: // GS_SMODE2 if(value & 0x1) Config.PsxType |= 2; // Interlaced else Config.PsxType &= ~2; // Non-Interlaced - *(u64*)PS2GS_BASE(mem) = value; - - if( CHECK_MULTIGS ) { - GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE64, mem&0x13ff, (u32)value, 0); - } break; case 0x12001000: // GS_CSR CSRwrite((u32)value); - break; + return; case 0x12001010: // GS_IMR IMRwrite((u32)value); - break; - - default: - *(u64*)PS2GS_BASE(mem) = value; - - if( CHECK_MULTIGS ) { - GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE64, mem&0x13ff, (u32)value, (u32)(value>>32)); - } + return; + } + + *(u64*)PS2GS_BASE(mem) = value; + + if( CHECK_MULTIGS ) { + GSRingBufSimplePacket(GS_RINGTYPE_MEMWRITE64, mem&0x13ff, (u32)value, (u32)(value>>32)); } - GIF_LOG("GS write 64 at %8.8lx with data %8.8lx_%8.8lx\n", mem, ((u32*)&value)[1], (u32)value); } u8 gsRead8(u32 mem) @@ -1399,10 +1436,32 @@ int HasToExit() return (gsHasToExit!=0); } +extern "C" void GSPostVsyncEnd() +{ + *(u32*)(PS2MEM_GS+0x1000) ^= 0x2000; // swap the vsync field + + if( CHECK_MULTIGS ) + { + //while( *(volatile LONG*)&g_pGSvSyncCount >= 8 ) + // gsSetEventWait(); + + //InterlockedIncrement( (volatile LONG*)&g_pGSvSyncCount ); + //SysPrintf( " Sending VSync : %d \n", *(volatile LONG*)&g_pGSvSyncCount ); + GSRingBufSimplePacket(GS_RINGTYPE_VSYNC, (*(u32*)(PS2MEM_GS+0x1000)&0x2000), 0, 0); + } + else { + GSvsync((*(u32*)(PS2MEM_GS+0x1000)&0x2000)); + // update here on single thread mode *OBSOLETE* + if( PAD1update != NULL ) PAD1update(0); + if( PAD2update != NULL ) PAD2update(1); + } +} + #if defined(_WIN32) && !defined(WIN32_PTHREADS) //#pragma optimize ("",off) //needed for a working PGO build DWORD WINAPI GSThreadProc(LPVOID lpParam) { + u32 prevCmd=0; HANDLE handles[2] = { g_hGsEvent, g_hVuGSExit }; //SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL); @@ -1479,18 +1538,30 @@ void* GSThreadProc(void* lpParam) u32 tag = *(u32*)g_pGSRingPos; u32 ringposinc = 16; +#ifdef RINGBUF_DEBUG_STACK + // pop a ringpos off the stack. It should match this one! + + EnterCriticalSection( &stackLock ); + long stackpos = ringposStack.back(); + assert( stackpos == (long)g_pGSRingPos ); + if( stackpos != (long)g_pGSRingPos ) + { + SysPrintf( "Holy Fuck ---------------> %x to %x\n", stackpos, (long)g_pGSRingPos ); + SysPrintf( " Prev Command : %x\n", prevCmd ); + } + prevCmd = tag; + ringposStack.pop_back(); + LeaveCriticalSection( &stackLock ); +#endif + switch( tag&0xffff ) { case GS_RINGTYPE_RESTART: InterlockedExchangePointer((volatile PVOID*)&g_pGSRingPos, GS_RINGBUFFERBASE); - - /*if( GS_RINGBUFFERBASE == writepos ) - { - // force the loop to break: - writepos = g_pGSRingPos; - break; - }*/ - + + // stall for a bit to let the MainThread have time to update the g_pGSWritePos. + EnterCriticalSection( &gsRestartLock ); + LeaveCriticalSection( &gsRestartLock ); continue; case GS_RINGTYPE_P1: @@ -1516,6 +1587,12 @@ void* GSThreadProc(void* lpParam) GSvsync(*(int*)(g_pGSRingPos+4)); if( PAD1update != NULL ) PAD1update(0); if( PAD2update != NULL ) PAD2update(1); + + //SysPrintf( " Receiving VSync : %d \n", *(volatile LONG*)&g_pGSvSyncCount ); + //InterlockedDecrement( (volatile LONG*)&g_pGSvSyncCount ); + + // vSyncCount should never dip below zero. + assert( *(volatile LONG*)&g_pGSvSyncCount >= 0 ); break; case GS_RINGTYPE_FRAMESKIP: @@ -1648,10 +1725,21 @@ void* GSThreadProc(void* lpParam) InterlockedExchangeAdd( (long*)&g_pGSRingPos, ringposinc ); assert( g_pGSRingPos <= GS_RINGBUFFEREND ); +#ifdef _WIN32 + InterlockedCompareExchangePointer( (volatile PVOID*)&g_pGSRingPos, GS_RINGBUFFERBASE, GS_RINGBUFFEREND ); +#else + // fixme - [TODO] - InterlockedCompareExchangePointer needs a linux implementation! if( g_pGSRingPos == GS_RINGBUFFEREND ) InterlockedExchangePointer((volatile PVOID*)&g_pGSRingPos, GS_RINGBUFFERBASE); +#endif } + // buffer is empty so our vsync must be zero. + + //if( *(volatile LONG*)&g_pGSvSyncCount != 0 ) + // SysPrintf( "MTGS > vSync count mismatch: %d\n", g_pGSvSyncCount ); + + //InterlockedExchange( (volatile LONG*)&g_pGSvSyncCount, 0 ); // process vu1 } diff --git a/pcsx2/GS.h b/pcsx2/GS.h index d3cbc1e208..1935e2a500 100644 --- a/pcsx2/GS.h +++ b/pcsx2/GS.h @@ -66,7 +66,6 @@ extern u8 g_RealGSMem[0x2000]; u8* GSRingBufCopy(void* mem, u32 size, u32 type); void GSRingBufSimplePacket(int type, int data0, int data1, int data2); -extern u8* g_pGSWritePos; //#ifdef PCSX2_DEVBUILD // use for debugging MTGS diff --git a/pcsx2/Misc.c b/pcsx2/Misc.c index 1bcfce399e..e1a26a663a 100644 --- a/pcsx2/Misc.c +++ b/pcsx2/Misc.c @@ -1061,3 +1061,11 @@ void injectIRX(char *filename){ rd[i].extInfoSize=0; } +__forceinline void _TIMESLICE() +{ +#ifdef _WIN32 + Sleep(0); +#else + usleep(500); +#endif +} diff --git a/pcsx2/Misc.h b/pcsx2/Misc.h index fd3e8ffc24..447f776363 100644 --- a/pcsx2/Misc.h +++ b/pcsx2/Misc.h @@ -384,14 +384,7 @@ static __forceinline long InterlockedExchangeAdd(long volatile* Addend, long Val //#endif // Timeslice releaser for those many idle loop spots through out PCSX2. -static __forceinline void _TIMESLICE() -{ -#ifdef _WIN32 - Sleep(0); -#else - usleep(500); -#endif -} +extern void _TIMESLICE(); #endif /* __MISC_H__ */ diff --git a/pcsx2/R5900.c b/pcsx2/R5900.c index 86040fa455..f9eadf47a2 100644 --- a/pcsx2/R5900.c +++ b/pcsx2/R5900.c @@ -416,7 +416,7 @@ static __forceinline void _cpuTestPERF() // fixme - The interpreter and recompiler both re-calculate these values // whenever they are read, so updating them at regular intervals *should be* // merely a common courtesy. But when I set them up to be called less - // frequently crashes happened. I'd like to figure out why someday. [Air] + // frequently some games would crash. I'd like to figure out why someday. [Air] if((cpuRegs.PERF.n.pccr & 0x800003E0) == 0x80000020) { cpuRegs.PERF.n.pcr0 += cpuRegs.cycle-s_iLastPERFCycle[0]; @@ -435,10 +435,6 @@ static __forceinline void _cpuTestPERF() // if cpuRegs.cycle is greater than this cycle, should check cpuBranchTest for updates u32 g_nextBranchCycle = 0; -// if non-zero, the EE uses a shorter wait cycle (effectively tightening EE/IOP code -// synchronization). Value decremented each branch test. -u32 g_eeTightenSync = 0; - // Shared portion of the branch test, called from both the Interpreter // and the recompiler. (moved here to help alleviate redundant code) static __forceinline void _cpuBranchTest_Shared()