Changed some stuff around with register freezes and mutex locks in the MTGS to make it thread-safe for concurrent threads sending packets to the GS. (packet locking is currently commented out since it's not actually needed yet)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1456 96395faa-99c1-11dd-bbfe-3dabce05a288
2009-07-03 06:05:48 +00:00 · 2009-07-03 06:05:48 +00:00 · 25aa15a0a2
parent c13cc555be
commit 25aa15a0a2
7 changed files with 66 additions and 38 deletions
--- a/pcsx2/FiFo.cpp
+++ b/pcsx2/FiFo.cpp
@ -173,6 +173,7 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value)
 	psHu64(0x6000) = value[0];
 	psHu64(0x6008) = value[1];
 	FreezeRegs(1);
 	if( mtgsThread != NULL )
 	{
 		const uint count = mtgsThread->PrepDataPacket( GIF_PATH_3, value, 1 );
@ -184,10 +185,9 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value)
 	}
 	else
 	{
 		FreezeRegs(1);
 		GSGIFTRANSFER3((u32*)value, 1);
 		FreezeRegs(0);
 	}
 	FreezeRegs(0);
 }
 void __fastcall WriteFIFO_page_7(u32 mem, const mem128_t *value)
--- a/pcsx2/GS.h
+++ b/pcsx2/GS.h
@ -176,9 +176,14 @@ protected:
 	uint m_RingPos;		// cur pos gs is reading from
 	uint m_WritePos;	// cur pos ee thread is writing to
-	Threading::Semaphore m_post_InitDone;	// used to regulate thread startup and gsInit
+	// used to regulate thread startup and gsInit
 	Threading::Semaphore m_post_InitDone;
 	Threading::MutexLock m_lock_RingRestart;
 	// used to keep multiple threads from sending packets to the ringbuffer concurrently.
 	Threading::MutexLock m_PacketLocker;
 	// Used to delay the sending of events.  Performance is better if the ringbuffer
 	// has more than one command in it when the thread is kicked.
 	int m_CopyCommandTally;
--- a/pcsx2/MTGS.cpp
+++ b/pcsx2/MTGS.cpp
@ -197,6 +197,7 @@ mtgsThreadObject::mtgsThreadObject() :
 ,	m_post_InitDone()
 ,	m_lock_RingRestart()
 ,	m_PacketLocker( true )		// true - makes it a recursive lock
 ,	m_CopyCommandTally( 0 )
 ,	m_CopyDataTally( 0 )
@ -712,14 +713,12 @@ int mtgsThreadObject::Callback()
 void mtgsThreadObject::WaitGS()
 {
 	// Freeze registers because some kernel code likes to destroy them
 	FreezeRegs(1);
 	SetEvent();
 	while( volatize(m_RingPos) != volatize(m_WritePos) )
 	{
 		Timeslice();
 		//SpinWait();
 	}
 	FreezeRegs(0);
 }
 // Sets the gsEvent flag and releases a timeslice.
@ -733,8 +732,6 @@ void mtgsThreadObject::SetEvent()
 void mtgsThreadObject::PrepEventWait()
 {
 	// Freeze registers because some kernel code likes to destroy them
 	FreezeRegs(1);
 	//Console::Notice( "MTGS Stall!  EE waits for nothing! ... except your GPU sometimes." );
 	SetEvent();
 	Timeslice();
@ -742,7 +739,6 @@ void mtgsThreadObject::PrepEventWait()
 void mtgsThreadObject::PostEventWait() const
 {
 	FreezeRegs(0);
 }
 u8* mtgsThreadObject::GetDataPacketPtr() const
@ -784,29 +780,29 @@ void mtgsThreadObject::SendDataPacket()
 	m_packet_size = 0;
-	if( m_RingBufferIsBusy ) return;
+	if( !m_RingBufferIsBusy )
 	// The ringbuffer is current in a resting state, so if enough copies have
 	// queued up then go ahead and initiate the GS thread..
 	// Optimization notes:  What we're doing here is initiating a "burst" mode on
 	// the thread, which improves its cache hit performance and makes it more friendly
 	// to other threads in Pcsx2 and such.  Primary is the Command Tally, and then a 
 	// secondary data size threshold for games that do lots of texture swizzling.
 	// 16 was the best value I found so far.
 	// tested values:
 	//  24 - very slow on HT machines (+5% drop in fps)
 	//  8 - roughly 2% slower on HT machines.
 	m_CopyDataTally += m_packet_size;
 	if( ( m_CopyDataTally > 0x8000 ) || ( ++m_CopyCommandTally > 16 ) )
 	{
-		FreezeRegs(1);
+		// The ringbuffer is current in a resting state, so if enough copies have
-		//Console::Status( "MTGS Kick! DataSize : 0x%5.8x, CommandTally : %d", m_CopyDataTally, m_CopyCommandTally );
+		// queued up then go ahead and initiate the GS thread..
-		SetEvent();
+		
-		FreezeRegs(0);
+		// Optimization notes:  What we're doing here is initiating a "burst" mode on
 		// the thread, which improves its cache hit performance and makes it more friendly
 		// to other threads in Pcsx2 and such.  Primary is the Command Tally, and then a 
 		// secondary data size threshold for games that do lots of texture swizzling.
 		// 16 was the best value I found so far.
 		// tested values:
 		//  24 - very slow on HT machines (+5% drop in fps)
 		//  8 - roughly 2% slower on HT machines.
 		m_CopyDataTally += m_packet_size;
 		if( ( m_CopyDataTally > 0x8000 ) || ( ++m_CopyCommandTally > 16 ) )
 		{
 			//Console::Status( "MTGS Kick! DataSize : 0x%5.8x, CommandTally : %d", m_CopyDataTally, m_CopyCommandTally );
 			SetEvent();
 		}
 	}
 	//m_PacketLocker.Unlock();
 }
 int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u64* srcdata, u32 size )
@ -840,6 +836,8 @@ static u32 GSRingBufCopySz = 0;
 //  size - size of the packet data, in smd128's
 int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size )
 {
 	//m_PacketLocker.Lock();
 #ifdef PCSX2_GSRING_TX_STATS
 	ringtx_s += size;
 	ringtx_s_ulg += size&0x7F;
@ -1064,6 +1062,8 @@ __forceinline void mtgsThreadObject::_FinishSimplePacket( uint future_writepos )
 void mtgsThreadObject::SendSimplePacket( GS_RINGTYPE type, int data0, int data1, int data2 )
 {
 	//ScopedLock locker( m_PacketLocker );
 	const uint thefuture = _PrepForSimplePacket();
 	PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos];
@ -1077,6 +1077,8 @@ void mtgsThreadObject::SendSimplePacket( GS_RINGTYPE type, int data0, int data1,
 void mtgsThreadObject::SendPointerPacket( GS_RINGTYPE type, u32 data0, void* data1 )
 {
 	//ScopedLock locker( m_PacketLocker );
 	const uint thefuture = _PrepForSimplePacket();
 	PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos];
--- a/pcsx2/ThreadTools.cpp
+++ b/pcsx2/ThreadTools.cpp
@ -38,6 +38,7 @@ namespace Threading
 	void Thread::Start()
 	{
 		m_terminated = false;
 		if( pthread_create( &m_thread, NULL, _internal_callback, this ) != 0 )
 			throw Exception::ThreadCreationError();
 	}
@ -135,6 +136,26 @@ namespace Threading
 		err = pthread_mutex_init( &mutex, NULL );
 	}
 	MutexLock::MutexLock( bool isRecursive )
 	{
 		if( isRecursive )
 		{
 			pthread_mutexattr_t mutexAttribute; 
 			int status = pthread_mutexattr_init( &mutexAttribute );
 			if (status != 0) { /* ... */ } 
 			status = pthread_mutexattr_settype( &mutexAttribute, PTHREAD_MUTEX_RECURSIVE); 
 			if (status != 0) { /* ... */} 
 			int err = 0;
 			err = pthread_mutex_init( &mutex, &mutexAttribute );
 		}
 		else
 		{
 			int err = 0;
 			err = pthread_mutex_init( &mutex, NULL );
 		}
 	}
 	MutexLock::~MutexLock()
 	{
 		pthread_mutex_destroy( &mutex );
--- a/pcsx2/Threading.h
+++ b/pcsx2/Threading.h
@ -61,6 +61,7 @@ namespace Threading
 		pthread_mutex_t mutex;
 		MutexLock();
 		MutexLock( bool isRecursive );
 		~MutexLock();
 		void Lock();
--- a/pcsx2/VU0.cpp
+++ b/pcsx2/VU0.cpp
@ -178,8 +178,7 @@ void CTC2() {
 			break;
 		case REG_CMSAR1: // REG_CMSAR1
 			if (!(VU0.VI[REG_VPU_STAT].UL & 0x100) ) {
-				VU1.VI[REG_TPC].UL = cpuRegs.GPR.r[_Rt_].US[0];
+				vu1ExecMicro(cpuRegs.GPR.r[_Rt_].US[0]);	// Execute VU1 Micro SubRoutine
 				vu1ExecMicro(VU1.VI[REG_TPC].UL);	// Execute VU1 Micro SubRoutine
 			}
 			break;
 		default:
--- a/pcsx2/VifDma.cpp
+++ b/pcsx2/VifDma.cpp
@ -1896,6 +1896,7 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
 			}
 		}
 		FreezeRegs(1);
 		if (mtgsThread != NULL)
 		{
 			// copy 16 bytes the fast way:
@ -1910,10 +1911,9 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
 		}
 		else
 		{
 			FreezeRegs(1);
 			GSGIFTRANSFER2((u32*)splittransfer[0], 1);
 			FreezeRegs(0);
 		}
 		FreezeRegs(0);
 		if (vif1.tag.size == 0) vif1.cmd = 0;
 		splitptr = 0;
@ -1945,6 +1945,7 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
 	//TODO: ret is guaranteed to be qword aligned ?
 	FreezeRegs(1);
 	if (mtgsThread != NULL)
 	{
 		//unaligned copy.VIF handling is -very- messy, so i'l use this code til i fix it :)
@ -1955,10 +1956,9 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
 	}
 	else
 	{
 		FreezeRegs(1);
 		GSGIFTRANSFER2(data, (ret >> 2));
 		FreezeRegs(0);
 	}
 	FreezeRegs(0);
 	return ret;
 }