Changed some stuff around with register freezes and mutex locks in the MTGS to make it thread-safe for concurrent threads sending packets to the GS. (packet locking is currently commented out since it's not actually needed yet)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1456 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-07-03 06:05:48 +00:00
parent c13cc555be
commit 25aa15a0a2
7 changed files with 66 additions and 38 deletions

View File

@ -173,6 +173,7 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value)
psHu64(0x6000) = value[0];
psHu64(0x6008) = value[1];
FreezeRegs(1);
if( mtgsThread != NULL )
{
const uint count = mtgsThread->PrepDataPacket( GIF_PATH_3, value, 1 );
@ -184,10 +185,9 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value)
}
else
{
FreezeRegs(1);
GSGIFTRANSFER3((u32*)value, 1);
FreezeRegs(0);
}
FreezeRegs(0);
}
void __fastcall WriteFIFO_page_7(u32 mem, const mem128_t *value)

View File

@ -176,9 +176,14 @@ protected:
uint m_RingPos; // cur pos gs is reading from
uint m_WritePos; // cur pos ee thread is writing to
Threading::Semaphore m_post_InitDone; // used to regulate thread startup and gsInit
Threading::MutexLock m_lock_RingRestart;
// used to regulate thread startup and gsInit
Threading::Semaphore m_post_InitDone;
Threading::MutexLock m_lock_RingRestart;
// used to keep multiple threads from sending packets to the ringbuffer concurrently.
Threading::MutexLock m_PacketLocker;
// Used to delay the sending of events. Performance is better if the ringbuffer
// has more than one command in it when the thread is kicked.
int m_CopyCommandTally;

View File

@ -197,6 +197,7 @@ mtgsThreadObject::mtgsThreadObject() :
, m_post_InitDone()
, m_lock_RingRestart()
, m_PacketLocker( true ) // true - makes it a recursive lock
, m_CopyCommandTally( 0 )
, m_CopyDataTally( 0 )
@ -712,14 +713,12 @@ int mtgsThreadObject::Callback()
void mtgsThreadObject::WaitGS()
{
// Freeze registers because some kernel code likes to destroy them
FreezeRegs(1);
SetEvent();
while( volatize(m_RingPos) != volatize(m_WritePos) )
{
Timeslice();
//SpinWait();
}
FreezeRegs(0);
}
// Sets the gsEvent flag and releases a timeslice.
@ -733,8 +732,6 @@ void mtgsThreadObject::SetEvent()
void mtgsThreadObject::PrepEventWait()
{
// Freeze registers because some kernel code likes to destroy them
FreezeRegs(1);
//Console::Notice( "MTGS Stall! EE waits for nothing! ... except your GPU sometimes." );
SetEvent();
Timeslice();
@ -742,7 +739,6 @@ void mtgsThreadObject::PrepEventWait()
void mtgsThreadObject::PostEventWait() const
{
FreezeRegs(0);
}
u8* mtgsThreadObject::GetDataPacketPtr() const
@ -784,29 +780,29 @@ void mtgsThreadObject::SendDataPacket()
m_packet_size = 0;
if( m_RingBufferIsBusy ) return;
// The ringbuffer is current in a resting state, so if enough copies have
// queued up then go ahead and initiate the GS thread..
// Optimization notes: What we're doing here is initiating a "burst" mode on
// the thread, which improves its cache hit performance and makes it more friendly
// to other threads in Pcsx2 and such. Primary is the Command Tally, and then a
// secondary data size threshold for games that do lots of texture swizzling.
// 16 was the best value I found so far.
// tested values:
// 24 - very slow on HT machines (+5% drop in fps)
// 8 - roughly 2% slower on HT machines.
m_CopyDataTally += m_packet_size;
if( ( m_CopyDataTally > 0x8000 ) || ( ++m_CopyCommandTally > 16 ) )
if( !m_RingBufferIsBusy )
{
FreezeRegs(1);
//Console::Status( "MTGS Kick! DataSize : 0x%5.8x, CommandTally : %d", m_CopyDataTally, m_CopyCommandTally );
SetEvent();
FreezeRegs(0);
// The ringbuffer is current in a resting state, so if enough copies have
// queued up then go ahead and initiate the GS thread..
// Optimization notes: What we're doing here is initiating a "burst" mode on
// the thread, which improves its cache hit performance and makes it more friendly
// to other threads in Pcsx2 and such. Primary is the Command Tally, and then a
// secondary data size threshold for games that do lots of texture swizzling.
// 16 was the best value I found so far.
// tested values:
// 24 - very slow on HT machines (+5% drop in fps)
// 8 - roughly 2% slower on HT machines.
m_CopyDataTally += m_packet_size;
if( ( m_CopyDataTally > 0x8000 ) || ( ++m_CopyCommandTally > 16 ) )
{
//Console::Status( "MTGS Kick! DataSize : 0x%5.8x, CommandTally : %d", m_CopyDataTally, m_CopyCommandTally );
SetEvent();
}
}
//m_PacketLocker.Unlock();
}
int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u64* srcdata, u32 size )
@ -840,6 +836,8 @@ static u32 GSRingBufCopySz = 0;
// size - size of the packet data, in smd128's
int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size )
{
//m_PacketLocker.Lock();
#ifdef PCSX2_GSRING_TX_STATS
ringtx_s += size;
ringtx_s_ulg += size&0x7F;
@ -1064,6 +1062,8 @@ __forceinline void mtgsThreadObject::_FinishSimplePacket( uint future_writepos )
void mtgsThreadObject::SendSimplePacket( GS_RINGTYPE type, int data0, int data1, int data2 )
{
//ScopedLock locker( m_PacketLocker );
const uint thefuture = _PrepForSimplePacket();
PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos];
@ -1072,11 +1072,13 @@ void mtgsThreadObject::SendSimplePacket( GS_RINGTYPE type, int data0, int data1,
tag.data[1] = data1;
tag.data[2] = data2;
_FinishSimplePacket( thefuture );
_FinishSimplePacket( thefuture );
}
void mtgsThreadObject::SendPointerPacket( GS_RINGTYPE type, u32 data0, void* data1 )
{
//ScopedLock locker( m_PacketLocker );
const uint thefuture = _PrepForSimplePacket();
PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos];

View File

@ -38,6 +38,7 @@ namespace Threading
void Thread::Start()
{
m_terminated = false;
if( pthread_create( &m_thread, NULL, _internal_callback, this ) != 0 )
throw Exception::ThreadCreationError();
}
@ -135,6 +136,26 @@ namespace Threading
err = pthread_mutex_init( &mutex, NULL );
}
MutexLock::MutexLock( bool isRecursive )
{
if( isRecursive )
{
pthread_mutexattr_t mutexAttribute;
int status = pthread_mutexattr_init( &mutexAttribute );
if (status != 0) { /* ... */ }
status = pthread_mutexattr_settype( &mutexAttribute, PTHREAD_MUTEX_RECURSIVE);
if (status != 0) { /* ... */}
int err = 0;
err = pthread_mutex_init( &mutex, &mutexAttribute );
}
else
{
int err = 0;
err = pthread_mutex_init( &mutex, NULL );
}
}
MutexLock::~MutexLock()
{
pthread_mutex_destroy( &mutex );
@ -149,7 +170,7 @@ namespace Threading
{
pthread_mutex_unlock( &mutex );
}
//////////////////////////////////////////////////////////////////////
// define some overloads for InterlockedExchanges
// for commonly used types, like u32 and s32.

View File

@ -61,6 +61,7 @@ namespace Threading
pthread_mutex_t mutex;
MutexLock();
MutexLock( bool isRecursive );
~MutexLock();
void Lock();

View File

@ -178,8 +178,7 @@ void CTC2() {
break;
case REG_CMSAR1: // REG_CMSAR1
if (!(VU0.VI[REG_VPU_STAT].UL & 0x100) ) {
VU1.VI[REG_TPC].UL = cpuRegs.GPR.r[_Rt_].US[0];
vu1ExecMicro(VU1.VI[REG_TPC].UL); // Execute VU1 Micro SubRoutine
vu1ExecMicro(cpuRegs.GPR.r[_Rt_].US[0]); // Execute VU1 Micro SubRoutine
}
break;
default:

View File

@ -1896,6 +1896,7 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
}
}
FreezeRegs(1);
if (mtgsThread != NULL)
{
// copy 16 bytes the fast way:
@ -1910,10 +1911,9 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
}
else
{
FreezeRegs(1);
GSGIFTRANSFER2((u32*)splittransfer[0], 1);
FreezeRegs(0);
}
FreezeRegs(0);
if (vif1.tag.size == 0) vif1.cmd = 0;
splitptr = 0;
@ -1945,6 +1945,7 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
//TODO: ret is guaranteed to be qword aligned ?
FreezeRegs(1);
if (mtgsThread != NULL)
{
//unaligned copy.VIF handling is -very- messy, so i'l use this code til i fix it :)
@ -1955,10 +1956,9 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
}
else
{
FreezeRegs(1);
GSGIFTRANSFER2(data, (ret >> 2));
FreezeRegs(0);
}
FreezeRegs(0);
return ret;
}