mirror of https://github.com/PCSX2/pcsx2.git
Changed some stuff around with register freezes and mutex locks in the MTGS to make it thread-safe for concurrent threads sending packets to the GS. (packet locking is currently commented out since it's not actually needed yet)
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1456 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c13cc555be
commit
25aa15a0a2
|
@ -173,6 +173,7 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value)
|
|||
psHu64(0x6000) = value[0];
|
||||
psHu64(0x6008) = value[1];
|
||||
|
||||
FreezeRegs(1);
|
||||
if( mtgsThread != NULL )
|
||||
{
|
||||
const uint count = mtgsThread->PrepDataPacket( GIF_PATH_3, value, 1 );
|
||||
|
@ -184,10 +185,9 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value)
|
|||
}
|
||||
else
|
||||
{
|
||||
FreezeRegs(1);
|
||||
GSGIFTRANSFER3((u32*)value, 1);
|
||||
FreezeRegs(0);
|
||||
}
|
||||
FreezeRegs(0);
|
||||
}
|
||||
|
||||
void __fastcall WriteFIFO_page_7(u32 mem, const mem128_t *value)
|
||||
|
|
|
@ -176,9 +176,14 @@ protected:
|
|||
uint m_RingPos; // cur pos gs is reading from
|
||||
uint m_WritePos; // cur pos ee thread is writing to
|
||||
|
||||
Threading::Semaphore m_post_InitDone; // used to regulate thread startup and gsInit
|
||||
Threading::MutexLock m_lock_RingRestart;
|
||||
// used to regulate thread startup and gsInit
|
||||
Threading::Semaphore m_post_InitDone;
|
||||
|
||||
Threading::MutexLock m_lock_RingRestart;
|
||||
|
||||
// used to keep multiple threads from sending packets to the ringbuffer concurrently.
|
||||
Threading::MutexLock m_PacketLocker;
|
||||
|
||||
// Used to delay the sending of events. Performance is better if the ringbuffer
|
||||
// has more than one command in it when the thread is kicked.
|
||||
int m_CopyCommandTally;
|
||||
|
|
|
@ -197,6 +197,7 @@ mtgsThreadObject::mtgsThreadObject() :
|
|||
|
||||
, m_post_InitDone()
|
||||
, m_lock_RingRestart()
|
||||
, m_PacketLocker( true ) // true - makes it a recursive lock
|
||||
|
||||
, m_CopyCommandTally( 0 )
|
||||
, m_CopyDataTally( 0 )
|
||||
|
@ -712,14 +713,12 @@ int mtgsThreadObject::Callback()
|
|||
void mtgsThreadObject::WaitGS()
|
||||
{
|
||||
// Freeze registers because some kernel code likes to destroy them
|
||||
FreezeRegs(1);
|
||||
SetEvent();
|
||||
while( volatize(m_RingPos) != volatize(m_WritePos) )
|
||||
{
|
||||
Timeslice();
|
||||
//SpinWait();
|
||||
}
|
||||
FreezeRegs(0);
|
||||
}
|
||||
|
||||
// Sets the gsEvent flag and releases a timeslice.
|
||||
|
@ -733,8 +732,6 @@ void mtgsThreadObject::SetEvent()
|
|||
|
||||
void mtgsThreadObject::PrepEventWait()
|
||||
{
|
||||
// Freeze registers because some kernel code likes to destroy them
|
||||
FreezeRegs(1);
|
||||
//Console::Notice( "MTGS Stall! EE waits for nothing! ... except your GPU sometimes." );
|
||||
SetEvent();
|
||||
Timeslice();
|
||||
|
@ -742,7 +739,6 @@ void mtgsThreadObject::PrepEventWait()
|
|||
|
||||
void mtgsThreadObject::PostEventWait() const
|
||||
{
|
||||
FreezeRegs(0);
|
||||
}
|
||||
|
||||
u8* mtgsThreadObject::GetDataPacketPtr() const
|
||||
|
@ -784,29 +780,29 @@ void mtgsThreadObject::SendDataPacket()
|
|||
|
||||
m_packet_size = 0;
|
||||
|
||||
if( m_RingBufferIsBusy ) return;
|
||||
|
||||
// The ringbuffer is current in a resting state, so if enough copies have
|
||||
// queued up then go ahead and initiate the GS thread..
|
||||
|
||||
// Optimization notes: What we're doing here is initiating a "burst" mode on
|
||||
// the thread, which improves its cache hit performance and makes it more friendly
|
||||
// to other threads in Pcsx2 and such. Primary is the Command Tally, and then a
|
||||
// secondary data size threshold for games that do lots of texture swizzling.
|
||||
|
||||
// 16 was the best value I found so far.
|
||||
// tested values:
|
||||
// 24 - very slow on HT machines (+5% drop in fps)
|
||||
// 8 - roughly 2% slower on HT machines.
|
||||
|
||||
m_CopyDataTally += m_packet_size;
|
||||
if( ( m_CopyDataTally > 0x8000 ) || ( ++m_CopyCommandTally > 16 ) )
|
||||
if( !m_RingBufferIsBusy )
|
||||
{
|
||||
FreezeRegs(1);
|
||||
//Console::Status( "MTGS Kick! DataSize : 0x%5.8x, CommandTally : %d", m_CopyDataTally, m_CopyCommandTally );
|
||||
SetEvent();
|
||||
FreezeRegs(0);
|
||||
// The ringbuffer is current in a resting state, so if enough copies have
|
||||
// queued up then go ahead and initiate the GS thread..
|
||||
|
||||
// Optimization notes: What we're doing here is initiating a "burst" mode on
|
||||
// the thread, which improves its cache hit performance and makes it more friendly
|
||||
// to other threads in Pcsx2 and such. Primary is the Command Tally, and then a
|
||||
// secondary data size threshold for games that do lots of texture swizzling.
|
||||
|
||||
// 16 was the best value I found so far.
|
||||
// tested values:
|
||||
// 24 - very slow on HT machines (+5% drop in fps)
|
||||
// 8 - roughly 2% slower on HT machines.
|
||||
|
||||
m_CopyDataTally += m_packet_size;
|
||||
if( ( m_CopyDataTally > 0x8000 ) || ( ++m_CopyCommandTally > 16 ) )
|
||||
{
|
||||
//Console::Status( "MTGS Kick! DataSize : 0x%5.8x, CommandTally : %d", m_CopyDataTally, m_CopyCommandTally );
|
||||
SetEvent();
|
||||
}
|
||||
}
|
||||
//m_PacketLocker.Unlock();
|
||||
}
|
||||
|
||||
int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u64* srcdata, u32 size )
|
||||
|
@ -840,6 +836,8 @@ static u32 GSRingBufCopySz = 0;
|
|||
// size - size of the packet data, in smd128's
|
||||
int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size )
|
||||
{
|
||||
//m_PacketLocker.Lock();
|
||||
|
||||
#ifdef PCSX2_GSRING_TX_STATS
|
||||
ringtx_s += size;
|
||||
ringtx_s_ulg += size&0x7F;
|
||||
|
@ -1064,6 +1062,8 @@ __forceinline void mtgsThreadObject::_FinishSimplePacket( uint future_writepos )
|
|||
|
||||
void mtgsThreadObject::SendSimplePacket( GS_RINGTYPE type, int data0, int data1, int data2 )
|
||||
{
|
||||
//ScopedLock locker( m_PacketLocker );
|
||||
|
||||
const uint thefuture = _PrepForSimplePacket();
|
||||
PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos];
|
||||
|
||||
|
@ -1072,11 +1072,13 @@ void mtgsThreadObject::SendSimplePacket( GS_RINGTYPE type, int data0, int data1,
|
|||
tag.data[1] = data1;
|
||||
tag.data[2] = data2;
|
||||
|
||||
_FinishSimplePacket( thefuture );
|
||||
_FinishSimplePacket( thefuture );
|
||||
}
|
||||
|
||||
void mtgsThreadObject::SendPointerPacket( GS_RINGTYPE type, u32 data0, void* data1 )
|
||||
{
|
||||
//ScopedLock locker( m_PacketLocker );
|
||||
|
||||
const uint thefuture = _PrepForSimplePacket();
|
||||
PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos];
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ namespace Threading
|
|||
|
||||
void Thread::Start()
|
||||
{
|
||||
m_terminated = false;
|
||||
if( pthread_create( &m_thread, NULL, _internal_callback, this ) != 0 )
|
||||
throw Exception::ThreadCreationError();
|
||||
}
|
||||
|
@ -135,6 +136,26 @@ namespace Threading
|
|||
err = pthread_mutex_init( &mutex, NULL );
|
||||
}
|
||||
|
||||
MutexLock::MutexLock( bool isRecursive )
|
||||
{
|
||||
if( isRecursive )
|
||||
{
|
||||
pthread_mutexattr_t mutexAttribute;
|
||||
int status = pthread_mutexattr_init( &mutexAttribute );
|
||||
if (status != 0) { /* ... */ }
|
||||
status = pthread_mutexattr_settype( &mutexAttribute, PTHREAD_MUTEX_RECURSIVE);
|
||||
if (status != 0) { /* ... */}
|
||||
|
||||
int err = 0;
|
||||
err = pthread_mutex_init( &mutex, &mutexAttribute );
|
||||
}
|
||||
else
|
||||
{
|
||||
int err = 0;
|
||||
err = pthread_mutex_init( &mutex, NULL );
|
||||
}
|
||||
}
|
||||
|
||||
MutexLock::~MutexLock()
|
||||
{
|
||||
pthread_mutex_destroy( &mutex );
|
||||
|
@ -149,7 +170,7 @@ namespace Threading
|
|||
{
|
||||
pthread_mutex_unlock( &mutex );
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// define some overloads for InterlockedExchanges
|
||||
// for commonly used types, like u32 and s32.
|
||||
|
|
|
@ -61,6 +61,7 @@ namespace Threading
|
|||
pthread_mutex_t mutex;
|
||||
|
||||
MutexLock();
|
||||
MutexLock( bool isRecursive );
|
||||
~MutexLock();
|
||||
|
||||
void Lock();
|
||||
|
|
|
@ -178,8 +178,7 @@ void CTC2() {
|
|||
break;
|
||||
case REG_CMSAR1: // REG_CMSAR1
|
||||
if (!(VU0.VI[REG_VPU_STAT].UL & 0x100) ) {
|
||||
VU1.VI[REG_TPC].UL = cpuRegs.GPR.r[_Rt_].US[0];
|
||||
vu1ExecMicro(VU1.VI[REG_TPC].UL); // Execute VU1 Micro SubRoutine
|
||||
vu1ExecMicro(cpuRegs.GPR.r[_Rt_].US[0]); // Execute VU1 Micro SubRoutine
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -1896,6 +1896,7 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
|
|||
}
|
||||
}
|
||||
|
||||
FreezeRegs(1);
|
||||
if (mtgsThread != NULL)
|
||||
{
|
||||
// copy 16 bytes the fast way:
|
||||
|
@ -1910,10 +1911,9 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
|
|||
}
|
||||
else
|
||||
{
|
||||
FreezeRegs(1);
|
||||
GSGIFTRANSFER2((u32*)splittransfer[0], 1);
|
||||
FreezeRegs(0);
|
||||
}
|
||||
FreezeRegs(0);
|
||||
|
||||
if (vif1.tag.size == 0) vif1.cmd = 0;
|
||||
splitptr = 0;
|
||||
|
@ -1945,6 +1945,7 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
|
|||
|
||||
//TODO: ret is guaranteed to be qword aligned ?
|
||||
|
||||
FreezeRegs(1);
|
||||
if (mtgsThread != NULL)
|
||||
{
|
||||
//unaligned copy.VIF handling is -very- messy, so i'l use this code til i fix it :)
|
||||
|
@ -1955,10 +1956,9 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
|
|||
}
|
||||
else
|
||||
{
|
||||
FreezeRegs(1);
|
||||
GSGIFTRANSFER2(data, (ret >> 2));
|
||||
FreezeRegs(0);
|
||||
}
|
||||
FreezeRegs(0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue