diff --git a/pcsx2/FiFo.cpp b/pcsx2/FiFo.cpp index 2b0a022404..1523702820 100644 --- a/pcsx2/FiFo.cpp +++ b/pcsx2/FiFo.cpp @@ -173,6 +173,7 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value) psHu64(0x6000) = value[0]; psHu64(0x6008) = value[1]; + FreezeRegs(1); if( mtgsThread != NULL ) { const uint count = mtgsThread->PrepDataPacket( GIF_PATH_3, value, 1 ); @@ -184,10 +185,9 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value) } else { - FreezeRegs(1); GSGIFTRANSFER3((u32*)value, 1); - FreezeRegs(0); } + FreezeRegs(0); } void __fastcall WriteFIFO_page_7(u32 mem, const mem128_t *value) diff --git a/pcsx2/GS.h b/pcsx2/GS.h index 39c2221844..7fd14d8561 100644 --- a/pcsx2/GS.h +++ b/pcsx2/GS.h @@ -176,9 +176,14 @@ protected: uint m_RingPos; // cur pos gs is reading from uint m_WritePos; // cur pos ee thread is writing to - Threading::Semaphore m_post_InitDone; // used to regulate thread startup and gsInit - Threading::MutexLock m_lock_RingRestart; + // used to regulate thread startup and gsInit + Threading::Semaphore m_post_InitDone; + Threading::MutexLock m_lock_RingRestart; + + // used to keep multiple threads from sending packets to the ringbuffer concurrently. + Threading::MutexLock m_PacketLocker; + // Used to delay the sending of events. Performance is better if the ringbuffer // has more than one command in it when the thread is kicked. int m_CopyCommandTally; diff --git a/pcsx2/MTGS.cpp b/pcsx2/MTGS.cpp index 4b18c5d001..4463e75b07 100644 --- a/pcsx2/MTGS.cpp +++ b/pcsx2/MTGS.cpp @@ -197,6 +197,7 @@ mtgsThreadObject::mtgsThreadObject() : , m_post_InitDone() , m_lock_RingRestart() +, m_PacketLocker( true ) // true - makes it a recursive lock , m_CopyCommandTally( 0 ) , m_CopyDataTally( 0 ) @@ -712,14 +713,12 @@ int mtgsThreadObject::Callback() void mtgsThreadObject::WaitGS() { // Freeze registers because some kernel code likes to destroy them - FreezeRegs(1); SetEvent(); while( volatize(m_RingPos) != volatize(m_WritePos) ) { Timeslice(); //SpinWait(); } - FreezeRegs(0); } // Sets the gsEvent flag and releases a timeslice. @@ -733,8 +732,6 @@ void mtgsThreadObject::SetEvent() void mtgsThreadObject::PrepEventWait() { - // Freeze registers because some kernel code likes to destroy them - FreezeRegs(1); //Console::Notice( "MTGS Stall! EE waits for nothing! ... except your GPU sometimes." ); SetEvent(); Timeslice(); @@ -742,7 +739,6 @@ void mtgsThreadObject::PrepEventWait() void mtgsThreadObject::PostEventWait() const { - FreezeRegs(0); } u8* mtgsThreadObject::GetDataPacketPtr() const @@ -784,29 +780,29 @@ void mtgsThreadObject::SendDataPacket() m_packet_size = 0; - if( m_RingBufferIsBusy ) return; - - // The ringbuffer is current in a resting state, so if enough copies have - // queued up then go ahead and initiate the GS thread.. - - // Optimization notes: What we're doing here is initiating a "burst" mode on - // the thread, which improves its cache hit performance and makes it more friendly - // to other threads in Pcsx2 and such. Primary is the Command Tally, and then a - // secondary data size threshold for games that do lots of texture swizzling. - - // 16 was the best value I found so far. - // tested values: - // 24 - very slow on HT machines (+5% drop in fps) - // 8 - roughly 2% slower on HT machines. - - m_CopyDataTally += m_packet_size; - if( ( m_CopyDataTally > 0x8000 ) || ( ++m_CopyCommandTally > 16 ) ) + if( !m_RingBufferIsBusy ) { - FreezeRegs(1); - //Console::Status( "MTGS Kick! DataSize : 0x%5.8x, CommandTally : %d", m_CopyDataTally, m_CopyCommandTally ); - SetEvent(); - FreezeRegs(0); + // The ringbuffer is current in a resting state, so if enough copies have + // queued up then go ahead and initiate the GS thread.. + + // Optimization notes: What we're doing here is initiating a "burst" mode on + // the thread, which improves its cache hit performance and makes it more friendly + // to other threads in Pcsx2 and such. Primary is the Command Tally, and then a + // secondary data size threshold for games that do lots of texture swizzling. + + // 16 was the best value I found so far. + // tested values: + // 24 - very slow on HT machines (+5% drop in fps) + // 8 - roughly 2% slower on HT machines. + + m_CopyDataTally += m_packet_size; + if( ( m_CopyDataTally > 0x8000 ) || ( ++m_CopyCommandTally > 16 ) ) + { + //Console::Status( "MTGS Kick! DataSize : 0x%5.8x, CommandTally : %d", m_CopyDataTally, m_CopyCommandTally ); + SetEvent(); + } } + //m_PacketLocker.Unlock(); } int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u64* srcdata, u32 size ) @@ -840,6 +836,8 @@ static u32 GSRingBufCopySz = 0; // size - size of the packet data, in smd128's int mtgsThreadObject::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size ) { + //m_PacketLocker.Lock(); + #ifdef PCSX2_GSRING_TX_STATS ringtx_s += size; ringtx_s_ulg += size&0x7F; @@ -1064,6 +1062,8 @@ __forceinline void mtgsThreadObject::_FinishSimplePacket( uint future_writepos ) void mtgsThreadObject::SendSimplePacket( GS_RINGTYPE type, int data0, int data1, int data2 ) { + //ScopedLock locker( m_PacketLocker ); + const uint thefuture = _PrepForSimplePacket(); PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos]; @@ -1072,11 +1072,13 @@ void mtgsThreadObject::SendSimplePacket( GS_RINGTYPE type, int data0, int data1, tag.data[1] = data1; tag.data[2] = data2; - _FinishSimplePacket( thefuture ); + _FinishSimplePacket( thefuture ); } void mtgsThreadObject::SendPointerPacket( GS_RINGTYPE type, u32 data0, void* data1 ) { + //ScopedLock locker( m_PacketLocker ); + const uint thefuture = _PrepForSimplePacket(); PacketTagType& tag = (PacketTagType&)m_RingBuffer[m_WritePos]; diff --git a/pcsx2/ThreadTools.cpp b/pcsx2/ThreadTools.cpp index 665ad569bc..dcb0bead91 100644 --- a/pcsx2/ThreadTools.cpp +++ b/pcsx2/ThreadTools.cpp @@ -38,6 +38,7 @@ namespace Threading void Thread::Start() { + m_terminated = false; if( pthread_create( &m_thread, NULL, _internal_callback, this ) != 0 ) throw Exception::ThreadCreationError(); } @@ -135,6 +136,26 @@ namespace Threading err = pthread_mutex_init( &mutex, NULL ); } + MutexLock::MutexLock( bool isRecursive ) + { + if( isRecursive ) + { + pthread_mutexattr_t mutexAttribute; + int status = pthread_mutexattr_init( &mutexAttribute ); + if (status != 0) { /* ... */ } + status = pthread_mutexattr_settype( &mutexAttribute, PTHREAD_MUTEX_RECURSIVE); + if (status != 0) { /* ... */} + + int err = 0; + err = pthread_mutex_init( &mutex, &mutexAttribute ); + } + else + { + int err = 0; + err = pthread_mutex_init( &mutex, NULL ); + } + } + MutexLock::~MutexLock() { pthread_mutex_destroy( &mutex ); @@ -149,7 +170,7 @@ namespace Threading { pthread_mutex_unlock( &mutex ); } - + ////////////////////////////////////////////////////////////////////// // define some overloads for InterlockedExchanges // for commonly used types, like u32 and s32. diff --git a/pcsx2/Threading.h b/pcsx2/Threading.h index f75d3e4262..f731676757 100644 --- a/pcsx2/Threading.h +++ b/pcsx2/Threading.h @@ -61,6 +61,7 @@ namespace Threading pthread_mutex_t mutex; MutexLock(); + MutexLock( bool isRecursive ); ~MutexLock(); void Lock(); diff --git a/pcsx2/VU0.cpp b/pcsx2/VU0.cpp index a017527406..823d4827a2 100644 --- a/pcsx2/VU0.cpp +++ b/pcsx2/VU0.cpp @@ -178,8 +178,7 @@ void CTC2() { break; case REG_CMSAR1: // REG_CMSAR1 if (!(VU0.VI[REG_VPU_STAT].UL & 0x100) ) { - VU1.VI[REG_TPC].UL = cpuRegs.GPR.r[_Rt_].US[0]; - vu1ExecMicro(VU1.VI[REG_TPC].UL); // Execute VU1 Micro SubRoutine + vu1ExecMicro(cpuRegs.GPR.r[_Rt_].US[0]); // Execute VU1 Micro SubRoutine } break; default: diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 9ae6a768e7..797be0c452 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -1896,6 +1896,7 @@ static int __fastcall Vif1TransDirectHL(u32 *data) } } + FreezeRegs(1); if (mtgsThread != NULL) { // copy 16 bytes the fast way: @@ -1910,10 +1911,9 @@ static int __fastcall Vif1TransDirectHL(u32 *data) } else { - FreezeRegs(1); GSGIFTRANSFER2((u32*)splittransfer[0], 1); - FreezeRegs(0); } + FreezeRegs(0); if (vif1.tag.size == 0) vif1.cmd = 0; splitptr = 0; @@ -1945,6 +1945,7 @@ static int __fastcall Vif1TransDirectHL(u32 *data) //TODO: ret is guaranteed to be qword aligned ? + FreezeRegs(1); if (mtgsThread != NULL) { //unaligned copy.VIF handling is -very- messy, so i'l use this code til i fix it :) @@ -1955,10 +1956,9 @@ static int __fastcall Vif1TransDirectHL(u32 *data) } else { - FreezeRegs(1); GSGIFTRANSFER2(data, (ret >> 2)); - FreezeRegs(0); } + FreezeRegs(0); return ret; }