Merge pull request #1444 from PCSX2/atomic-relax

Atomic relax
This commit is contained in:
Gregory Hainaut 2016-07-22 18:36:02 +02:00 committed by GitHub
commit 7d35e15fea
10 changed files with 125 additions and 143 deletions

View File

@ -159,9 +159,6 @@ namespace Threading
// For use in spin/wait loops. // For use in spin/wait loops.
extern void SpinWait(); extern void SpinWait();
// Use prior to committing data to another thread
extern void StoreFence();
// Optional implementation to enable hires thread/process scheduler for the operating system. // Optional implementation to enable hires thread/process scheduler for the operating system.
// Needed by Windows, but might not be relevant to other platforms. // Needed by Windows, but might not be relevant to other platforms.
extern void EnableHiresScheduler(); extern void EnableHiresScheduler();
@ -170,18 +167,6 @@ namespace Threading
// sleeps the current thread for the given number of milliseconds. // sleeps the current thread for the given number of milliseconds.
extern void Sleep( int ms ); extern void Sleep( int ms );
// --------------------------------------------------------------------------------------
// AtomicExchange / AtomicIncrement
// --------------------------------------------------------------------------------------
// Our fundamental interlocking functions. All other useful interlocks can be derived
// from these little beasties! (these are all implemented internally using cross-platform
// implementations of _InterlockedExchange and such)
extern u32 AtomicRead( volatile u32& Target );
extern s32 AtomicRead( volatile s32& Target );
extern u32 AtomicExchange( volatile u32& Target, u32 value );
extern s32 AtomicExchange( volatile s32& Target, s32 value );
// pthread Cond is an evil api that is not suited for Pcsx2 needs. // pthread Cond is an evil api that is not suited for Pcsx2 needs.
// Let's not use it. Use mutexes and semaphores instead to create waits. (Air) // Let's not use it. Use mutexes and semaphores instead to create waits. (Air)
#if 0 #if 0
@ -309,7 +294,7 @@ namespace Threading
// will be automatically released on any return or exit from the function. // will be automatically released on any return or exit from the function.
// //
// Const qualification note: // Const qualification note:
// ScopedLock takes const instances of the mutex, even though the mutex is modified // ScopedLock takes const instances of the mutex, even though the mutex is modified
// by locking and unlocking. Two rationales: // by locking and unlocking. Two rationales:
// //
// 1) when designing classes with accessors (GetString, GetValue, etc) that need mutexes, // 1) when designing classes with accessors (GetString, GetValue, etc) that need mutexes,
@ -399,17 +384,17 @@ namespace Threading
ScopedLockBool(Mutex& mutexToLock, std::atomic<bool>& isLockedBool) ScopedLockBool(Mutex& mutexToLock, std::atomic<bool>& isLockedBool)
: m_lock(mutexToLock), : m_lock(mutexToLock),
m_bool(isLockedBool) { m_bool(isLockedBool) {
m_bool = m_lock.IsLocked(); m_bool.store(m_lock.IsLocked(), std::memory_order_relaxed);
} }
virtual ~ScopedLockBool() throw() { virtual ~ScopedLockBool() throw() {
m_bool = false; m_bool.store(false, std::memory_order_relaxed);
} }
void Acquire() { void Acquire() {
m_lock.Acquire(); m_lock.Acquire();
m_bool = m_lock.IsLocked(); m_bool.store(m_lock.IsLocked(), std::memory_order_relaxed);
} }
void Release() { void Release() {
m_bool = false; m_bool.store(false, std::memory_order_relaxed);
m_lock.Release(); m_lock.Release();
} }
}; };

View File

@ -327,7 +327,7 @@ void Threading::ScopedLock::AssignAndLock( const Mutex* locker )
if( !m_lock ) return; if( !m_lock ) return;
m_IsLocked = true; m_IsLocked = true;
m_lock->Acquire(); m_lock->Acquire();
} }
void Threading::ScopedLock::Assign( const Mutex& locker ) void Threading::ScopedLock::Assign( const Mutex& locker )

View File

@ -782,27 +782,6 @@ void Threading::WaitEvent::Wait()
} }
#endif #endif
// --------------------------------------------------------------------------------------
// InterlockedExchanges / AtomicExchanges (PCSX2's Helper versions)
// --------------------------------------------------------------------------------------
// define some overloads for InterlockedExchanges for commonly used types, like u32 and s32.
// Note: For all of these atomic operations below to be atomic, the variables need to be 4-byte
// aligned. Read: http://msdn.microsoft.com/en-us/library/ms684122%28v=vs.85%29.aspx
__fi u32 Threading::AtomicRead(volatile u32& Target) {
return Target; // Properly-aligned 32-bit reads are atomic
}
__fi s32 Threading::AtomicRead(volatile s32& Target) {
return Target; // Properly-aligned 32-bit reads are atomic
}
__fi u32 Threading::AtomicExchange(volatile u32& Target, u32 value ) {
return _InterlockedExchange( (volatile vol_t*)&Target, value );
}
__fi s32 Threading::AtomicExchange( volatile s32& Target, s32 value ) {
return _InterlockedExchange( (volatile vol_t*)&Target, value );
}
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// BaseThreadError // BaseThreadError
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------

View File

@ -36,11 +36,6 @@ __fi void Threading::SpinWait()
__asm pause; __asm pause;
} }
__fi void Threading::StoreFence()
{
__asm sfence;
}
__fi void Threading::EnableHiresScheduler() __fi void Threading::EnableHiresScheduler()
{ {
// This improves accuracy of Sleep() by some amount, and only adds a negligible amount of // This improves accuracy of Sleep() by some amount, and only adds a negligible amount of

View File

@ -66,7 +66,7 @@ BaseDeletableObject::BaseDeletableObject()
//pxAssertDev( _CrtIsValidHeapPointer( this ), "BaseDeletableObject types cannot be created on the stack or as temporaries!" ); //pxAssertDev( _CrtIsValidHeapPointer( this ), "BaseDeletableObject types cannot be created on the stack or as temporaries!" );
#endif #endif
m_IsBeingDeleted = false; m_IsBeingDeleted.store(false, std::memory_order_relaxed);
} }
BaseDeletableObject::~BaseDeletableObject() throw() BaseDeletableObject::~BaseDeletableObject() throw()

View File

@ -275,8 +275,9 @@ class SysMtgsThread : public SysThreadBase
public: public:
// note: when m_ReadPos == m_WritePos, the fifo is empty // note: when m_ReadPos == m_WritePos, the fifo is empty
__aligned(4) uint m_ReadPos; // cur pos gs is reading from // Threading info: m_ReadPos is updated by the MTGS thread. m_WritePos is updated by the EE thread
__aligned(4) uint m_WritePos; // cur pos ee thread is writing to std::atomic<unsigned int> m_ReadPos; // cur pos gs is reading from
std::atomic<unsigned int> m_WritePos; // cur pos ee thread is writing to
std::atomic<bool> m_RingBufferIsBusy; std::atomic<bool> m_RingBufferIsBusy;
std::atomic<bool> m_SignalRingEnable; std::atomic<bool> m_SignalRingEnable;

View File

@ -37,13 +37,6 @@ using namespace Threading;
# define MTGS_LOG(...) do {} while (0) # define MTGS_LOG(...) do {} while (0)
#endif #endif
// forces the compiler to treat a non-volatile value as volatile.
// This allows us to declare the vars as non-volatile and only use
// them as volatile when appropriate (more optimized).
#define volatize(x) (*reinterpret_cast<volatile uint*>(&(x)))
// ===================================================================================================== // =====================================================================================================
// MTGS Threaded Class Implementation // MTGS Threaded Class Implementation
// ===================================================================================================== // =====================================================================================================
@ -74,7 +67,7 @@ void SysMtgsThread::OnStart()
m_ReadPos = 0; m_ReadPos = 0;
m_WritePos = 0; m_WritePos = 0;
m_RingBufferIsBusy = false; m_RingBufferIsBusy = false;
m_packet_size = 0; m_packet_size = 0;
m_packet_writepos = 0; m_packet_writepos = 0;
@ -110,9 +103,9 @@ void SysMtgsThread::ResetGS()
// * Signal a reset. // * Signal a reset.
// * clear the path and byRegs structs (used by GIFtagDummy) // * clear the path and byRegs structs (used by GIFtagDummy)
m_ReadPos = m_WritePos; m_ReadPos = m_WritePos.load();
m_QueuedFrameCount = 0; m_QueuedFrameCount = 0;
m_VsyncSignalListener = false; m_VsyncSignalListener = 0;
MTGS_LOG( "MTGS: Sending Reset..." ); MTGS_LOG( "MTGS: Sending Reset..." );
SendSimplePacket( GS_RINGTYPE_RESET, 0, 0, 0 ); SendSimplePacket( GS_RINGTYPE_RESET, 0, 0, 0 );
@ -163,8 +156,15 @@ void SysMtgsThread::PostVsyncStart()
if ((m_QueuedFrameCount.fetch_add(1) < EmuConfig.GS.VsyncQueueSize) /*|| (!EmuConfig.GS.VsyncEnable && !EmuConfig.GS.FrameLimitEnable)*/) return; if ((m_QueuedFrameCount.fetch_add(1) < EmuConfig.GS.VsyncQueueSize) /*|| (!EmuConfig.GS.VsyncEnable && !EmuConfig.GS.FrameLimitEnable)*/) return;
m_VsyncSignalListener = true; m_VsyncSignalListener.store(true, std::memory_order_release);
//Console.WriteLn( Color_Blue, "(EEcore Sleep) Vsync\t\tringpos=0x%06x, writepos=0x%06x", volatize(m_ReadPos), m_WritePos ); //Console.WriteLn( Color_Blue, "(EEcore Sleep) Vsync\t\tringpos=0x%06x, writepos=0x%06x", m_ReadPos.load(), m_WritePos.load() );
// We will wait a vsync event from the MTGS ring. If the ring is already purged, the event will never come !
// To avoid this potential deadlock, ring must be wake up after m_VsyncSignalListener
// Note: potentially we can also miss the previous wake up if we optimize away the post just before the release of busy signal of the ring
// So let's ensure the ring doesn't sleep
m_sem_event.Post();
m_sem_Vsync.WaitNoCancel(); m_sem_Vsync.WaitNoCancel();
} }
@ -238,34 +238,45 @@ void SysMtgsThread::OpenPlugin()
GSsetGameCRC( ElfCRC, 0 ); GSsetGameCRC( ElfCRC, 0 );
} }
struct RingBufferLock { class RingBufferLock {
ScopedLock m_lock1; ScopedLock m_lock1;
ScopedLock m_lock2; ScopedLock m_lock2;
SysMtgsThread& m_mtgs; SysMtgsThread& m_mtgs;
public:
RingBufferLock(SysMtgsThread& mtgs) RingBufferLock(SysMtgsThread& mtgs)
: m_lock1(mtgs.m_mtx_RingBufferBusy), : m_lock1(mtgs.m_mtx_RingBufferBusy),
m_lock2(mtgs.m_mtx_RingBufferBusy2), m_lock2(mtgs.m_mtx_RingBufferBusy2),
m_mtgs(mtgs) { m_mtgs(mtgs) {
m_mtgs.m_RingBufferIsBusy = true; m_mtgs.m_RingBufferIsBusy.store(true, std::memory_order_relaxed);
} }
virtual ~RingBufferLock() throw() { virtual ~RingBufferLock() throw() {
m_mtgs.m_RingBufferIsBusy = false; m_mtgs.m_RingBufferIsBusy.store(false, std::memory_order_relaxed);
} }
void Acquire() { void Acquire() {
m_lock1.Acquire(); m_lock1.Acquire();
m_lock2.Acquire(); m_lock2.Acquire();
m_mtgs.m_RingBufferIsBusy = true; m_mtgs.m_RingBufferIsBusy.store(true, std::memory_order_relaxed);
} }
void Release() { void Release() {
m_mtgs.m_RingBufferIsBusy = false; m_mtgs.m_RingBufferIsBusy.store(false, std::memory_order_relaxed);
m_lock2.Release(); m_lock2.Release();
m_lock1.Release(); m_lock1.Release();
} }
void PartialAcquire() {
m_lock2.Acquire();
}
void PartialRelease() {
m_lock2.Release();
}
}; };
void SysMtgsThread::ExecuteTaskInThread() void SysMtgsThread::ExecuteTaskInThread()
{ {
// Threading info: run in MTGS thread
// m_ReadPos is only update by the MTGS thread so it is safe to load it with a relaxed atomic
#ifdef RINGBUF_DEBUG_STACK #ifdef RINGBUF_DEBUG_STACK
PacketTagType prevCmd; PacketTagType prevCmd;
#endif #endif
@ -285,16 +296,18 @@ void SysMtgsThread::ExecuteTaskInThread()
// note: m_ReadPos is intentionally not volatile, because it should only // note: m_ReadPos is intentionally not volatile, because it should only
// ever be modified by this thread. // ever be modified by this thread.
while( m_ReadPos != volatize(m_WritePos)) while( m_ReadPos.load(std::memory_order_relaxed) != m_WritePos.load(std::memory_order_acquire))
{ {
if (EmuConfig.GS.DisableOutput) { if (EmuConfig.GS.DisableOutput) {
m_ReadPos = m_WritePos; m_ReadPos = m_WritePos.load();
continue; continue;
} }
pxAssert( m_ReadPos < RingBufferSize ); const unsigned int local_ReadPos = m_ReadPos.load(std::memory_order_relaxed);
const PacketTagType& tag = (PacketTagType&)RingBuffer[m_ReadPos]; pxAssert( local_ReadPos < RingBufferSize );
const PacketTagType& tag = (PacketTagType&)RingBuffer[local_ReadPos];
u32 ringposinc = 1; u32 ringposinc = 1;
#ifdef RINGBUF_DEBUG_STACK #ifdef RINGBUF_DEBUG_STACK
@ -302,11 +315,11 @@ void SysMtgsThread::ExecuteTaskInThread()
m_lock_Stack.Lock(); m_lock_Stack.Lock();
uptr stackpos = ringposStack.back(); uptr stackpos = ringposStack.back();
if( stackpos != m_ReadPos ) if( stackpos != local_ReadPos )
{ {
Console.Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, m_ReadPos, prevCmd.command ); Console.Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, local_ReadPos, prevCmd.command );
} }
pxAssert( stackpos == m_ReadPos ); pxAssert( stackpos == local_ReadPos );
prevCmd = tag; prevCmd = tag;
ringposStack.pop_back(); ringposStack.pop_back();
m_lock_Stack.Release(); m_lock_Stack.Release();
@ -317,7 +330,7 @@ void SysMtgsThread::ExecuteTaskInThread()
#if COPY_GS_PACKET_TO_MTGS == 1 #if COPY_GS_PACKET_TO_MTGS == 1
case GS_RINGTYPE_P1: case GS_RINGTYPE_P1:
{ {
uint datapos = (m_ReadPos+1) & RingBufferMask; uint datapos = (local_ReadPos+1) & RingBufferMask;
const int qsize = tag.data[0]; const int qsize = tag.data[0];
const u128* data = &RingBuffer[datapos]; const u128* data = &RingBuffer[datapos];
@ -342,7 +355,7 @@ void SysMtgsThread::ExecuteTaskInThread()
case GS_RINGTYPE_P2: case GS_RINGTYPE_P2:
{ {
uint datapos = (m_ReadPos+1) & RingBufferMask; uint datapos = (local_ReadPos+1) & RingBufferMask;
const int qsize = tag.data[0]; const int qsize = tag.data[0];
const u128* data = &RingBuffer[datapos]; const u128* data = &RingBuffer[datapos];
@ -367,7 +380,7 @@ void SysMtgsThread::ExecuteTaskInThread()
case GS_RINGTYPE_P3: case GS_RINGTYPE_P3:
{ {
uint datapos = (m_ReadPos+1) & RingBufferMask; uint datapos = (local_ReadPos+1) & RingBufferMask;
const int qsize = tag.data[0]; const int qsize = tag.data[0];
const u128* data = &RingBuffer[datapos]; const u128* data = &RingBuffer[datapos];
@ -402,10 +415,10 @@ void SysMtgsThread::ExecuteTaskInThread()
case GS_RINGTYPE_MTVU_GSPACKET: { case GS_RINGTYPE_MTVU_GSPACKET: {
MTVU_LOG("MTGS - Waiting on semaXGkick!"); MTVU_LOG("MTGS - Waiting on semaXGkick!");
vu1Thread.KickStart(true); vu1Thread.KickStart(true);
busy.m_lock2.Release(); busy.PartialRelease();
// Wait for MTVU to complete vu1 program // Wait for MTVU to complete vu1 program
vu1Thread.semaXGkick.WaitWithoutYield(); vu1Thread.semaXGkick.WaitWithoutYield();
busy.m_lock2.Acquire(); busy.PartialAcquire();
Gif_Path& path = gifUnit.gifPath[GIF_PATH_1]; Gif_Path& path = gifUnit.gifPath[GIF_PATH_1];
GS_Packet gsPack = path.GetGSPacketMTVU(); // Get vu1 program's xgkick packet(s) GS_Packet gsPack = path.GetGSPacketMTVU(); // Get vu1 program's xgkick packet(s)
if (gsPack.size) GSgifTransfer((u32*)&path.buffer[gsPack.offset], gsPack.size/16); if (gsPack.size) GSgifTransfer((u32*)&path.buffer[gsPack.offset], gsPack.size/16);
@ -429,7 +442,7 @@ void SysMtgsThread::ExecuteTaskInThread()
// This seemingly obtuse system is needed in order to handle cases where the vsync data wraps // This seemingly obtuse system is needed in order to handle cases where the vsync data wraps
// around the edge of the ringbuffer. If not for that I'd just use a struct. >_< // around the edge of the ringbuffer. If not for that I'd just use a struct. >_<
uint datapos = (m_ReadPos+1) & RingBufferMask; uint datapos = (local_ReadPos+1) & RingBufferMask;
MemCopy_WrappedSrc( RingBuffer.m_Ring, datapos, RingBufferSize, (u128*)RingBuffer.Regs, 0xf ); MemCopy_WrappedSrc( RingBuffer.m_Ring, datapos, RingBufferSize, (u128*)RingBuffer.Regs, 0xf );
u32* remainder = (u32*)&RingBuffer[datapos]; u32* remainder = (u32*)&RingBuffer[datapos];
@ -504,9 +517,9 @@ void SysMtgsThread::ExecuteTaskInThread()
#ifdef PCSX2_DEVBUILD #ifdef PCSX2_DEVBUILD
default: default:
Console.Error("GSThreadProc, bad packet (%x) at m_ReadPos: %x, m_WritePos: %x", tag.command, m_ReadPos, m_WritePos); Console.Error("GSThreadProc, bad packet (%x) at m_ReadPos: %x, m_WritePos: %x", tag.command, local_ReadPos, m_WritePos.load());
pxFail( "Bad packet encountered in the MTGS Ringbuffer." ); pxFail( "Bad packet encountered in the MTGS Ringbuffer." );
m_ReadPos = m_WritePos; m_ReadPos.store(m_WritePos.load(std::memory_order_acquire), std::memory_order_release);
continue; continue;
#else #else
// Optimized performance in non-Dev builds. // Optimized performance in non-Dev builds.
@ -516,22 +529,22 @@ void SysMtgsThread::ExecuteTaskInThread()
} }
} }
uint newringpos = (m_ReadPos + ringposinc) & RingBufferMask; uint newringpos = (m_ReadPos.load(std::memory_order_relaxed) + ringposinc) & RingBufferMask;
if( EmuConfig.GS.SynchronousMTGS ) if( EmuConfig.GS.SynchronousMTGS )
{ {
pxAssert( m_WritePos == newringpos ); pxAssert( m_WritePos == newringpos );
} }
m_ReadPos = newringpos; m_ReadPos.store(newringpos, std::memory_order_release);
if( m_SignalRingEnable ) if(m_SignalRingEnable.load(std::memory_order_acquire))
{ {
// The EEcore has requested a signal after some amount of processed data. // The EEcore has requested a signal after some amount of processed data.
if( m_SignalRingPosition.fetch_sub( ringposinc ) <= 0 ) if( m_SignalRingPosition.fetch_sub( ringposinc ) <= 0 )
{ {
// Make sure to post the signal after the m_ReadPos has been updated... // Make sure to post the signal after the m_ReadPos has been updated...
m_SignalRingEnable = false; m_SignalRingEnable.store(false, std::memory_order_release);
m_sem_OnRingReset.Post(); m_sem_OnRingReset.Post();
continue; continue;
} }
@ -547,7 +560,7 @@ void SysMtgsThread::ExecuteTaskInThread()
if( m_SignalRingEnable.exchange(false) ) if( m_SignalRingEnable.exchange(false) )
{ {
//Console.Warning( "(MTGS Thread) Dangling RingSignal on empty buffer! signalpos=0x%06x", m_SignalRingPosition.exchange(0) ) ); //Console.Warning( "(MTGS Thread) Dangling RingSignal on empty buffer! signalpos=0x%06x", m_SignalRingPosition.exchange(0) ) );
m_SignalRingPosition = 0; m_SignalRingPosition.store(0, std::memory_order_release);
m_sem_OnRingReset.Post(); m_sem_OnRingReset.Post();
} }
@ -599,14 +612,17 @@ void SysMtgsThread::WaitGS(bool syncRegs, bool weakWait, bool isMTVU)
Gif_Path& path = gifUnit.gifPath[GIF_PATH_1]; Gif_Path& path = gifUnit.gifPath[GIF_PATH_1];
u32 startP1Packs = weakWait ? path.GetPendingGSPackets() : 0; u32 startP1Packs = weakWait ? path.GetPendingGSPackets() : 0;
if (isMTVU || volatize(m_ReadPos) != m_WritePos) { // Both m_ReadPos and m_WritePos can be relaxed as we only want to test if the queue is empty but
// we don't want to access the content of the queue
if (isMTVU || m_ReadPos.load(std::memory_order_relaxed) != m_WritePos.load(std::memory_order_relaxed)) {
SetEvent(); SetEvent();
RethrowException(); RethrowException();
for(;;) { for(;;) {
if (weakWait) m_mtx_RingBufferBusy2.Wait(); if (weakWait) m_mtx_RingBufferBusy2.Wait();
else m_mtx_RingBufferBusy .Wait(); else m_mtx_RingBufferBusy .Wait();
RethrowException(); RethrowException();
if(!isMTVU && volatize(m_ReadPos) == m_WritePos) break; if(!isMTVU && m_ReadPos.load(std::memory_order_relaxed) == m_WritePos.load(std::memory_order_relaxed)) break;
u32 curP1Packs = weakWait ? path.GetPendingGSPackets() : 0; u32 curP1Packs = weakWait ? path.GetPendingGSPackets() : 0;
if (weakWait && ((startP1Packs-curP1Packs) || !curP1Packs)) break; if (weakWait && ((startP1Packs-curP1Packs) || !curP1Packs)) break;
// On weakWait we will stop waiting on the MTGS thread if the // On weakWait we will stop waiting on the MTGS thread if the
@ -629,7 +645,7 @@ void SysMtgsThread::WaitGS(bool syncRegs, bool weakWait, bool isMTVU)
// For use in loops that wait on the GS thread to do certain things. // For use in loops that wait on the GS thread to do certain things.
void SysMtgsThread::SetEvent() void SysMtgsThread::SetEvent()
{ {
if(!m_RingBufferIsBusy) if(!m_RingBufferIsBusy.load(std::memory_order_relaxed))
m_sem_event.Post(); m_sem_event.Post();
m_CopyDataTally = 0; m_CopyDataTally = 0;
@ -653,13 +669,13 @@ void SysMtgsThread::SendDataPacket()
PacketTagType& tag = (PacketTagType&)RingBuffer[m_packet_startpos]; PacketTagType& tag = (PacketTagType&)RingBuffer[m_packet_startpos];
tag.data[0] = actualSize; tag.data[0] = actualSize;
m_WritePos = m_packet_writepos; m_WritePos.store(m_packet_writepos, std::memory_order_release);
if( EmuConfig.GS.SynchronousMTGS ) if(EmuConfig.GS.SynchronousMTGS)
{ {
WaitGS(); WaitGS();
} }
else if( !m_RingBufferIsBusy ) else if(!m_RingBufferIsBusy.load(std::memory_order_relaxed))
{ {
m_CopyDataTally += m_packet_size; m_CopyDataTally += m_packet_size;
if( m_CopyDataTally > 0x2000 ) SetEvent(); if( m_CopyDataTally > 0x2000 ) SetEvent();
@ -675,7 +691,7 @@ void SysMtgsThread::GenericStall( uint size )
// Note on volatiles: m_WritePos is not modified by the GS thread, so there's no need // Note on volatiles: m_WritePos is not modified by the GS thread, so there's no need
// to use volatile reads here. We do cache it though, since we know it never changes, // to use volatile reads here. We do cache it though, since we know it never changes,
// except for calls to RingbufferRestert() -- handled below. // except for calls to RingbufferRestert() -- handled below.
const uint writepos = m_WritePos; const uint writepos = m_WritePos.load(std::memory_order_relaxed);
// Sanity checks! (within the confines of our ringbuffer please!) // Sanity checks! (within the confines of our ringbuffer please!)
pxAssert( size < RingBufferSize ); pxAssert( size < RingBufferSize );
@ -686,7 +702,7 @@ void SysMtgsThread::GenericStall( uint size )
// But if not then we need to make sure the readpos is outside the scope of // But if not then we need to make sure the readpos is outside the scope of
// the block about to be written (writepos + size) // the block about to be written (writepos + size)
uint readpos = volatize(m_ReadPos); uint readpos = m_ReadPos.load(std::memory_order_acquire);
uint freeroom; uint freeroom;
if (writepos < readpos) if (writepos < readpos)
@ -714,15 +730,15 @@ void SysMtgsThread::GenericStall( uint size )
if( somedone > 0x80 ) if( somedone > 0x80 )
{ {
pxAssertDev( m_SignalRingEnable == 0, "MTGS Thread Synchronization Error" ); pxAssertDev( m_SignalRingEnable == 0, "MTGS Thread Synchronization Error" );
m_SignalRingPosition = somedone; m_SignalRingPosition.store(somedone, std::memory_order_release);
//Console.WriteLn( Color_Blue, "(EEcore Sleep) PrepDataPacker \tringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", readpos, writepos, m_SignalRingPosition ); //Console.WriteLn( Color_Blue, "(EEcore Sleep) PrepDataPacker \tringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", readpos, writepos, m_SignalRingPosition );
while(true) { while(true) {
m_SignalRingEnable = true; m_SignalRingEnable.store(true, std::memory_order_release);
SetEvent(); SetEvent();
m_sem_OnRingReset.WaitWithoutYield(); m_sem_OnRingReset.WaitWithoutYield();
readpos = volatize(m_ReadPos); readpos = m_ReadPos.load(std::memory_order_acquire);
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos ); //Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
if (writepos < readpos) if (writepos < readpos)
@ -741,7 +757,7 @@ void SysMtgsThread::GenericStall( uint size )
SetEvent(); SetEvent();
while(true) { while(true) {
SpinWait(); SpinWait();
readpos = volatize(m_ReadPos); readpos = m_ReadPos.load(std::memory_order_acquire);
if (writepos < readpos) if (writepos < readpos)
freeroom = readpos - writepos; freeroom = readpos - writepos;
@ -762,12 +778,13 @@ void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
// Command qword: Low word is the command, and the high word is the packet // Command qword: Low word is the command, and the high word is the packet
// length in SIMDs (128 bits). // length in SIMDs (128 bits).
const unsigned int local_WritePos = m_WritePos.load(std::memory_order_relaxed);
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos]; PacketTagType& tag = (PacketTagType&)RingBuffer[local_WritePos];
tag.command = cmd; tag.command = cmd;
tag.data[0] = m_packet_size; tag.data[0] = m_packet_size;
m_packet_startpos = m_WritePos; m_packet_startpos = local_WritePos;
m_packet_writepos = (m_WritePos + 1) & RingBufferMask; m_packet_writepos = (local_WritePos + 1) & RingBufferMask;
} }
// Returns the amount of giftag data processed (in simd128 values). // Returns the amount of giftag data processed (in simd128 values).
@ -784,9 +801,9 @@ void SysMtgsThread::PrepDataPacket( GIF_PATH pathidx, u32 size )
__fi void SysMtgsThread::_FinishSimplePacket() __fi void SysMtgsThread::_FinishSimplePacket()
{ {
uint future_writepos = (m_WritePos+1) & RingBufferMask; uint future_writepos = (m_WritePos.load(std::memory_order_relaxed) +1) & RingBufferMask;
pxAssert( future_writepos != volatize(m_ReadPos) ); pxAssert( future_writepos != m_ReadPos.load(std::memory_order_acquire) );
m_WritePos = future_writepos; m_WritePos.store(future_writepos, std::memory_order_release);
if( EmuConfig.GS.SynchronousMTGS ) if( EmuConfig.GS.SynchronousMTGS )
WaitGS(); WaitGS();
@ -799,7 +816,7 @@ void SysMtgsThread::SendSimplePacket( MTGS_RingCommand type, int data0, int data
//ScopedLock locker( m_PacketLocker ); //ScopedLock locker( m_PacketLocker );
GenericStall(1); GenericStall(1);
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos]; PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos.load(std::memory_order_relaxed)];
tag.command = type; tag.command = type;
tag.data[0] = data0; tag.data[0] = data0;
@ -814,7 +831,7 @@ void SysMtgsThread::SendSimpleGSPacket(MTGS_RingCommand type, u32 offset, u32 si
SendSimplePacket(type, (int)offset, (int)size, (int)path); SendSimplePacket(type, (int)offset, (int)size, (int)path);
if(!EmuConfig.GS.SynchronousMTGS) { if(!EmuConfig.GS.SynchronousMTGS) {
if(!m_RingBufferIsBusy) { if(!m_RingBufferIsBusy.load(std::memory_order_relaxed)) {
m_CopyDataTally += size / 16; m_CopyDataTally += size / 16;
if (m_CopyDataTally > 0x2000) SetEvent(); if (m_CopyDataTally > 0x2000) SetEvent();
} }
@ -826,7 +843,7 @@ void SysMtgsThread::SendPointerPacket( MTGS_RingCommand type, u32 data0, void* d
//ScopedLock locker( m_PacketLocker ); //ScopedLock locker( m_PacketLocker );
GenericStall(1); GenericStall(1);
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos]; PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos.load(std::memory_order_relaxed)];
tag.command = type; tag.command = type;
tag.data[0] = data0; tag.data[0] = data0;

View File

@ -21,7 +21,6 @@
__aligned16 VU_Thread vu1Thread(CpuVU1, VU1); __aligned16 VU_Thread vu1Thread(CpuVU1, VU1);
#define volatize(x) (*reinterpret_cast<volatile uint*>(&(x)))
#define size_u32(x) (((u32)x+3u)>>2) // Rounds up a size in bytes for size in u32's #define size_u32(x) (((u32)x+3u)>>2) // Rounds up a size in bytes for size in u32's
#define MTVU_ALWAYS_KICK 0 #define MTVU_ALWAYS_KICK 0
#define MTVU_SYNC_MODE 0 #define MTVU_SYNC_MODE 0
@ -52,7 +51,10 @@ void SaveStateBase::mtvuFreeze()
FreezeTag("MTVU"); FreezeTag("MTVU");
pxAssert(vu1Thread.IsDone()); pxAssert(vu1Thread.IsDone());
if (!IsSaving()) vu1Thread.Reset(); if (!IsSaving()) vu1Thread.Reset();
Freeze(vu1Thread.vuCycles); for (size_t i = 0; i < 4; ++i) {
unsigned int v = vu1Thread.vuCycles[i].load();
Freeze(v);
}
Freeze(vu1Thread.vuCycleIdx); Freeze(vu1Thread.vuCycleIdx);
} }
@ -75,14 +77,15 @@ void VU_Thread::Reset()
{ {
ScopedLock lock(mtxBusy); ScopedLock lock(mtxBusy);
read_pos = 0;
write_pos = 0;
write_offset = 0; write_offset = 0;
vuCycleIdx = 0; vuCycleIdx = 0;
isBusy = false; read_pos = 0;
isBusy = false;
write_pos = 0;
memzero(vif); memzero(vif);
memzero(vifRegs); memzero(vifRegs);
memzero(vuCycles); for (size_t i = 0; i < 4; ++i)
vu1Thread.vuCycles[i] = 0;
} }
void VU_Thread::ExecuteTaskInThread() void VU_Thread::ExecuteTaskInThread()
@ -97,7 +100,7 @@ void VU_Thread::ExecuteRingBuffer()
for(;;) { for(;;) {
semaEvent.WaitWithoutYield(); semaEvent.WaitWithoutYield();
ScopedLockBool lock(mtxBusy, isBusy); ScopedLockBool lock(mtxBusy, isBusy);
while (read_pos != GetWritePos()) { while (read_pos.load(std::memory_order_relaxed) != GetWritePos()) {
u32 tag = Read(); u32 tag = Read();
switch (tag) { switch (tag) {
case MTVU_VU_EXECUTE: { case MTVU_VU_EXECUTE: {
@ -109,7 +112,7 @@ void VU_Thread::ExecuteRingBuffer()
vuCPU->Execute(vu1RunCycles); vuCPU->Execute(vu1RunCycles);
gifUnit.gifPath[GIF_PATH_1].FinishGSPacketMTVU(); gifUnit.gifPath[GIF_PATH_1].FinishGSPacketMTVU();
semaXGkick.Post(); // Tell MTGS a path1 packet is complete semaXGkick.Post(); // Tell MTGS a path1 packet is complete
AtomicExchange(vuCycles[vuCycleIdx], vuRegs.cycle); vuCycles[vuCycleIdx].store(vuRegs.cycle, std::memory_order_relaxed);
vuCycleIdx = (vuCycleIdx + 1) & 3; vuCycleIdx = (vuCycleIdx + 1) & 3;
break; break;
} }
@ -137,12 +140,12 @@ void VU_Thread::ExecuteRingBuffer()
Read(&vif.tag, vif_copy_size); Read(&vif.tag, vif_copy_size);
ReadRegs(&vifRegs); ReadRegs(&vifRegs);
u32 size = Read(); u32 size = Read();
MTVU_Unpack(&buffer[read_pos], vifRegs); MTVU_Unpack(&buffer[read_pos.load(std::memory_order_relaxed)], vifRegs);
incReadPos(size_u32(size)); incReadPos(size_u32(size));
break; break;
} }
case MTVU_NULL_PACKET: case MTVU_NULL_PACKET:
read_pos = 0; read_pos.store(0, std::memory_order_release);
break; break;
jNO_DEFAULT; jNO_DEFAULT;
} }
@ -156,8 +159,8 @@ __ri void VU_Thread::WaitOnSize(s32 size)
{ {
for(;;) { for(;;) {
s32 readPos = GetReadPos(); s32 readPos = GetReadPos();
if (readPos <= write_pos) break; // MTVU is reading in back of write_pos if (readPos <= write_pos.load(std::memory_order_relaxed)) break; // MTVU is reading in back of write_pos
if (readPos > write_pos + size) break; // Enough free front space if (readPos > write_pos.load(std::memory_order_relaxed) + size) break; // Enough free front space
if (1) { // Let MTVU run to free up buffer space if (1) { // Let MTVU run to free up buffer space
KickStart(); KickStart();
if (IsDevBuild) DevCon.WriteLn("WaitOnSize()"); if (IsDevBuild) DevCon.WriteLn("WaitOnSize()");
@ -174,12 +177,12 @@ void VU_Thread::ReserveSpace(s32 size)
pxAssert(size < buffer_size); pxAssert(size < buffer_size);
pxAssert(size > 0); pxAssert(size > 0);
pxAssert(write_offset == 0); pxAssert(write_offset == 0);
if (write_pos + size > buffer_size) { if (write_pos.load(std::memory_order_relaxed) + size > buffer_size) {
pxAssert(write_pos > 0); pxAssert(write_pos > 0);
WaitOnSize(1); // Size of MTVU_NULL_PACKET WaitOnSize(1); // Size of MTVU_NULL_PACKET
Write(MTVU_NULL_PACKET); Write(MTVU_NULL_PACKET);
write_offset = 0; write_offset = 0;
AtomicExchange(volatize(write_pos), 0); write_pos.store(0, std::memory_order_release);
} }
WaitOnSize(size); WaitOnSize(size);
} }
@ -187,48 +190,48 @@ void VU_Thread::ReserveSpace(s32 size)
// Use this when reading read_pos from ee thread // Use this when reading read_pos from ee thread
__fi s32 VU_Thread::GetReadPos() __fi s32 VU_Thread::GetReadPos()
{ {
return read_pos.load(); return read_pos.load(std::memory_order_acquire);
} }
// Use this when reading write_pos from vu thread // Use this when reading write_pos from vu thread
__fi s32 VU_Thread::GetWritePos() __fi s32 VU_Thread::GetWritePos()
{ {
return AtomicRead(volatize(write_pos)); return write_pos.load(std::memory_order_acquire);
} }
// Gets the effective write pointer after adding write_offset // Gets the effective write pointer after adding write_offset
__fi u32* VU_Thread::GetWritePtr() __fi u32* VU_Thread::GetWritePtr()
{ {
return &buffer[(write_pos + write_offset) & buffer_mask]; return &buffer[(write_pos.load(std::memory_order_relaxed) + write_offset) & buffer_mask];
} }
__fi void VU_Thread::incReadPos(s32 offset) __fi void VU_Thread::incReadPos(s32 offset)
{ // Offset in u32 sizes { // Offset in u32 sizes
read_pos = (read_pos + offset) & buffer_mask; read_pos.store((read_pos.load(std::memory_order_relaxed) + offset) & buffer_mask, std::memory_order_release);
} }
__fi void VU_Thread::incWritePos() __fi void VU_Thread::incWritePos()
{ // Adds write_offset { // Adds write_offset
s32 temp = (write_pos + write_offset) & buffer_mask; s32 temp = (write_pos.load(std::memory_order_relaxed) + write_offset) & buffer_mask;
write_offset = 0; write_offset = 0;
AtomicExchange(volatize(write_pos), temp); write_pos.store(temp, std::memory_order_release);
if (MTVU_ALWAYS_KICK) KickStart(); if (MTVU_ALWAYS_KICK) KickStart();
if (MTVU_SYNC_MODE) WaitVU(); if (MTVU_SYNC_MODE) WaitVU();
} }
__fi u32 VU_Thread::Read() __fi u32 VU_Thread::Read()
{ {
u32 ret = buffer[read_pos]; u32 ret = buffer[read_pos.load(std::memory_order_relaxed)];
incReadPos(1); incReadPos(1);
return ret; return ret;
} }
__fi void VU_Thread::Read(void* dest, u32 size) __fi void VU_Thread::Read(void* dest, u32 size)
{ {
memcpy(dest, &buffer[read_pos], size); memcpy(dest, &buffer[read_pos.load(std::memory_order_relaxed)], size);
incReadPos(size_u32(size)); incReadPos(size_u32(size));
} }
__fi void VU_Thread::ReadRegs(VIFregisters* dest) __fi void VU_Thread::ReadRegs(VIFregisters* dest)
{ {
VIFregistersMTVU* src = (VIFregistersMTVU*)&buffer[read_pos]; VIFregistersMTVU* src = (VIFregistersMTVU*)&buffer[read_pos.load(std::memory_order_relaxed)];
dest->cycle = src->cycle; dest->cycle = src->cycle;
dest->mode = src->mode; dest->mode = src->mode;
dest->num = src->num; dest->num = src->num;
@ -265,19 +268,21 @@ __fi void VU_Thread::WriteRegs(VIFregisters* src)
// Used for vu cycle stealing hack // Used for vu cycle stealing hack
u32 VU_Thread::Get_vuCycles() u32 VU_Thread::Get_vuCycles()
{ {
return (AtomicRead(vuCycles[0]) + AtomicRead(vuCycles[1]) return (vuCycles[0].load(std::memory_order_relaxed) +
+ AtomicRead(vuCycles[2]) + AtomicRead(vuCycles[3])) >> 2; vuCycles[1].load(std::memory_order_relaxed) +
vuCycles[2].load(std::memory_order_relaxed) +
vuCycles[3].load(std::memory_order_relaxed)) >> 2;
} }
void VU_Thread::KickStart(bool forceKick) void VU_Thread::KickStart(bool forceKick)
{ {
if ((forceKick && !semaEvent.Count()) if ((forceKick && !semaEvent.Count())
|| (!isBusy && GetReadPos() != write_pos)) semaEvent.Post(); || (!isBusy.load(std::memory_order_relaxed) && GetReadPos() != write_pos.load(std::memory_order_relaxed))) semaEvent.Post();
} }
bool VU_Thread::IsDone() bool VU_Thread::IsDone()
{ {
return !isBusy && GetReadPos() == GetWritePos(); return !isBusy.load(std::memory_order_relaxed) && GetReadPos() == GetWritePos();
} }
void VU_Thread::WaitVU() void VU_Thread::WaitVU()

View File

@ -30,9 +30,9 @@ class VU_Thread : public pxThread {
static const s32 buffer_size = (_1mb * 16) / sizeof(s32); static const s32 buffer_size = (_1mb * 16) / sizeof(s32);
static const u32 buffer_mask = buffer_size - 1; static const u32 buffer_mask = buffer_size - 1;
__aligned(4) u32 buffer[buffer_size]; __aligned(4) u32 buffer[buffer_size];
__aligned(4) std::atomic<int> read_pos; // Only modified by VU thread std::atomic<int> read_pos; // Only modified by VU thread
__aligned(4) std::atomic<bool> isBusy; // Is thread processing data? std::atomic<bool> isBusy; // Is thread processing data?
__aligned(4) s32 write_pos; // Only modified by EE thread std::atomic<int> write_pos; // Only modified by EE thread
__aligned(4) s32 write_offset; // Only modified by EE thread __aligned(4) s32 write_offset; // Only modified by EE thread
__aligned(4) Mutex mtxBusy; __aligned(4) Mutex mtxBusy;
__aligned(4) Semaphore semaEvent; __aligned(4) Semaphore semaEvent;
@ -43,7 +43,7 @@ public:
__aligned16 vifStruct vif; __aligned16 vifStruct vif;
__aligned16 VIFregisters vifRegs; __aligned16 VIFregisters vifRegs;
__aligned(4) Semaphore semaXGkick; __aligned(4) Semaphore semaXGkick;
__aligned(4) u32 vuCycles[4]; // Used for VU cycle stealing hack __aligned(4) std::atomic<unsigned int> vuCycles[4]; // Used for VU cycle stealing hack
__aligned(4) u32 vuCycleIdx; // Used for VU cycle stealing hack __aligned(4) u32 vuCycleIdx; // Used for VU cycle stealing hack
VU_Thread(BaseVUmicroCPU*& _vuCPU, VURegs& _vuRegs); VU_Thread(BaseVUmicroCPU*& _vuCPU, VURegs& _vuRegs);

View File

@ -25,7 +25,7 @@
GSTextureSW::GSTextureSW(int type, int width, int height) GSTextureSW::GSTextureSW(int type, int width, int height)
{ {
m_mapped.clear(); m_mapped.clear(std::memory_order_release);
m_size = GSVector2i(width, height); m_size = GSVector2i(width, height);
m_type = type; m_type = type;
m_format = 0; m_format = 0;
@ -68,7 +68,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r)
if(m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y) if(m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y)
{ {
if (!m_mapped.test_and_set()) if (!m_mapped.test_and_set(std::memory_order_acquire))
{ {
m.bits = (uint8*)m_data + m_pitch * r2.top + (r2.left << 2); m.bits = (uint8*)m_data + m_pitch * r2.top + (r2.left << 2);
m.pitch = m_pitch; m.pitch = m_pitch;
@ -82,7 +82,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r)
void GSTextureSW::Unmap() void GSTextureSW::Unmap()
{ {
m_mapped.clear(); m_mapped.clear(std::memory_order_release);
} }
bool GSTextureSW::Save(const string& fn, bool user_image, bool dds) bool GSTextureSW::Save(const string& fn, bool user_image, bool dds)