mirror of https://github.com/PCSX2/pcsx2.git
ReorderingMTGS:
* Make PCSX2 bare minimum reqs include SSE as well as MMX. * Minor bugfix which could have affected MTGS performance. * Default GIFpath stuff to use SSE opts. git-svn-id: http://pcsx2.googlecode.com/svn/branches/ReorderingMTGS@3491 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c8f16a1cde
commit
2f3452ec25
|
@ -49,7 +49,6 @@ void gsSetRegionMode( GS_RegionMode region )
|
||||||
void gsInit()
|
void gsInit()
|
||||||
{
|
{
|
||||||
memzero(g_RealGSMem);
|
memzero(g_RealGSMem);
|
||||||
GIFPath_Initialize();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extern bool SIGNAL_IMR_Pending;
|
extern bool SIGNAL_IMR_Pending;
|
||||||
|
|
20
pcsx2/GS.h
20
pcsx2/GS.h
|
@ -18,15 +18,6 @@
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
#include "System/SysThreads.h"
|
#include "System/SysThreads.h"
|
||||||
|
|
||||||
enum CpuExtType
|
|
||||||
{
|
|
||||||
CpuExt_Base,
|
|
||||||
CpuExt_MMX,
|
|
||||||
CpuExt_SSE,
|
|
||||||
CpuExt_SSE2,
|
|
||||||
CpuExt_SSE41,
|
|
||||||
};
|
|
||||||
|
|
||||||
extern __aligned16 u8 g_RealGSMem[Ps2MemSize::GSregs];
|
extern __aligned16 u8 g_RealGSMem[Ps2MemSize::GSregs];
|
||||||
|
|
||||||
enum CSR_FifoState
|
enum CSR_FifoState
|
||||||
|
@ -282,8 +273,8 @@ class SysMtgsThread : public SysThreadBase
|
||||||
typedef SysThreadBase _parent;
|
typedef SysThreadBase _parent;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// note: when m_RingPos == m_WritePos, the fifo is empty
|
// note: when m_ReadPos == m_WritePos, the fifo is empty
|
||||||
uint m_RingPos; // cur pos gs is reading from
|
uint m_ReadPos; // cur pos gs is reading from
|
||||||
uint m_WritePos; // cur pos ee thread is writing to
|
uint m_WritePos; // cur pos ee thread is writing to
|
||||||
|
|
||||||
volatile bool m_RingBufferIsBusy;
|
volatile bool m_RingBufferIsBusy;
|
||||||
|
@ -313,7 +304,7 @@ public:
|
||||||
|
|
||||||
uint m_packet_startpos; // size of the packet (data only, ie. not including the 16 byte command!)
|
uint m_packet_startpos; // size of the packet (data only, ie. not including the 16 byte command!)
|
||||||
uint m_packet_size; // size of the packet (data only, ie. not including the 16 byte command!)
|
uint m_packet_size; // size of the packet (data only, ie. not including the 16 byte command!)
|
||||||
uint m_packet_ringpos; // index of the data location in the ringbuffer.
|
uint m_packet_writepos; // index of the data location in the ringbuffer.
|
||||||
|
|
||||||
#ifdef RINGBUF_DEBUG_STACK
|
#ifdef RINGBUF_DEBUG_STACK
|
||||||
Threading::Mutex m_lock_Stack;
|
Threading::Mutex m_lock_Stack;
|
||||||
|
@ -356,9 +347,10 @@ protected:
|
||||||
void OnResumeInThread( bool IsSuspended );
|
void OnResumeInThread( bool IsSuspended );
|
||||||
void OnCleanupInThread();
|
void OnCleanupInThread();
|
||||||
|
|
||||||
|
void GenericStall( uint size );
|
||||||
|
|
||||||
// Used internally by SendSimplePacket type functions
|
// Used internally by SendSimplePacket type functions
|
||||||
uint _PrepForSimplePacket();
|
void _FinishSimplePacket();
|
||||||
void _FinishSimplePacket( uint future_writepos );
|
|
||||||
void ExecuteTaskInThread();
|
void ExecuteTaskInThread();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
201
pcsx2/MTGS.cpp
201
pcsx2/MTGS.cpp
|
@ -70,11 +70,11 @@ void SysMtgsThread::OnStart()
|
||||||
{
|
{
|
||||||
m_PluginOpened = false;
|
m_PluginOpened = false;
|
||||||
|
|
||||||
m_RingPos = 0;
|
m_ReadPos = 0;
|
||||||
m_WritePos = 0;
|
m_WritePos = 0;
|
||||||
m_RingBufferIsBusy = false;
|
m_RingBufferIsBusy = false;
|
||||||
m_packet_size = 0;
|
m_packet_size = 0;
|
||||||
m_packet_ringpos = 0;
|
m_packet_writepos = 0;
|
||||||
|
|
||||||
m_QueuedFrameCount = 0;
|
m_QueuedFrameCount = 0;
|
||||||
m_VsyncSignalListener = false;
|
m_VsyncSignalListener = false;
|
||||||
|
@ -98,14 +98,14 @@ void SysMtgsThread::OnResumeReady()
|
||||||
|
|
||||||
void SysMtgsThread::ResetGS()
|
void SysMtgsThread::ResetGS()
|
||||||
{
|
{
|
||||||
pxAssertDev( !IsOpen() || (m_RingPos == m_WritePos), "Must close or terminate the GS thread prior to gsReset." );
|
pxAssertDev( !IsOpen() || (m_ReadPos == m_WritePos), "Must close or terminate the GS thread prior to gsReset." );
|
||||||
|
|
||||||
// MTGS Reset process:
|
// MTGS Reset process:
|
||||||
// * clear the ringbuffer.
|
// * clear the ringbuffer.
|
||||||
// * Signal a reset.
|
// * Signal a reset.
|
||||||
// * clear the path and byRegs structs (used by GIFtagDummy)
|
// * clear the path and byRegs structs (used by GIFtagDummy)
|
||||||
|
|
||||||
m_RingPos = m_WritePos;
|
m_ReadPos = m_WritePos;
|
||||||
m_QueuedFrameCount = 0;
|
m_QueuedFrameCount = 0;
|
||||||
m_VsyncSignalListener = false;
|
m_VsyncSignalListener = false;
|
||||||
|
|
||||||
|
@ -134,13 +134,13 @@ void SysMtgsThread::PostVsyncEnd()
|
||||||
|
|
||||||
uint packsize = sizeof(RingCmdPacket_Vsync) / 16;
|
uint packsize = sizeof(RingCmdPacket_Vsync) / 16;
|
||||||
PrepDataPacket(GS_RINGTYPE_VSYNC, packsize);
|
PrepDataPacket(GS_RINGTYPE_VSYNC, packsize);
|
||||||
MemCopy_WrappedDest( (u128*)PS2MEM_GS, RingBuffer.m_Ring, m_packet_ringpos, RingBufferSize, 0xf );
|
MemCopy_WrappedDest( (u128*)PS2MEM_GS, RingBuffer.m_Ring, m_packet_writepos, RingBufferSize, 0xf );
|
||||||
|
|
||||||
u32* remainder = (u32*)GetDataPacketPtr();
|
u32* remainder = (u32*)GetDataPacketPtr();
|
||||||
remainder[0] = GSCSRr;
|
remainder[0] = GSCSRr;
|
||||||
remainder[1] = GSIMR;
|
remainder[1] = GSIMR;
|
||||||
(GSRegSIGBLID&)remainder[2] = GSSIGLBLID;
|
(GSRegSIGBLID&)remainder[2] = GSSIGLBLID;
|
||||||
m_packet_ringpos = (m_packet_ringpos + 1) & RingBufferMask;
|
m_packet_writepos = (m_packet_writepos + 1) & RingBufferMask;
|
||||||
|
|
||||||
SendDataPacket();
|
SendDataPacket();
|
||||||
|
|
||||||
|
@ -155,7 +155,7 @@ void SysMtgsThread::PostVsyncEnd()
|
||||||
if ((AtomicIncrement(m_QueuedFrameCount) < EmuConfig.GS.VsyncQueueSize) || (!EmuConfig.GS.VsyncEnable && !EmuConfig.GS.FrameLimitEnable)) return;
|
if ((AtomicIncrement(m_QueuedFrameCount) < EmuConfig.GS.VsyncQueueSize) || (!EmuConfig.GS.VsyncEnable && !EmuConfig.GS.FrameLimitEnable)) return;
|
||||||
|
|
||||||
m_VsyncSignalListener = true;
|
m_VsyncSignalListener = true;
|
||||||
//Console.WriteLn( Color_Blue, "(EEcore Sleep) Vsync\t\tringpos=0x%06x, writepos=0x%06x", volatize(m_RingPos), m_WritePos );
|
//Console.WriteLn( Color_Blue, "(EEcore Sleep) Vsync\t\tringpos=0x%06x, writepos=0x%06x", volatize(m_ReadPos), m_WritePos );
|
||||||
m_sem_Vsync.WaitNoCancel();
|
m_sem_Vsync.WaitNoCancel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -239,6 +239,8 @@ void SysMtgsThread::OpenPlugin()
|
||||||
|
|
||||||
class RingBufferLock : public ScopedLock
|
class RingBufferLock : public ScopedLock
|
||||||
{
|
{
|
||||||
|
typedef ScopedLock _parent;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
SysMtgsThread& m_mtgs;
|
SysMtgsThread& m_mtgs;
|
||||||
|
|
||||||
|
@ -254,6 +256,18 @@ public:
|
||||||
{
|
{
|
||||||
m_mtgs.m_RingBufferIsBusy = false;
|
m_mtgs.m_RingBufferIsBusy = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Acquire()
|
||||||
|
{
|
||||||
|
_parent::Acquire();
|
||||||
|
m_mtgs.m_RingBufferIsBusy = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Release()
|
||||||
|
{
|
||||||
|
m_mtgs.m_RingBufferIsBusy = false;
|
||||||
|
_parent::Release();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void SysMtgsThread::ExecuteTaskInThread()
|
void SysMtgsThread::ExecuteTaskInThread()
|
||||||
|
@ -262,31 +276,33 @@ void SysMtgsThread::ExecuteTaskInThread()
|
||||||
PacketTagType prevCmd;
|
PacketTagType prevCmd;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
RingBufferLock busy( *this );
|
||||||
|
|
||||||
while( true )
|
while( true )
|
||||||
{
|
{
|
||||||
|
busy.Release();
|
||||||
|
|
||||||
// Performance note: Both of these perform cancellation tests, but pthread_testcancel
|
// Performance note: Both of these perform cancellation tests, but pthread_testcancel
|
||||||
// is very optimized (only 1 instruction test in most cases), so no point in trying
|
// is very optimized (only 1 instruction test in most cases), so no point in trying
|
||||||
// to avoid it.
|
// to avoid it.
|
||||||
|
|
||||||
m_sem_event.WaitWithoutYield();
|
m_sem_event.WaitWithoutYield();
|
||||||
StateCheckInThread();
|
StateCheckInThread();
|
||||||
|
busy.Acquire();
|
||||||
|
|
||||||
{
|
// note: m_ReadPos is intentionally not volatile, because it should only
|
||||||
RingBufferLock busy( *this );
|
|
||||||
|
|
||||||
// note: m_RingPos is intentionally not volatile, because it should only
|
|
||||||
// ever be modified by this thread.
|
// ever be modified by this thread.
|
||||||
while( m_RingPos != volatize(m_WritePos))
|
while( m_ReadPos != volatize(m_WritePos))
|
||||||
{
|
{
|
||||||
if( EmuConfig.GS.DisableOutput )
|
if( EmuConfig.GS.DisableOutput )
|
||||||
{
|
{
|
||||||
m_RingPos = m_WritePos;
|
m_ReadPos = m_WritePos;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
pxAssert( m_RingPos < RingBufferSize );
|
pxAssert( m_ReadPos < RingBufferSize );
|
||||||
|
|
||||||
const PacketTagType& tag = (PacketTagType&)RingBuffer[m_RingPos];
|
const PacketTagType& tag = (PacketTagType&)RingBuffer[m_ReadPos];
|
||||||
u32 ringposinc = 1;
|
u32 ringposinc = 1;
|
||||||
|
|
||||||
#ifdef RINGBUF_DEBUG_STACK
|
#ifdef RINGBUF_DEBUG_STACK
|
||||||
|
@ -294,11 +310,11 @@ void SysMtgsThread::ExecuteTaskInThread()
|
||||||
|
|
||||||
m_lock_Stack.Lock();
|
m_lock_Stack.Lock();
|
||||||
uptr stackpos = ringposStack.back();
|
uptr stackpos = ringposStack.back();
|
||||||
if( stackpos != m_RingPos )
|
if( stackpos != m_ReadPos )
|
||||||
{
|
{
|
||||||
Console.Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, m_RingPos, prevCmd.command );
|
Console.Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, m_ReadPos, prevCmd.command );
|
||||||
}
|
}
|
||||||
pxAssert( stackpos == m_RingPos );
|
pxAssert( stackpos == m_ReadPos );
|
||||||
prevCmd = tag;
|
prevCmd = tag;
|
||||||
ringposStack.pop_back();
|
ringposStack.pop_back();
|
||||||
m_lock_Stack.Release();
|
m_lock_Stack.Release();
|
||||||
|
@ -308,7 +324,7 @@ void SysMtgsThread::ExecuteTaskInThread()
|
||||||
{
|
{
|
||||||
case GS_RINGTYPE_P1:
|
case GS_RINGTYPE_P1:
|
||||||
{
|
{
|
||||||
uint datapos = (m_RingPos+1) & RingBufferMask;
|
uint datapos = (m_ReadPos+1) & RingBufferMask;
|
||||||
const int qsize = tag.data[0];
|
const int qsize = tag.data[0];
|
||||||
const u128* data = &RingBuffer[datapos];
|
const u128* data = &RingBuffer[datapos];
|
||||||
|
|
||||||
|
@ -333,7 +349,7 @@ void SysMtgsThread::ExecuteTaskInThread()
|
||||||
|
|
||||||
case GS_RINGTYPE_P2:
|
case GS_RINGTYPE_P2:
|
||||||
{
|
{
|
||||||
uint datapos = (m_RingPos+1) & RingBufferMask;
|
uint datapos = (m_ReadPos+1) & RingBufferMask;
|
||||||
const int qsize = tag.data[0];
|
const int qsize = tag.data[0];
|
||||||
const u128* data = &RingBuffer[datapos];
|
const u128* data = &RingBuffer[datapos];
|
||||||
|
|
||||||
|
@ -358,7 +374,7 @@ void SysMtgsThread::ExecuteTaskInThread()
|
||||||
|
|
||||||
case GS_RINGTYPE_P3:
|
case GS_RINGTYPE_P3:
|
||||||
{
|
{
|
||||||
uint datapos = (m_RingPos+1) & RingBufferMask;
|
uint datapos = (m_ReadPos+1) & RingBufferMask;
|
||||||
const int qsize = tag.data[0];
|
const int qsize = tag.data[0];
|
||||||
const u128* data = &RingBuffer[datapos];
|
const u128* data = &RingBuffer[datapos];
|
||||||
|
|
||||||
|
@ -393,11 +409,13 @@ void SysMtgsThread::ExecuteTaskInThread()
|
||||||
MTGS_LOG( "(MTGS Packet Read) ringtype=Vsync, field=%u, skip=%s", !!(((u32&)RingBuffer.Regs[0x1000]) & 0x2000) ? 0 : 1, tag.data[1] ? "true" : "false" );
|
MTGS_LOG( "(MTGS Packet Read) ringtype=Vsync, field=%u, skip=%s", !!(((u32&)RingBuffer.Regs[0x1000]) & 0x2000) ? 0 : 1, tag.data[1] ? "true" : "false" );
|
||||||
|
|
||||||
// Mail in the important GS registers.
|
// Mail in the important GS registers.
|
||||||
RingCmdPacket_Vsync& local((RingCmdPacket_Vsync&)RingBuffer[m_RingPos+1]);
|
uint datapos = (m_ReadPos+1) & RingBufferMask;
|
||||||
memcpy_fast( RingBuffer.Regs, local.regset1, sizeof(local.regset1));
|
MemCopy_WrappedSrc( RingBuffer.m_Ring, datapos, RingBufferSize, (u128*)RingBuffer.Regs, 0xf );
|
||||||
((u32&)RingBuffer.Regs[0x1000]) = local.csr;
|
|
||||||
((u32&)RingBuffer.Regs[0x1010]) = local.imr;
|
u32* remainder = (u32*)&RingBuffer[datapos];
|
||||||
((GSRegSIGBLID&)RingBuffer.Regs[0x1080]) = local.siglblid;
|
GSCSRr = remainder[0];
|
||||||
|
GSIMR = remainder[1];
|
||||||
|
GSSIGLBLID = (GSRegSIGBLID&)remainder[2];
|
||||||
|
|
||||||
// CSR & 0x2000; is the pageflip id.
|
// CSR & 0x2000; is the pageflip id.
|
||||||
GSvsync(((u32&)RingBuffer.Regs[0x1000]) & 0x2000);
|
GSvsync(((u32&)RingBuffer.Regs[0x1000]) & 0x2000);
|
||||||
|
@ -454,9 +472,9 @@ void SysMtgsThread::ExecuteTaskInThread()
|
||||||
|
|
||||||
#ifdef PCSX2_DEVBUILD
|
#ifdef PCSX2_DEVBUILD
|
||||||
default:
|
default:
|
||||||
Console.Error("GSThreadProc, bad packet (%x) at m_RingPos: %x, m_WritePos: %x", tag.command, m_RingPos, m_WritePos);
|
Console.Error("GSThreadProc, bad packet (%x) at m_ReadPos: %x, m_WritePos: %x", tag.command, m_ReadPos, m_WritePos);
|
||||||
pxFail( "Bad packet encountered in the MTGS Ringbuffer." );
|
pxFail( "Bad packet encountered in the MTGS Ringbuffer." );
|
||||||
m_RingPos = m_WritePos;
|
m_ReadPos = m_WritePos;
|
||||||
continue;
|
continue;
|
||||||
#else
|
#else
|
||||||
// Optimized performance in non-Dev builds.
|
// Optimized performance in non-Dev builds.
|
||||||
|
@ -466,28 +484,29 @@ void SysMtgsThread::ExecuteTaskInThread()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint newringpos = (m_RingPos + ringposinc) & RingBufferMask;
|
uint newringpos = (m_ReadPos + ringposinc) & RingBufferMask;
|
||||||
|
|
||||||
if( EmuConfig.GS.SynchronousMTGS )
|
if( EmuConfig.GS.SynchronousMTGS )
|
||||||
{
|
{
|
||||||
pxAssert( m_WritePos == newringpos );
|
pxAssert( m_WritePos == newringpos );
|
||||||
}
|
}
|
||||||
|
|
||||||
m_RingPos = newringpos;
|
m_ReadPos = newringpos;
|
||||||
|
|
||||||
if( m_SignalRingEnable != 0 )
|
if( m_SignalRingEnable != 0 )
|
||||||
{
|
{
|
||||||
// The EEcore has requested a signal after some amount of processed data.
|
// The EEcore has requested a signal after some amount of processed data.
|
||||||
if( AtomicExchangeSub( m_SignalRingPosition, ringposinc ) <= 0 )
|
if( AtomicExchangeSub( m_SignalRingPosition, ringposinc ) <= 0 )
|
||||||
{
|
{
|
||||||
// Make sure to post the signal after the m_RingPos has been updated...
|
// Make sure to post the signal after the m_ReadPos has been updated...
|
||||||
AtomicExchange( m_SignalRingEnable, 0 );
|
AtomicExchange( m_SignalRingEnable, 0 );
|
||||||
m_sem_OnRingReset.Post();
|
m_sem_OnRingReset.Post();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
busy.Release();
|
||||||
|
|
||||||
// Safety valve in case standard signals fail for some reason -- this ensures the EEcore
|
// Safety valve in case standard signals fail for some reason -- this ensures the EEcore
|
||||||
// won't sleep the eternity, even if SignalRingPosition didn't reach 0 for some reason.
|
// won't sleep the eternity, even if SignalRingPosition didn't reach 0 for some reason.
|
||||||
|
@ -503,7 +522,7 @@ void SysMtgsThread::ExecuteTaskInThread()
|
||||||
if (!!AtomicExchange(m_VsyncSignalListener, false))
|
if (!!AtomicExchange(m_VsyncSignalListener, false))
|
||||||
m_sem_Vsync.Post();
|
m_sem_Vsync.Post();
|
||||||
|
|
||||||
//Console.Warning( "(MTGS Thread) Nothing to do! ringpos=0x%06x", m_RingPos );
|
//Console.Warning( "(MTGS Thread) Nothing to do! ringpos=0x%06x", m_ReadPos );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -543,7 +562,7 @@ void SysMtgsThread::WaitGS()
|
||||||
if( m_ExecMode == ExecMode_NoThreadYet || !IsRunning() ) return;
|
if( m_ExecMode == ExecMode_NoThreadYet || !IsRunning() ) return;
|
||||||
if( !pxAssertDev( IsOpen(), "MTGS Warning! WaitGS issued on a closed thread." ) ) return;
|
if( !pxAssertDev( IsOpen(), "MTGS Warning! WaitGS issued on a closed thread." ) ) return;
|
||||||
|
|
||||||
if( volatize(m_RingPos) != m_WritePos )
|
if( volatize(m_ReadPos) != m_WritePos )
|
||||||
{
|
{
|
||||||
SetEvent();
|
SetEvent();
|
||||||
RethrowException();
|
RethrowException();
|
||||||
|
@ -551,7 +570,7 @@ void SysMtgsThread::WaitGS()
|
||||||
do {
|
do {
|
||||||
m_mtx_RingBufferBusy.Wait();
|
m_mtx_RingBufferBusy.Wait();
|
||||||
RethrowException();
|
RethrowException();
|
||||||
} while( volatize(m_RingPos) != m_WritePos );
|
} while( volatize(m_ReadPos) != m_WritePos );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Completely synchronize GS and MTGS register states.
|
// Completely synchronize GS and MTGS register states.
|
||||||
|
@ -570,7 +589,7 @@ void SysMtgsThread::SetEvent()
|
||||||
|
|
||||||
u8* SysMtgsThread::GetDataPacketPtr() const
|
u8* SysMtgsThread::GetDataPacketPtr() const
|
||||||
{
|
{
|
||||||
return (u8*)&RingBuffer[m_packet_ringpos & RingBufferMask];
|
return (u8*)&RingBuffer[m_packet_writepos & RingBufferMask];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Closes the data packet send command, and initiates the gs thread (if needed).
|
// Closes the data packet send command, and initiates the gs thread (if needed).
|
||||||
|
@ -579,14 +598,14 @@ void SysMtgsThread::SendDataPacket()
|
||||||
// make sure a previous copy block has been started somewhere.
|
// make sure a previous copy block has been started somewhere.
|
||||||
pxAssert( m_packet_size != 0 );
|
pxAssert( m_packet_size != 0 );
|
||||||
|
|
||||||
uint actualSize = ((m_packet_ringpos - m_packet_startpos) & RingBufferMask)-1;
|
uint actualSize = ((m_packet_writepos - m_packet_startpos) & RingBufferMask)-1;
|
||||||
pxAssert( actualSize <= m_packet_size );
|
pxAssert( actualSize <= m_packet_size );
|
||||||
pxAssert( m_packet_ringpos < RingBufferSize );
|
pxAssert( m_packet_writepos < RingBufferSize );
|
||||||
|
|
||||||
PacketTagType& tag = (PacketTagType&)RingBuffer[m_packet_startpos];
|
PacketTagType& tag = (PacketTagType&)RingBuffer[m_packet_startpos];
|
||||||
tag.data[0] = actualSize;
|
tag.data[0] = actualSize;
|
||||||
|
|
||||||
m_WritePos = m_packet_ringpos;
|
m_WritePos = m_packet_writepos;
|
||||||
|
|
||||||
if( EmuConfig.GS.SynchronousMTGS )
|
if( EmuConfig.GS.SynchronousMTGS )
|
||||||
{
|
{
|
||||||
|
@ -603,29 +622,23 @@ void SysMtgsThread::SendDataPacket()
|
||||||
//m_PacketLocker.Release();
|
//m_PacketLocker.Release();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
void SysMtgsThread::GenericStall( uint size )
|
||||||
{
|
{
|
||||||
// Note on volatiles: m_WritePos is not modified by the GS thread, so there's no need
|
// Note on volatiles: m_WritePos is not modified by the GS thread, so there's no need
|
||||||
// to use volatile reads here. We do cache it though, since we know it never changes,
|
// to use volatile reads here. We do cache it though, since we know it never changes,
|
||||||
// except for calls to RingbufferRestert() -- handled below.
|
// except for calls to RingbufferRestert() -- handled below.
|
||||||
uint writepos = m_WritePos;
|
const uint writepos = m_WritePos;
|
||||||
|
|
||||||
// Checks if a previous copy was started without an accompanying call to GSRINGBUF_DONECOPY
|
|
||||||
pxAssert( m_packet_size == 0 );
|
|
||||||
|
|
||||||
// Sanity checks! (within the confines of our ringbuffer please!)
|
// Sanity checks! (within the confines of our ringbuffer please!)
|
||||||
pxAssert( size < RingBufferSize );
|
pxAssert( size < RingBufferSize );
|
||||||
pxAssert( writepos < RingBufferSize );
|
pxAssert( writepos < RingBufferSize );
|
||||||
|
|
||||||
m_packet_size = size;
|
|
||||||
++size; // takes into account our RingCommand QWC.
|
|
||||||
|
|
||||||
// generic gs wait/stall.
|
// generic gs wait/stall.
|
||||||
// if the writepos is past the readpos then we're safe.
|
// if the writepos is past the readpos then we're safe.
|
||||||
// But if not then we need to make sure the readpos is outside the scope of
|
// But if not then we need to make sure the readpos is outside the scope of
|
||||||
// the block about to be written (writepos + size)
|
// the block about to be written (writepos + size)
|
||||||
|
|
||||||
uint readpos = volatize(m_RingPos);
|
uint readpos = volatize(m_ReadPos);
|
||||||
uint endpos = writepos+size;
|
uint endpos = writepos+size;
|
||||||
uint freeroom;
|
uint freeroom;
|
||||||
|
|
||||||
|
@ -662,7 +675,7 @@ void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
||||||
AtomicExchange( m_SignalRingEnable, 1 );
|
AtomicExchange( m_SignalRingEnable, 1 );
|
||||||
SetEvent();
|
SetEvent();
|
||||||
m_sem_OnRingReset.WaitWithoutYield();
|
m_sem_OnRingReset.WaitWithoutYield();
|
||||||
readpos = volatize(m_RingPos);
|
readpos = volatize(m_ReadPos);
|
||||||
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
|
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
|
||||||
} while( (writepos < readpos) && (writepos+size >= readpos) );
|
} while( (writepos < readpos) && (writepos+size >= readpos) );
|
||||||
|
|
||||||
|
@ -674,16 +687,17 @@ void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
||||||
SetEvent();
|
SetEvent();
|
||||||
do {
|
do {
|
||||||
SpinWait();
|
SpinWait();
|
||||||
readpos = volatize(m_RingPos);
|
readpos = volatize(m_ReadPos);
|
||||||
} while( (writepos < readpos) && (writepos+size >= readpos) );
|
} while( (writepos < readpos) && (writepos+size >= readpos) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef RINGBUF_DEBUG_STACK
|
void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
||||||
m_lock_Stack.Lock();
|
{
|
||||||
ringposStack.push_front( writepos );
|
m_packet_size = size;
|
||||||
m_lock_Stack.Release();
|
++size; // takes into account our RingCommand QWC.
|
||||||
#endif
|
GenericStall(size);
|
||||||
|
|
||||||
// Command qword: Low word is the command, and the high word is the packet
|
// Command qword: Low word is the command, and the high word is the packet
|
||||||
// length in SIMDs (128 bits).
|
// length in SIMDs (128 bits).
|
||||||
|
@ -692,7 +706,7 @@ void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
||||||
tag.command = cmd;
|
tag.command = cmd;
|
||||||
tag.data[0] = m_packet_size;
|
tag.data[0] = m_packet_size;
|
||||||
m_packet_startpos = m_WritePos;
|
m_packet_startpos = m_WritePos;
|
||||||
m_packet_ringpos = (m_WritePos + 1) & RingBufferMask;
|
m_packet_writepos = (m_WritePos + 1) & RingBufferMask;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the amount of giftag data processed (in simd128 values).
|
// Returns the amount of giftag data processed (in simd128 values).
|
||||||
|
@ -707,71 +721,10 @@ void SysMtgsThread::PrepDataPacket( GIF_PATH pathidx, u32 size )
|
||||||
PrepDataPacket( (MTGS_RingCommand)pathidx, size );
|
PrepDataPacket( (MTGS_RingCommand)pathidx, size );
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline uint SysMtgsThread::_PrepForSimplePacket()
|
__forceinline void SysMtgsThread::_FinishSimplePacket()
|
||||||
{
|
{
|
||||||
#ifdef RINGBUF_DEBUG_STACK
|
uint future_writepos = (m_WritePos+1) & RingBufferMask;
|
||||||
m_lock_Stack.Lock();
|
pxAssert( future_writepos != volatize(m_ReadPos) );
|
||||||
ringposStack.push_front( m_WritePos );
|
|
||||||
m_lock_Stack.Release();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
uint future_writepos = m_WritePos+1;
|
|
||||||
pxAssert( future_writepos <= RingBufferSize );
|
|
||||||
|
|
||||||
future_writepos &= RingBufferMask;
|
|
||||||
if( future_writepos == 0 )
|
|
||||||
m_QueuedFrameCount = 0;
|
|
||||||
|
|
||||||
uint readpos = volatize(m_RingPos);
|
|
||||||
if( future_writepos == readpos )
|
|
||||||
{
|
|
||||||
// The ringbuffer read pos is blocking the future write position, so stall out
|
|
||||||
// until the read position has moved.
|
|
||||||
|
|
||||||
uint freeroom;
|
|
||||||
|
|
||||||
if (future_writepos < readpos)
|
|
||||||
freeroom = readpos - future_writepos;
|
|
||||||
else
|
|
||||||
freeroom = RingBufferSize - (future_writepos - readpos);
|
|
||||||
|
|
||||||
uint totalAccum = RingBufferSize - freeroom;
|
|
||||||
|
|
||||||
uint somedone = totalAccum / 4;
|
|
||||||
|
|
||||||
if( somedone > 0x80 )
|
|
||||||
{
|
|
||||||
m_SignalRingPosition = somedone;
|
|
||||||
|
|
||||||
//Console.WriteLn( Color_Blue, "(EEcore Sleep) PrepSimplePacket\tringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", readpos, m_WritePos, m_SignalRingPosition );
|
|
||||||
|
|
||||||
do {
|
|
||||||
AtomicExchange( m_SignalRingEnable, 1 );
|
|
||||||
SetEvent();
|
|
||||||
m_sem_OnRingReset.WaitWithoutYield();
|
|
||||||
readpos = volatize(m_RingPos);
|
|
||||||
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Simple Post-sleep Report!\tringpos=0x%06x", readpos );
|
|
||||||
} while( future_writepos == readpos );
|
|
||||||
|
|
||||||
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//Console.WriteLn( Color_StrongGray, "(EEcore Spin) PrepSimplePacket!" );
|
|
||||||
|
|
||||||
SetEvent();
|
|
||||||
do {
|
|
||||||
SpinWait();
|
|
||||||
} while( future_writepos == volatize(m_RingPos) );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return future_writepos;
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline void SysMtgsThread::_FinishSimplePacket( uint future_writepos )
|
|
||||||
{
|
|
||||||
pxAssert( future_writepos != volatize(m_RingPos) );
|
|
||||||
m_WritePos = future_writepos;
|
m_WritePos = future_writepos;
|
||||||
|
|
||||||
if( EmuConfig.GS.SynchronousMTGS )
|
if( EmuConfig.GS.SynchronousMTGS )
|
||||||
|
@ -784,7 +737,7 @@ void SysMtgsThread::SendSimplePacket( MTGS_RingCommand type, int data0, int data
|
||||||
{
|
{
|
||||||
//ScopedLock locker( m_PacketLocker );
|
//ScopedLock locker( m_PacketLocker );
|
||||||
|
|
||||||
const uint thefuture = _PrepForSimplePacket();
|
GenericStall(1);
|
||||||
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
|
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
|
||||||
|
|
||||||
tag.command = type;
|
tag.command = type;
|
||||||
|
@ -792,21 +745,21 @@ void SysMtgsThread::SendSimplePacket( MTGS_RingCommand type, int data0, int data
|
||||||
tag.data[1] = data1;
|
tag.data[1] = data1;
|
||||||
tag.data[2] = data2;
|
tag.data[2] = data2;
|
||||||
|
|
||||||
_FinishSimplePacket( thefuture );
|
_FinishSimplePacket();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SysMtgsThread::SendPointerPacket( MTGS_RingCommand type, u32 data0, void* data1 )
|
void SysMtgsThread::SendPointerPacket( MTGS_RingCommand type, u32 data0, void* data1 )
|
||||||
{
|
{
|
||||||
//ScopedLock locker( m_PacketLocker );
|
//ScopedLock locker( m_PacketLocker );
|
||||||
|
|
||||||
const uint thefuture = _PrepForSimplePacket();
|
GenericStall(1);
|
||||||
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
|
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
|
||||||
|
|
||||||
tag.command = type;
|
tag.command = type;
|
||||||
tag.data[0] = data0;
|
tag.data[0] = data0;
|
||||||
*(uptr*)&tag.data[1] = (uptr)data1;
|
*(uptr*)&tag.data[1] = (uptr)data1;
|
||||||
|
|
||||||
_FinishSimplePacket( thefuture );
|
_FinishSimplePacket();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SysMtgsThread::SendGameCRC( u32 crc )
|
void SysMtgsThread::SendGameCRC( u32 crc )
|
||||||
|
|
|
@ -189,13 +189,13 @@ void Pcsx2App::DetectCpuAndUserMode()
|
||||||
x86caps.CountCores();
|
x86caps.CountCores();
|
||||||
x86caps.SIMD_EstablishMXCSRmask();
|
x86caps.SIMD_EstablishMXCSRmask();
|
||||||
|
|
||||||
if( !x86caps.hasMultimediaExtensions )
|
if( !x86caps.hasMultimediaExtensions || !x86caps.hasStreamingSIMDExtensions )
|
||||||
{
|
{
|
||||||
// Note: due to memcpy_fast, we need minimum MMX even for interpreters. This will
|
// Note: Due to optimizations to GIFpath parsers, memcpy, and possibly other things, we need
|
||||||
// hopefully change later once we have a dynamically recompiled memcpy.
|
// a bare minimum of SSE supported by the CPU.
|
||||||
throw Exception::HardwareDeficiency()
|
throw Exception::HardwareDeficiency()
|
||||||
.SetDiagMsg(L"Critical Failure: MMX Extensions not available.")
|
.SetDiagMsg(L"Critical Failure: SSE Extensions not available.")
|
||||||
.SetUserMsg(_("MMX extensions are not available. PCSX2 requires cpu with MMX extension support to run."));
|
.SetUserMsg(_("SSE extensions are not available. PCSX2 requires a cpu that supports the SSE instruction set."));
|
||||||
}
|
}
|
||||||
|
|
||||||
ReadUserModeSettings();
|
ReadUserModeSettings();
|
||||||
|
|
|
@ -97,10 +97,10 @@ struct GIFPath
|
||||||
u8 GetReg();
|
u8 GetReg();
|
||||||
bool IsActive() const;
|
bool IsActive() const;
|
||||||
|
|
||||||
template< CpuExtType CpuExt, bool Aligned >
|
template< bool Aligned >
|
||||||
void SetTag(const void* mem);
|
void SetTag(const void* mem);
|
||||||
|
|
||||||
template< CpuExtType CpuExt, int pathidx >
|
template< GIF_PATH pathidx, bool Aligned >
|
||||||
int CopyTag(const u128* pMem, u32 size);
|
int CopyTag(const u128* pMem, u32 size);
|
||||||
|
|
||||||
int ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
|
int ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
|
||||||
|
@ -291,13 +291,10 @@ __forceinline void GIFPath::PrepPackedRegs()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template< CpuExtType CpuExt, bool Aligned >
|
template< bool Aligned >
|
||||||
__forceinline void GIFPath::SetTag(const void* mem)
|
__forceinline void GIFPath::SetTag(const void* mem)
|
||||||
{
|
{
|
||||||
if( CpuExt >= CpuExt_SSE )
|
|
||||||
_mm_store_ps( (float*)&tag, Aligned ? _mm_load_ps((const float*)mem) : _mm_loadu_ps((const float*)mem) );
|
_mm_store_ps( (float*)&tag, Aligned ? _mm_load_ps((const float*)mem) : _mm_loadu_ps((const float*)mem) );
|
||||||
else
|
|
||||||
const_cast<GIFTAG&>(tag) = *((GIFTAG*)mem);
|
|
||||||
|
|
||||||
nloop = tag.NLOOP;
|
nloop = tag.NLOOP;
|
||||||
curreg = 0;
|
curreg = 0;
|
||||||
|
@ -391,7 +388,7 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s
|
||||||
while (size > 0) {
|
while (size > 0) {
|
||||||
if (!nloop) {
|
if (!nloop) {
|
||||||
|
|
||||||
SetTag<CpuExt_Base,false>(pMem);
|
SetTag<false>(pMem);
|
||||||
incTag(1);
|
incTag(1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -567,10 +564,7 @@ __forceinline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint
|
||||||
}
|
}
|
||||||
|
|
||||||
#define copyTag() do { \
|
#define copyTag() do { \
|
||||||
if( CpuExt >= CpuExt_SSE ) \
|
_mm_store_ps( (float*)&RingBuffer.m_Ring[ringpos], Aligned ? _mm_load_ps((float*)pMem128) : _mm_loadu_ps((float*)pMem128)); \
|
||||||
_mm_store_ps( (float*)&RingBuffer.m_Ring[ringpos], (pathidx!=GIF_PATH_2) ? _mm_load_ps((float*)pMem128) : _mm_loadu_ps((float*)pMem128)); \
|
|
||||||
else \
|
|
||||||
RingBuffer.m_Ring[ringpos] = *pMem128; \
|
|
||||||
++pMem128; --size; \
|
++pMem128; --size; \
|
||||||
ringpos = (ringpos+1)&RingBufferMask; \
|
ringpos = (ringpos+1)&RingBufferMask; \
|
||||||
} while(false)
|
} while(false)
|
||||||
|
@ -579,10 +573,10 @@ __forceinline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint
|
||||||
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
|
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
|
||||||
// path does not terminate (EOP) within the specified size, it is assumed that the path must
|
// path does not terminate (EOP) within the specified size, it is assumed that the path must
|
||||||
// loop around to the start of VU memory and continue processing.
|
// loop around to the start of VU memory and continue processing.
|
||||||
template< CpuExtType CpuExt, int pathidx >
|
template< GIF_PATH pathidx, bool Aligned >
|
||||||
__forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
|
__forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
|
||||||
{
|
{
|
||||||
uint& ringpos = GetMTGS().m_packet_ringpos;
|
uint& ringpos = GetMTGS().m_packet_writepos;
|
||||||
const uint original_ringpos = ringpos;
|
const uint original_ringpos = ringpos;
|
||||||
|
|
||||||
u32 startSize = size; // Start Size
|
u32 startSize = size; // Start Size
|
||||||
|
@ -590,7 +584,7 @@ __forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
|
||||||
while (size > 0) {
|
while (size > 0) {
|
||||||
if (!nloop) {
|
if (!nloop) {
|
||||||
|
|
||||||
SetTag<CpuExt, (pathidx!=GIF_PATH_2)>((u8*)pMem128);
|
SetTag<Aligned>((u8*)pMem128);
|
||||||
copyTag();
|
copyTag();
|
||||||
|
|
||||||
if(nloop > 0)
|
if(nloop > 0)
|
||||||
|
@ -795,6 +789,7 @@ __forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
|
||||||
|
|
||||||
Console.Warning("GIFTAG error, size exceeded VU memory size %x", startSize);
|
Console.Warning("GIFTAG error, size exceeded VU memory size %x", startSize);
|
||||||
nloop = 0;
|
nloop = 0;
|
||||||
|
const_cast<GIFTAG&>(tag).EOP = 1;
|
||||||
|
|
||||||
// Don't send the packet to the GS -- its incomplete and might cause the GS plugin
|
// Don't send the packet to the GS -- its incomplete and might cause the GS plugin
|
||||||
// to get confused and die. >_<
|
// to get confused and die. >_<
|
||||||
|
@ -870,41 +865,25 @@ __forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef int __fastcall FnType_CopyTag(const u128* pMem, u32 size);
|
|
||||||
|
|
||||||
static __aligned16 FnType_CopyTag* tbl_CopyTag[3];
|
|
||||||
|
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
|
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
|
||||||
// path does not terminate (EOP) within the specified size, it is assumed that the path must
|
// path does not terminate (EOP) within the specified size, it is assumed that the path must
|
||||||
// loop around to the start of VU memory and continue processing.
|
// loop around to the start of VU memory and continue processing.
|
||||||
template< CpuExtType CpuExt, int pathidx >
|
|
||||||
static int __fastcall _CopyTag_tmpl(const u128* pMem, u32 size)
|
|
||||||
{
|
|
||||||
return s_gifPath[pathidx].CopyTag<CpuExt,pathidx>(pMem, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GIFPath_Initialize()
|
|
||||||
{
|
|
||||||
#ifdef __LINUX__
|
|
||||||
// It's already thrown an exception if it isn't SSE, and the check was giving me a compilation error.
|
|
||||||
// I could fix it, but why bother?
|
|
||||||
tbl_CopyTag[0] = _CopyTag_tmpl<CpuExt_SSE, 0>;
|
|
||||||
tbl_CopyTag[1] = _CopyTag_tmpl<CpuExt_SSE, 1>;
|
|
||||||
tbl_CopyTag[2] = _CopyTag_tmpl<CpuExt_SSE, 2>;
|
|
||||||
#else
|
|
||||||
tbl_CopyTag[0] = x86caps.hasStreamingSIMDExtensions ? _CopyTag_tmpl<CpuExt_SSE, 0> : _CopyTag_tmpl<CpuExt_Base, 0>;
|
|
||||||
tbl_CopyTag[1] = x86caps.hasStreamingSIMDExtensions ? _CopyTag_tmpl<CpuExt_SSE, 1> : _CopyTag_tmpl<CpuExt_Base, 1>;
|
|
||||||
tbl_CopyTag[2] = x86caps.hasStreamingSIMDExtensions ? _CopyTag_tmpl<CpuExt_SSE, 2> : _CopyTag_tmpl<CpuExt_Base, 2>;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size)
|
__forceinline int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size)
|
||||||
{
|
{
|
||||||
return tbl_CopyTag[pathidx](pMem, size);
|
switch( pathidx )
|
||||||
|
{
|
||||||
|
case GIF_PATH_1: return s_gifPath[GIF_PATH_1].CopyTag<GIF_PATH_1,true>(pMem, size);
|
||||||
|
case GIF_PATH_2: return s_gifPath[GIF_PATH_2].CopyTag<GIF_PATH_2,false>(pMem, size);
|
||||||
|
case GIF_PATH_3: return s_gifPath[GIF_PATH_3].CopyTag<GIF_PATH_3,true>(pMem, size);
|
||||||
|
|
||||||
|
jNO_DEFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Quick version for queueing PATH1 data.
|
return 0; // unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quick version for queuing PATH1 data.
|
||||||
// This version calculates the real length of the packet data only. It does not process
|
// This version calculates the real length of the packet data only. It does not process
|
||||||
// IRQs or DMA status updates.
|
// IRQs or DMA status updates.
|
||||||
__forceinline int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size)
|
__forceinline int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size)
|
||||||
|
|
Loading…
Reference in New Issue