ReorderingMTGS:

* Make PCSX2 bare minimum reqs include SSE as well as MMX.
 * Minor bugfix which could have affected MTGS performance.
 * Default GIFpath stuff to use SSE opts.

git-svn-id: http://pcsx2.googlecode.com/svn/branches/ReorderingMTGS@3491 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-07-15 05:21:26 +00:00
parent c8f16a1cde
commit 2f3452ec25
5 changed files with 110 additions and 187 deletions

View File

@ -49,7 +49,6 @@ void gsSetRegionMode( GS_RegionMode region )
void gsInit()
{
memzero(g_RealGSMem);
GIFPath_Initialize();
}
extern bool SIGNAL_IMR_Pending;

View File

@ -18,15 +18,6 @@
#include "Common.h"
#include "System/SysThreads.h"
enum CpuExtType
{
CpuExt_Base,
CpuExt_MMX,
CpuExt_SSE,
CpuExt_SSE2,
CpuExt_SSE41,
};
extern __aligned16 u8 g_RealGSMem[Ps2MemSize::GSregs];
enum CSR_FifoState
@ -282,8 +273,8 @@ class SysMtgsThread : public SysThreadBase
typedef SysThreadBase _parent;
public:
// note: when m_RingPos == m_WritePos, the fifo is empty
uint m_RingPos; // cur pos gs is reading from
// note: when m_ReadPos == m_WritePos, the fifo is empty
uint m_ReadPos; // cur pos gs is reading from
uint m_WritePos; // cur pos ee thread is writing to
volatile bool m_RingBufferIsBusy;
@ -313,7 +304,7 @@ public:
uint m_packet_startpos; // size of the packet (data only, ie. not including the 16 byte command!)
uint m_packet_size; // size of the packet (data only, ie. not including the 16 byte command!)
uint m_packet_ringpos; // index of the data location in the ringbuffer.
uint m_packet_writepos; // index of the data location in the ringbuffer.
#ifdef RINGBUF_DEBUG_STACK
Threading::Mutex m_lock_Stack;
@ -356,9 +347,10 @@ protected:
void OnResumeInThread( bool IsSuspended );
void OnCleanupInThread();
void GenericStall( uint size );
// Used internally by SendSimplePacket type functions
uint _PrepForSimplePacket();
void _FinishSimplePacket( uint future_writepos );
void _FinishSimplePacket();
void ExecuteTaskInThread();
};

View File

@ -70,11 +70,11 @@ void SysMtgsThread::OnStart()
{
m_PluginOpened = false;
m_RingPos = 0;
m_ReadPos = 0;
m_WritePos = 0;
m_RingBufferIsBusy = false;
m_packet_size = 0;
m_packet_ringpos = 0;
m_packet_writepos = 0;
m_QueuedFrameCount = 0;
m_VsyncSignalListener = false;
@ -98,14 +98,14 @@ void SysMtgsThread::OnResumeReady()
void SysMtgsThread::ResetGS()
{
pxAssertDev( !IsOpen() || (m_RingPos == m_WritePos), "Must close or terminate the GS thread prior to gsReset." );
pxAssertDev( !IsOpen() || (m_ReadPos == m_WritePos), "Must close or terminate the GS thread prior to gsReset." );
// MTGS Reset process:
// * clear the ringbuffer.
// * Signal a reset.
// * clear the path and byRegs structs (used by GIFtagDummy)
m_RingPos = m_WritePos;
m_ReadPos = m_WritePos;
m_QueuedFrameCount = 0;
m_VsyncSignalListener = false;
@ -134,13 +134,13 @@ void SysMtgsThread::PostVsyncEnd()
uint packsize = sizeof(RingCmdPacket_Vsync) / 16;
PrepDataPacket(GS_RINGTYPE_VSYNC, packsize);
MemCopy_WrappedDest( (u128*)PS2MEM_GS, RingBuffer.m_Ring, m_packet_ringpos, RingBufferSize, 0xf );
MemCopy_WrappedDest( (u128*)PS2MEM_GS, RingBuffer.m_Ring, m_packet_writepos, RingBufferSize, 0xf );
u32* remainder = (u32*)GetDataPacketPtr();
remainder[0] = GSCSRr;
remainder[1] = GSIMR;
(GSRegSIGBLID&)remainder[2] = GSSIGLBLID;
m_packet_ringpos = (m_packet_ringpos + 1) & RingBufferMask;
m_packet_writepos = (m_packet_writepos + 1) & RingBufferMask;
SendDataPacket();
@ -155,7 +155,7 @@ void SysMtgsThread::PostVsyncEnd()
if ((AtomicIncrement(m_QueuedFrameCount) < EmuConfig.GS.VsyncQueueSize) || (!EmuConfig.GS.VsyncEnable && !EmuConfig.GS.FrameLimitEnable)) return;
m_VsyncSignalListener = true;
//Console.WriteLn( Color_Blue, "(EEcore Sleep) Vsync\t\tringpos=0x%06x, writepos=0x%06x", volatize(m_RingPos), m_WritePos );
//Console.WriteLn( Color_Blue, "(EEcore Sleep) Vsync\t\tringpos=0x%06x, writepos=0x%06x", volatize(m_ReadPos), m_WritePos );
m_sem_Vsync.WaitNoCancel();
}
@ -239,6 +239,8 @@ void SysMtgsThread::OpenPlugin()
class RingBufferLock : public ScopedLock
{
typedef ScopedLock _parent;
protected:
SysMtgsThread& m_mtgs;
@ -254,6 +256,18 @@ public:
{
m_mtgs.m_RingBufferIsBusy = false;
}
void Acquire()
{
_parent::Acquire();
m_mtgs.m_RingBufferIsBusy = true;
}
void Release()
{
m_mtgs.m_RingBufferIsBusy = false;
_parent::Release();
}
};
void SysMtgsThread::ExecuteTaskInThread()
@ -262,31 +276,33 @@ void SysMtgsThread::ExecuteTaskInThread()
PacketTagType prevCmd;
#endif
RingBufferLock busy( *this );
while( true )
{
busy.Release();
// Performance note: Both of these perform cancellation tests, but pthread_testcancel
// is very optimized (only 1 instruction test in most cases), so no point in trying
// to avoid it.
m_sem_event.WaitWithoutYield();
StateCheckInThread();
busy.Acquire();
{
RingBufferLock busy( *this );
// note: m_RingPos is intentionally not volatile, because it should only
// note: m_ReadPos is intentionally not volatile, because it should only
// ever be modified by this thread.
while( m_RingPos != volatize(m_WritePos))
while( m_ReadPos != volatize(m_WritePos))
{
if( EmuConfig.GS.DisableOutput )
{
m_RingPos = m_WritePos;
m_ReadPos = m_WritePos;
continue;
}
pxAssert( m_RingPos < RingBufferSize );
pxAssert( m_ReadPos < RingBufferSize );
const PacketTagType& tag = (PacketTagType&)RingBuffer[m_RingPos];
const PacketTagType& tag = (PacketTagType&)RingBuffer[m_ReadPos];
u32 ringposinc = 1;
#ifdef RINGBUF_DEBUG_STACK
@ -294,11 +310,11 @@ void SysMtgsThread::ExecuteTaskInThread()
m_lock_Stack.Lock();
uptr stackpos = ringposStack.back();
if( stackpos != m_RingPos )
if( stackpos != m_ReadPos )
{
Console.Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, m_RingPos, prevCmd.command );
Console.Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, m_ReadPos, prevCmd.command );
}
pxAssert( stackpos == m_RingPos );
pxAssert( stackpos == m_ReadPos );
prevCmd = tag;
ringposStack.pop_back();
m_lock_Stack.Release();
@ -308,7 +324,7 @@ void SysMtgsThread::ExecuteTaskInThread()
{
case GS_RINGTYPE_P1:
{
uint datapos = (m_RingPos+1) & RingBufferMask;
uint datapos = (m_ReadPos+1) & RingBufferMask;
const int qsize = tag.data[0];
const u128* data = &RingBuffer[datapos];
@ -333,7 +349,7 @@ void SysMtgsThread::ExecuteTaskInThread()
case GS_RINGTYPE_P2:
{
uint datapos = (m_RingPos+1) & RingBufferMask;
uint datapos = (m_ReadPos+1) & RingBufferMask;
const int qsize = tag.data[0];
const u128* data = &RingBuffer[datapos];
@ -358,7 +374,7 @@ void SysMtgsThread::ExecuteTaskInThread()
case GS_RINGTYPE_P3:
{
uint datapos = (m_RingPos+1) & RingBufferMask;
uint datapos = (m_ReadPos+1) & RingBufferMask;
const int qsize = tag.data[0];
const u128* data = &RingBuffer[datapos];
@ -393,11 +409,13 @@ void SysMtgsThread::ExecuteTaskInThread()
MTGS_LOG( "(MTGS Packet Read) ringtype=Vsync, field=%u, skip=%s", !!(((u32&)RingBuffer.Regs[0x1000]) & 0x2000) ? 0 : 1, tag.data[1] ? "true" : "false" );
// Mail in the important GS registers.
RingCmdPacket_Vsync& local((RingCmdPacket_Vsync&)RingBuffer[m_RingPos+1]);
memcpy_fast( RingBuffer.Regs, local.regset1, sizeof(local.regset1));
((u32&)RingBuffer.Regs[0x1000]) = local.csr;
((u32&)RingBuffer.Regs[0x1010]) = local.imr;
((GSRegSIGBLID&)RingBuffer.Regs[0x1080]) = local.siglblid;
uint datapos = (m_ReadPos+1) & RingBufferMask;
MemCopy_WrappedSrc( RingBuffer.m_Ring, datapos, RingBufferSize, (u128*)RingBuffer.Regs, 0xf );
u32* remainder = (u32*)&RingBuffer[datapos];
GSCSRr = remainder[0];
GSIMR = remainder[1];
GSSIGLBLID = (GSRegSIGBLID&)remainder[2];
// CSR & 0x2000; is the pageflip id.
GSvsync(((u32&)RingBuffer.Regs[0x1000]) & 0x2000);
@ -454,9 +472,9 @@ void SysMtgsThread::ExecuteTaskInThread()
#ifdef PCSX2_DEVBUILD
default:
Console.Error("GSThreadProc, bad packet (%x) at m_RingPos: %x, m_WritePos: %x", tag.command, m_RingPos, m_WritePos);
Console.Error("GSThreadProc, bad packet (%x) at m_ReadPos: %x, m_WritePos: %x", tag.command, m_ReadPos, m_WritePos);
pxFail( "Bad packet encountered in the MTGS Ringbuffer." );
m_RingPos = m_WritePos;
m_ReadPos = m_WritePos;
continue;
#else
// Optimized performance in non-Dev builds.
@ -466,28 +484,29 @@ void SysMtgsThread::ExecuteTaskInThread()
}
}
uint newringpos = (m_RingPos + ringposinc) & RingBufferMask;
uint newringpos = (m_ReadPos + ringposinc) & RingBufferMask;
if( EmuConfig.GS.SynchronousMTGS )
{
pxAssert( m_WritePos == newringpos );
}
m_RingPos = newringpos;
m_ReadPos = newringpos;
if( m_SignalRingEnable != 0 )
{
// The EEcore has requested a signal after some amount of processed data.
if( AtomicExchangeSub( m_SignalRingPosition, ringposinc ) <= 0 )
{
// Make sure to post the signal after the m_RingPos has been updated...
// Make sure to post the signal after the m_ReadPos has been updated...
AtomicExchange( m_SignalRingEnable, 0 );
m_sem_OnRingReset.Post();
continue;
}
}
}
}
busy.Release();
// Safety valve in case standard signals fail for some reason -- this ensures the EEcore
// won't sleep the eternity, even if SignalRingPosition didn't reach 0 for some reason.
@ -503,7 +522,7 @@ void SysMtgsThread::ExecuteTaskInThread()
if (!!AtomicExchange(m_VsyncSignalListener, false))
m_sem_Vsync.Post();
//Console.Warning( "(MTGS Thread) Nothing to do! ringpos=0x%06x", m_RingPos );
//Console.Warning( "(MTGS Thread) Nothing to do! ringpos=0x%06x", m_ReadPos );
}
}
@ -543,7 +562,7 @@ void SysMtgsThread::WaitGS()
if( m_ExecMode == ExecMode_NoThreadYet || !IsRunning() ) return;
if( !pxAssertDev( IsOpen(), "MTGS Warning! WaitGS issued on a closed thread." ) ) return;
if( volatize(m_RingPos) != m_WritePos )
if( volatize(m_ReadPos) != m_WritePos )
{
SetEvent();
RethrowException();
@ -551,7 +570,7 @@ void SysMtgsThread::WaitGS()
do {
m_mtx_RingBufferBusy.Wait();
RethrowException();
} while( volatize(m_RingPos) != m_WritePos );
} while( volatize(m_ReadPos) != m_WritePos );
}
// Completely synchronize GS and MTGS register states.
@ -570,7 +589,7 @@ void SysMtgsThread::SetEvent()
u8* SysMtgsThread::GetDataPacketPtr() const
{
return (u8*)&RingBuffer[m_packet_ringpos & RingBufferMask];
return (u8*)&RingBuffer[m_packet_writepos & RingBufferMask];
}
// Closes the data packet send command, and initiates the gs thread (if needed).
@ -579,14 +598,14 @@ void SysMtgsThread::SendDataPacket()
// make sure a previous copy block has been started somewhere.
pxAssert( m_packet_size != 0 );
uint actualSize = ((m_packet_ringpos - m_packet_startpos) & RingBufferMask)-1;
uint actualSize = ((m_packet_writepos - m_packet_startpos) & RingBufferMask)-1;
pxAssert( actualSize <= m_packet_size );
pxAssert( m_packet_ringpos < RingBufferSize );
pxAssert( m_packet_writepos < RingBufferSize );
PacketTagType& tag = (PacketTagType&)RingBuffer[m_packet_startpos];
tag.data[0] = actualSize;
m_WritePos = m_packet_ringpos;
m_WritePos = m_packet_writepos;
if( EmuConfig.GS.SynchronousMTGS )
{
@ -603,29 +622,23 @@ void SysMtgsThread::SendDataPacket()
//m_PacketLocker.Release();
}
void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
void SysMtgsThread::GenericStall( uint size )
{
// Note on volatiles: m_WritePos is not modified by the GS thread, so there's no need
// to use volatile reads here. We do cache it though, since we know it never changes,
// except for calls to RingbufferRestert() -- handled below.
uint writepos = m_WritePos;
// Checks if a previous copy was started without an accompanying call to GSRINGBUF_DONECOPY
pxAssert( m_packet_size == 0 );
const uint writepos = m_WritePos;
// Sanity checks! (within the confines of our ringbuffer please!)
pxAssert( size < RingBufferSize );
pxAssert( writepos < RingBufferSize );
m_packet_size = size;
++size; // takes into account our RingCommand QWC.
// generic gs wait/stall.
// if the writepos is past the readpos then we're safe.
// But if not then we need to make sure the readpos is outside the scope of
// the block about to be written (writepos + size)
uint readpos = volatize(m_RingPos);
uint readpos = volatize(m_ReadPos);
uint endpos = writepos+size;
uint freeroom;
@ -662,7 +675,7 @@ void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
AtomicExchange( m_SignalRingEnable, 1 );
SetEvent();
m_sem_OnRingReset.WaitWithoutYield();
readpos = volatize(m_RingPos);
readpos = volatize(m_ReadPos);
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
} while( (writepos < readpos) && (writepos+size >= readpos) );
@ -674,16 +687,17 @@ void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
SetEvent();
do {
SpinWait();
readpos = volatize(m_RingPos);
readpos = volatize(m_ReadPos);
} while( (writepos < readpos) && (writepos+size >= readpos) );
}
}
}
#ifdef RINGBUF_DEBUG_STACK
m_lock_Stack.Lock();
ringposStack.push_front( writepos );
m_lock_Stack.Release();
#endif
void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
{
m_packet_size = size;
++size; // takes into account our RingCommand QWC.
GenericStall(size);
// Command qword: Low word is the command, and the high word is the packet
// length in SIMDs (128 bits).
@ -692,7 +706,7 @@ void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
tag.command = cmd;
tag.data[0] = m_packet_size;
m_packet_startpos = m_WritePos;
m_packet_ringpos = (m_WritePos + 1) & RingBufferMask;
m_packet_writepos = (m_WritePos + 1) & RingBufferMask;
}
// Returns the amount of giftag data processed (in simd128 values).
@ -707,71 +721,10 @@ void SysMtgsThread::PrepDataPacket( GIF_PATH pathidx, u32 size )
PrepDataPacket( (MTGS_RingCommand)pathidx, size );
}
__forceinline uint SysMtgsThread::_PrepForSimplePacket()
__forceinline void SysMtgsThread::_FinishSimplePacket()
{
#ifdef RINGBUF_DEBUG_STACK
m_lock_Stack.Lock();
ringposStack.push_front( m_WritePos );
m_lock_Stack.Release();
#endif
uint future_writepos = m_WritePos+1;
pxAssert( future_writepos <= RingBufferSize );
future_writepos &= RingBufferMask;
if( future_writepos == 0 )
m_QueuedFrameCount = 0;
uint readpos = volatize(m_RingPos);
if( future_writepos == readpos )
{
// The ringbuffer read pos is blocking the future write position, so stall out
// until the read position has moved.
uint freeroom;
if (future_writepos < readpos)
freeroom = readpos - future_writepos;
else
freeroom = RingBufferSize - (future_writepos - readpos);
uint totalAccum = RingBufferSize - freeroom;
uint somedone = totalAccum / 4;
if( somedone > 0x80 )
{
m_SignalRingPosition = somedone;
//Console.WriteLn( Color_Blue, "(EEcore Sleep) PrepSimplePacket\tringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", readpos, m_WritePos, m_SignalRingPosition );
do {
AtomicExchange( m_SignalRingEnable, 1 );
SetEvent();
m_sem_OnRingReset.WaitWithoutYield();
readpos = volatize(m_RingPos);
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Simple Post-sleep Report!\tringpos=0x%06x", readpos );
} while( future_writepos == readpos );
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
}
else
{
//Console.WriteLn( Color_StrongGray, "(EEcore Spin) PrepSimplePacket!" );
SetEvent();
do {
SpinWait();
} while( future_writepos == volatize(m_RingPos) );
}
}
return future_writepos;
}
__forceinline void SysMtgsThread::_FinishSimplePacket( uint future_writepos )
{
pxAssert( future_writepos != volatize(m_RingPos) );
uint future_writepos = (m_WritePos+1) & RingBufferMask;
pxAssert( future_writepos != volatize(m_ReadPos) );
m_WritePos = future_writepos;
if( EmuConfig.GS.SynchronousMTGS )
@ -784,7 +737,7 @@ void SysMtgsThread::SendSimplePacket( MTGS_RingCommand type, int data0, int data
{
//ScopedLock locker( m_PacketLocker );
const uint thefuture = _PrepForSimplePacket();
GenericStall(1);
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
tag.command = type;
@ -792,21 +745,21 @@ void SysMtgsThread::SendSimplePacket( MTGS_RingCommand type, int data0, int data
tag.data[1] = data1;
tag.data[2] = data2;
_FinishSimplePacket( thefuture );
_FinishSimplePacket();
}
void SysMtgsThread::SendPointerPacket( MTGS_RingCommand type, u32 data0, void* data1 )
{
//ScopedLock locker( m_PacketLocker );
const uint thefuture = _PrepForSimplePacket();
GenericStall(1);
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
tag.command = type;
tag.data[0] = data0;
*(uptr*)&tag.data[1] = (uptr)data1;
_FinishSimplePacket( thefuture );
_FinishSimplePacket();
}
void SysMtgsThread::SendGameCRC( u32 crc )

View File

@ -189,13 +189,13 @@ void Pcsx2App::DetectCpuAndUserMode()
x86caps.CountCores();
x86caps.SIMD_EstablishMXCSRmask();
if( !x86caps.hasMultimediaExtensions )
if( !x86caps.hasMultimediaExtensions || !x86caps.hasStreamingSIMDExtensions )
{
// Note: due to memcpy_fast, we need minimum MMX even for interpreters. This will
// hopefully change later once we have a dynamically recompiled memcpy.
// Note: Due to optimizations to GIFpath parsers, memcpy, and possibly other things, we need
// a bare minimum of SSE supported by the CPU.
throw Exception::HardwareDeficiency()
.SetDiagMsg(L"Critical Failure: MMX Extensions not available.")
.SetUserMsg(_("MMX extensions are not available. PCSX2 requires cpu with MMX extension support to run."));
.SetDiagMsg(L"Critical Failure: SSE Extensions not available.")
.SetUserMsg(_("SSE extensions are not available. PCSX2 requires a cpu that supports the SSE instruction set."));
}
ReadUserModeSettings();

View File

@ -97,10 +97,10 @@ struct GIFPath
u8 GetReg();
bool IsActive() const;
template< CpuExtType CpuExt, bool Aligned >
template< bool Aligned >
void SetTag(const void* mem);
template< CpuExtType CpuExt, int pathidx >
template< GIF_PATH pathidx, bool Aligned >
int CopyTag(const u128* pMem, u32 size);
int ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
@ -291,13 +291,10 @@ __forceinline void GIFPath::PrepPackedRegs()
}
template< CpuExtType CpuExt, bool Aligned >
template< bool Aligned >
__forceinline void GIFPath::SetTag(const void* mem)
{
if( CpuExt >= CpuExt_SSE )
_mm_store_ps( (float*)&tag, Aligned ? _mm_load_ps((const float*)mem) : _mm_loadu_ps((const float*)mem) );
else
const_cast<GIFTAG&>(tag) = *((GIFTAG*)mem);
nloop = tag.NLOOP;
curreg = 0;
@ -391,7 +388,7 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s
while (size > 0) {
if (!nloop) {
SetTag<CpuExt_Base,false>(pMem);
SetTag<false>(pMem);
incTag(1);
}
else
@ -567,10 +564,7 @@ __forceinline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint
}
#define copyTag() do { \
if( CpuExt >= CpuExt_SSE ) \
_mm_store_ps( (float*)&RingBuffer.m_Ring[ringpos], (pathidx!=GIF_PATH_2) ? _mm_load_ps((float*)pMem128) : _mm_loadu_ps((float*)pMem128)); \
else \
RingBuffer.m_Ring[ringpos] = *pMem128; \
_mm_store_ps( (float*)&RingBuffer.m_Ring[ringpos], Aligned ? _mm_load_ps((float*)pMem128) : _mm_loadu_ps((float*)pMem128)); \
++pMem128; --size; \
ringpos = (ringpos+1)&RingBufferMask; \
} while(false)
@ -579,10 +573,10 @@ __forceinline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
// path does not terminate (EOP) within the specified size, it is assumed that the path must
// loop around to the start of VU memory and continue processing.
template< CpuExtType CpuExt, int pathidx >
template< GIF_PATH pathidx, bool Aligned >
__forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
{
uint& ringpos = GetMTGS().m_packet_ringpos;
uint& ringpos = GetMTGS().m_packet_writepos;
const uint original_ringpos = ringpos;
u32 startSize = size; // Start Size
@ -590,7 +584,7 @@ __forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
while (size > 0) {
if (!nloop) {
SetTag<CpuExt, (pathidx!=GIF_PATH_2)>((u8*)pMem128);
SetTag<Aligned>((u8*)pMem128);
copyTag();
if(nloop > 0)
@ -795,6 +789,7 @@ __forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
Console.Warning("GIFTAG error, size exceeded VU memory size %x", startSize);
nloop = 0;
const_cast<GIFTAG&>(tag).EOP = 1;
// Don't send the packet to the GS -- its incomplete and might cause the GS plugin
// to get confused and die. >_<
@ -870,41 +865,25 @@ __forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
return size;
}
typedef int __fastcall FnType_CopyTag(const u128* pMem, u32 size);
static __aligned16 FnType_CopyTag* tbl_CopyTag[3];
// Parameters:
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
// path does not terminate (EOP) within the specified size, it is assumed that the path must
// loop around to the start of VU memory and continue processing.
template< CpuExtType CpuExt, int pathidx >
static int __fastcall _CopyTag_tmpl(const u128* pMem, u32 size)
{
return s_gifPath[pathidx].CopyTag<CpuExt,pathidx>(pMem, size);
}
void GIFPath_Initialize()
{
#ifdef __LINUX__
// It's already thrown an exception if it isn't SSE, and the check was giving me a compilation error.
// I could fix it, but why bother?
tbl_CopyTag[0] = _CopyTag_tmpl<CpuExt_SSE, 0>;
tbl_CopyTag[1] = _CopyTag_tmpl<CpuExt_SSE, 1>;
tbl_CopyTag[2] = _CopyTag_tmpl<CpuExt_SSE, 2>;
#else
tbl_CopyTag[0] = x86caps.hasStreamingSIMDExtensions ? _CopyTag_tmpl<CpuExt_SSE, 0> : _CopyTag_tmpl<CpuExt_Base, 0>;
tbl_CopyTag[1] = x86caps.hasStreamingSIMDExtensions ? _CopyTag_tmpl<CpuExt_SSE, 1> : _CopyTag_tmpl<CpuExt_Base, 1>;
tbl_CopyTag[2] = x86caps.hasStreamingSIMDExtensions ? _CopyTag_tmpl<CpuExt_SSE, 2> : _CopyTag_tmpl<CpuExt_Base, 2>;
#endif
}
__forceinline int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size)
{
return tbl_CopyTag[pathidx](pMem, size);
switch( pathidx )
{
case GIF_PATH_1: return s_gifPath[GIF_PATH_1].CopyTag<GIF_PATH_1,true>(pMem, size);
case GIF_PATH_2: return s_gifPath[GIF_PATH_2].CopyTag<GIF_PATH_2,false>(pMem, size);
case GIF_PATH_3: return s_gifPath[GIF_PATH_3].CopyTag<GIF_PATH_3,true>(pMem, size);
jNO_DEFAULT;
}
return 0; // unreachable
}
// Quick version for queueing PATH1 data.
// Quick version for queuing PATH1 data.
// This version calculates the real length of the packet data only. It does not process
// IRQs or DMA status updates.
__forceinline int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size)