mirror of https://github.com/PCSX2/pcsx2.git
ReorderingMTGS:
* Implemented GIFPath_CopyTag, which performs a "copy-in-place" while parsing tags (big speedup over the old parse-then-copy strategy, especially with the SSE intrinsics I've included for kicks). * Removed the old ringbuffer 'restart' mechanism and replaced it with a truly free-flowing wrapping mechanism. Utilizes the ringbuffer more efficiently, and removes quite a bit of overhead from the MTGS's PrepDataPacket call. git-svn-id: http://pcsx2.googlecode.com/svn/branches/ReorderingMTGS@3458 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
65f50f009f
commit
a9084741bc
|
@ -564,6 +564,7 @@ typedef void (CALLBACK* _PS2EsetEmuVersion)(const char* emuId, u32 version); //
|
|||
typedef s32 (CALLBACK* _GSopen)(void *pDsp, char *Title, int multithread);
|
||||
typedef s32 (CALLBACK* _GSopen2)( void *pDsp, u32 flags );
|
||||
typedef void (CALLBACK* _GSvsync)(int field);
|
||||
typedef void (CALLBACK* _GSgifTransfer)(u32 *pMem, u32 size);
|
||||
typedef void (CALLBACK* _GSgifTransfer1)(u32 *pMem, u32 addr);
|
||||
typedef void (CALLBACK* _GSgifTransfer2)(u32 *pMem, u32 size);
|
||||
typedef void (CALLBACK* _GSgifTransfer3)(u32 *pMem, u32 size);
|
||||
|
@ -723,6 +724,7 @@ typedef void (CALLBACK* _FWirqCallback)(void (*callback)());
|
|||
extern _GSopen GSopen;
|
||||
extern _GSopen2 GSopen2;
|
||||
extern _GSvsync GSvsync;
|
||||
extern _GSgifTransfer GSgifTransfer;
|
||||
extern _GSgifTransfer1 GSgifTransfer1;
|
||||
extern _GSgifTransfer2 GSgifTransfer2;
|
||||
extern _GSgifTransfer3 GSgifTransfer3;
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
// Only used in the Windows version of memzero.h. But it's in Misc.cpp for some reason.
|
||||
void _memset16_unaligned( void* dest, u16 data, size_t size );
|
||||
|
||||
#define memcpy_fast memcpy_amd_ // Fast memcpy
|
||||
#define memcpy_aligned memcpy_amd_ // Memcpy with 16-byte Aligned addresses
|
||||
#define memcpy_const memcpy_amd_ // Memcpy with constant size
|
||||
#define memcpy_constA memcpy_amd_ // Memcpy with constant size and 16-byte aligned
|
||||
#define memcpy_fast memcpy_amd_ // Fast memcpy
|
||||
#define memcpy_aligned(d,s,c) memcpy_amd_(d,s,c*16) // Memcpy with 16-byte Aligned addresses
|
||||
#define memcpy_const memcpy_amd_ // Memcpy with constant size
|
||||
#define memcpy_constA memcpy_amd_ // Memcpy with constant size and 16-byte aligned
|
||||
|
|
|
@ -195,10 +195,9 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value)
|
|||
nloop0_packet[1] = psHu32(GIF_FIFO + 4);
|
||||
nloop0_packet[2] = psHu32(GIF_FIFO + 8);
|
||||
nloop0_packet[3] = psHu32(GIF_FIFO + 12);
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_3, (u8*)nloop0_packet, 1);
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_3, 1);
|
||||
u64* data = (u64*)GetMTGS().GetDataPacketPtr();
|
||||
data[0] = value[0];
|
||||
data[1] = value[1];
|
||||
GIFPath_CopyTag( GIF_PATH_3, (u128*)nloop0_packet, 1 );
|
||||
GetMTGS().SendDataPacket();
|
||||
if(GSTransferStatus.PTH3 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH3 )
|
||||
{
|
||||
|
|
37
pcsx2/GS.h
37
pcsx2/GS.h
|
@ -229,7 +229,7 @@ enum GIF_PATH
|
|||
GIF_PATH_3,
|
||||
};
|
||||
|
||||
extern int GIFPath_ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size);
|
||||
extern int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size);
|
||||
extern int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
|
||||
extern void GIFPath_Reset();
|
||||
extern void GIFPath_Clear( GIF_PATH pathidx );
|
||||
|
@ -282,7 +282,6 @@ public:
|
|||
volatile s32 m_SignalRingPosition;
|
||||
|
||||
int m_QueuedFrameCount;
|
||||
u32 m_RingWrapSpot;
|
||||
|
||||
Mutex m_lock_RingBufferBusy;
|
||||
Semaphore m_sem_OnRingReset;
|
||||
|
@ -301,6 +300,7 @@ public:
|
|||
// These vars maintain instance data for sending Data Packets.
|
||||
// Only one data packet can be constructed and uploaded at a time.
|
||||
|
||||
uint m_packet_startpos; // size of the packet (data only, ie. not including the 16 byte command!)
|
||||
uint m_packet_size; // size of the packet (data only, ie. not including the 16 byte command!)
|
||||
uint m_packet_ringpos; // index of the data location in the ringbuffer.
|
||||
|
||||
|
@ -317,14 +317,13 @@ public:
|
|||
void WaitGS();
|
||||
void ResetGS();
|
||||
|
||||
int PrepDataPacket( MTGS_RingCommand cmd, u32 size );
|
||||
int PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size );
|
||||
void PrepDataPacket( MTGS_RingCommand cmd, u32 size );
|
||||
void PrepDataPacket( GIF_PATH pathidx, u32 size );
|
||||
void SendDataPacket();
|
||||
void SendGameCRC( u32 crc );
|
||||
void WaitForOpen();
|
||||
void Freeze( int mode, MTGS_FreezeData& data );
|
||||
|
||||
void RestartRingbuffer( uint packsize=0 );
|
||||
void SendSimplePacket( MTGS_RingCommand type, int data0, int data1, int data2 );
|
||||
void SendPointerPacket( MTGS_RingCommand type, u32 data0, void* data1 );
|
||||
|
||||
|
@ -416,3 +415,31 @@ extern int g_nLeftGSFrames;
|
|||
|
||||
#endif
|
||||
|
||||
// Size of the ringbuffer as a power of 2 -- size is a multiple of simd128s.
|
||||
// (actual size is 1<<m_RingBufferSizeFactor simd vectors [128-bit values])
|
||||
// A value of 19 is a 8meg ring buffer. 18 would be 4 megs, and 20 would be 16 megs.
|
||||
// Default was 2mb, but some games with lots of MTGS activity want 8mb to run fast (rama)
|
||||
static const uint RingBufferSizeFactor = 19;
|
||||
|
||||
// size of the ringbuffer in simd128's.
|
||||
static const uint RingBufferSize = 1<<RingBufferSizeFactor;
|
||||
|
||||
// Mask to apply to ring buffer indices to wrap the pointer from end to
|
||||
// start (the wrapping is what makes it a ringbuffer, yo!)
|
||||
static const uint RingBufferMask = RingBufferSize - 1;
|
||||
|
||||
struct MTGS_BufferedData
|
||||
{
|
||||
u128 m_Ring[RingBufferSize];
|
||||
u8 Regs[Ps2MemSize::GSregs];
|
||||
|
||||
MTGS_BufferedData() {}
|
||||
|
||||
u128& operator[]( uint idx )
|
||||
{
|
||||
pxAssert( idx < RingBufferSize );
|
||||
return m_Ring[idx];
|
||||
}
|
||||
};
|
||||
|
||||
extern __aligned(32) MTGS_BufferedData RingBuffer;
|
||||
|
|
|
@ -59,16 +59,15 @@ void gsPath1Interrupt()
|
|||
gifRegs->stat.P1Q = false;
|
||||
while(Path1WritePos > 0)
|
||||
{
|
||||
u32 size = GetMTGS().PrepDataPacket(GIF_PATH_1, Path1Buffer + (Path1ReadPos * 16), (Path1WritePos - Path1ReadPos));
|
||||
u8* pDest = GetMTGS().GetDataPacketPtr();
|
||||
uint size = (Path1WritePos - Path1ReadPos);
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_1, size);
|
||||
//DevCon.Warning("Flush Size = %x", size);
|
||||
|
||||
memcpy_aligned(pDest, Path1Buffer + (Path1ReadPos * 16), size * 16);
|
||||
GetMTGS().SendDataPacket();
|
||||
|
||||
|
||||
Path1ReadPos += size;
|
||||
|
||||
uint count = GIFPath_CopyTag(GIF_PATH_1, ((u128*)Path1Buffer) + Path1ReadPos, size);
|
||||
GetMTGS().SendDataPacket();
|
||||
pxAssume( count == size );
|
||||
Path1ReadPos += count;
|
||||
|
||||
if(GSTransferStatus.PTH1 == STOPPED_MODE)
|
||||
{
|
||||
gifRegs->stat.OPH = false;
|
||||
|
@ -150,11 +149,9 @@ __forceinline void gsInterrupt()
|
|||
|
||||
static u32 WRITERING_DMA(u32 *pMem, u32 qwc)
|
||||
{
|
||||
int size = GetMTGS().PrepDataPacket(GIF_PATH_3, (u8*)pMem, qwc);
|
||||
u8* pgsmem = GetMTGS().GetDataPacketPtr();
|
||||
|
||||
memcpy_aligned(pgsmem, pMem, size<<4);
|
||||
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_3, qwc);
|
||||
//uint len1 = GIFPath_ParseTag(GIF_PATH_3, (u8*)pMem, qwc );
|
||||
uint size = GIFPath_CopyTag(GIF_PATH_3, (u128*)pMem, qwc );
|
||||
GetMTGS().SendDataPacket();
|
||||
return size;
|
||||
}
|
||||
|
|
329
pcsx2/MTGS.cpp
329
pcsx2/MTGS.cpp
|
@ -29,7 +29,7 @@
|
|||
|
||||
using namespace Threading;
|
||||
|
||||
#if 0 // PCSX2_DEBUG
|
||||
#if 0 //PCSX2_DEBUG
|
||||
# define MTGS_LOG Console.WriteLn
|
||||
#else
|
||||
# define MTGS_LOG 0&&
|
||||
|
@ -46,34 +46,7 @@ using namespace Threading;
|
|||
// MTGS Threaded Class Implementation
|
||||
// =====================================================================================================
|
||||
|
||||
// Size of the ringbuffer as a power of 2 -- size is a multiple of simd128s.
|
||||
// (actual size is 1<<m_RingBufferSizeFactor simd vectors [128-bit values])
|
||||
// A value of 19 is a 8meg ring buffer. 18 would be 4 megs, and 20 would be 16 megs.
|
||||
// Default was 2mb, but some games with lots of MTGS activity want 8mb to run fast (rama)
|
||||
static const uint RingBufferSizeFactor = 19;
|
||||
|
||||
// size of the ringbuffer in simd128's.
|
||||
static const uint RingBufferSize = 1<<RingBufferSizeFactor;
|
||||
|
||||
// Mask to apply to ring buffer indices to wrap the pointer from end to
|
||||
// start (the wrapping is what makes it a ringbuffer, yo!)
|
||||
static const uint RingBufferMask = RingBufferSize - 1;
|
||||
|
||||
struct MTGS_BufferedData
|
||||
{
|
||||
u128 m_Ring[RingBufferSize];
|
||||
u8 Regs[Ps2MemSize::GSregs];
|
||||
|
||||
MTGS_BufferedData() {}
|
||||
|
||||
u128& operator[]( uint idx )
|
||||
{
|
||||
pxAssert( idx < RingBufferSize );
|
||||
return m_Ring[idx];
|
||||
}
|
||||
};
|
||||
|
||||
static __aligned(32) MTGS_BufferedData RingBuffer;
|
||||
__aligned(32) MTGS_BufferedData RingBuffer;
|
||||
extern bool renderswitch;
|
||||
|
||||
|
||||
|
@ -106,7 +79,6 @@ void SysMtgsThread::OnStart()
|
|||
m_QueuedFrameCount = 0;
|
||||
m_SignalRingEnable = 0;
|
||||
m_SignalRingPosition= 0;
|
||||
m_RingWrapSpot = 0;
|
||||
|
||||
m_CopyDataTally = 0;
|
||||
|
||||
|
@ -125,12 +97,15 @@ void SysMtgsThread::OnResumeReady()
|
|||
|
||||
void SysMtgsThread::ResetGS()
|
||||
{
|
||||
pxAssertDev( !IsOpen() || (m_RingPos == m_WritePos), "Must close or terminate the GS thread prior to gsReset." );
|
||||
|
||||
// MTGS Reset process:
|
||||
// * clear the ringbuffer.
|
||||
// * Signal a reset.
|
||||
// * clear the path and byRegs structs (used by GIFtagDummy)
|
||||
|
||||
m_RingPos = m_WritePos;
|
||||
m_QueuedFrameCount = 0;
|
||||
|
||||
MTGS_LOG( "MTGS: Sending Reset..." );
|
||||
SendSimplePacket( GS_RINGTYPE_RESET, 0, 0, 0 );
|
||||
|
@ -155,7 +130,8 @@ void SysMtgsThread::PostVsyncEnd()
|
|||
// 256-byte copy is only a few dozen cycles -- executed 60 times a second -- so probably
|
||||
// not worth the effort or overhead of trying to selectively avoid it.
|
||||
|
||||
PrepDataPacket(GS_RINGTYPE_VSYNC, sizeof(RingCmdPacket_Vsync));
|
||||
uint packsize = sizeof(RingCmdPacket_Vsync) / 16;
|
||||
PrepDataPacket(GS_RINGTYPE_VSYNC, packsize);
|
||||
RingCmdPacket_Vsync& local( *(RingCmdPacket_Vsync*)GetDataPacketPtr() );
|
||||
|
||||
memcpy_fast( local.regset1, PS2MEM_GS, sizeof(local.regset1) );
|
||||
|
@ -163,6 +139,7 @@ void SysMtgsThread::PostVsyncEnd()
|
|||
local.imr = GSIMR;
|
||||
local.siglblid = GSSIGLBLID;
|
||||
|
||||
m_packet_ringpos += packsize;
|
||||
SendDataPacket();
|
||||
|
||||
// Alter-frame flushing! Restarts the ringbuffer (wraps) on every other frame. This is a
|
||||
|
@ -172,13 +149,29 @@ void SysMtgsThread::PostVsyncEnd()
|
|||
// and they also allow us to reuse the front of the ringbuffer more often, which should improve
|
||||
// L2 cache performance.
|
||||
|
||||
if( m_QueuedFrameCount > 0 )
|
||||
RestartRingbuffer();
|
||||
if( AtomicIncrement(m_QueuedFrameCount) == 0 ) return;
|
||||
|
||||
uint readpos = volatize(m_RingPos);
|
||||
uint freeroom;
|
||||
|
||||
if (m_WritePos < readpos)
|
||||
freeroom = readpos - m_WritePos;
|
||||
else
|
||||
{
|
||||
m_QueuedFrameCount++;
|
||||
SetEvent();
|
||||
}
|
||||
freeroom = RingBufferSize - (m_WritePos - readpos);
|
||||
|
||||
uint totalAccum = RingBufferSize - freeroom;
|
||||
uint somedone = totalAccum / 4;
|
||||
|
||||
m_SignalRingPosition = totalAccum;
|
||||
|
||||
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Vsync Sleep!\t\twrapspot=0x%06x, ringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", m_RingWrapSpot, readpos, writepos, m_SignalRingPosition );
|
||||
|
||||
AtomicExchange( m_SignalRingEnable, 1 );
|
||||
SetEvent();
|
||||
m_sem_OnRingReset.WaitWithoutYield();
|
||||
readpos = volatize(m_RingPos);
|
||||
|
||||
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
|
||||
}
|
||||
|
||||
struct PacketTagType
|
||||
|
@ -197,7 +190,7 @@ void SysMtgsThread::OpenPlugin()
|
|||
{
|
||||
if( m_PluginOpened ) return;
|
||||
|
||||
memcpy_aligned( RingBuffer.Regs, PS2MEM_GS, sizeof(PS2MEM_GS) );
|
||||
memcpy_aligned( RingBuffer.Regs, PS2MEM_GS, sizeof(PS2MEM_GS)/16 );
|
||||
GSsetBaseMem( RingBuffer.Regs );
|
||||
GSirqCallback( dummyIrqCallback );
|
||||
|
||||
|
@ -330,38 +323,75 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
{
|
||||
case GS_RINGTYPE_P1:
|
||||
{
|
||||
uint datapos = (m_RingPos+1) & RingBufferMask;
|
||||
const int qsize = tag.data[0];
|
||||
const u128* data = &RingBuffer[m_RingPos+1];
|
||||
const u128* data = &RingBuffer[datapos];
|
||||
|
||||
MTGS_LOG( "(MTGS Packet Read) ringtype=P1, qwc=%u", qsize );
|
||||
|
||||
// make sure that tag>>16 is the MAX size readable
|
||||
GSgifTransfer1((u32*)(data - 0x400 + qsize), 0x4000-qsize*16);
|
||||
//GSgifTransfer1((u32*)data, qsize);
|
||||
uint endpos = datapos + qsize;
|
||||
if( endpos >= RingBufferSize )
|
||||
{
|
||||
uint firstcopylen = RingBufferSize - datapos;
|
||||
GSgifTransfer( (u32*)data, firstcopylen );
|
||||
datapos = endpos & RingBufferMask;
|
||||
GSgifTransfer( (u32*)RingBuffer.m_Ring, datapos );
|
||||
}
|
||||
else
|
||||
{
|
||||
GSgifTransfer( (u32*)data, qsize );
|
||||
}
|
||||
|
||||
ringposinc += qsize;
|
||||
}
|
||||
break;
|
||||
|
||||
case GS_RINGTYPE_P2:
|
||||
{
|
||||
uint datapos = (m_RingPos+1) & RingBufferMask;
|
||||
const int qsize = tag.data[0];
|
||||
const u128* data = &RingBuffer[m_RingPos+1];
|
||||
const u128* data = &RingBuffer[datapos];
|
||||
|
||||
MTGS_LOG( "(MTGS Packet Read) ringtype=P2, qwc=%u", qsize );
|
||||
|
||||
GSgifTransfer2((u32*)data, qsize);
|
||||
uint endpos = datapos + qsize;
|
||||
if( endpos >= RingBufferSize )
|
||||
{
|
||||
uint firstcopylen = RingBufferSize - datapos;
|
||||
GSgifTransfer2( (u32*)data, firstcopylen );
|
||||
datapos = endpos & RingBufferMask;
|
||||
GSgifTransfer2( (u32*)RingBuffer.m_Ring, datapos );
|
||||
}
|
||||
else
|
||||
{
|
||||
GSgifTransfer2( (u32*)data, qsize );
|
||||
}
|
||||
|
||||
ringposinc += qsize;
|
||||
}
|
||||
break;
|
||||
|
||||
case GS_RINGTYPE_P3:
|
||||
{
|
||||
uint datapos = (m_RingPos+1) & RingBufferMask;
|
||||
const int qsize = tag.data[0];
|
||||
const u128* data = &RingBuffer[m_RingPos+1];
|
||||
const u128* data = &RingBuffer[datapos];
|
||||
|
||||
MTGS_LOG( "(MTGS Packet Read) ringtype=P3, qwc=%u", qsize );
|
||||
|
||||
GSgifTransfer3((u32*)data, qsize);
|
||||
uint endpos = datapos + qsize;
|
||||
if( endpos >= RingBufferSize )
|
||||
{
|
||||
uint firstcopylen = RingBufferSize - datapos;
|
||||
GSgifTransfer3( (u32*)data, firstcopylen );
|
||||
datapos = endpos & RingBufferMask;
|
||||
GSgifTransfer3( (u32*)RingBuffer.m_Ring, datapos );
|
||||
}
|
||||
else
|
||||
{
|
||||
GSgifTransfer3( (u32*)data, qsize );
|
||||
}
|
||||
|
||||
ringposinc += qsize;
|
||||
}
|
||||
break;
|
||||
|
@ -380,7 +410,7 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
const int qsize = tag.data[0];
|
||||
ringposinc += qsize;
|
||||
|
||||
MTGS_LOG( "(MTGS Packet Read) ringtype=Vsync, field=%u, skip=%s", tag.data[0], tag.data[1] ? "true" : "false" );
|
||||
MTGS_LOG( "(MTGS Packet Read) ringtype=Vsync, field=%u, skip=%s", !!(((u32&)RingBuffer.Regs[0x1000]) & 0x2000) ? 0 : 1, tag.data[1] ? "true" : "false" );
|
||||
|
||||
// Mail in the important GS registers.
|
||||
RingCmdPacket_Vsync& local((RingCmdPacket_Vsync&)RingBuffer[m_RingPos+1]);
|
||||
|
@ -398,6 +428,7 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
if( (GSopen2 == NULL) && (PADupdate != NULL) )
|
||||
PADupdate(0);
|
||||
|
||||
AtomicDecrement( m_QueuedFrameCount );
|
||||
StateCheckInThread();
|
||||
}
|
||||
break;
|
||||
|
@ -450,9 +481,14 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
}
|
||||
}
|
||||
|
||||
uint newringpos = m_RingPos + ringposinc;
|
||||
pxAssert( newringpos <= RingBufferSize );
|
||||
m_RingPos = newringpos & RingBufferMask;
|
||||
uint newringpos = (m_RingPos + ringposinc) & RingBufferMask;
|
||||
|
||||
if( EmuConfig.GS.SynchronousMTGS )
|
||||
{
|
||||
pxAssert( m_WritePos == newringpos );
|
||||
}
|
||||
|
||||
m_RingPos = newringpos;
|
||||
|
||||
if( m_SignalRingEnable != 0 )
|
||||
{
|
||||
|
@ -546,7 +582,7 @@ void SysMtgsThread::SetEvent()
|
|||
|
||||
u8* SysMtgsThread::GetDataPacketPtr() const
|
||||
{
|
||||
return (u8*)&RingBuffer[m_packet_ringpos];
|
||||
return (u8*)&RingBuffer[m_packet_ringpos & RingBufferMask];
|
||||
}
|
||||
|
||||
// Closes the data packet send command, and initiates the gs thread (if needed).
|
||||
|
@ -555,6 +591,7 @@ void SysMtgsThread::SendDataPacket()
|
|||
// make sure a previous copy block has been started somewhere.
|
||||
pxAssert( m_packet_size != 0 );
|
||||
|
||||
#if 0
|
||||
uint temp = m_packet_ringpos + m_packet_size;
|
||||
pxAssert( temp <= RingBufferSize );
|
||||
temp &= RingBufferMask;
|
||||
|
@ -578,8 +615,16 @@ void SysMtgsThread::SendDataPacket()
|
|||
pxAssert( readpos != temp );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
m_WritePos = temp;
|
||||
uint actualSize = ((m_packet_ringpos - m_packet_startpos) & RingBufferMask)-1;
|
||||
pxAssert( actualSize <= m_packet_size );
|
||||
pxAssert( m_packet_ringpos < RingBufferSize );
|
||||
|
||||
PacketTagType& tag = (PacketTagType&)RingBuffer[m_packet_startpos];
|
||||
tag.data[0] = actualSize;
|
||||
|
||||
m_WritePos = m_packet_ringpos;
|
||||
|
||||
if( EmuConfig.GS.SynchronousMTGS )
|
||||
{
|
||||
|
@ -596,7 +641,7 @@ void SysMtgsThread::SendDataPacket()
|
|||
//m_PacketLocker.Release();
|
||||
}
|
||||
|
||||
int SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
||||
void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
||||
{
|
||||
// Note on volatiles: m_WritePos is not modified by the GS thread, so there's no need
|
||||
// to use volatile reads here. We do cache it though, since we know it never changes,
|
||||
|
@ -613,119 +658,63 @@ int SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
|||
m_packet_size = size;
|
||||
++size; // takes into account our RingCommand QWC.
|
||||
|
||||
if( writepos + size < RingBufferSize )
|
||||
// generic gs wait/stall.
|
||||
// if the writepos is past the readpos then we're safe.
|
||||
// But if not then we need to make sure the readpos is outside the scope of
|
||||
// the block about to be written (writepos + size)
|
||||
|
||||
uint readpos = volatize(m_RingPos);
|
||||
uint endpos = writepos+size;
|
||||
uint freeroom;
|
||||
|
||||
if (writepos < readpos)
|
||||
freeroom = readpos - writepos;
|
||||
else
|
||||
freeroom = RingBufferSize - (writepos - readpos);
|
||||
|
||||
if (freeroom < size)
|
||||
{
|
||||
// generic gs wait/stall.
|
||||
// if the writepos is past the readpos then we're safe.
|
||||
// But if not then we need to make sure the readpos is outside the scope of
|
||||
// the block about to be written (writepos + size)
|
||||
// writepos will overlap readpos if we commit the data, so we need to wait until
|
||||
// readpos is out past the end of the future write pos, or until it wraps around
|
||||
// (in which case writepos will be >= readpos).
|
||||
|
||||
uint readpos = volatize(m_RingPos);
|
||||
if( (writepos < readpos) && (writepos+size >= readpos) )
|
||||
// Ideally though we want to wait longer, because if we just toss in this packet
|
||||
// the next packet will likely stall up too. So lets set a condition for the MTGS
|
||||
// thread to wake up the EE once there's a sizable chunk of the ringbuffer emptied.
|
||||
|
||||
uint somedone = (RingBufferSize - freeroom) / 4;
|
||||
if( somedone < size+1 ) somedone = size + 1;
|
||||
|
||||
// FMV Optimization: FMVs typically send *very* little data to the GS, in some cases
|
||||
// every other frame is nothing more than a page swap. Sleeping the EEcore is a
|
||||
// waste of time, and we get better results using a spinwait.
|
||||
|
||||
if( somedone > 0x80 )
|
||||
{
|
||||
// writepos is behind the readpos and will overlap it if we commit the data,
|
||||
// so we need to wait until readpos is out past the end of the future write pos,
|
||||
// or until it wraps around (in which case writepos will be >= readpos).
|
||||
pxAssertDev( m_SignalRingEnable == 0, "MTGS Thread Synchronization Error" );
|
||||
m_SignalRingPosition = somedone;
|
||||
|
||||
// Ideally though we want to wait longer, because if we just toss in this packet
|
||||
// the next packet will likely stall up too. So lets set a condition for the MTGS
|
||||
// thread to wake up the EE once there's a sizable chunk of the ringbuffer emptied.
|
||||
//Console.WriteLn( Color_Blue, "(EEcore Sleep) GenStall \tringpos=0x%06x, writepos=0x%06x, wrapspot=0x%06x, signalpos=0x%06x", readpos, writepos, m_RingWrapSpot, m_SignalRingPosition );
|
||||
|
||||
uint totalAccum = (m_RingWrapSpot - readpos) + writepos;
|
||||
uint somedone = totalAccum / 4;
|
||||
if( somedone < size+1 ) somedone = size + 1;
|
||||
|
||||
// FMV Optimization: FMVs typically send *very* little data to the GS, in some cases
|
||||
// every other frame is nothing more than a page swap. Sleeping the EEcore is a
|
||||
// waste of time, and we get better results using a spinwait.
|
||||
|
||||
if( somedone > 0x80 )
|
||||
{
|
||||
pxAssertDev( m_SignalRingEnable == 0, "MTGS Thread Synchronization Error" );
|
||||
m_SignalRingPosition = somedone;
|
||||
|
||||
//Console.WriteLn( Color_Blue, "(EEcore Sleep) GenStall \tringpos=0x%06x, writepos=0x%06x, wrapspot=0x%06x, signalpos=0x%06x", readpos, writepos, m_RingWrapSpot, m_SignalRingPosition );
|
||||
|
||||
do {
|
||||
AtomicExchange( m_SignalRingEnable, 1 );
|
||||
SetEvent();
|
||||
m_sem_OnRingReset.WaitWithoutYield();
|
||||
readpos = volatize(m_RingPos);
|
||||
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
|
||||
} while( (writepos < readpos) && (writepos+size >= readpos) );
|
||||
|
||||
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
|
||||
}
|
||||
else
|
||||
{
|
||||
do {
|
||||
AtomicExchange( m_SignalRingEnable, 1 );
|
||||
SetEvent();
|
||||
do {
|
||||
SpinWait();
|
||||
readpos = volatize(m_RingPos);
|
||||
} while( (writepos < readpos) && (writepos+size >= readpos) );
|
||||
}
|
||||
m_sem_OnRingReset.WaitWithoutYield();
|
||||
readpos = volatize(m_RingPos);
|
||||
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
|
||||
} while( (writepos < readpos) && (writepos+size >= readpos) );
|
||||
|
||||
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
|
||||
}
|
||||
else
|
||||
{
|
||||
SetEvent();
|
||||
do {
|
||||
SpinWait();
|
||||
readpos = volatize(m_RingPos);
|
||||
} while( (writepos < readpos) && (writepos+size >= readpos) );
|
||||
}
|
||||
}
|
||||
else if( writepos + size > RingBufferSize )
|
||||
{
|
||||
pxAssert( writepos != 0 );
|
||||
|
||||
// If the incoming packet doesn't fit, then start over from the start of the ring
|
||||
// buffer (it's a lot easier than trying to wrap the packet around the end of the
|
||||
// buffer).
|
||||
|
||||
//Console.WriteLn( "MTGS > Ringbuffer Got Filled!");
|
||||
RestartRingbuffer( size );
|
||||
writepos = m_WritePos;
|
||||
}
|
||||
else // always true - if( writepos + size == MTGS_RINGBUFFEREND )
|
||||
{
|
||||
// Yay. Perfect fit. What are the odds?
|
||||
// Copy is ready so long as readpos is less than writepos and *not* equal to the
|
||||
// base of the ringbuffer (otherwise the buffer will stop when the writepos is
|
||||
// wrapped around to zero later-on in SendDataPacket).
|
||||
|
||||
uint readpos = volatize(m_RingPos);
|
||||
//Console.WriteLn( "MTGS > Perfect Fit!\tringpos=0x%06x, writepos=0x%06x", readpos, writepos );
|
||||
if( readpos > writepos || readpos == 0 )
|
||||
{
|
||||
uint totalAccum = (readpos == 0) ? RingBufferSize : ((m_RingWrapSpot - readpos) + writepos);
|
||||
uint somedone = totalAccum / 4;
|
||||
if( somedone < size+1 ) somedone = size + 1;
|
||||
|
||||
// FMV Optimization: (see above) This condition of a perfect fit is so rare that optimizing
|
||||
// for it is pointless -- but it was also mindlessly simple copy-paste. So there. :p
|
||||
|
||||
if( somedone > 0x80 )
|
||||
{
|
||||
m_SignalRingPosition = somedone;
|
||||
|
||||
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Perfect Sleep!\twrapspot=0x%06x, ringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", m_RingWrapSpot, readpos, writepos, m_SignalRingPosition );
|
||||
|
||||
do {
|
||||
AtomicExchange( m_SignalRingEnable, 1 );
|
||||
SetEvent();
|
||||
m_sem_OnRingReset.WaitWithoutYield();
|
||||
readpos = volatize(m_RingPos);
|
||||
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Perfect Post-sleep Report!\tringpos=0x%06x", readpos );
|
||||
} while( (writepos < readpos) || (readpos==0) );
|
||||
|
||||
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
|
||||
}
|
||||
else
|
||||
{
|
||||
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Perfect Spin!" );
|
||||
SetEvent();
|
||||
do {
|
||||
SpinWait();
|
||||
readpos = volatize(m_RingPos);
|
||||
} while( (writepos < readpos) || (readpos==0) );
|
||||
}
|
||||
}
|
||||
|
||||
m_QueuedFrameCount = 0;
|
||||
m_RingWrapSpot = RingBufferSize;
|
||||
}
|
||||
|
||||
#ifdef RINGBUF_DEBUG_STACK
|
||||
m_lock_Stack.Lock();
|
||||
|
@ -739,9 +728,8 @@ int SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
|||
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
|
||||
tag.command = cmd;
|
||||
tag.data[0] = m_packet_size;
|
||||
m_packet_ringpos = m_WritePos + 1;
|
||||
|
||||
return m_packet_size;
|
||||
m_packet_startpos = m_WritePos;
|
||||
m_packet_ringpos = (m_WritePos + 1) & RingBufferMask;
|
||||
}
|
||||
|
||||
// Returns the amount of giftag data processed (in simd128 values).
|
||||
|
@ -749,13 +737,14 @@ int SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
|||
// around VU memory instead of having buffer overflow...
|
||||
// Parameters:
|
||||
// size - size of the packet data, in smd128's
|
||||
int SysMtgsThread::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size )
|
||||
void SysMtgsThread::PrepDataPacket( GIF_PATH pathidx, u32 size )
|
||||
{
|
||||
//m_PacketLocker.Acquire();
|
||||
|
||||
return PrepDataPacket( (MTGS_RingCommand)pathidx, GIFPath_ParseTag(pathidx, srcdata, size) );
|
||||
PrepDataPacket( (MTGS_RingCommand)pathidx, size );
|
||||
}
|
||||
|
||||
#if 0
|
||||
void SysMtgsThread::RestartRingbuffer( uint packsize )
|
||||
{
|
||||
if( m_WritePos == 0 ) return;
|
||||
|
@ -816,6 +805,7 @@ void SysMtgsThread::RestartRingbuffer( uint packsize )
|
|||
if( EmuConfig.GS.SynchronousMTGS )
|
||||
WaitGS();
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline uint SysMtgsThread::_PrepForSimplePacket()
|
||||
{
|
||||
|
@ -830,10 +820,7 @@ __forceinline uint SysMtgsThread::_PrepForSimplePacket()
|
|||
|
||||
future_writepos &= RingBufferMask;
|
||||
if( future_writepos == 0 )
|
||||
{
|
||||
m_QueuedFrameCount = 0;
|
||||
m_RingWrapSpot = RingBufferSize;
|
||||
}
|
||||
|
||||
uint readpos = volatize(m_RingPos);
|
||||
if( future_writepos == readpos )
|
||||
|
@ -841,7 +828,15 @@ __forceinline uint SysMtgsThread::_PrepForSimplePacket()
|
|||
// The ringbuffer read pos is blocking the future write position, so stall out
|
||||
// until the read position has moved.
|
||||
|
||||
uint totalAccum = (m_RingWrapSpot - readpos) + future_writepos;
|
||||
uint freeroom;
|
||||
|
||||
if (future_writepos < readpos)
|
||||
freeroom = readpos - future_writepos;
|
||||
else
|
||||
freeroom = RingBufferSize - (future_writepos - readpos);
|
||||
|
||||
uint totalAccum = RingBufferSize - freeroom;
|
||||
|
||||
uint somedone = totalAccum / 4;
|
||||
|
||||
if( somedone > 0x80 )
|
||||
|
|
|
@ -144,6 +144,7 @@ static s32 CALLBACK fallback_test() { return 0; }
|
|||
_GSvsync GSvsync;
|
||||
_GSopen GSopen;
|
||||
_GSopen2 GSopen2;
|
||||
_GSgifTransfer GSgifTransfer;
|
||||
_GSgifTransfer1 GSgifTransfer1;
|
||||
_GSgifTransfer2 GSgifTransfer2;
|
||||
_GSgifTransfer3 GSgifTransfer3;
|
||||
|
@ -309,7 +310,8 @@ static const LegacyApi_ReqMethod s_MethMessReq_GS[] =
|
|||
{
|
||||
{ "GSopen", (vMeth**)&GSopen, NULL },
|
||||
{ "GSvsync", (vMeth**)&GSvsync, NULL },
|
||||
{ "GSgifTransfer1", (vMeth**)&GSgifTransfer1, NULL },
|
||||
{ "GSgifTransfer", (vMeth**)&GSgifTransfer, NULL },
|
||||
//{ "GSgifTransfer1", (vMeth**)&GSgifTransfer1, NULL },
|
||||
{ "GSgifTransfer2", (vMeth**)&GSgifTransfer2, NULL },
|
||||
{ "GSgifTransfer3", (vMeth**)&GSgifTransfer3, NULL },
|
||||
{ "GSreadFIFO2", (vMeth**)&GSreadFIFO2, NULL },
|
||||
|
|
|
@ -2057,21 +2057,8 @@ void _vuXGKICK(VURegs * VU)
|
|||
|
||||
u8* data = ((u8*)VU->Mem + ((VU->VI[_Is_].US[0]*16) & 0x3fff));
|
||||
u32 size;
|
||||
size = GetMTGS().PrepDataPacket( GIF_PATH_1, data, (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)) >> 4);
|
||||
u8* pmem = GetMTGS().GetDataPacketPtr();
|
||||
|
||||
if((size << 4) > (u32)(0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)))
|
||||
{
|
||||
//DevCon.Warning("addr + Size = 0x%x, transferring %x then doing %x", ((VU->VI[_Is_].US[0]*16) & 0x3fff) + (size << 4), (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)) >> 4, size - (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff) >> 4));
|
||||
memcpy_aligned(pmem, (u8*)VU->Mem+((VU->VI[_Is_].US[0]*16) & 0x3fff), 0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff));
|
||||
size -= (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)) >> 4;
|
||||
//DevCon.Warning("Size left %x", size);
|
||||
pmem += 0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff);
|
||||
memcpy_aligned(pmem, (u8*)VU->Mem, size<<4);
|
||||
}
|
||||
else {
|
||||
memcpy_aligned(pmem, (u8*)VU->Mem+((VU->VI[_Is_].US[0]*16) & 0x3fff), size<<4);
|
||||
}
|
||||
GetMTGS().PrepDataPacket( GIF_PATH_1, 0x400 );
|
||||
size = GIFPath_CopyTag( GIF_PATH_1, (u128*)data, (0x400-(VU->VI[_Is_].US[0] & 0x3ff)) );
|
||||
GetMTGS().SendDataPacket();
|
||||
}
|
||||
|
||||
|
|
|
@ -213,8 +213,8 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
|
|||
v.bSize = 0;
|
||||
v.bPtr = 0;
|
||||
}
|
||||
const uint count = GetMTGS().PrepDataPacket(GIF_PATH_2, v.buffer, 1);
|
||||
memcpy_fast(GetMTGS().GetDataPacketPtr(), v.buffer, count << 4);
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_2, 1);
|
||||
GIFPath_CopyTag(GIF_PATH_2, (u128*)v.buffer, 1);
|
||||
GetMTGS().SendDataPacket();
|
||||
|
||||
if(vif1.tag.size == 0)
|
||||
|
@ -226,16 +226,17 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
|
|||
}
|
||||
else
|
||||
{
|
||||
const uint count = GetMTGS().PrepDataPacket(GIF_PATH_2, data, size >> 4);
|
||||
memcpy_fast(GetMTGS().GetDataPacketPtr(), data, count << 4);
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_2, size/16);
|
||||
uint count = GIFPath_CopyTag(GIF_PATH_2, (u128*)data, size/16) * 4;
|
||||
GetMTGS().SendDataPacket();
|
||||
vif1.tag.size -= count << 2;
|
||||
|
||||
vif1.tag.size -= count;
|
||||
if(vif1.tag.size == 0)
|
||||
{
|
||||
vif1.cmd = 0;
|
||||
}
|
||||
vif1.vifstalled = true;
|
||||
return count << 2;
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -97,7 +97,7 @@ struct GIFPath
|
|||
u8 GetReg();
|
||||
bool IsActive() const;
|
||||
|
||||
int ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size);
|
||||
int CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size);
|
||||
int ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
|
||||
};
|
||||
|
||||
|
@ -287,7 +287,8 @@ __forceinline void GIFPath::PrepPackedRegs()
|
|||
|
||||
__forceinline void GIFPath::SetTag(const void* mem)
|
||||
{
|
||||
const_cast<GIFTAG&>(tag) = *((GIFTAG*)mem);
|
||||
_mm_store_ps( (float*)&tag, _mm_loadu_ps((float*)mem) );
|
||||
//const_cast<GIFTAG&>(tag) = *((GIFTAG*)mem);
|
||||
|
||||
nloop = tag.NLOOP;
|
||||
curreg = 0;
|
||||
|
@ -521,15 +522,50 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s
|
|||
return size;
|
||||
}
|
||||
|
||||
__forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
||||
void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len )
|
||||
{
|
||||
uint endpos = destStart + len;
|
||||
if( endpos >= destSize )
|
||||
{
|
||||
uint firstcopylen = RingBufferSize - destStart;
|
||||
memcpy_aligned(&destBase[destStart], src, firstcopylen );
|
||||
|
||||
destStart = endpos & RingBufferMask;
|
||||
memcpy_aligned(destBase, src+firstcopylen, destStart );
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy_aligned(&destBase[destStart], src, len );
|
||||
destStart += len;
|
||||
}
|
||||
}
|
||||
|
||||
// [TODO] optimization: If later templated, we can have Paths 1 and 3 use aligned SSE movs,
|
||||
// since only PATH2 can feed us unaligned source data.
|
||||
#define copyTag() do { \
|
||||
/*RingBuffer.m_Ring[ringpos] = *pMem128;*/ \
|
||||
_mm_store_ps( (float*)&RingBuffer.m_Ring[ringpos], _mm_loadu_ps((float*)pMem128)); \
|
||||
++pMem128; --size; \
|
||||
ringpos = (ringpos+1)&RingBufferMask; \
|
||||
} while(false)
|
||||
|
||||
__forceinline int GIFPath::CopyTag(GIF_PATH pathidx, const u128* pMem128, u32 size)
|
||||
{
|
||||
uint& ringpos = GetMTGS().m_packet_ringpos;
|
||||
const uint original_ringpos = ringpos;
|
||||
|
||||
u32 startSize = size; // Start Size
|
||||
|
||||
while (size > 0) {
|
||||
if (!nloop) {
|
||||
|
||||
SetTag(pMem);
|
||||
incTag(1);
|
||||
// [TODO] Optimization: Use MMX intrinsics for SetTag and CopyTag, which both currently
|
||||
// produce a series of mov eax,[src]; mov [dest],eax instructions to copy these
|
||||
// individual qwcs. Warning: Path2 transfers are not always QWC-aligned, but they are
|
||||
// always aligned on an 8 byte boundary; so its probably best to use MMX here.
|
||||
|
||||
SetTag((u8*)pMem128);
|
||||
copyTag();
|
||||
|
||||
if(nloop > 0)
|
||||
{
|
||||
|
@ -599,9 +635,9 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
{
|
||||
do {
|
||||
if (GetReg() == 0xe) {
|
||||
gsHandler(pMem);
|
||||
gsHandler((u8*)pMem128);
|
||||
}
|
||||
incTag(1);
|
||||
copyTag();
|
||||
} while(StepReg() && size > 0 && SIGNAL_IMR_Pending == false);
|
||||
}
|
||||
else
|
||||
|
@ -644,11 +680,14 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
curreg = 0;
|
||||
nloop = 0;
|
||||
}
|
||||
incTag(len);
|
||||
|
||||
MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len );
|
||||
pMem128 += len;
|
||||
size -= len;
|
||||
}
|
||||
break;
|
||||
case GIF_FLG_REGLIST:
|
||||
{
|
||||
{
|
||||
GIF_LOG("Reglist Mode EOP %x", tag.EOP);
|
||||
|
||||
// In reglist mode, the GIF packs 2 registers into each QWC. The nloop however
|
||||
|
@ -687,8 +726,9 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
nloop = 0;
|
||||
}
|
||||
|
||||
incTag(len);
|
||||
|
||||
MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len );
|
||||
pMem128 += len;
|
||||
size -= len;
|
||||
}
|
||||
break;
|
||||
case GIF_FLG_IMAGE:
|
||||
|
@ -696,13 +736,15 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
{
|
||||
GIF_LOG("IMAGE Mode EOP %x", tag.EOP);
|
||||
int len = aMin(size, nloop);
|
||||
incTag(len);
|
||||
|
||||
MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len );
|
||||
|
||||
pMem128 += len;
|
||||
size -= len;
|
||||
nloop -= len;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if(pathidx == GIF_PATH_1)
|
||||
|
@ -713,11 +755,11 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
{
|
||||
size = 0x3ff - startSize;
|
||||
startSize = 0x3ff;
|
||||
pMem -= 0x4000;
|
||||
pMem128 -= 0x400;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Note: The BIOS does an XGKICK on the VU1 and lets yt DMA to the GS without an EOP
|
||||
// Note: The BIOS does an XGKICK on the VU1 and lets it DMA to the GS without an EOP
|
||||
// (seemingly to loop forever), only to write an EOP later on. No other game is known to
|
||||
// do anything of the sort.
|
||||
// So lets just cap the DMA at 16k, and force it to "look" like it's terminated for now.
|
||||
|
@ -727,6 +769,11 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
|
||||
Console.Warning("GIFTAG error, size exceeded VU memory size %x", startSize);
|
||||
nloop = 0;
|
||||
|
||||
// Don't send the packet to the GS -- its incomplete and might cause the GS plugin
|
||||
// to get confused and die. >_<
|
||||
|
||||
ringpos = original_ringpos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -793,47 +840,18 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
gif->qwc -= size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
// Processes a GIFtag & packet, and throws out some gsIRQs as needed.
|
||||
// Used to keep interrupts in sync with the EE, while the GS itself
|
||||
// runs potentially several frames behind.
|
||||
// Parameters:
|
||||
// size - max size of incoming data stream, in qwc (simd128)
|
||||
__forceinline int GIFPath_ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
||||
__forceinline int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size)
|
||||
{
|
||||
#ifdef PCSX2_GSRING_SAMPLING_STATS
|
||||
static uptr profStartPtr = 0;
|
||||
static uptr profEndPtr = 0;
|
||||
if (profStartPtr == 0) {
|
||||
__asm
|
||||
{
|
||||
__beginfunc:
|
||||
mov profStartPtr, offset __beginfunc;
|
||||
mov profEndPtr, offset __endfunc;
|
||||
}
|
||||
ProfilerRegisterSource( "GSRingBufCopy", (void*)profStartPtr, profEndPtr - profStartPtr );
|
||||
}
|
||||
#endif
|
||||
|
||||
int retSize = s_gifPath[pathidx].ParseTag(pathidx, pMem, size);
|
||||
|
||||
#ifdef PCSX2_GSRING_SAMPLING_STATS
|
||||
__asm
|
||||
{
|
||||
__endfunc:
|
||||
nop;
|
||||
}
|
||||
#endif
|
||||
return retSize;
|
||||
return s_gifPath[pathidx].CopyTag(pathidx, pMem, size);
|
||||
}
|
||||
|
||||
//Quick version for queueing PATH1 data
|
||||
|
||||
// Quick version for queueing PATH1 data.
|
||||
// This version calculates the real length of the packet data only. It does not process
|
||||
// IRQs or DMA status updates.
|
||||
__forceinline int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size)
|
||||
{
|
||||
int retSize = s_gifPath[pathidx].ParseTagQuick(pathidx, pMem, size);
|
||||
|
|
|
@ -1101,27 +1101,15 @@ void __fastcall mVU_XGKICK_(u32 addr) {
|
|||
|
||||
if(gifRegs->stat.APATH <= GIF_APATH1 || (gifRegs->stat.APATH == GIF_APATH3 && gifRegs->stat.IP3 == true) && SIGNAL_IMR_Pending == false)
|
||||
{
|
||||
|
||||
if(Path1WritePos != 0)
|
||||
{
|
||||
//Flush any pending transfers so things dont go up in the wrong order
|
||||
while(gifRegs->stat.P1Q == true) gsPath1Interrupt();
|
||||
}
|
||||
size = GetMTGS().PrepDataPacket(GIF_PATH_1, data, diff);
|
||||
pDest = GetMTGS().GetDataPacketPtr();
|
||||
if (size > diff) {
|
||||
// fixme: one of these days the following *16's will get cleaned up when we introduce
|
||||
// a special qwc/simd16 optimized version of memcpy_aligned. :)
|
||||
//DevCon.Status("XGkick Wrap!");
|
||||
memcpy_aligned(pDest, microVU1.regs->Mem + (addr*16), diff*16);
|
||||
size -= diff;
|
||||
pDest += diff*16;
|
||||
memcpy_aligned(pDest, microVU1.regs->Mem, size*16);
|
||||
}
|
||||
else {
|
||||
memcpy_aligned(pDest, microVU1.regs->Mem + (addr*16), size*16);
|
||||
}
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_1, 0x400);
|
||||
size = GIFPath_CopyTag(GIF_PATH_1, (u128*)data, diff);
|
||||
GetMTGS().SendDataPacket();
|
||||
|
||||
if(GSTransferStatus.PTH1 == STOPPED_MODE)
|
||||
{
|
||||
gifRegs->stat.OPH = false;
|
||||
|
@ -1141,14 +1129,14 @@ void __fastcall mVU_XGKICK_(u32 addr) {
|
|||
// fixme: one of these days the following *16's will get cleaned up when we introduce
|
||||
// a special qwc/simd16 optimized version of memcpy_aligned. :)
|
||||
//DevCon.Status("XGkick Wrap!");
|
||||
memcpy_aligned(pDest, microVU1.regs->Mem + (addr*16), diff*16);
|
||||
memcpy_aligned(pDest, microVU1.regs->Mem + (addr*16), diff);
|
||||
Path1WritePos += size;
|
||||
size -= diff;
|
||||
pDest += diff*16;
|
||||
memcpy_aligned(pDest, microVU1.regs->Mem, size*16);
|
||||
memcpy_aligned(pDest, microVU1.regs->Mem, size);
|
||||
}
|
||||
else {
|
||||
memcpy_aligned(pDest, microVU1.regs->Mem + (addr*16), size*16);
|
||||
memcpy_aligned(pDest, microVU1.regs->Mem + (addr*16), size);
|
||||
Path1WritePos += size;
|
||||
}
|
||||
//if(!gifRegs->stat.P1Q) CPU_INT(28, 128);
|
||||
|
|
|
@ -1988,21 +1988,10 @@ void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
|
|||
//Flush any pending transfers so things dont go up in the wrong order
|
||||
while(gifRegs->stat.P1Q == true) gsPath1Interrupt();
|
||||
}
|
||||
size = GetMTGS().PrepDataPacket(GIF_PATH_1, data, diff);
|
||||
pDest = GetMTGS().GetDataPacketPtr();
|
||||
if (size > diff) {
|
||||
// fixme: one of these days the following *16's will get cleaned up when we introduce
|
||||
// a special qwc/simd16 optimized version of memcpy_aligned. :)
|
||||
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, diff*16);
|
||||
size -= diff;
|
||||
pDest += diff*16;
|
||||
memcpy_aligned(pDest, VU1.Mem, size*16);
|
||||
}
|
||||
else {
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, size*16);
|
||||
}
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_1, 0x400);
|
||||
size = GIFPath_CopyTag(GIF_PATH_1, (u128*)data, diff);
|
||||
GetMTGS().SendDataPacket();
|
||||
|
||||
if(GSTransferStatus.PTH1 == STOPPED_MODE )
|
||||
{
|
||||
gifRegs->stat.OPH = false;
|
||||
|
@ -2015,8 +2004,6 @@ void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
|
|||
size = GIFPath_ParseTagQuick(GIF_PATH_1, data, diff);
|
||||
pDest = &Path1Buffer[Path1WritePos*16];
|
||||
|
||||
|
||||
|
||||
pxAssumeMsg((Path1WritePos+size < sizeof(Path1Buffer)), "XGKick Buffer Overflow detected on Path1Buffer!");
|
||||
|
||||
//DevCon.Warning("Storing size %x PATH 1", size);
|
||||
|
@ -2024,14 +2011,14 @@ void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
|
|||
// fixme: one of these days the following *16's will get cleaned up when we introduce
|
||||
// a special qwc/simd16 optimized version of memcpy_aligned. :)
|
||||
//DevCon.Status("XGkick Wrap!");
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, diff*16);
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, diff);
|
||||
Path1WritePos += size;
|
||||
size -= diff;
|
||||
pDest += diff*16;
|
||||
memcpy_aligned(pDest, VU1.Mem, size*16);
|
||||
memcpy_aligned(pDest, VU1.Mem, size);
|
||||
}
|
||||
else {
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, size*16);
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, size);
|
||||
Path1WritePos += size;
|
||||
}
|
||||
//if(!gifRegs->stat.P1Q) CPU_INT(28, 128);
|
||||
|
|
Loading…
Reference in New Issue