Merge optimizations and code cleanups from the ReorderingMTGS branch (r3523) into Trunk. Summary of changes:

* GIFpath parsing copies as it parses (speedup!)
 * Improved memcpy for 128-bit copies (speedup!)
 * MTGS ringbuffer uses free-flowing wrapping now, which simplified ringbuffer management logic considerably (speedup!)
 * Various MTGS-related refactoring (var renames and such)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3532 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-07-19 03:48:37 +00:00
commit 856ffe4c65
30 changed files with 754 additions and 677 deletions

View File

@ -248,6 +248,7 @@ void CALLBACK GSsetSettingsDir( const char* dir );
void CALLBACK GSsetLogDir( const char* dir );
void CALLBACK GSvsync(int field);
void CALLBACK GSgifTransfer(u32 *pMem, u32 addr);
void CALLBACK GSgifTransfer1(u32 *pMem, u32 addr);
void CALLBACK GSgifTransfer2(u32 *pMem, u32 size);
void CALLBACK GSgifTransfer3(u32 *pMem, u32 size);

View File

@ -22,12 +22,14 @@
extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
extern "C" u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize);
extern void memcpy_amd_qwc(void *dest, const void *src, size_t bytes);
#else
# include "win_memzero.h"
extern void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
extern void memcpy_amd_qwc(void *dest, const void *src, size_t bytes);
extern u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
extern void memxor_mmx(void* dst, const void* src1, int cmpsize);
@ -40,9 +42,12 @@ void _memset16_unaligned( void* dest, u16 data, size_t size );
extern void memcpy_vibes(void * dest, const void * src, int size);
extern void gen_memcpy_vibes();
#define memcpy_fast memcpy_amd_ // Fast memcpy
#define memcpy_aligned memcpy_amd_ // Memcpy with 16-byte Aligned addresses
#define memcpy_const memcpy_amd_ // Memcpy with constant size
#define memcpy_constA memcpy_amd_ // Memcpy with constant size and 16-byte aligned
#define memcpy_qwc_ memcpy_vibes // Memcpy in aligned qwc increments, with 0x400 qwc or less
#define memcpy_qwc(x,y,z) memcpy_amd_(x, y, z*16) // Memcpy in aligned qwc increments
#define memcpy_fast memcpy_amd_ // Fast memcpy
#define memcpy_aligned(d,s,c) memcpy_amd_(d,s,c) // Memcpy with 16-byte Aligned addresses
#define memcpy_const memcpy_amd_ // Memcpy with constant size
#define memcpy_constA memcpy_amd_ // Memcpy with constant size and 16-byte aligned
#define memcpy_qwc_ memcpy_vibes // Memcpy in aligned qwc increments, with 0x400 qwc or less
#define memcpy_qwc(d,s,c) memcpy_amd_qwc(d,s,c)
// Useful alternative if we think memcpy_amd_qwc is buggy
//#define memcpy_qwc(d,s,c) memcpy_amd_(d,s,c*16)

View File

@ -129,6 +129,10 @@ namespace Threading
// For use in spin/wait loops.
extern void SpinWait();
// Use prior to committing data to another thread (internal memcpy_qwc does not use fencing,
// so that many memcpys can be issued in a row more efficiently)
extern void StoreFence();
// Optional implementation to enable hires thread/process scheduler for the operating system.
// Needed by Windows, but might not be relevant to other platforms.

View File

@ -71,7 +71,7 @@ wxString DiagnosticOrigin::ToString( const wxChar* msg ) const
bool pxAssertImpl_LogIt( const DiagnosticOrigin& origin, const wxChar *msg )
{
wxLogError( origin.ToString( msg ) );
wxLogError( L"%s", origin.ToString( msg ) );
return false;
}

View File

@ -36,6 +36,11 @@ __forceinline void Threading::SpinWait()
__asm pause;
}
__forceinline void Threading::StoreFence()
{
__asm sfence;
}
__forceinline void Threading::EnableHiresScheduler()
{
// This improves accuracy of Sleep() by some amount, and only adds a negligible amount of

View File

@ -41,12 +41,10 @@
MEMCPY_AMD.CPP
******************************************************************************/
// Very optimized memcpy() routine for AMD Athlon and Duron family.
// This code uses any of FOUR different basic copy methods, depending
// on the transfer size.
// NOTE: Since this code uses MOVNTQ (also known as "Non-Temporal MOV" or
// "Streaming Store"), and also uses the software prefetch instructions,
// be sure you're running on Athlon/Duron or other recent CPU before calling!
// be sure you're running on P4/Core2/i7, Athlon/Phenom or newer CPUs before
// calling!
#define TINY_BLOCK_COPY 64 // upper limit for movsd type copy
// The smallest copy uses the X86 "movsd" instruction, in an optimized
@ -68,10 +66,8 @@ MEMCPY_AMD.CPP
#if defined(_MSC_VER)
// --------------------------------------------------------------------------------------
// Fast memcpy as coded by AMD, and then improved by air.
// --------------------------------------------------------------------------------------
// Fast memcpy as coded by AMD, and then improved by air for PCSX2 needs.
__declspec(naked) void __fastcall memcpy_amd_(void *dest, const void *src, size_t n)
{
__asm
@ -92,6 +88,7 @@ __declspec(naked) void __fastcall memcpy_amd_(void *dest, const void *src, size_
jbe $memcpy_do_align ; it appears to be slower
cmp eax, 64*1024
jbe $memcpy_align_done
$memcpy_do_align:
mov eax, 8 ; a trick that's faster than rep movsb...
sub eax, edi ; align destination to qword
@ -146,7 +143,7 @@ $memcpy_ic_1: ; 64-byte block copies, in-cache copy
add esi, 64 ; update source pointer
add edi, 64 ; update destination pointer
dec eax ; count down
sub eax, 1
jnz $memcpy_ic_1 ; last 64-byte block?
$memcpy_ic_2:
@ -189,64 +186,15 @@ $memcpy_uc_1: ; 64-byte blocks, uncached copy
movq mm1,[esi-8]
movntq [edi-24], mm2
movntq [edi-16], mm0
dec eax
movntq [edi-8], mm1
sub eax, 1
jnz $memcpy_uc_1 ; last 64-byte block?
jmp $memcpy_ic_2 ; almost done (not needed because large copy below was removed)
// For the largest size blocks, a special technique called Block Prefetch
// can be used to accelerate the read operations. Block Prefetch reads
// one address per cache line, for a series of cache lines, in a short loop.
// This is faster than using software prefetch. The technique is great for
// getting maximum read bandwidth, especially in DDR memory systems.
// Note: Pcsx2 rarely invokes large copies, so this mode has been disabled to
// help keep the code cache footprint of memcpy_fast to a minimum.
/*
$memcpy_bp_1: ; large blocks, block prefetch copy
cmp ecx, CACHEBLOCK ; big enough to run another prefetch loop?
jl $memcpy_64_test ; no, back to regular uncached copy
mov eax, CACHEBLOCK / 2 ; block prefetch loop, unrolled 2X
add esi, CACHEBLOCK * 64 ; move to the top of the block
align 16
$memcpy_bp_2:
mov edx, [esi-64] ; grab one address per cache line
mov edx, [esi-128] ; grab one address per cache line
sub esi, 128 ; go reverse order to suppress HW prefetcher
dec eax ; count down the cache lines
jnz $memcpy_bp_2 ; keep grabbing more lines into cache
mov eax, CACHEBLOCK ; now that it's in cache, do the copy
align 16
$memcpy_bp_3:
movq mm0, [esi ] ; read 64 bits
movq mm1, [esi+ 8]
movq mm2, [esi+16]
movq mm3, [esi+24]
movq mm4, [esi+32]
movq mm5, [esi+40]
movq mm6, [esi+48]
movq mm7, [esi+56]
add esi, 64 ; update source pointer
movntq [edi ], mm0 ; write 64 bits, bypassing cache
movntq [edi+ 8], mm1 ; note: movntq also prevents the CPU
movntq [edi+16], mm2 ; from READING the destination address
movntq [edi+24], mm3 ; into the cache, only to be over-written,
movntq [edi+32], mm4 ; so that also helps performance
movntq [edi+40], mm5
movntq [edi+48], mm6
movntq [edi+56], mm7
add edi, 64 ; update dest pointer
dec eax ; count down
jnz $memcpy_bp_3 ; keep copying
sub ecx, CACHEBLOCK ; update the 64-byte block count
jmp $memcpy_bp_1 ; keep processing chunks
*/
// Note: Pcsx2 rarely invokes large copies, so the large copy "block prefetch" mode has been
// disabled to help keep the code cache footprint of memcpy_fast to a minimum.
// The smallest copy uses the X86 "movsd" instruction, in an optimized
// form which is an "unrolled loop". Then it handles the last few bytes.
@ -274,17 +222,99 @@ $memcpy_last_few: ; dword aligned from before movsd's
rep movsb ; the last 1, 2, or 3 bytes
$memcpy_final:
pop esi
pop edi
emms ; clean up the MMX state
sfence ; flush the write buffer
//mov eax, [dest] ; ret value = destination pointer
pop esi
pop edi
ret 4
}
}
// Quadword Copy! Count is in QWCs (128 bits). Neither source nor dest need to be aligned.
__forceinline void memcpy_amd_qwc(void *dest, const void *src, size_t qwc)
{
// Optimization Analysis: This code is *nearly* optimal. Do not think that using XMM
// registers will improve copy performance, because they won't. Use of XMMs is only
// warranted in situations where both source and dest are guaranteed aligned to 16 bytes,
// and even then the benefits are typically minimal (sometimes slower depending on the
// amount of data being copied).
//
// Thus: MMX are alignment safe, fast, and widely available. Lets just stick with them.
// --air
// Linux Conversion note:
// This code would benefit nicely from having inline-able GAS syntax, since it should
// allow GCC to optimize the first 3 instructions out of existence in many scenarios.
// And its called enough times to probably merit the extra effort to ensure proper
// optimization. --air
__asm
{
mov ecx, dest
mov edx, src
mov eax, qwc ; keep a copy of count
shr eax, 1
jz $memcpy_qwc_1 ; only one 16 byte block to copy?
cmp eax, IN_CACHE_COPY/32
jb $memcpy_qwc_loop1 ; small copies should be cached (definite speedup --air)
$memcpy_qwc_loop2: ; 32-byte blocks, uncached copy
prefetchnta [edx + 568] ; start reading ahead (tested: it helps! --air)
movq mm0,[edx+0] ; read 64 bits
movq mm1,[edx+8]
movq mm2,[edx+16]
movntq [ecx+0], mm0 ; write 64 bits, bypassing the cache
movntq [ecx+8], mm1
movq mm3,[edx+24]
movntq [ecx+16], mm2
movntq [ecx+24], mm3
add edx,32 ; update source pointer
add ecx,32 ; update destination pointer
sub eax,1
jnz $memcpy_qwc_loop2 ; last 64-byte block?
sfence ; flush the write buffer
jmp $memcpy_qwc_1
; 32-byte blocks, cached!
; This *is* important. Removing this and using exclusively non-temporal stores
; results in noticable speed loss!
$memcpy_qwc_loop1:
prefetchnta [edx + 568] ; start reading ahead (tested: it helps! --air)
movq mm0,[edx+0] ; read 64 bits
movq mm1,[edx+8]
movq mm2,[edx+16]
movq [ecx+0], mm0 ; write 64 bits, bypassing the cache
movq [ecx+8], mm1
movq mm3,[edx+24]
movq [ecx+16], mm2
movq [ecx+24], mm3
add edx,32 ; update source pointer
add ecx,32 ; update destination pointer
sub eax,1
jnz $memcpy_qwc_loop1 ; last 64-byte block?
$memcpy_qwc_1:
test qwc,1
jz $memcpy_qwc_final
movq mm0,[edx]
movq mm1,[edx+8]
movq [ecx], mm0
movq [ecx+8], mm1
$memcpy_qwc_final:
emms ; clean up the MMX state
}
}
// mmx mem-compare implementation, size has to be a multiple of 8
// returns 0 is equal, nonzero value if not equal
// ~10 times faster than standard memcmp

View File

@ -156,3 +156,95 @@ __forceinline void memcpy_vibes(void * dest, const void * src, int size) {
#endif
#endif
// Since MemcpyVibes is already in the project, I'll just tuck the Linux version of memcpy_amd_qwc here for the moment,
// to get around compilation issues with having it in the headers.
#ifdef __LINUX__
// This can be moved later, but Linux doesn't even compile memcpyFast.cpp, so I figured I'd stick it here for now.
// Quadword Copy! Count is in QWCs (128 bits). Neither source nor dest need to be aligned.
__forceinline void memcpy_amd_qwc(void *dest, const void *src, size_t qwc)
{
// Optimization Analysis: This code is *nearly* optimal. Do not think that using XMM
// registers will improve copy performance, because they won't. Use of XMMs is only
// warranted in situations where both source and dest are guaranteed aligned to 16 bytes,
// and even then the benefits are typically minimal (sometimes slower depending on the
// amount of data being copied).
//
// Thus: MMX are alignment safe, fast, and widely available. Lets just stick with them.
// --air
// Linux Conversion note:
// This code would benefit nicely from having inline-able GAS syntax, since it should
// allow GCC to optimize the first 3 instructions out of existence in many scenarios.
// And its called enough times to probably merit the extra effort to ensure proper
// optimization. --air
__asm__
(
".intel_syntax noprefix\n"
"mov eax, %[qwc]\n" // keep a copy of count for looping
"shr eax, 1\n"
"jz memcpy_qwc_1\n" // only one 16 byte block to copy?
"cmp eax, 64\n" // "IN_CACHE_COPY/32"
"jb memcpy_qwc_loop1\n" // small copies should be cached (definite speedup --air)
"memcpy_qwc_loop2:\n" // 32-byte blocks, uncached copy
"prefetchnta [%[src] + 568]\n" // start reading ahead (tested: it helps! --air)
"movq mm0,[%[src]+0]\n" // read 64 bits
"movq mm1,[%[src]+8]\n"
"movq mm2,[%[src]+16]\n"
"movntq [%[dest]+0], mm0\n" // write 64 bits, bypassing the cache
"movntq [%[dest]+8], mm1\n"
"movq mm3,[%[src]+24]\n"
"movntq [%[dest]+16], mm2\n"
"movntq [%[dest]+24], mm3\n"
"add %[src],32\n" // update source pointer
"add %[dest],32\n" // update destination pointer
"sub eax,1\n"
"jnz memcpy_qwc_loop2\n" // last 64-byte block?
"sfence\n" // flush the write buffer
"jmp memcpy_qwc_1\n"
// 32-byte blocks, cached!
// This *is* important. Removing this and using exclusively non-temporal stores
// results in noticeable speed loss!
"memcpy_qwc_loop1:\n"
"prefetchnta [%[src] + 568]\n" // start reading ahead (tested: it helps! --air)
"movq mm0,[%[src]+0]\n" // read 64 bits
"movq mm1,[%[src]+8]\n"
"movq mm2,[%[src]+16]\n"
"movq [%[dest]+0], mm0\n" // write 64 bits, bypassing the cache
"movq [%[dest]+8], mm1\n"
"movq mm3,[%[src]+24]\n"
"movq [%[dest]+16], mm2\n"
"movq [%[dest]+24], mm3\n"
"add %[src],32\n" // update source pointer
"add %[dest],32\n" // update destination pointer
"sub eax,1\n"
"jnz memcpy_qwc_loop1\n" // last 64-byte block?
"memcpy_qwc_1:\n"
"test %[qwc],1\n"
"jz memcpy_qwc_final\n"
"movq mm0,[%[src]]\n"
"movq mm1,[%[src]+8]\n"
"movq [%[dest]], mm0\n"
"movq [%[dest]+8], mm1\n"
"memcpy_qwc_final:\n"
"emms\n" // clean up the MMX state
".att_syntax\n"
: "=&r"(dest), "=&r"(src), "=&r"(qwc)
: [dest]"0"(dest), [src]"1"(src), [qwc]"2"(qwc)
: "memory", "eax", "mm0", "mm1", "mm2", "mm3"
);
}
#endif

View File

@ -395,6 +395,7 @@ struct Pcsx2Config
// style. Useful for debugging potential bugs in the MTGS pipeline.
bool SynchronousMTGS;
bool DisableOutput;
int VsyncQueueSize;
bool FrameLimitEnable;
bool FrameSkipEnable;
@ -420,6 +421,8 @@ struct Pcsx2Config
return
OpEqu( SynchronousMTGS ) &&
OpEqu( DisableOutput ) &&
OpEqu( VsyncQueueSize ) &&
OpEqu( FrameSkipEnable ) &&
OpEqu( FrameLimitEnable ) &&
OpEqu( VsyncEnable ) &&

View File

@ -164,7 +164,6 @@ void __fastcall WriteFIFO_page_5(u32 mem, const mem128_t *value)
if(GSTransferStatus.PTH2 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH2)
{
if(gifRegs->stat.DIR == 0)gifRegs->stat.OPH = false;
gifRegs->stat.APATH = GIF_APATH_IDLE;
if(gifRegs->stat.P1Q) gsPath1Interrupt();
}
@ -195,14 +194,12 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value)
nloop0_packet[1] = psHu32(GIF_FIFO + 4);
nloop0_packet[2] = psHu32(GIF_FIFO + 8);
nloop0_packet[3] = psHu32(GIF_FIFO + 12);
GetMTGS().PrepDataPacket(GIF_PATH_3, (u8*)nloop0_packet, 1);
u64* data = (u64*)GetMTGS().GetDataPacketPtr();
data[0] = value[0];
data[1] = value[1];
GetMTGS().PrepDataPacket(GIF_PATH_3, 1);
//u64* data = (u64*)GetMTGS().GetDataPacketPtr();
GIFPath_CopyTag( GIF_PATH_3, (u128*)nloop0_packet, 1 );
GetMTGS().SendDataPacket();
if(GSTransferStatus.PTH3 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH3 )
{
if(gifRegs->stat.DIR == 0)gifRegs->stat.OPH = false;
gifRegs->stat.APATH = GIF_APATH_IDLE;
if(gifRegs->stat.P1Q) gsPath1Interrupt();
}

View File

@ -282,14 +282,19 @@ void __fastcall gsWrite64_page_01( u32 mem, const mem64_t* value )
{
case 0x12001040: //busdir
//This is probably a complete hack, however writing to BUSDIR "should" start a transfer (Bleach Blade Battlers)
//Only problem is it kills killzone :( leaving it commented out for now.
//This is probably a complete hack, however writing to BUSDIR "should" start a transfer
//(Bleach Blade Battlers, Growlanser 2 and 3, Wizardry)
//Only problem is it kills killzone :(.
// (yes it *is* a complete hack; both lines here in fact --air)
//=========================================================================
//gifRegs->stat.OPH = true;
//Console.Warning("BUSDIR write! Setting OPH and DIR to = %x",(u32)value[0]);
if ((u32)value[0] == 1)
gifRegs->stat.OPH = true;
else
gifRegs->stat.OPH = false;
gifRegs->stat.DIR = (u32)value[0];
//=========================================================================
gifRegs->stat.DIR = (u32)value;
// BUSDIR INSANITY !! MTGS FLUSH NEEDED
//
// Yup folks. BUSDIR is evil. The only safe way to handle it is to flush the whole MTGS

View File

@ -229,7 +229,8 @@ enum GIF_PATH
GIF_PATH_3,
};
extern int GIFPath_ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size);
extern void GIFPath_Initialize();
extern int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size);
extern int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
extern void GIFPath_Reset();
extern void GIFPath_Clear( GIF_PATH pathidx );
@ -248,7 +249,6 @@ enum MTGS_RingCommand
GS_RINGTYPE_P1
, GS_RINGTYPE_P2
, GS_RINGTYPE_P3
, GS_RINGTYPE_RESTART
, GS_RINGTYPE_VSYNC
, GS_RINGTYPE_FRAMESKIP
, GS_RINGTYPE_FREEZE
@ -273,19 +273,20 @@ class SysMtgsThread : public SysThreadBase
typedef SysThreadBase _parent;
public:
// note: when m_RingPos == m_WritePos, the fifo is empty
uint m_RingPos; // cur pos gs is reading from
// note: when m_ReadPos == m_WritePos, the fifo is empty
uint m_ReadPos; // cur pos gs is reading from
uint m_WritePos; // cur pos ee thread is writing to
volatile bool m_RingBufferIsBusy;
volatile u32 m_SignalRingEnable;
volatile s32 m_SignalRingPosition;
int m_QueuedFrameCount;
u32 m_RingWrapSpot;
volatile s32 m_QueuedFrameCount;
volatile u32 m_VsyncSignalListener;
Mutex m_lock_RingBufferBusy;
Mutex m_mtx_RingBufferBusy;
Semaphore m_sem_OnRingReset;
Semaphore m_sem_Vsync;
// used to keep multiple threads from sending packets to the ringbuffer concurrently.
// (currently not used or implemented -- is a planned feature for a future threaded VU1)
@ -301,8 +302,9 @@ public:
// These vars maintain instance data for sending Data Packets.
// Only one data packet can be constructed and uploaded at a time.
uint m_packet_startpos; // size of the packet (data only, ie. not including the 16 byte command!)
uint m_packet_size; // size of the packet (data only, ie. not including the 16 byte command!)
uint m_packet_ringpos; // index of the data location in the ringbuffer.
uint m_packet_writepos; // index of the data location in the ringbuffer.
#ifdef RINGBUF_DEBUG_STACK
Threading::Mutex m_lock_Stack;
@ -317,14 +319,13 @@ public:
void WaitGS();
void ResetGS();
int PrepDataPacket( MTGS_RingCommand cmd, u32 size );
int PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size );
void PrepDataPacket( MTGS_RingCommand cmd, u32 size );
void PrepDataPacket( GIF_PATH pathidx, u32 size );
void SendDataPacket();
void SendGameCRC( u32 crc );
void WaitForOpen();
void Freeze( int mode, MTGS_FreezeData& data );
void RestartRingbuffer( uint packsize=0 );
void SendSimplePacket( MTGS_RingCommand type, int data0, int data1, int data2 );
void SendPointerPacket( MTGS_RingCommand type, u32 data0, void* data1 );
@ -346,9 +347,10 @@ protected:
void OnResumeInThread( bool IsSuspended );
void OnCleanupInThread();
void GenericStall( uint size );
// Used internally by SendSimplePacket type functions
uint _PrepForSimplePacket();
void _FinishSimplePacket( uint future_writepos );
void _FinishSimplePacket();
void ExecuteTaskInThread();
};
@ -416,3 +418,36 @@ extern int g_nLeftGSFrames;
#endif
// Size of the ringbuffer as a power of 2 -- size is a multiple of simd128s.
// (actual size is 1<<m_RingBufferSizeFactor simd vectors [128-bit values])
// A value of 19 is a 8meg ring buffer. 18 would be 4 megs, and 20 would be 16 megs.
// Default was 2mb, but some games with lots of MTGS activity want 8mb to run fast (rama)
static const uint RingBufferSizeFactor = 19;
// size of the ringbuffer in simd128's.
static const uint RingBufferSize = 1<<RingBufferSizeFactor;
// Mask to apply to ring buffer indices to wrap the pointer from end to
// start (the wrapping is what makes it a ringbuffer, yo!)
static const uint RingBufferMask = RingBufferSize - 1;
struct MTGS_BufferedData
{
u128 m_Ring[RingBufferSize];
u8 Regs[Ps2MemSize::GSregs];
MTGS_BufferedData() {}
u128& operator[]( uint idx )
{
pxAssert( idx < RingBufferSize );
return m_Ring[idx];
}
};
extern __aligned(32) MTGS_BufferedData RingBuffer;
// FIXME: These belong in common with other memcpy tools. Will move them there later if no one
// else beats me to it. --air
extern void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len );
extern void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len );

View File

@ -36,7 +36,7 @@ static u32 gifqwc = 0;
static bool gifmfifoirq = false;
//Just some temporary bits to store Path1 transfers if another is in progress.
u8 Path1Buffer[0x1000000];
__aligned16 u8 Path1Buffer[0x1000000];
u32 Path1WritePos = 0;
u32 Path1ReadPos = 0;
@ -57,23 +57,23 @@ void gsPath1Interrupt()
if((gifRegs->stat.APATH <= GIF_APATH1 || (gifRegs->stat.IP3 == true && gifRegs->stat.APATH == GIF_APATH3)) && Path1WritePos > 0 && !gifRegs->stat.PSE)
{
gifRegs->stat.P1Q = false;
while(Path1WritePos > 0)
{
u32 size = GetMTGS().PrepDataPacket(GIF_PATH_1, Path1Buffer + (Path1ReadPos * 16), (Path1WritePos - Path1ReadPos));
u8* pDest = GetMTGS().GetDataPacketPtr();
//DevCon.Warning("Flush Size = %x", size);
memcpy_aligned(pDest, Path1Buffer + (Path1ReadPos * 16), size * 16);
GetMTGS().SendDataPacket();
Path1ReadPos += size;
if(GSTransferStatus.PTH1 == STOPPED_MODE)
if (uint size = (Path1WritePos - Path1ReadPos))
{
GetMTGS().PrepDataPacket(GIF_PATH_1, size);
//DevCon.Warning("Flush Size = %x", size);
while(size > 0)
{
gifRegs->stat.OPH = false;
gifRegs->stat.APATH = GIF_APATH_IDLE;
uint count = GIFPath_CopyTag(GIF_PATH_1, ((u128*)Path1Buffer) + Path1ReadPos, size);
Path1ReadPos += count;
size -= count;
if(GSTransferStatus.PTH1 == STOPPED_MODE)
{
gifRegs->stat.APATH = GIF_APATH_IDLE;
}
}
GetMTGS().SendDataPacket();
if(Path1ReadPos == Path1WritePos)
{
@ -105,7 +105,6 @@ __forceinline void gsInterrupt()
if(GSTransferStatus.PTH3 >= PENDINGSTOP_MODE && gifRegs->stat.APATH == GIF_APATH3 )
{
gifRegs->stat.OPH = false;
GSTransferStatus.PTH3 = STOPPED_MODE;
gifRegs->stat.APATH = GIF_APATH_IDLE;
if(gifRegs->stat.P1Q) gsPath1Interrupt();
@ -150,11 +149,8 @@ __forceinline void gsInterrupt()
static u32 WRITERING_DMA(u32 *pMem, u32 qwc)
{
int size = GetMTGS().PrepDataPacket(GIF_PATH_3, (u8*)pMem, qwc);
u8* pgsmem = GetMTGS().GetDataPacketPtr();
memcpy_aligned(pgsmem, pMem, size<<4);
GetMTGS().PrepDataPacket(GIF_PATH_3, qwc);
uint size = GIFPath_CopyTag(GIF_PATH_3, (u128*)pMem, qwc );
GetMTGS().SendDataPacket();
return size;
}
@ -167,7 +163,6 @@ static u32 WRITERING_DMA(tDMA_TAG *pMem, u32 qwc)
int _GIFchain()
{
tDMA_TAG *pMem;
int qwc = 0;
pMem = dmaGetAddr(gif->madr, false);
if (pMem == NULL)
@ -182,11 +177,6 @@ int _GIFchain()
return -1;
}
//in Intermittent Mode it enabled, IMAGE_MODE transfers are sliced.
///(gifRegs->stat.IMT && GSTransferStatus.PTH3 <= IMAGE_MODE) qwc = min((int)gif->qwc, 8);
/*else qwc = gif->qwc;*/
return WRITERING_DMA(pMem, gif->qwc);
}
@ -327,7 +317,7 @@ void GIFdma()
//gifRegs->stat.OPH = true;
//gifRegs->stat.OPH = true; // why set the GS output path flag here? (rama)
gifRegs->stat.FQC = min((u16)0x10, gif->qwc);// FQC=31, hack ;) (for values of 31 that equal 16) [ used to be 0xE00; // APATH=3]
//Check with Path3 masking games
if (gif->qwc > 0) {
@ -346,7 +336,7 @@ void GIFdma()
}
//gifRegs->stat.OPH = true;
//gifRegs->stat.OPH = true; // why set the GS output path flag here? (rama)
// Transfer Dn_QWC from Dn_MADR to GIF
if ((gif->chcr.MOD == NORMAL_MODE) || (gif->qwc > 0)) // Normal Mode
{
@ -450,42 +440,44 @@ static __forceinline bool mfifoGIFrbTransfer()
u32 mfifoqwc = min(gifqwc, (u32)gif->qwc);
u32 *src;
GetMTGS().PrepDataPacket(GIF_PATH_3, mfifoqwc);
// TODO (minor optimization): The new GIFpath parser can do rather efficient wrapping of
// its own internally now. We just need to groom a version of it that can wrap around MFIFO
// memory similarly to how it wraps VU1 memory on PATH1.
/* Check if the transfer should wrap around the ring buffer */
if ((gif->madr + mfifoqwc * 16) > (dmacRegs->rbor.ADDR + dmacRegs->rbsr.RMSK + 16))
{
uint s1 = ((dmacRegs->rbor.ADDR + dmacRegs->rbsr.RMSK + 16) - gif->madr) >> 4;
uint s2 = (mfifoqwc - s1);
// fixme - I don't think these should use WRITERING_DMA, since our source
// isn't the DmaGetAddr(gif->madr) address that WRITERING_DMA expects.
/* it does (wrap around), so first copy 's1' bytes from 'addr' to 'data' */
/* and second copy 's2' bytes from 'maddr' to '&data[s1]' */
src = (u32*)PSM(gif->madr);
if (src == NULL) return false;
s1 = WRITERING_DMA(src, s1);
uint copied = GIFPath_CopyTag(GIF_PATH_3, (u128*)src, s1);
if (s1 == (mfifoqwc - s2))
if (copied == s1) // but only copy second if first didn't abort prematurely for some reason.
{
/* and second copy 's2' bytes from 'maddr' to '&data[s1]' */
src = (u32*)PSM(dmacRegs->rbor.ADDR);
if (src == NULL) return false;
s2 = WRITERING_DMA(src, s2);
}
else
{
s2 = 0;
copied += GIFPath_CopyTag(GIF_PATH_3, (u128*)src, s2);
}
mfifoqwc = s1 + s2;
mfifoqwc = copied;
}
else
{
/* it doesn't, so just transfer 'qwc*16' words from 'gif->madr' to GS */
src = (u32*)PSM(gif->madr);
if (src == NULL) return false;
mfifoqwc = WRITERING_DMA(src, mfifoqwc);
mfifoqwc = GIFPath_CopyTag(GIF_PATH_3, (u128*)src, mfifoqwc);
gif->madr = dmacRegs->rbor.ADDR + (gif->madr & dmacRegs->rbsr.RMSK);
}
GetMTGS().SendDataPacket();
gifqwc -= mfifoqwc;
return true;
@ -571,36 +563,36 @@ void mfifoGIFtransfer(int qwc)
switch (ptag->ID)
{
case TAG_REFE: // Refe - Transfer Packet According to ADDR field
case TAG_REFE: // Refe - Transfer Packet According to ADDR field
gif->tadr = qwctag(gif->tadr + 16);
gifstate = GIF_STATE_DONE; //End Transfer
break;
case TAG_CNT: // CNT - Transfer QWC following the tag.
case TAG_CNT: // CNT - Transfer QWC following the tag.
gif->madr = qwctag(gif->tadr + 16); //Set MADR to QW after Tag
gif->tadr = qwctag(gif->madr + (gif->qwc << 4)); //Set TADR to QW following the data
gif->tadr = qwctag(gif->madr + (gif->qwc << 4)); //Set TADR to QW following the data
gifstate = GIF_STATE_READY;
break;
case TAG_NEXT: // Next - Transfer QWC following tag. TADR = ADDR
case TAG_NEXT: // Next - Transfer QWC following tag. TADR = ADDR
{
u32 temp = gif->madr; //Temporarily Store ADDR
gif->madr = qwctag(gif->tadr + 16); //Set MADR to QW following the tag
gif->tadr = temp; //Copy temporarily stored ADDR to Tag
u32 temp = gif->madr; //Temporarily Store ADDR
gif->madr = qwctag(gif->tadr + 16); //Set MADR to QW following the tag
gif->tadr = temp; //Copy temporarily stored ADDR to Tag
gifstate = GIF_STATE_READY;
break;
}
case TAG_REF: // Ref - Transfer QWC from ADDR field
case TAG_REFS: // Refs - Transfer QWC from ADDR field (Stall Control)
case TAG_REF: // Ref - Transfer QWC from ADDR field
case TAG_REFS: // Refs - Transfer QWC from ADDR field (Stall Control)
gif->tadr = qwctag(gif->tadr + 16); //Set TADR to next tag
gifstate = GIF_STATE_READY;
break;
case TAG_END: // End - Transfer QWC following the tag
gif->madr = qwctag(gif->tadr + 16); //Set MADR to data following the tag
gif->tadr = qwctag(gif->madr + (gif->qwc << 4)); //Set TADR to QW following the data
gifstate = GIF_STATE_DONE; //End Transfer
case TAG_END: // End - Transfer QWC following the tag
gif->madr = qwctag(gif->tadr + 16); //Set MADR to data following the tag
gif->tadr = qwctag(gif->madr + (gif->qwc << 4)); //Set TADR to QW following the data
gifstate = GIF_STATE_DONE; //End Transfer
break;
}
@ -638,7 +630,6 @@ void gifMFIFOInterrupt()
if(GSTransferStatus.PTH3 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH3 )
{
gifRegs->stat.OPH = false;
gifRegs->stat.APATH = GIF_APATH_IDLE;
if(gifRegs->stat.P1Q) gsPath1Interrupt();
}

View File

@ -290,7 +290,7 @@ extern void gifMFIFOInterrupt();
//Just some temporary bits to store Path1 transfers if another is in progress.
extern void gsPath1Interrupt();
extern u8 Path1Buffer[0x1000000];
extern __aligned16 u8 Path1Buffer[0x1000000];
extern u32 Path1WritePos;
extern u32 Path1ReadPos;
#endif

View File

@ -29,7 +29,7 @@
using namespace Threading;
#if 0 // PCSX2_DEBUG
#if 0 //PCSX2_DEBUG
# define MTGS_LOG Console.WriteLn
#else
# define MTGS_LOG 0&&
@ -46,34 +46,7 @@ using namespace Threading;
// MTGS Threaded Class Implementation
// =====================================================================================================
// Size of the ringbuffer as a power of 2 -- size is a multiple of simd128s.
// (actual size is 1<<m_RingBufferSizeFactor simd vectors [128-bit values])
// A value of 19 is a 8meg ring buffer. 18 would be 4 megs, and 20 would be 16 megs.
// Default was 2mb, but some games with lots of MTGS activity want 8mb to run fast (rama)
static const uint RingBufferSizeFactor = 19;
// size of the ringbuffer in simd128's.
static const uint RingBufferSize = 1<<RingBufferSizeFactor;
// Mask to apply to ring buffer indices to wrap the pointer from end to
// start (the wrapping is what makes it a ringbuffer, yo!)
static const uint RingBufferMask = RingBufferSize - 1;
struct MTGS_BufferedData
{
u128 m_Ring[RingBufferSize];
u8 Regs[Ps2MemSize::GSregs];
MTGS_BufferedData() {}
u128& operator[]( uint idx )
{
pxAssert( idx < RingBufferSize );
return m_Ring[idx];
}
};
static __aligned(32) MTGS_BufferedData RingBuffer;
__aligned(32) MTGS_BufferedData RingBuffer;
extern bool renderswitch;
@ -97,16 +70,16 @@ void SysMtgsThread::OnStart()
{
m_PluginOpened = false;
m_RingPos = 0;
m_ReadPos = 0;
m_WritePos = 0;
m_RingBufferIsBusy = false;
m_packet_size = 0;
m_packet_ringpos = 0;
m_packet_writepos = 0;
m_QueuedFrameCount = 0;
m_VsyncSignalListener = false;
m_SignalRingEnable = 0;
m_SignalRingPosition= 0;
m_RingWrapSpot = 0;
m_CopyDataTally = 0;
@ -125,12 +98,16 @@ void SysMtgsThread::OnResumeReady()
void SysMtgsThread::ResetGS()
{
pxAssertDev( !IsOpen() || (m_ReadPos == m_WritePos), "Must close or terminate the GS thread prior to gsReset." );
// MTGS Reset process:
// * clear the ringbuffer.
// * Signal a reset.
// * clear the path and byRegs structs (used by GIFtagDummy)
m_RingPos = m_WritePos;
m_ReadPos = m_WritePos;
m_QueuedFrameCount = 0;
m_VsyncSignalListener = false;
MTGS_LOG( "MTGS: Sending Reset..." );
SendSimplePacket( GS_RINGTYPE_RESET, 0, 0, 0 );
@ -155,30 +132,31 @@ void SysMtgsThread::PostVsyncEnd()
// 256-byte copy is only a few dozen cycles -- executed 60 times a second -- so probably
// not worth the effort or overhead of trying to selectively avoid it.
PrepDataPacket(GS_RINGTYPE_VSYNC, sizeof(RingCmdPacket_Vsync));
RingCmdPacket_Vsync& local( *(RingCmdPacket_Vsync*)GetDataPacketPtr() );
uint packsize = sizeof(RingCmdPacket_Vsync) / 16;
PrepDataPacket(GS_RINGTYPE_VSYNC, packsize);
MemCopy_WrappedDest( (u128*)PS2MEM_GS, RingBuffer.m_Ring, m_packet_writepos, RingBufferSize, 0xf );
memcpy_fast( local.regset1, PS2MEM_GS, sizeof(local.regset1) );
local.csr = GSCSRr;
local.imr = GSIMR;
local.siglblid = GSSIGLBLID;
u32* remainder = (u32*)GetDataPacketPtr();
remainder[0] = GSCSRr;
remainder[1] = GSIMR;
(GSRegSIGBLID&)remainder[2] = GSSIGLBLID;
m_packet_writepos = (m_packet_writepos + 1) & RingBufferMask;
SendDataPacket();
// Alter-frame flushing! Restarts the ringbuffer (wraps) on every other frame. This is a
// mandatory feature that prevents the MTGS from queuing more than 2 frames at any time.
// (queued frames cause input lag and desynced audio -- bad!). Ring restarts work for this
// because they act as sync points where the EE must stall to wait for the GS to catch-up,
// and they also allow us to reuse the front of the ringbuffer more often, which should improve
// L2 cache performance.
// Vsyncs should always start the GS thread, regardless of how little has actually be queued.
if (m_CopyDataTally != 0) SetEvent();
if( m_QueuedFrameCount > 0 )
RestartRingbuffer();
else
{
m_QueuedFrameCount++;
SetEvent();
}
// If the MTGS is allowed to queue a lot of frames in advance, it creates input lag.
// Use the Queued FrameCount to stall the EE if another vsync (or two) are already queued
// in the ringbuffer. The queue limit is disabled when both FrameLimiting and Vsync are
// disabled, since the queue can have perverse effects on framerate benchmarking.
if ((AtomicIncrement(m_QueuedFrameCount) < EmuConfig.GS.VsyncQueueSize) || (!EmuConfig.GS.VsyncEnable && !EmuConfig.GS.FrameLimitEnable)) return;
m_VsyncSignalListener = true;
//Console.WriteLn( Color_Blue, "(EEcore Sleep) Vsync\t\tringpos=0x%06x, writepos=0x%06x", volatize(m_ReadPos), m_WritePos );
m_sem_Vsync.WaitNoCancel();
}
struct PacketTagType
@ -261,12 +239,14 @@ void SysMtgsThread::OpenPlugin()
class RingBufferLock : public ScopedLock
{
typedef ScopedLock _parent;
protected:
SysMtgsThread& m_mtgs;
public:
RingBufferLock( SysMtgsThread& mtgs )
: ScopedLock( mtgs.m_lock_RingBufferBusy )
: ScopedLock( mtgs.m_mtx_RingBufferBusy )
, m_mtgs( mtgs )
{
m_mtgs.m_RingBufferIsBusy = true;
@ -276,6 +256,18 @@ public:
{
m_mtgs.m_RingBufferIsBusy = false;
}
void Acquire()
{
_parent::Acquire();
m_mtgs.m_RingBufferIsBusy = true;
}
void Release()
{
m_mtgs.m_RingBufferIsBusy = false;
_parent::Release();
}
};
void SysMtgsThread::ExecuteTaskInThread()
@ -284,31 +276,33 @@ void SysMtgsThread::ExecuteTaskInThread()
PacketTagType prevCmd;
#endif
RingBufferLock busy( *this );
while( true )
{
busy.Release();
// Performance note: Both of these perform cancellation tests, but pthread_testcancel
// is very optimized (only 1 instruction test in most cases), so no point in trying
// to avoid it.
m_sem_event.WaitWithoutYield();
StateCheckInThread();
busy.Acquire();
{
RingBufferLock busy( *this );
// note: m_RingPos is intentionally not volatile, because it should only
// note: m_ReadPos is intentionally not volatile, because it should only
// ever be modified by this thread.
while( m_RingPos != volatize(m_WritePos))
while( m_ReadPos != volatize(m_WritePos))
{
if( EmuConfig.GS.DisableOutput )
{
m_RingPos = m_WritePos;
m_ReadPos = m_WritePos;
continue;
}
pxAssert( m_RingPos < RingBufferSize );
pxAssert( m_ReadPos < RingBufferSize );
const PacketTagType& tag = (PacketTagType&)RingBuffer[m_RingPos];
const PacketTagType& tag = (PacketTagType&)RingBuffer[m_ReadPos];
u32 ringposinc = 1;
#ifdef RINGBUF_DEBUG_STACK
@ -316,11 +310,11 @@ void SysMtgsThread::ExecuteTaskInThread()
m_lock_Stack.Lock();
uptr stackpos = ringposStack.back();
if( stackpos != m_RingPos )
if( stackpos != m_ReadPos )
{
Console.Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, m_RingPos, prevCmd.command );
Console.Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, m_ReadPos, prevCmd.command );
}
pxAssert( stackpos == m_RingPos );
pxAssert( stackpos == m_ReadPos );
prevCmd = tag;
ringposStack.pop_back();
m_lock_Stack.Release();
@ -330,38 +324,75 @@ void SysMtgsThread::ExecuteTaskInThread()
{
case GS_RINGTYPE_P1:
{
uint datapos = (m_ReadPos+1) & RingBufferMask;
const int qsize = tag.data[0];
const u128* data = &RingBuffer[m_RingPos+1];
const u128* data = &RingBuffer[datapos];
MTGS_LOG( "(MTGS Packet Read) ringtype=P1, qwc=%u", qsize );
// make sure that tag>>16 is the MAX size readable
GSgifTransfer1((u32*)(data - 0x400 + qsize), 0x4000-qsize*16);
//GSgifTransfer1((u32*)data, qsize);
uint endpos = datapos + qsize;
if( endpos >= RingBufferSize )
{
uint firstcopylen = RingBufferSize - datapos;
GSgifTransfer( (u32*)data, firstcopylen );
datapos = endpos & RingBufferMask;
GSgifTransfer( (u32*)RingBuffer.m_Ring, datapos );
}
else
{
GSgifTransfer( (u32*)data, qsize );
}
ringposinc += qsize;
}
break;
case GS_RINGTYPE_P2:
{
uint datapos = (m_ReadPos+1) & RingBufferMask;
const int qsize = tag.data[0];
const u128* data = &RingBuffer[m_RingPos+1];
const u128* data = &RingBuffer[datapos];
MTGS_LOG( "(MTGS Packet Read) ringtype=P2, qwc=%u", qsize );
GSgifTransfer2((u32*)data, qsize);
uint endpos = datapos + qsize;
if( endpos >= RingBufferSize )
{
uint firstcopylen = RingBufferSize - datapos;
GSgifTransfer2( (u32*)data, firstcopylen );
datapos = endpos & RingBufferMask;
GSgifTransfer2( (u32*)RingBuffer.m_Ring, datapos );
}
else
{
GSgifTransfer2( (u32*)data, qsize );
}
ringposinc += qsize;
}
break;
case GS_RINGTYPE_P3:
{
uint datapos = (m_ReadPos+1) & RingBufferMask;
const int qsize = tag.data[0];
const u128* data = &RingBuffer[m_RingPos+1];
const u128* data = &RingBuffer[datapos];
MTGS_LOG( "(MTGS Packet Read) ringtype=P3, qwc=%u", qsize );
GSgifTransfer3((u32*)data, qsize);
uint endpos = datapos + qsize;
if( endpos >= RingBufferSize )
{
uint firstcopylen = RingBufferSize - datapos;
GSgifTransfer3( (u32*)data, firstcopylen );
datapos = endpos & RingBufferMask;
GSgifTransfer3( (u32*)RingBuffer.m_Ring, datapos );
}
else
{
GSgifTransfer3( (u32*)data, qsize );
}
ringposinc += qsize;
}
break;
@ -370,25 +401,25 @@ void SysMtgsThread::ExecuteTaskInThread()
{
switch( tag.command )
{
case GS_RINGTYPE_RESTART:
//MTGS_LOG( "(MTGS Packet Read) ringtype=Restart" );
m_RingPos = 0;
continue;
case GS_RINGTYPE_VSYNC:
{
const int qsize = tag.data[0];
ringposinc += qsize;
MTGS_LOG( "(MTGS Packet Read) ringtype=Vsync, field=%u, skip=%s", tag.data[0], tag.data[1] ? "true" : "false" );
MTGS_LOG( "(MTGS Packet Read) ringtype=Vsync, field=%u, skip=%s", !!(((u32&)RingBuffer.Regs[0x1000]) & 0x2000) ? 0 : 1, tag.data[1] ? "true" : "false" );
// Mail in the important GS registers.
RingCmdPacket_Vsync& local((RingCmdPacket_Vsync&)RingBuffer[m_RingPos+1]);
memcpy_fast( RingBuffer.Regs, local.regset1, sizeof(local.regset1));
((u32&)RingBuffer.Regs[0x1000]) = local.csr;
((u32&)RingBuffer.Regs[0x1010]) = local.imr;
((GSRegSIGBLID&)RingBuffer.Regs[0x1080]) = local.siglblid;
// This seemingly obtuse system is needed in order to handle cases where the vsync data wraps
// around the edge of the ringbuffer. If not for that I'd just use a struct. >_<
uint datapos = (m_ReadPos+1) & RingBufferMask;
MemCopy_WrappedSrc( RingBuffer.m_Ring, datapos, RingBufferSize, (u128*)RingBuffer.Regs, 0xf );
u32* remainder = (u32*)&RingBuffer[datapos];
((u32&)RingBuffer.Regs[0x1000]) = remainder[0];
((u32&)RingBuffer.Regs[0x1010]) = remainder[1];
((GSRegSIGBLID&)RingBuffer.Regs[0x1080]) = (GSRegSIGBLID&)remainder[2];
// CSR & 0x2000; is the pageflip id.
GSvsync(((u32&)RingBuffer.Regs[0x1000]) & 0x2000);
gsFrameSkip();
@ -398,7 +429,13 @@ void SysMtgsThread::ExecuteTaskInThread()
if( (GSopen2 == NULL) && (PADupdate != NULL) )
PADupdate(0);
AtomicDecrement( m_QueuedFrameCount );
if (!!AtomicExchange(m_VsyncSignalListener, false))
m_sem_Vsync.Post();
busy.Release();
StateCheckInThread();
busy.Acquire();
}
break;
@ -438,9 +475,9 @@ void SysMtgsThread::ExecuteTaskInThread()
#ifdef PCSX2_DEVBUILD
default:
Console.Error("GSThreadProc, bad packet (%x) at m_RingPos: %x, m_WritePos: %x", tag.command, m_RingPos, m_WritePos);
Console.Error("GSThreadProc, bad packet (%x) at m_ReadPos: %x, m_WritePos: %x", tag.command, m_ReadPos, m_WritePos);
pxFail( "Bad packet encountered in the MTGS Ringbuffer." );
m_RingPos = m_WritePos;
m_ReadPos = m_WritePos;
continue;
#else
// Optimized performance in non-Dev builds.
@ -450,23 +487,29 @@ void SysMtgsThread::ExecuteTaskInThread()
}
}
uint newringpos = m_RingPos + ringposinc;
pxAssert( newringpos <= RingBufferSize );
m_RingPos = newringpos & RingBufferMask;
uint newringpos = (m_ReadPos + ringposinc) & RingBufferMask;
if( EmuConfig.GS.SynchronousMTGS )
{
pxAssert( m_WritePos == newringpos );
}
m_ReadPos = newringpos;
if( m_SignalRingEnable != 0 )
{
// The EEcore has requested a signal after some amount of processed data.
if( AtomicExchangeSub( m_SignalRingPosition, ringposinc ) <= 0 )
{
// Make sure to post the signal after the m_RingPos has been updated...
// Make sure to post the signal after the m_ReadPos has been updated...
AtomicExchange( m_SignalRingEnable, 0 );
m_sem_OnRingReset.Post();
continue;
}
}
}
}
busy.Release();
// Safety valve in case standard signals fail for some reason -- this ensures the EEcore
// won't sleep the eternity, even if SignalRingPosition didn't reach 0 for some reason.
@ -479,7 +522,10 @@ void SysMtgsThread::ExecuteTaskInThread()
m_sem_OnRingReset.Post();
}
//Console.Warning( "(MTGS Thread) Nothing to do! ringpos=0x%06x", m_RingPos );
if (!!AtomicExchange(m_VsyncSignalListener, false))
m_sem_Vsync.Post();
//Console.Warning( "(MTGS Thread) Nothing to do! ringpos=0x%06x", m_ReadPos );
}
}
@ -519,15 +565,15 @@ void SysMtgsThread::WaitGS()
if( m_ExecMode == ExecMode_NoThreadYet || !IsRunning() ) return;
if( !pxAssertDev( IsOpen(), "MTGS Warning! WaitGS issued on a closed thread." ) ) return;
if( volatize(m_RingPos) != m_WritePos )
if( volatize(m_ReadPos) != m_WritePos )
{
SetEvent();
RethrowException();
do {
m_lock_RingBufferBusy.Wait();
m_mtx_RingBufferBusy.Wait();
RethrowException();
} while( volatize(m_RingPos) != m_WritePos );
} while( volatize(m_ReadPos) != m_WritePos );
}
// Completely synchronize GS and MTGS register states.
@ -546,7 +592,7 @@ void SysMtgsThread::SetEvent()
u8* SysMtgsThread::GetDataPacketPtr() const
{
return (u8*)&RingBuffer[m_packet_ringpos];
return (u8*)&RingBuffer[m_packet_writepos & RingBufferMask];
}
// Closes the data packet send command, and initiates the gs thread (if needed).
@ -555,31 +601,14 @@ void SysMtgsThread::SendDataPacket()
// make sure a previous copy block has been started somewhere.
pxAssert( m_packet_size != 0 );
uint temp = m_packet_ringpos + m_packet_size;
pxAssert( temp <= RingBufferSize );
temp &= RingBufferMask;
uint actualSize = ((m_packet_writepos - m_packet_startpos) & RingBufferMask)-1;
pxAssert( actualSize <= m_packet_size );
pxAssert( m_packet_writepos < RingBufferSize );
if( IsDebugBuild )
{
if( m_packet_ringpos + m_packet_size < RingBufferSize )
{
uint readpos = volatize(m_RingPos);
if( readpos != m_WritePos )
{
// The writepos should never leapfrog the readpos
// since that indicates a bad write.
if( m_packet_ringpos < readpos )
pxAssert( temp < readpos );
}
PacketTagType& tag = (PacketTagType&)RingBuffer[m_packet_startpos];
tag.data[0] = actualSize;
// Updating the writepos should never make it equal the readpos, since
// that would stop the buffer prematurely (and indicates bad code in the
// ringbuffer manager)
pxAssert( readpos != temp );
}
}
m_WritePos = temp;
m_WritePos = m_packet_writepos;
if( EmuConfig.GS.SynchronousMTGS )
{
@ -596,142 +625,95 @@ void SysMtgsThread::SendDataPacket()
//m_PacketLocker.Release();
}
int SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
void SysMtgsThread::GenericStall( uint size )
{
// Note on volatiles: m_WritePos is not modified by the GS thread, so there's no need
// to use volatile reads here. We do cache it though, since we know it never changes,
// except for calls to RingbufferRestert() -- handled below.
uint writepos = m_WritePos;
// Checks if a previous copy was started without an accompanying call to GSRINGBUF_DONECOPY
pxAssert( m_packet_size == 0 );
const uint writepos = m_WritePos;
// Sanity checks! (within the confines of our ringbuffer please!)
pxAssert( size < RingBufferSize );
pxAssert( writepos < RingBufferSize );
// generic gs wait/stall.
// if the writepos is past the readpos then we're safe.
// But if not then we need to make sure the readpos is outside the scope of
// the block about to be written (writepos + size)
uint readpos = volatize(m_ReadPos);
uint freeroom;
if (writepos < readpos)
freeroom = readpos - writepos;
else
freeroom = RingBufferSize - (writepos - readpos);
if (freeroom <= size)
{
// writepos will overlap readpos if we commit the data, so we need to wait until
// readpos is out past the end of the future write pos, or until it wraps around
// (in which case writepos will be >= readpos).
// Ideally though we want to wait longer, because if we just toss in this packet
// the next packet will likely stall up too. So lets set a condition for the MTGS
// thread to wake up the EE once there's a sizable chunk of the ringbuffer emptied.
uint somedone = (RingBufferSize - freeroom) / 4;
if( somedone < size+1 ) somedone = size + 1;
// FMV Optimization: FMVs typically send *very* little data to the GS, in some cases
// every other frame is nothing more than a page swap. Sleeping the EEcore is a
// waste of time, and we get better results using a spinwait.
if( somedone > 0x80 )
{
pxAssertDev( m_SignalRingEnable == 0, "MTGS Thread Synchronization Error" );
m_SignalRingPosition = somedone;
//Console.WriteLn( Color_Blue, "(EEcore Sleep) PrepDataPacker \tringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", readpos, writepos, m_SignalRingPosition );
while(true) {
AtomicExchange( m_SignalRingEnable, 1 );
SetEvent();
m_sem_OnRingReset.WaitWithoutYield();
readpos = volatize(m_ReadPos);
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
if (writepos < readpos)
freeroom = readpos - writepos;
else
freeroom = RingBufferSize - (writepos - readpos);
if (freeroom > size) break;
}
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
}
else
{
//Console.WriteLn( Color_StrongGray, "(EEcore Spin) PrepDataPacket!" );
SetEvent();
while(true) {
SpinWait();
readpos = volatize(m_ReadPos);
if (writepos < readpos)
freeroom = readpos - writepos;
else
freeroom = RingBufferSize - (writepos - readpos);
if (freeroom > size) break;
}
}
}
}
void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
{
m_packet_size = size;
++size; // takes into account our RingCommand QWC.
if( writepos + size < RingBufferSize )
{
// generic gs wait/stall.
// if the writepos is past the readpos then we're safe.
// But if not then we need to make sure the readpos is outside the scope of
// the block about to be written (writepos + size)
uint readpos = volatize(m_RingPos);
if( (writepos < readpos) && (writepos+size >= readpos) )
{
// writepos is behind the readpos and will overlap it if we commit the data,
// so we need to wait until readpos is out past the end of the future write pos,
// or until it wraps around (in which case writepos will be >= readpos).
// Ideally though we want to wait longer, because if we just toss in this packet
// the next packet will likely stall up too. So lets set a condition for the MTGS
// thread to wake up the EE once there's a sizable chunk of the ringbuffer emptied.
uint totalAccum = (m_RingWrapSpot - readpos) + writepos;
uint somedone = totalAccum / 4;
if( somedone < size+1 ) somedone = size + 1;
// FMV Optimization: FMVs typically send *very* little data to the GS, in some cases
// every other frame is nothing more than a page swap. Sleeping the EEcore is a
// waste of time, and we get better results using a spinwait.
if( somedone > 0x80 )
{
pxAssertDev( m_SignalRingEnable == 0, "MTGS Thread Synchronization Error" );
m_SignalRingPosition = somedone;
//Console.WriteLn( Color_Blue, "(EEcore Sleep) GenStall \tringpos=0x%06x, writepos=0x%06x, wrapspot=0x%06x, signalpos=0x%06x", readpos, writepos, m_RingWrapSpot, m_SignalRingPosition );
do {
AtomicExchange( m_SignalRingEnable, 1 );
SetEvent();
m_sem_OnRingReset.WaitWithoutYield();
readpos = volatize(m_RingPos);
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
} while( (writepos < readpos) && (writepos+size >= readpos) );
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
}
else
{
SetEvent();
do {
SpinWait();
readpos = volatize(m_RingPos);
} while( (writepos < readpos) && (writepos+size >= readpos) );
}
}
}
else if( writepos + size > RingBufferSize )
{
pxAssert( writepos != 0 );
// If the incoming packet doesn't fit, then start over from the start of the ring
// buffer (it's a lot easier than trying to wrap the packet around the end of the
// buffer).
//Console.WriteLn( "MTGS > Ringbuffer Got Filled!");
RestartRingbuffer( size );
writepos = m_WritePos;
}
else // always true - if( writepos + size == MTGS_RINGBUFFEREND )
{
// Yay. Perfect fit. What are the odds?
// Copy is ready so long as readpos is less than writepos and *not* equal to the
// base of the ringbuffer (otherwise the buffer will stop when the writepos is
// wrapped around to zero later-on in SendDataPacket).
uint readpos = volatize(m_RingPos);
//Console.WriteLn( "MTGS > Perfect Fit!\tringpos=0x%06x, writepos=0x%06x", readpos, writepos );
if( readpos > writepos || readpos == 0 )
{
uint totalAccum = (readpos == 0) ? RingBufferSize : ((m_RingWrapSpot - readpos) + writepos);
uint somedone = totalAccum / 4;
if( somedone < size+1 ) somedone = size + 1;
// FMV Optimization: (see above) This condition of a perfect fit is so rare that optimizing
// for it is pointless -- but it was also mindlessly simple copy-paste. So there. :p
if( somedone > 0x80 )
{
m_SignalRingPosition = somedone;
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Perfect Sleep!\twrapspot=0x%06x, ringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", m_RingWrapSpot, readpos, writepos, m_SignalRingPosition );
do {
AtomicExchange( m_SignalRingEnable, 1 );
SetEvent();
m_sem_OnRingReset.WaitWithoutYield();
readpos = volatize(m_RingPos);
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Perfect Post-sleep Report!\tringpos=0x%06x", readpos );
} while( (writepos < readpos) || (readpos==0) );
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
}
else
{
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Perfect Spin!" );
SetEvent();
do {
SpinWait();
readpos = volatize(m_RingPos);
} while( (writepos < readpos) || (readpos==0) );
}
}
m_QueuedFrameCount = 0;
m_RingWrapSpot = RingBufferSize;
}
#ifdef RINGBUF_DEBUG_STACK
m_lock_Stack.Lock();
ringposStack.push_front( writepos );
m_lock_Stack.Release();
#endif
GenericStall(size);
// Command qword: Low word is the command, and the high word is the packet
// length in SIMDs (128 bits).
@ -739,9 +721,8 @@ int SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
tag.command = cmd;
tag.data[0] = m_packet_size;
m_packet_ringpos = m_WritePos + 1;
return m_packet_size;
m_packet_startpos = m_WritePos;
m_packet_writepos = (m_WritePos + 1) & RingBufferMask;
}
// Returns the amount of giftag data processed (in simd128 values).
@ -749,132 +730,17 @@ int SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
// around VU memory instead of having buffer overflow...
// Parameters:
// size - size of the packet data, in smd128's
int SysMtgsThread::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size )
void SysMtgsThread::PrepDataPacket( GIF_PATH pathidx, u32 size )
{
//m_PacketLocker.Acquire();
return PrepDataPacket( (MTGS_RingCommand)pathidx, GIFPath_ParseTag(pathidx, srcdata, size) );
PrepDataPacket( (MTGS_RingCommand)pathidx, size );
}
void SysMtgsThread::RestartRingbuffer( uint packsize )
__forceinline void SysMtgsThread::_FinishSimplePacket()
{
if( m_WritePos == 0 ) return;
const uint thefuture = packsize;
//Console.WriteLn( Color_Magenta, "**** Ringbuffer Restart!!" );
// Always kick the MTGS into action for a ringbuffer restart.
SetEvent();
uint readpos = volatize(m_RingPos);
if( (readpos > m_WritePos) || (readpos <= thefuture) )
{
// We have to be careful not to leapfrog our read-position, which would happen if
// it's greater than the current write position (since wrapping writepos to 0 would
// be the act of skipping PAST readpos). Stall until it loops around to the
// beginning of the buffer, and past the size of our packet allocation.
uint somedone;
if( readpos > m_WritePos )
somedone = (m_RingWrapSpot - readpos) + packsize + 1;
else
somedone = (packsize + 1) - readpos;
if( somedone > 0x80 )
{
m_SignalRingPosition = somedone;
//Console.WriteLn( Color_Blue, "(EEcore Sleep) Restart!\tringpos=0x%06x, writepos=0x%06x, wrapspot=0x%06x, signalpos=0x%06x",
// readpos, m_WritePos, m_RingWrapSpot, m_SignalRingPosition );
do {
AtomicExchange( m_SignalRingEnable, 1 );
SetEvent();
m_sem_OnRingReset.WaitWithoutYield();
readpos = volatize(m_RingPos);
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
} while( (readpos > m_WritePos) || (readpos <= thefuture) );
}
else
{
SetEvent();
do {
SpinWait();
readpos = volatize(m_RingPos);
} while( (readpos > m_WritePos) || (readpos <= thefuture) );
}
}
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
tag.command = GS_RINGTYPE_RESTART;
m_RingWrapSpot = m_WritePos;
m_WritePos = 0;
m_QueuedFrameCount = 0;
if( EmuConfig.GS.SynchronousMTGS )
WaitGS();
}
__forceinline uint SysMtgsThread::_PrepForSimplePacket()
{
#ifdef RINGBUF_DEBUG_STACK
m_lock_Stack.Lock();
ringposStack.push_front( m_WritePos );
m_lock_Stack.Release();
#endif
uint future_writepos = m_WritePos+1;
pxAssert( future_writepos <= RingBufferSize );
future_writepos &= RingBufferMask;
if( future_writepos == 0 )
{
m_QueuedFrameCount = 0;
m_RingWrapSpot = RingBufferSize;
}
uint readpos = volatize(m_RingPos);
if( future_writepos == readpos )
{
// The ringbuffer read pos is blocking the future write position, so stall out
// until the read position has moved.
uint totalAccum = (m_RingWrapSpot - readpos) + future_writepos;
uint somedone = totalAccum / 4;
if( somedone > 0x80 )
{
m_SignalRingPosition = somedone;
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Simple Sleep!\t\twrapspot=0x%06x, ringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", m_RingWrapSpot, readpos, writepos, m_SignalRingPosition );
do {
AtomicExchange( m_SignalRingEnable, 1 );
SetEvent();
m_sem_OnRingReset.WaitWithoutYield();
readpos = volatize(m_RingPos);
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Simple Post-sleep Report!\tringpos=0x%06x", readpos );
} while( future_writepos == readpos );
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
}
else
{
SetEvent();
do {
SpinWait();
} while( future_writepos == volatize(m_RingPos) );
}
}
return future_writepos;
}
__forceinline void SysMtgsThread::_FinishSimplePacket( uint future_writepos )
{
pxAssert( future_writepos != volatize(m_RingPos) );
uint future_writepos = (m_WritePos+1) & RingBufferMask;
pxAssert( future_writepos != volatize(m_ReadPos) );
m_WritePos = future_writepos;
if( EmuConfig.GS.SynchronousMTGS )
@ -887,7 +753,7 @@ void SysMtgsThread::SendSimplePacket( MTGS_RingCommand type, int data0, int data
{
//ScopedLock locker( m_PacketLocker );
const uint thefuture = _PrepForSimplePacket();
GenericStall(1);
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
tag.command = type;
@ -895,21 +761,21 @@ void SysMtgsThread::SendSimplePacket( MTGS_RingCommand type, int data0, int data
tag.data[1] = data1;
tag.data[2] = data2;
_FinishSimplePacket( thefuture );
_FinishSimplePacket();
}
void SysMtgsThread::SendPointerPacket( MTGS_RingCommand type, u32 data0, void* data1 )
{
//ScopedLock locker( m_PacketLocker );
const uint thefuture = _PrepForSimplePacket();
GenericStall(1);
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
tag.command = type;
tag.data[0] = data0;
*(uptr*)&tag.data[1] = (uptr)data1;
_FinishSimplePacket( thefuture );
_FinishSimplePacket();
}
void SysMtgsThread::SendGameCRC( u32 crc )

View File

@ -217,6 +217,7 @@ Pcsx2Config::GSOptions::GSOptions()
SynchronousMTGS = false;
DisableOutput = false;
VsyncQueueSize = 2;
DefaultRegionMode = Region_NTSC;
FramesToDraw = 2;
@ -234,6 +235,7 @@ void Pcsx2Config::GSOptions::LoadSave( IniInterface& ini )
IniEntry( SynchronousMTGS );
IniEntry( DisableOutput );
IniEntry( VsyncQueueSize );
IniEntry( FrameLimitEnable );
IniEntry( FrameSkipEnable );

View File

@ -144,6 +144,7 @@ static s32 CALLBACK fallback_test() { return 0; }
_GSvsync GSvsync;
_GSopen GSopen;
_GSopen2 GSopen2;
_GSgifTransfer GSgifTransfer;
_GSgifTransfer1 GSgifTransfer1;
_GSgifTransfer2 GSgifTransfer2;
_GSgifTransfer3 GSgifTransfer3;
@ -309,7 +310,8 @@ static const LegacyApi_ReqMethod s_MethMessReq_GS[] =
{
{ "GSopen", (vMeth**)&GSopen, NULL },
{ "GSvsync", (vMeth**)&GSvsync, NULL },
{ "GSgifTransfer1", (vMeth**)&GSgifTransfer1, NULL },
{ "GSgifTransfer", (vMeth**)&GSgifTransfer, NULL },
//{ "GSgifTransfer1", (vMeth**)&GSgifTransfer1, NULL },
{ "GSgifTransfer2", (vMeth**)&GSgifTransfer2, NULL },
{ "GSgifTransfer3", (vMeth**)&GSgifTransfer3, NULL },
{ "GSreadFIFO2", (vMeth**)&GSreadFIFO2, NULL },

View File

@ -2057,21 +2057,8 @@ void _vuXGKICK(VURegs * VU)
u8* data = ((u8*)VU->Mem + ((VU->VI[_Is_].US[0]*16) & 0x3fff));
u32 size;
size = GetMTGS().PrepDataPacket( GIF_PATH_1, data, (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)) >> 4);
u8* pmem = GetMTGS().GetDataPacketPtr();
if((size << 4) > (u32)(0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)))
{
//DevCon.Warning("addr + Size = 0x%x, transferring %x then doing %x", ((VU->VI[_Is_].US[0]*16) & 0x3fff) + (size << 4), (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)) >> 4, size - (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff) >> 4));
memcpy_aligned(pmem, (u8*)VU->Mem+((VU->VI[_Is_].US[0]*16) & 0x3fff), 0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff));
size -= (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)) >> 4;
//DevCon.Warning("Size left %x", size);
pmem += 0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff);
memcpy_aligned(pmem, (u8*)VU->Mem, size<<4);
}
else {
memcpy_aligned(pmem, (u8*)VU->Mem+((VU->VI[_Is_].US[0]*16) & 0x3fff), size<<4);
}
GetMTGS().PrepDataPacket( GIF_PATH_1, 0x400 );
size = GIFPath_CopyTag( GIF_PATH_1, (u128*)data, (0x400-(VU->VI[_Is_].US[0] & 0x3ff)) );
GetMTGS().SendDataPacket();
}

View File

@ -345,7 +345,6 @@ __forceinline void vif1Interrupt()
if(GSTransferStatus.PTH2 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH2)
{
gifRegs->stat.OPH = false;
gifRegs->stat.APATH = GIF_APATH_IDLE;
if(gifRegs->stat.P1Q) gsPath1Interrupt();
}
@ -440,11 +439,6 @@ __forceinline void vif1Interrupt()
if (vif1.cmd != 0) Console.WriteLn("vif1.cmd still set %x tag size %x", vif1.cmd, vif1.tag.size);
#endif
if((vif1ch->chcr.DIR == VIF_NORMAL_TO_MEM_MODE) && vif1.GSLastDownloadSize <= 16)
{ //Reverse fifo has finished and nothing is left, so lets clear the outputting flag
gifRegs->stat.OPH = false;
}
vif1ch->chcr.STR = false;
vif1.vifstalled = false;
g_vifCycles = 0;

View File

@ -239,7 +239,6 @@ void vifMFIFOInterrupt()
if(GSTransferStatus.PTH2 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH2)
{
GSTransferStatus.PTH2 = STOPPED_MODE;
if(gifRegs->stat.DIR == 0)gifRegs->stat.OPH = false;
gifRegs->stat.APATH = GIF_APATH_IDLE;
if(gifRegs->stat.P1Q) gsPath1Interrupt();
/*gifRegs->stat.APATH = GIF_APATH_IDLE;

View File

@ -167,10 +167,16 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
return 0;
}
// HACK ATTACK!
// we shouldn't be clearing the queue flag here at all. Ideally, the queue statuses
// should be checked, handled, and cleared from the EOP check in GIFPath only. --air
gifRegs->stat.clear_flags(GIF_STAT_P2Q);
// the tag size should ALWAYS be 128 bits (qwc). If it isn't, it means there's a serious bug
// somewhere in the VIF (likely relating to +/-'ing the tag.size during processing).
// NOTE: ICO [PAL] exploits this during bootup. Needs investigation. --air
//pxAssumeMsg( (vif1.tag.size & 3) == 0, "Invalid Vif1 DIRECT packet size detected!" );
nVifStruct& v = nVif[1];
const int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
u32 size = ret << 2;
@ -184,8 +190,6 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
if(vif1.vifpacketsize < 4 && v.bSize < 16)
{
nVifStruct& v = nVif[idx];
memcpy(&v.buffer[v.bPtr], data, vif1.vifpacketsize << 2);
v.bSize += vif1.vifpacketsize << 2;
v.bPtr += vif1.vifpacketsize << 2;
@ -199,7 +203,6 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
}
else
{
nVifStruct& v = nVif[idx];
if(v.bSize)
{
int ret = 0;
@ -213,8 +216,8 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
v.bSize = 0;
v.bPtr = 0;
}
const uint count = GetMTGS().PrepDataPacket(GIF_PATH_2, v.buffer, 1);
memcpy_fast(GetMTGS().GetDataPacketPtr(), v.buffer, count << 4);
GetMTGS().PrepDataPacket(GIF_PATH_2, 1);
GIFPath_CopyTag(GIF_PATH_2, (u128*)v.buffer, 1);
GetMTGS().SendDataPacket();
if(vif1.tag.size == 0)
@ -226,16 +229,17 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
}
else
{
const uint count = GetMTGS().PrepDataPacket(GIF_PATH_2, data, size >> 4);
memcpy_fast(GetMTGS().GetDataPacketPtr(), data, count << 4);
GetMTGS().PrepDataPacket(GIF_PATH_2, size/16);
uint count = GIFPath_CopyTag(GIF_PATH_2, (u128*)data, size/16) * 4;
GetMTGS().SendDataPacket();
vif1.tag.size -= count << 2;
vif1.tag.size -= count;
if(vif1.tag.size == 0)
{
vif1.cmd = 0;
}
vif1.vifstalled = true;
return count << 2;
return count;
}
}

View File

@ -36,16 +36,8 @@ _vifT bool analyzeIbit(u32* &data, int iBit) {
if (iBit && !vifX.cmd && !vifXRegs->err.MII) {
//DevCon.WriteLn("Vif I-Bit IRQ");
vifX.irq++;
// On i-bit, the command is run, vif stalls etc,
// however if the vifcode is MARK, you do NOT stall, just send IRQ. - Max Payne shows this up.
//if(((vifXRegs->code >> 24) & 0x7f) == 0x7) return 0;
// If we have a vifcode with i-bit, the following instruction
// should stall unless its MARK?.. we test that case here...
// Not 100% sure if this is the correct behavior, so printing
// a console message to see games that use this. (cottonvibes)
// Okay did some testing with Max Payne, it does this
// Okay did some testing with Max Payne, it does this:
// VifMark value = 0x666 (i know, evil!)
// NOP with I Bit
// VifMark value = 0
@ -53,6 +45,23 @@ _vifT bool analyzeIbit(u32* &data, int iBit) {
// If you break after the 2nd Mark has run, the game reports invalid mark 0 and the game dies.
// So it has to occur here, testing a theory that it only doesn't stall if the command with
// the iBit IS mark, but still sends the IRQ to let the cpu know the mark is there. (Refraction)
//
// --------------------------
//
// This is how it probably works: i-bit sets the IRQ flag, and VIF keeps running until it encounters
// a non-MARK instruction. This includes the *current* instruction. ie, execution only continues
// unimpeded if MARK[i] is specified, and keeps executing unimpeded until any non-MARK command.
// Any other command with an I bit should stall immediately.
// Example:
//
// VifMark[i] value = 0x321 (with I bit)
// VifMark value = 0
// VifMark value = 0x333
// NOP
//
// ... the VIF should not stall and raise the interrupt until after the NOP is processed.
// So the final value for MARK as the game sees it will be 0x333. --air
return runMark<idx>(data);
}
return 0;

View File

@ -134,10 +134,10 @@ bool AppDoAssert( const DiagnosticOrigin& origin, const wxChar *msg )
wxString trace( pxGetStackTrace(origin.function) );
wxString dbgmsg( origin.ToString( msg ) );
wxMessageOutputDebug().Printf( dbgmsg );
wxMessageOutputDebug().Printf( L"%s", dbgmsg );
Console.Error( dbgmsg );
Console.WriteLn( trace );
Console.Error( L"%s", dbgmsg );
Console.WriteLn( L"%s", trace );
wxString windowmsg( L"Assertion failed: " );
if( msg != NULL )

View File

@ -189,13 +189,13 @@ void Pcsx2App::DetectCpuAndUserMode()
x86caps.CountCores();
x86caps.SIMD_EstablishMXCSRmask();
if( !x86caps.hasMultimediaExtensions )
if( !x86caps.hasMultimediaExtensions || !x86caps.hasStreamingSIMDExtensions )
{
// Note: due to memcpy_fast, we need minimum MMX even for interpreters. This will
// hopefully change later once we have a dynamically recompiled memcpy.
// Note: Due to optimizations to GIFpath parsers, memcpy, and possibly other things, we need
// a bare minimum of SSE supported by the CPU.
throw Exception::HardwareDeficiency()
.SetDiagMsg(L"Critical Failure: MMX Extensions not available.")
.SetUserMsg(_("MMX extensions are not available. PCSX2 requires cpu with MMX extension support to run."));
.SetDiagMsg(L"Critical Failure: SSE Extensions not available.")
.SetUserMsg(_("SSE extensions are not available. PCSX2 requires a cpu that supports the SSE instruction set."));
}
ReadUserModeSettings();

View File

@ -19,6 +19,7 @@
#include "Gif.h"
#include "Vif_Dma.h"
#include "Vif.h"
#include <xmmintrin.h>
// --------------------------------------------------------------------------------------
// GIFpath -- the GIFtag Parser
@ -92,12 +93,16 @@ struct GIFPath
void Reset();
void PrepPackedRegs();
void SetTag(const void* mem);
bool StepReg();
u8 GetReg();
bool IsActive() const;
int ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size);
template< bool Aligned >
void SetTag(const void* mem);
template< GIF_PATH pathidx, bool Aligned >
int CopyTag(const u128* pMem, u32 size);
int ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
};
@ -285,9 +290,11 @@ __forceinline void GIFPath::PrepPackedRegs()
}
}
template< bool Aligned >
__forceinline void GIFPath::SetTag(const void* mem)
{
const_cast<GIFTAG&>(tag) = *((GIFTAG*)mem);
_mm_store_ps( (float*)&tag, Aligned ? _mm_load_ps((const float*)mem) : _mm_loadu_ps((const float*)mem) );
nloop = tag.NLOOP;
curreg = 0;
@ -350,7 +357,8 @@ static __forceinline void gsHandler(const u8* pMem)
// qwords, rounded down; any extra bits are lost
// games must take care to ensure transfer rectangles are exact multiples of a qword
vif1.GSLastDownloadSize = vif1.TRXREG.RRW * vif1.TRXREG.RRH * bpp >> 7;
gifRegs->stat.OPH = true;
//DevCon.Warning("GS download in progress. OPH = %x", gifRegs->stat.OPH);
//gifRegs->stat.OPH = true; // Too early to set it here. It should be done on a BUSDIR call (rama)
}
}
if (reg >= 0x60)
@ -371,10 +379,9 @@ static __forceinline void gsHandler(const u8* pMem)
#define aMin(x, y) std::min(x, y)
// Parameters:
// size (path1) - difference between the end of VU memory and pMem.
// size (path2/3) - max size of incoming data stream, in qwc (simd128)
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
// path does not terminate (EOP) within the specified size, it is assumed that the path must
// loop around to the start of VU memory and continue processing.
__forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size)
{
u32 startSize = size; // Start Size
@ -382,7 +389,7 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s
while (size > 0) {
if (!nloop) {
SetTag(pMem);
SetTag<false>(pMem);
incTag(1);
}
else
@ -509,6 +516,7 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s
Console.Warning("GIFTAG error, size exceeded VU memory size %x", startSize);
nloop = 0;
const_cast<GIFTAG&>(tag).EOP = 1;
}
}
}
@ -521,15 +529,65 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s
return size;
}
__forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
__forceinline void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len )
{
uint endpos = destStart + len;
if( endpos < destSize )
{
memcpy_qwc(&destBase[destStart], src, len );
destStart += len;
}
else
{
uint firstcopylen = destSize - destStart;
memcpy_qwc(&destBase[destStart], src, firstcopylen );
destStart = endpos % destSize;
memcpy_qwc(destBase, src+firstcopylen, destStart );
}
}
__forceinline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len )
{
uint endpos = srcStart + len;
if( endpos < srcSize )
{
memcpy_qwc(dest, &srcBase[srcStart], len );
srcStart += len;
}
else
{
uint firstcopylen = srcSize - srcStart;
memcpy_qwc(dest, &srcBase[srcStart], firstcopylen );
srcStart = endpos % srcSize;
memcpy_qwc(dest+firstcopylen, srcBase, srcStart );
}
}
#define copyTag() do { \
_mm_store_ps( (float*)&RingBuffer.m_Ring[ringpos], Aligned ? _mm_load_ps((float*)pMem128) : _mm_loadu_ps((float*)pMem128)); \
++pMem128; --size; \
ringpos = (ringpos+1)&RingBufferMask; \
} while(false)
// Parameters:
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
// path does not terminate (EOP) within the specified size, it is assumed that the path must
// loop around to the start of VU memory and continue processing.
template< GIF_PATH pathidx, bool Aligned >
__forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
{
uint& ringpos = GetMTGS().m_packet_writepos;
const uint original_ringpos = ringpos;
u32 startSize = size; // Start Size
while (size > 0) {
if (!nloop) {
SetTag(pMem);
incTag(1);
SetTag<Aligned>((u8*)pMem128);
copyTag();
if(nloop > 0)
{
@ -560,7 +618,7 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
}
if(GSTransferStatus.PTH3 < PENDINGSTOP_MODE || pathidx != 2)
{
gifRegs->stat.OPH = true;
//gifRegs->stat.OPH = true; // why set the GS output path flag here? (rama)
gifRegs->stat.APATH = pathidx + 1;
}
@ -588,7 +646,7 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
break;
}
gifRegs->stat.APATH = pathidx + 1;
gifRegs->stat.OPH = true;
//gifRegs->stat.OPH = true; // why set the GS output path flag here? (rama)
switch(tag.FLG) {
case GIF_FLG_PACKED:
@ -599,9 +657,9 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
{
do {
if (GetReg() == 0xe) {
gsHandler(pMem);
gsHandler((u8*)pMem128);
}
incTag(1);
copyTag();
} while(StepReg() && size > 0 && SIGNAL_IMR_Pending == false);
}
else
@ -644,11 +702,14 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
curreg = 0;
nloop = 0;
}
incTag(len);
MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len );
pMem128 += len;
size -= len;
}
break;
case GIF_FLG_REGLIST:
{
{
GIF_LOG("Reglist Mode EOP %x", tag.EOP);
// In reglist mode, the GIF packs 2 registers into each QWC. The nloop however
@ -687,8 +748,9 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
nloop = 0;
}
incTag(len);
MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len );
pMem128 += len;
size -= len;
}
break;
case GIF_FLG_IMAGE:
@ -696,13 +758,15 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
{
GIF_LOG("IMAGE Mode EOP %x", tag.EOP);
int len = aMin(size, nloop);
incTag(len);
MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len );
pMem128 += len;
size -= len;
nloop -= len;
}
break;
}
}
if(pathidx == GIF_PATH_1)
@ -713,11 +777,11 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
{
size = 0x3ff - startSize;
startSize = 0x3ff;
pMem -= 0x4000;
pMem128 -= 0x400;
}
else
{
// Note: The BIOS does an XGKICK on the VU1 and lets yt DMA to the GS without an EOP
// Note: The BIOS does an XGKICK on the VU1 and lets it DMA to the GS without an EOP
// (seemingly to loop forever), only to write an EOP later on. No other game is known to
// do anything of the sort.
// So lets just cap the DMA at 16k, and force it to "look" like it's terminated for now.
@ -727,6 +791,12 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
Console.Warning("GIFTAG error, size exceeded VU memory size %x", startSize);
nloop = 0;
const_cast<GIFTAG&>(tag).EOP = 1;
// Don't send the packet to the GS -- its incomplete and might cause the GS plugin
// to get confused and die. >_<
ringpos = original_ringpos;
}
}
}
@ -749,6 +819,9 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
gsIrq();
}
}
// [TODO] : DMAC Arbitration rights should select the next queued GIF transfer here.
break;
}
if(SIGNAL_IMR_Pending == true)
@ -793,47 +866,40 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
gif->qwc -= size;
}
}
return size;
}
// Processes a GIFtag & packet, and throws out some gsIRQs as needed.
// Used to keep interrupts in sync with the EE, while the GS itself
// runs potentially several frames behind.
// Parameters:
// size - max size of incoming data stream, in qwc (simd128)
__forceinline int GIFPath_ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
// path does not terminate (EOP) within the specified size, it is assumed that the path must
// loop around to the start of VU memory and continue processing.
__forceinline int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size)
{
#ifdef PCSX2_GSRING_SAMPLING_STATS
static uptr profStartPtr = 0;
static uptr profEndPtr = 0;
if (profStartPtr == 0) {
__asm
{
__beginfunc:
mov profStartPtr, offset __beginfunc;
mov profEndPtr, offset __endfunc;
}
ProfilerRegisterSource( "GSRingBufCopy", (void*)profStartPtr, profEndPtr - profStartPtr );
}
#endif
int retSize = s_gifPath[pathidx].ParseTag(pathidx, pMem, size);
#ifdef PCSX2_GSRING_SAMPLING_STATS
__asm
switch( pathidx )
{
__endfunc:
nop;
case GIF_PATH_1:
pxAssertMsg(!s_gifPath[GIF_PATH_2].IsActive(), "GIFpath conflict: Attempted to start PATH1 while PATH2 is already active.");
pxAssertMsg(!s_gifPath[GIF_PATH_3].IsActive() || (GSTransferStatus.PTH3 == IMAGE_MODE), "GIFpath conflict: Attempted to start PATH1 while PATH3 is already active.");
return s_gifPath[GIF_PATH_1].CopyTag<GIF_PATH_1,true>(pMem, size);
case GIF_PATH_2:
pxAssertMsg(!s_gifPath[GIF_PATH_1].IsActive(), "GIFpath conflict: Attempted to start PATH2 while PATH1 is already active.");
pxAssertMsg(!s_gifPath[GIF_PATH_3].IsActive() || (GSTransferStatus.PTH3 == IMAGE_MODE), "GIFpath conflict: Attempted to start PATH2 while PATH3 is already active.");
return s_gifPath[GIF_PATH_2].CopyTag<GIF_PATH_2,false>(pMem, size);
case GIF_PATH_3:
pxAssertMsg(!s_gifPath[GIF_PATH_1].IsActive(), "GIFpath conflict: Attempted to start PATH3 while PATH1 is already active.");
pxAssertMsg(!s_gifPath[GIF_PATH_2].IsActive(), "GIFpath conflict: Attempted to start PATH3 while PATH2 is already active.");
return s_gifPath[GIF_PATH_3].CopyTag<GIF_PATH_3,true>(pMem, size);
jNO_DEFAULT;
}
#endif
return retSize;
return 0; // unreachable
}
//Quick version for queueing PATH1 data
// Quick version for queuing PATH1 data.
// This version calculates the real length of the packet data only. It does not process
// IRQs or DMA status updates.
__forceinline int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size)
{
int retSize = s_gifPath[pathidx].ParseTagQuick(pathidx, pMem, size);

View File

@ -1258,11 +1258,11 @@ void recompileNextInstruction(int delayslot)
// Calling of this function can be enabled or disabled through the use of EmuConfig.Recompiler.PreBlockChecks
static void __fastcall PreBlockCheck( u32 blockpc )
{
static int lastrec = 0;
/*static int lastrec = 0;
static int curcount = 0;
const int skip = 0;
/*if( blockpc != 0x81fc0 ) {//&& lastrec != g_lastpc ) {
if( blockpc != 0x81fc0 ) {//&& lastrec != g_lastpc ) {
curcount++;
if( curcount > skip ) {

View File

@ -1097,7 +1097,6 @@ void __fastcall mVU_XGKICK_(u32 addr) {
u8* data = microVU1.regs->Mem + (addr*16);
u32 diff = 0x400 - addr;
u32 size;
u8* pDest;
if(gifRegs->stat.APATH <= GIF_APATH1 || (gifRegs->stat.APATH == GIF_APATH3 && gifRegs->stat.IP3 == true) && SIGNAL_IMR_Pending == false)
{
@ -1106,23 +1105,12 @@ void __fastcall mVU_XGKICK_(u32 addr) {
//Flush any pending transfers so things dont go up in the wrong order
while(gifRegs->stat.P1Q == true) gsPath1Interrupt();
}
size = GetMTGS().PrepDataPacket(GIF_PATH_1, data, diff);
pDest = GetMTGS().GetDataPacketPtr();
if (size > diff) {
//DevCon.WriteLn("XGkick Wrap!");
memcpy_qwc(pDest, microVU1.regs->Mem + (addr*16), diff);
size -= diff;
pDest += diff*16;
memcpy_qwc(pDest, microVU1.regs->Mem, size);
}
else {
memcpy_qwc(pDest, microVU1.regs->Mem + (addr*16), size);
}
GetMTGS().PrepDataPacket(GIF_PATH_1, 0x400);
size = GIFPath_CopyTag(GIF_PATH_1, (u128*)data, diff);
GetMTGS().SendDataPacket();
if(GSTransferStatus.PTH1 == STOPPED_MODE)
{
gifRegs->stat.OPH = false;
gifRegs->stat.APATH = GIF_APATH_IDLE;
}
}
@ -1130,17 +1118,16 @@ void __fastcall mVU_XGKICK_(u32 addr) {
{
//DevCon.Warning("GIF APATH busy %x Holding for later W %x, R %x", gifRegs->stat.APATH, Path1WritePos, Path1ReadPos);
size = GIFPath_ParseTagQuick(GIF_PATH_1, data, diff);
pDest = &Path1Buffer[Path1WritePos*16];
u8* pDest = &Path1Buffer[Path1WritePos*16];
pxAssumeMsg((Path1WritePos+size < sizeof(Path1Buffer)), "XGKick Buffer Overflow detected on Path1Buffer!");
Path1WritePos += size;
pxAssumeMsg((Path1WritePos < sizeof(Path1Buffer)), "XGKick Buffer Overflow detected on Path1Buffer!");
//DevCon.Warning("Storing size %x PATH 1", size);
if (size > diff) {
// fixme: one of these days the following *16's will get cleaned up when we introduce
// a special qwc/simd16 optimized version of memcpy_aligned. :)
//DevCon.Status("XGkick Wrap!");
memcpy_qwc(pDest, microVU1.regs->Mem + (addr*16), diff);
Path1WritePos += size;
size -= diff;
pDest += diff*16;
memcpy_qwc(pDest, microVU1.regs->Mem, size);

View File

@ -1988,24 +1988,12 @@ void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
//Flush any pending transfers so things dont go up in the wrong order
while(gifRegs->stat.P1Q == true) gsPath1Interrupt();
}
size = GetMTGS().PrepDataPacket(GIF_PATH_1, data, diff);
pDest = GetMTGS().GetDataPacketPtr();
if (size > diff) {
// fixme: one of these days the following *16's will get cleaned up when we introduce
// a special qwc/simd16 optimized version of memcpy_aligned. :)
memcpy_aligned(pDest, VU1.Mem + addr, diff*16);
size -= diff;
pDest += diff*16;
memcpy_aligned(pDest, VU1.Mem, size*16);
}
else {
memcpy_aligned(pDest, VU1.Mem + addr, size*16);
}
GetMTGS().PrepDataPacket(GIF_PATH_1, 0x400);
size = GIFPath_CopyTag(GIF_PATH_1, (u128*)data, diff);
GetMTGS().SendDataPacket();
if(GSTransferStatus.PTH1 == STOPPED_MODE )
{
gifRegs->stat.OPH = false;
gifRegs->stat.APATH = GIF_APATH_IDLE;
}
}
@ -2015,8 +2003,6 @@ void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
size = GIFPath_ParseTagQuick(GIF_PATH_1, data, diff);
pDest = &Path1Buffer[Path1WritePos*16];
pxAssumeMsg((Path1WritePos+size < sizeof(Path1Buffer)), "XGKick Buffer Overflow detected on Path1Buffer!");
//DevCon.Warning("Storing size %x PATH 1", size);
@ -2024,14 +2010,14 @@ void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
// fixme: one of these days the following *16's will get cleaned up when we introduce
// a special qwc/simd16 optimized version of memcpy_aligned. :)
//DevCon.Status("XGkick Wrap!");
memcpy_aligned(pDest, VU1.Mem + addr, diff*16);
memcpy_aligned(pDest, VU1.Mem + addr, diff);
Path1WritePos += size;
size -= diff;
pDest += diff*16;
memcpy_aligned(pDest, VU1.Mem, size*16);
memcpy_aligned(pDest, VU1.Mem, size);
}
else {
memcpy_aligned(pDest, VU1.Mem + addr, size*16);
memcpy_aligned(pDest, VU1.Mem + addr, size);
Path1WritePos += size;
}
//if(!gifRegs->stat.P1Q) CPU_INT(28, 128);

View File

@ -195,8 +195,6 @@
<Unit filename="../spdif.h" />
<Unit filename="../spu2freeze.cpp" />
<Unit filename="../spu2sys.cpp" />
<Unit filename="../utf8.cpp" />
<Unit filename="../utf8.h" />
<Extensions>
<code_completion />
<debugger />

View File

@ -635,7 +635,7 @@ typedef struct
int imageTransfer;
int imageWnew, imageHnew, imageX, imageY, imageEndX, imageEndY;
pathInfo path[3];
pathInfo path[4];
GIFRegDIMX dimx;
void setRGBA(u32 r, u32 g, u32 b, u32 a)
{

View File

@ -265,8 +265,17 @@ void CALLBACK GSgifTransfer3(u32 *pMem, u32 size)
_GSgifTransfer<2>(pMem, size);
}
void InitPath()
void CALLBACK GSgifTransfer(u32 *pMem, u32 size)
{
gs.path[0].mode = gs.path[1].mode = gs.path[2].mode = 0;
FUNCLOG
//ZZLog::GS_Log("GSgifTransfer3 size = %lx (mode %d, gs.path3.tag.nloop = %d).", size, gs.path[2].mode, gs.path[2].tag.nloop);
_GSgifTransfer<3>(pMem, size);
}
void InitPath()
{
gs.path[0].mode = gs.path[1].mode = gs.path[2].mode = gs.path[3].mode = 0;
}