mirror of https://github.com/PCSX2/pcsx2.git
Merge optimizations and code cleanups from the ReorderingMTGS branch (r3523) into Trunk. Summary of changes:
* GIFpath parsing copies as it parses (speedup!) * Improved memcpy for 128-bit copies (speedup!) * MTGS ringbuffer uses free-flowing wrapping now, which simplified ringbuffer management logic considerably (speedup!) * Various MTGS-related refactoring (var renames and such) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3532 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
commit
856ffe4c65
common
include
src/Utilities
pcsx2
Config.hFiFo.cppGS.cppGS.hGif.cppGif.hMTGS.cppPcsx2Config.cppPluginManager.cppVUops.cppVif1_Dma.cppVif1_MFIFO.cppVif_Codes.cppVif_Transfer.cpp
gui
ps2
x86
plugins
|
@ -248,6 +248,7 @@ void CALLBACK GSsetSettingsDir( const char* dir );
|
|||
void CALLBACK GSsetLogDir( const char* dir );
|
||||
|
||||
void CALLBACK GSvsync(int field);
|
||||
void CALLBACK GSgifTransfer(u32 *pMem, u32 addr);
|
||||
void CALLBACK GSgifTransfer1(u32 *pMem, u32 addr);
|
||||
void CALLBACK GSgifTransfer2(u32 *pMem, u32 size);
|
||||
void CALLBACK GSgifTransfer3(u32 *pMem, u32 size);
|
||||
|
|
|
@ -22,12 +22,14 @@
|
|||
extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
|
||||
extern "C" u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
|
||||
extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize);
|
||||
extern void memcpy_amd_qwc(void *dest, const void *src, size_t bytes);
|
||||
|
||||
#else
|
||||
|
||||
# include "win_memzero.h"
|
||||
|
||||
extern void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
|
||||
extern void memcpy_amd_qwc(void *dest, const void *src, size_t bytes);
|
||||
extern u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
|
||||
extern void memxor_mmx(void* dst, const void* src1, int cmpsize);
|
||||
|
||||
|
@ -40,9 +42,12 @@ void _memset16_unaligned( void* dest, u16 data, size_t size );
|
|||
extern void memcpy_vibes(void * dest, const void * src, int size);
|
||||
extern void gen_memcpy_vibes();
|
||||
|
||||
#define memcpy_fast memcpy_amd_ // Fast memcpy
|
||||
#define memcpy_aligned memcpy_amd_ // Memcpy with 16-byte Aligned addresses
|
||||
#define memcpy_const memcpy_amd_ // Memcpy with constant size
|
||||
#define memcpy_constA memcpy_amd_ // Memcpy with constant size and 16-byte aligned
|
||||
#define memcpy_qwc_ memcpy_vibes // Memcpy in aligned qwc increments, with 0x400 qwc or less
|
||||
#define memcpy_qwc(x,y,z) memcpy_amd_(x, y, z*16) // Memcpy in aligned qwc increments
|
||||
#define memcpy_fast memcpy_amd_ // Fast memcpy
|
||||
#define memcpy_aligned(d,s,c) memcpy_amd_(d,s,c) // Memcpy with 16-byte Aligned addresses
|
||||
#define memcpy_const memcpy_amd_ // Memcpy with constant size
|
||||
#define memcpy_constA memcpy_amd_ // Memcpy with constant size and 16-byte aligned
|
||||
#define memcpy_qwc_ memcpy_vibes // Memcpy in aligned qwc increments, with 0x400 qwc or less
|
||||
#define memcpy_qwc(d,s,c) memcpy_amd_qwc(d,s,c)
|
||||
|
||||
// Useful alternative if we think memcpy_amd_qwc is buggy
|
||||
//#define memcpy_qwc(d,s,c) memcpy_amd_(d,s,c*16)
|
||||
|
|
|
@ -129,6 +129,10 @@ namespace Threading
|
|||
|
||||
// For use in spin/wait loops.
|
||||
extern void SpinWait();
|
||||
|
||||
// Use prior to committing data to another thread (internal memcpy_qwc does not use fencing,
|
||||
// so that many memcpys can be issued in a row more efficiently)
|
||||
extern void StoreFence();
|
||||
|
||||
// Optional implementation to enable hires thread/process scheduler for the operating system.
|
||||
// Needed by Windows, but might not be relevant to other platforms.
|
||||
|
|
|
@ -71,7 +71,7 @@ wxString DiagnosticOrigin::ToString( const wxChar* msg ) const
|
|||
|
||||
bool pxAssertImpl_LogIt( const DiagnosticOrigin& origin, const wxChar *msg )
|
||||
{
|
||||
wxLogError( origin.ToString( msg ) );
|
||||
wxLogError( L"%s", origin.ToString( msg ) );
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -36,6 +36,11 @@ __forceinline void Threading::SpinWait()
|
|||
__asm pause;
|
||||
}
|
||||
|
||||
__forceinline void Threading::StoreFence()
|
||||
{
|
||||
__asm sfence;
|
||||
}
|
||||
|
||||
__forceinline void Threading::EnableHiresScheduler()
|
||||
{
|
||||
// This improves accuracy of Sleep() by some amount, and only adds a negligible amount of
|
||||
|
|
|
@ -41,12 +41,10 @@
|
|||
MEMCPY_AMD.CPP
|
||||
******************************************************************************/
|
||||
|
||||
// Very optimized memcpy() routine for AMD Athlon and Duron family.
|
||||
// This code uses any of FOUR different basic copy methods, depending
|
||||
// on the transfer size.
|
||||
// NOTE: Since this code uses MOVNTQ (also known as "Non-Temporal MOV" or
|
||||
// "Streaming Store"), and also uses the software prefetch instructions,
|
||||
// be sure you're running on Athlon/Duron or other recent CPU before calling!
|
||||
// be sure you're running on P4/Core2/i7, Athlon/Phenom or newer CPUs before
|
||||
// calling!
|
||||
|
||||
#define TINY_BLOCK_COPY 64 // upper limit for movsd type copy
|
||||
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
||||
|
@ -68,10 +66,8 @@ MEMCPY_AMD.CPP
|
|||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Fast memcpy as coded by AMD, and then improved by air.
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
||||
// Fast memcpy as coded by AMD, and then improved by air for PCSX2 needs.
|
||||
__declspec(naked) void __fastcall memcpy_amd_(void *dest, const void *src, size_t n)
|
||||
{
|
||||
__asm
|
||||
|
@ -92,6 +88,7 @@ __declspec(naked) void __fastcall memcpy_amd_(void *dest, const void *src, size_
|
|||
jbe $memcpy_do_align ; it appears to be slower
|
||||
cmp eax, 64*1024
|
||||
jbe $memcpy_align_done
|
||||
|
||||
$memcpy_do_align:
|
||||
mov eax, 8 ; a trick that's faster than rep movsb...
|
||||
sub eax, edi ; align destination to qword
|
||||
|
@ -146,7 +143,7 @@ $memcpy_ic_1: ; 64-byte block copies, in-cache copy
|
|||
|
||||
add esi, 64 ; update source pointer
|
||||
add edi, 64 ; update destination pointer
|
||||
dec eax ; count down
|
||||
sub eax, 1
|
||||
jnz $memcpy_ic_1 ; last 64-byte block?
|
||||
|
||||
$memcpy_ic_2:
|
||||
|
@ -189,64 +186,15 @@ $memcpy_uc_1: ; 64-byte blocks, uncached copy
|
|||
movq mm1,[esi-8]
|
||||
movntq [edi-24], mm2
|
||||
movntq [edi-16], mm0
|
||||
dec eax
|
||||
movntq [edi-8], mm1
|
||||
|
||||
sub eax, 1
|
||||
jnz $memcpy_uc_1 ; last 64-byte block?
|
||||
|
||||
jmp $memcpy_ic_2 ; almost done (not needed because large copy below was removed)
|
||||
|
||||
// For the largest size blocks, a special technique called Block Prefetch
|
||||
// can be used to accelerate the read operations. Block Prefetch reads
|
||||
// one address per cache line, for a series of cache lines, in a short loop.
|
||||
// This is faster than using software prefetch. The technique is great for
|
||||
// getting maximum read bandwidth, especially in DDR memory systems.
|
||||
|
||||
// Note: Pcsx2 rarely invokes large copies, so this mode has been disabled to
|
||||
// help keep the code cache footprint of memcpy_fast to a minimum.
|
||||
/*
|
||||
$memcpy_bp_1: ; large blocks, block prefetch copy
|
||||
|
||||
cmp ecx, CACHEBLOCK ; big enough to run another prefetch loop?
|
||||
jl $memcpy_64_test ; no, back to regular uncached copy
|
||||
|
||||
mov eax, CACHEBLOCK / 2 ; block prefetch loop, unrolled 2X
|
||||
add esi, CACHEBLOCK * 64 ; move to the top of the block
|
||||
align 16
|
||||
$memcpy_bp_2:
|
||||
mov edx, [esi-64] ; grab one address per cache line
|
||||
mov edx, [esi-128] ; grab one address per cache line
|
||||
sub esi, 128 ; go reverse order to suppress HW prefetcher
|
||||
dec eax ; count down the cache lines
|
||||
jnz $memcpy_bp_2 ; keep grabbing more lines into cache
|
||||
|
||||
mov eax, CACHEBLOCK ; now that it's in cache, do the copy
|
||||
align 16
|
||||
$memcpy_bp_3:
|
||||
movq mm0, [esi ] ; read 64 bits
|
||||
movq mm1, [esi+ 8]
|
||||
movq mm2, [esi+16]
|
||||
movq mm3, [esi+24]
|
||||
movq mm4, [esi+32]
|
||||
movq mm5, [esi+40]
|
||||
movq mm6, [esi+48]
|
||||
movq mm7, [esi+56]
|
||||
add esi, 64 ; update source pointer
|
||||
movntq [edi ], mm0 ; write 64 bits, bypassing cache
|
||||
movntq [edi+ 8], mm1 ; note: movntq also prevents the CPU
|
||||
movntq [edi+16], mm2 ; from READING the destination address
|
||||
movntq [edi+24], mm3 ; into the cache, only to be over-written,
|
||||
movntq [edi+32], mm4 ; so that also helps performance
|
||||
movntq [edi+40], mm5
|
||||
movntq [edi+48], mm6
|
||||
movntq [edi+56], mm7
|
||||
add edi, 64 ; update dest pointer
|
||||
|
||||
dec eax ; count down
|
||||
|
||||
jnz $memcpy_bp_3 ; keep copying
|
||||
sub ecx, CACHEBLOCK ; update the 64-byte block count
|
||||
jmp $memcpy_bp_1 ; keep processing chunks
|
||||
*/
|
||||
// Note: Pcsx2 rarely invokes large copies, so the large copy "block prefetch" mode has been
|
||||
// disabled to help keep the code cache footprint of memcpy_fast to a minimum.
|
||||
|
||||
// The smallest copy uses the X86 "movsd" instruction, in an optimized
|
||||
// form which is an "unrolled loop". Then it handles the last few bytes.
|
||||
|
@ -274,17 +222,99 @@ $memcpy_last_few: ; dword aligned from before movsd's
|
|||
rep movsb ; the last 1, 2, or 3 bytes
|
||||
|
||||
$memcpy_final:
|
||||
pop esi
|
||||
pop edi
|
||||
|
||||
emms ; clean up the MMX state
|
||||
sfence ; flush the write buffer
|
||||
//mov eax, [dest] ; ret value = destination pointer
|
||||
|
||||
pop esi
|
||||
pop edi
|
||||
|
||||
ret 4
|
||||
}
|
||||
}
|
||||
|
||||
// Quadword Copy! Count is in QWCs (128 bits). Neither source nor dest need to be aligned.
|
||||
__forceinline void memcpy_amd_qwc(void *dest, const void *src, size_t qwc)
|
||||
{
|
||||
// Optimization Analysis: This code is *nearly* optimal. Do not think that using XMM
|
||||
// registers will improve copy performance, because they won't. Use of XMMs is only
|
||||
// warranted in situations where both source and dest are guaranteed aligned to 16 bytes,
|
||||
// and even then the benefits are typically minimal (sometimes slower depending on the
|
||||
// amount of data being copied).
|
||||
//
|
||||
// Thus: MMX are alignment safe, fast, and widely available. Lets just stick with them.
|
||||
// --air
|
||||
|
||||
// Linux Conversion note:
|
||||
// This code would benefit nicely from having inline-able GAS syntax, since it should
|
||||
// allow GCC to optimize the first 3 instructions out of existence in many scenarios.
|
||||
// And its called enough times to probably merit the extra effort to ensure proper
|
||||
// optimization. --air
|
||||
|
||||
__asm
|
||||
{
|
||||
mov ecx, dest
|
||||
mov edx, src
|
||||
mov eax, qwc ; keep a copy of count
|
||||
shr eax, 1
|
||||
jz $memcpy_qwc_1 ; only one 16 byte block to copy?
|
||||
|
||||
cmp eax, IN_CACHE_COPY/32
|
||||
jb $memcpy_qwc_loop1 ; small copies should be cached (definite speedup --air)
|
||||
|
||||
$memcpy_qwc_loop2: ; 32-byte blocks, uncached copy
|
||||
prefetchnta [edx + 568] ; start reading ahead (tested: it helps! --air)
|
||||
|
||||
movq mm0,[edx+0] ; read 64 bits
|
||||
movq mm1,[edx+8]
|
||||
movq mm2,[edx+16]
|
||||
movntq [ecx+0], mm0 ; write 64 bits, bypassing the cache
|
||||
movntq [ecx+8], mm1
|
||||
movq mm3,[edx+24]
|
||||
movntq [ecx+16], mm2
|
||||
movntq [ecx+24], mm3
|
||||
|
||||
add edx,32 ; update source pointer
|
||||
add ecx,32 ; update destination pointer
|
||||
sub eax,1
|
||||
jnz $memcpy_qwc_loop2 ; last 64-byte block?
|
||||
sfence ; flush the write buffer
|
||||
jmp $memcpy_qwc_1
|
||||
|
||||
; 32-byte blocks, cached!
|
||||
; This *is* important. Removing this and using exclusively non-temporal stores
|
||||
; results in noticable speed loss!
|
||||
|
||||
$memcpy_qwc_loop1:
|
||||
prefetchnta [edx + 568] ; start reading ahead (tested: it helps! --air)
|
||||
|
||||
movq mm0,[edx+0] ; read 64 bits
|
||||
movq mm1,[edx+8]
|
||||
movq mm2,[edx+16]
|
||||
movq [ecx+0], mm0 ; write 64 bits, bypassing the cache
|
||||
movq [ecx+8], mm1
|
||||
movq mm3,[edx+24]
|
||||
movq [ecx+16], mm2
|
||||
movq [ecx+24], mm3
|
||||
|
||||
add edx,32 ; update source pointer
|
||||
add ecx,32 ; update destination pointer
|
||||
sub eax,1
|
||||
jnz $memcpy_qwc_loop1 ; last 64-byte block?
|
||||
|
||||
$memcpy_qwc_1:
|
||||
test qwc,1
|
||||
jz $memcpy_qwc_final
|
||||
movq mm0,[edx]
|
||||
movq mm1,[edx+8]
|
||||
movq [ecx], mm0
|
||||
movq [ecx+8], mm1
|
||||
|
||||
$memcpy_qwc_final:
|
||||
emms ; clean up the MMX state
|
||||
}
|
||||
}
|
||||
|
||||
// mmx mem-compare implementation, size has to be a multiple of 8
|
||||
// returns 0 is equal, nonzero value if not equal
|
||||
// ~10 times faster than standard memcmp
|
||||
|
|
|
@ -156,3 +156,95 @@ __forceinline void memcpy_vibes(void * dest, const void * src, int size) {
|
|||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Since MemcpyVibes is already in the project, I'll just tuck the Linux version of memcpy_amd_qwc here for the moment,
|
||||
// to get around compilation issues with having it in the headers.
|
||||
#ifdef __LINUX__
|
||||
|
||||
// This can be moved later, but Linux doesn't even compile memcpyFast.cpp, so I figured I'd stick it here for now.
|
||||
// Quadword Copy! Count is in QWCs (128 bits). Neither source nor dest need to be aligned.
|
||||
__forceinline void memcpy_amd_qwc(void *dest, const void *src, size_t qwc)
|
||||
{
|
||||
// Optimization Analysis: This code is *nearly* optimal. Do not think that using XMM
|
||||
// registers will improve copy performance, because they won't. Use of XMMs is only
|
||||
// warranted in situations where both source and dest are guaranteed aligned to 16 bytes,
|
||||
// and even then the benefits are typically minimal (sometimes slower depending on the
|
||||
// amount of data being copied).
|
||||
//
|
||||
// Thus: MMX are alignment safe, fast, and widely available. Lets just stick with them.
|
||||
// --air
|
||||
|
||||
// Linux Conversion note:
|
||||
// This code would benefit nicely from having inline-able GAS syntax, since it should
|
||||
// allow GCC to optimize the first 3 instructions out of existence in many scenarios.
|
||||
// And its called enough times to probably merit the extra effort to ensure proper
|
||||
// optimization. --air
|
||||
|
||||
__asm__
|
||||
(
|
||||
".intel_syntax noprefix\n"
|
||||
"mov eax, %[qwc]\n" // keep a copy of count for looping
|
||||
"shr eax, 1\n"
|
||||
"jz memcpy_qwc_1\n" // only one 16 byte block to copy?
|
||||
|
||||
"cmp eax, 64\n" // "IN_CACHE_COPY/32"
|
||||
"jb memcpy_qwc_loop1\n" // small copies should be cached (definite speedup --air)
|
||||
|
||||
"memcpy_qwc_loop2:\n" // 32-byte blocks, uncached copy
|
||||
"prefetchnta [%[src] + 568]\n" // start reading ahead (tested: it helps! --air)
|
||||
|
||||
"movq mm0,[%[src]+0]\n" // read 64 bits
|
||||
"movq mm1,[%[src]+8]\n"
|
||||
"movq mm2,[%[src]+16]\n"
|
||||
"movntq [%[dest]+0], mm0\n" // write 64 bits, bypassing the cache
|
||||
"movntq [%[dest]+8], mm1\n"
|
||||
"movq mm3,[%[src]+24]\n"
|
||||
"movntq [%[dest]+16], mm2\n"
|
||||
"movntq [%[dest]+24], mm3\n"
|
||||
|
||||
"add %[src],32\n" // update source pointer
|
||||
"add %[dest],32\n" // update destination pointer
|
||||
"sub eax,1\n"
|
||||
"jnz memcpy_qwc_loop2\n" // last 64-byte block?
|
||||
"sfence\n" // flush the write buffer
|
||||
"jmp memcpy_qwc_1\n"
|
||||
|
||||
// 32-byte blocks, cached!
|
||||
// This *is* important. Removing this and using exclusively non-temporal stores
|
||||
// results in noticeable speed loss!
|
||||
|
||||
"memcpy_qwc_loop1:\n"
|
||||
"prefetchnta [%[src] + 568]\n" // start reading ahead (tested: it helps! --air)
|
||||
|
||||
"movq mm0,[%[src]+0]\n" // read 64 bits
|
||||
"movq mm1,[%[src]+8]\n"
|
||||
"movq mm2,[%[src]+16]\n"
|
||||
"movq [%[dest]+0], mm0\n" // write 64 bits, bypassing the cache
|
||||
"movq [%[dest]+8], mm1\n"
|
||||
"movq mm3,[%[src]+24]\n"
|
||||
"movq [%[dest]+16], mm2\n"
|
||||
"movq [%[dest]+24], mm3\n"
|
||||
|
||||
"add %[src],32\n" // update source pointer
|
||||
"add %[dest],32\n" // update destination pointer
|
||||
"sub eax,1\n"
|
||||
"jnz memcpy_qwc_loop1\n" // last 64-byte block?
|
||||
|
||||
"memcpy_qwc_1:\n"
|
||||
"test %[qwc],1\n"
|
||||
"jz memcpy_qwc_final\n"
|
||||
"movq mm0,[%[src]]\n"
|
||||
"movq mm1,[%[src]+8]\n"
|
||||
"movq [%[dest]], mm0\n"
|
||||
"movq [%[dest]+8], mm1\n"
|
||||
|
||||
"memcpy_qwc_final:\n"
|
||||
"emms\n" // clean up the MMX state
|
||||
".att_syntax\n"
|
||||
: "=&r"(dest), "=&r"(src), "=&r"(qwc)
|
||||
: [dest]"0"(dest), [src]"1"(src), [qwc]"2"(qwc)
|
||||
: "memory", "eax", "mm0", "mm1", "mm2", "mm3"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -395,6 +395,7 @@ struct Pcsx2Config
|
|||
// style. Useful for debugging potential bugs in the MTGS pipeline.
|
||||
bool SynchronousMTGS;
|
||||
bool DisableOutput;
|
||||
int VsyncQueueSize;
|
||||
|
||||
bool FrameLimitEnable;
|
||||
bool FrameSkipEnable;
|
||||
|
@ -420,6 +421,8 @@ struct Pcsx2Config
|
|||
return
|
||||
OpEqu( SynchronousMTGS ) &&
|
||||
OpEqu( DisableOutput ) &&
|
||||
OpEqu( VsyncQueueSize ) &&
|
||||
|
||||
OpEqu( FrameSkipEnable ) &&
|
||||
OpEqu( FrameLimitEnable ) &&
|
||||
OpEqu( VsyncEnable ) &&
|
||||
|
|
|
@ -164,7 +164,6 @@ void __fastcall WriteFIFO_page_5(u32 mem, const mem128_t *value)
|
|||
|
||||
if(GSTransferStatus.PTH2 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH2)
|
||||
{
|
||||
if(gifRegs->stat.DIR == 0)gifRegs->stat.OPH = false;
|
||||
gifRegs->stat.APATH = GIF_APATH_IDLE;
|
||||
if(gifRegs->stat.P1Q) gsPath1Interrupt();
|
||||
}
|
||||
|
@ -195,14 +194,12 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value)
|
|||
nloop0_packet[1] = psHu32(GIF_FIFO + 4);
|
||||
nloop0_packet[2] = psHu32(GIF_FIFO + 8);
|
||||
nloop0_packet[3] = psHu32(GIF_FIFO + 12);
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_3, (u8*)nloop0_packet, 1);
|
||||
u64* data = (u64*)GetMTGS().GetDataPacketPtr();
|
||||
data[0] = value[0];
|
||||
data[1] = value[1];
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_3, 1);
|
||||
//u64* data = (u64*)GetMTGS().GetDataPacketPtr();
|
||||
GIFPath_CopyTag( GIF_PATH_3, (u128*)nloop0_packet, 1 );
|
||||
GetMTGS().SendDataPacket();
|
||||
if(GSTransferStatus.PTH3 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH3 )
|
||||
{
|
||||
if(gifRegs->stat.DIR == 0)gifRegs->stat.OPH = false;
|
||||
gifRegs->stat.APATH = GIF_APATH_IDLE;
|
||||
if(gifRegs->stat.P1Q) gsPath1Interrupt();
|
||||
}
|
||||
|
|
15
pcsx2/GS.cpp
15
pcsx2/GS.cpp
|
@ -282,14 +282,19 @@ void __fastcall gsWrite64_page_01( u32 mem, const mem64_t* value )
|
|||
{
|
||||
case 0x12001040: //busdir
|
||||
|
||||
//This is probably a complete hack, however writing to BUSDIR "should" start a transfer (Bleach Blade Battlers)
|
||||
//Only problem is it kills killzone :( leaving it commented out for now.
|
||||
//This is probably a complete hack, however writing to BUSDIR "should" start a transfer
|
||||
//(Bleach Blade Battlers, Growlanser 2 and 3, Wizardry)
|
||||
//Only problem is it kills killzone :(.
|
||||
// (yes it *is* a complete hack; both lines here in fact --air)
|
||||
//=========================================================================
|
||||
//gifRegs->stat.OPH = true;
|
||||
//Console.Warning("BUSDIR write! Setting OPH and DIR to = %x",(u32)value[0]);
|
||||
if ((u32)value[0] == 1)
|
||||
gifRegs->stat.OPH = true;
|
||||
else
|
||||
gifRegs->stat.OPH = false;
|
||||
|
||||
gifRegs->stat.DIR = (u32)value[0];
|
||||
//=========================================================================
|
||||
gifRegs->stat.DIR = (u32)value;
|
||||
|
||||
// BUSDIR INSANITY !! MTGS FLUSH NEEDED
|
||||
//
|
||||
// Yup folks. BUSDIR is evil. The only safe way to handle it is to flush the whole MTGS
|
||||
|
|
61
pcsx2/GS.h
61
pcsx2/GS.h
|
@ -229,7 +229,8 @@ enum GIF_PATH
|
|||
GIF_PATH_3,
|
||||
};
|
||||
|
||||
extern int GIFPath_ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size);
|
||||
extern void GIFPath_Initialize();
|
||||
extern int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size);
|
||||
extern int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
|
||||
extern void GIFPath_Reset();
|
||||
extern void GIFPath_Clear( GIF_PATH pathidx );
|
||||
|
@ -248,7 +249,6 @@ enum MTGS_RingCommand
|
|||
GS_RINGTYPE_P1
|
||||
, GS_RINGTYPE_P2
|
||||
, GS_RINGTYPE_P3
|
||||
, GS_RINGTYPE_RESTART
|
||||
, GS_RINGTYPE_VSYNC
|
||||
, GS_RINGTYPE_FRAMESKIP
|
||||
, GS_RINGTYPE_FREEZE
|
||||
|
@ -273,19 +273,20 @@ class SysMtgsThread : public SysThreadBase
|
|||
typedef SysThreadBase _parent;
|
||||
|
||||
public:
|
||||
// note: when m_RingPos == m_WritePos, the fifo is empty
|
||||
uint m_RingPos; // cur pos gs is reading from
|
||||
// note: when m_ReadPos == m_WritePos, the fifo is empty
|
||||
uint m_ReadPos; // cur pos gs is reading from
|
||||
uint m_WritePos; // cur pos ee thread is writing to
|
||||
|
||||
volatile bool m_RingBufferIsBusy;
|
||||
volatile u32 m_SignalRingEnable;
|
||||
volatile s32 m_SignalRingPosition;
|
||||
|
||||
int m_QueuedFrameCount;
|
||||
u32 m_RingWrapSpot;
|
||||
volatile s32 m_QueuedFrameCount;
|
||||
volatile u32 m_VsyncSignalListener;
|
||||
|
||||
Mutex m_lock_RingBufferBusy;
|
||||
Mutex m_mtx_RingBufferBusy;
|
||||
Semaphore m_sem_OnRingReset;
|
||||
Semaphore m_sem_Vsync;
|
||||
|
||||
// used to keep multiple threads from sending packets to the ringbuffer concurrently.
|
||||
// (currently not used or implemented -- is a planned feature for a future threaded VU1)
|
||||
|
@ -301,8 +302,9 @@ public:
|
|||
// These vars maintain instance data for sending Data Packets.
|
||||
// Only one data packet can be constructed and uploaded at a time.
|
||||
|
||||
uint m_packet_startpos; // size of the packet (data only, ie. not including the 16 byte command!)
|
||||
uint m_packet_size; // size of the packet (data only, ie. not including the 16 byte command!)
|
||||
uint m_packet_ringpos; // index of the data location in the ringbuffer.
|
||||
uint m_packet_writepos; // index of the data location in the ringbuffer.
|
||||
|
||||
#ifdef RINGBUF_DEBUG_STACK
|
||||
Threading::Mutex m_lock_Stack;
|
||||
|
@ -317,14 +319,13 @@ public:
|
|||
void WaitGS();
|
||||
void ResetGS();
|
||||
|
||||
int PrepDataPacket( MTGS_RingCommand cmd, u32 size );
|
||||
int PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size );
|
||||
void PrepDataPacket( MTGS_RingCommand cmd, u32 size );
|
||||
void PrepDataPacket( GIF_PATH pathidx, u32 size );
|
||||
void SendDataPacket();
|
||||
void SendGameCRC( u32 crc );
|
||||
void WaitForOpen();
|
||||
void Freeze( int mode, MTGS_FreezeData& data );
|
||||
|
||||
void RestartRingbuffer( uint packsize=0 );
|
||||
void SendSimplePacket( MTGS_RingCommand type, int data0, int data1, int data2 );
|
||||
void SendPointerPacket( MTGS_RingCommand type, u32 data0, void* data1 );
|
||||
|
||||
|
@ -346,9 +347,10 @@ protected:
|
|||
void OnResumeInThread( bool IsSuspended );
|
||||
void OnCleanupInThread();
|
||||
|
||||
void GenericStall( uint size );
|
||||
|
||||
// Used internally by SendSimplePacket type functions
|
||||
uint _PrepForSimplePacket();
|
||||
void _FinishSimplePacket( uint future_writepos );
|
||||
void _FinishSimplePacket();
|
||||
void ExecuteTaskInThread();
|
||||
};
|
||||
|
||||
|
@ -416,3 +418,36 @@ extern int g_nLeftGSFrames;
|
|||
|
||||
#endif
|
||||
|
||||
// Size of the ringbuffer as a power of 2 -- size is a multiple of simd128s.
|
||||
// (actual size is 1<<m_RingBufferSizeFactor simd vectors [128-bit values])
|
||||
// A value of 19 is a 8meg ring buffer. 18 would be 4 megs, and 20 would be 16 megs.
|
||||
// Default was 2mb, but some games with lots of MTGS activity want 8mb to run fast (rama)
|
||||
static const uint RingBufferSizeFactor = 19;
|
||||
|
||||
// size of the ringbuffer in simd128's.
|
||||
static const uint RingBufferSize = 1<<RingBufferSizeFactor;
|
||||
|
||||
// Mask to apply to ring buffer indices to wrap the pointer from end to
|
||||
// start (the wrapping is what makes it a ringbuffer, yo!)
|
||||
static const uint RingBufferMask = RingBufferSize - 1;
|
||||
|
||||
struct MTGS_BufferedData
|
||||
{
|
||||
u128 m_Ring[RingBufferSize];
|
||||
u8 Regs[Ps2MemSize::GSregs];
|
||||
|
||||
MTGS_BufferedData() {}
|
||||
|
||||
u128& operator[]( uint idx )
|
||||
{
|
||||
pxAssert( idx < RingBufferSize );
|
||||
return m_Ring[idx];
|
||||
}
|
||||
};
|
||||
|
||||
extern __aligned(32) MTGS_BufferedData RingBuffer;
|
||||
|
||||
// FIXME: These belong in common with other memcpy tools. Will move them there later if no one
|
||||
// else beats me to it. --air
|
||||
extern void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len );
|
||||
extern void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len );
|
||||
|
|
101
pcsx2/Gif.cpp
101
pcsx2/Gif.cpp
|
@ -36,7 +36,7 @@ static u32 gifqwc = 0;
|
|||
static bool gifmfifoirq = false;
|
||||
|
||||
//Just some temporary bits to store Path1 transfers if another is in progress.
|
||||
u8 Path1Buffer[0x1000000];
|
||||
__aligned16 u8 Path1Buffer[0x1000000];
|
||||
u32 Path1WritePos = 0;
|
||||
u32 Path1ReadPos = 0;
|
||||
|
||||
|
@ -57,23 +57,23 @@ void gsPath1Interrupt()
|
|||
if((gifRegs->stat.APATH <= GIF_APATH1 || (gifRegs->stat.IP3 == true && gifRegs->stat.APATH == GIF_APATH3)) && Path1WritePos > 0 && !gifRegs->stat.PSE)
|
||||
{
|
||||
gifRegs->stat.P1Q = false;
|
||||
while(Path1WritePos > 0)
|
||||
{
|
||||
u32 size = GetMTGS().PrepDataPacket(GIF_PATH_1, Path1Buffer + (Path1ReadPos * 16), (Path1WritePos - Path1ReadPos));
|
||||
u8* pDest = GetMTGS().GetDataPacketPtr();
|
||||
//DevCon.Warning("Flush Size = %x", size);
|
||||
|
||||
memcpy_aligned(pDest, Path1Buffer + (Path1ReadPos * 16), size * 16);
|
||||
GetMTGS().SendDataPacket();
|
||||
|
||||
|
||||
Path1ReadPos += size;
|
||||
|
||||
if(GSTransferStatus.PTH1 == STOPPED_MODE)
|
||||
if (uint size = (Path1WritePos - Path1ReadPos))
|
||||
{
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_1, size);
|
||||
//DevCon.Warning("Flush Size = %x", size);
|
||||
while(size > 0)
|
||||
{
|
||||
gifRegs->stat.OPH = false;
|
||||
gifRegs->stat.APATH = GIF_APATH_IDLE;
|
||||
uint count = GIFPath_CopyTag(GIF_PATH_1, ((u128*)Path1Buffer) + Path1ReadPos, size);
|
||||
Path1ReadPos += count;
|
||||
size -= count;
|
||||
|
||||
if(GSTransferStatus.PTH1 == STOPPED_MODE)
|
||||
{
|
||||
gifRegs->stat.APATH = GIF_APATH_IDLE;
|
||||
}
|
||||
}
|
||||
GetMTGS().SendDataPacket();
|
||||
|
||||
if(Path1ReadPos == Path1WritePos)
|
||||
{
|
||||
|
@ -105,7 +105,6 @@ __forceinline void gsInterrupt()
|
|||
|
||||
if(GSTransferStatus.PTH3 >= PENDINGSTOP_MODE && gifRegs->stat.APATH == GIF_APATH3 )
|
||||
{
|
||||
gifRegs->stat.OPH = false;
|
||||
GSTransferStatus.PTH3 = STOPPED_MODE;
|
||||
gifRegs->stat.APATH = GIF_APATH_IDLE;
|
||||
if(gifRegs->stat.P1Q) gsPath1Interrupt();
|
||||
|
@ -150,11 +149,8 @@ __forceinline void gsInterrupt()
|
|||
|
||||
static u32 WRITERING_DMA(u32 *pMem, u32 qwc)
|
||||
{
|
||||
int size = GetMTGS().PrepDataPacket(GIF_PATH_3, (u8*)pMem, qwc);
|
||||
u8* pgsmem = GetMTGS().GetDataPacketPtr();
|
||||
|
||||
memcpy_aligned(pgsmem, pMem, size<<4);
|
||||
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_3, qwc);
|
||||
uint size = GIFPath_CopyTag(GIF_PATH_3, (u128*)pMem, qwc );
|
||||
GetMTGS().SendDataPacket();
|
||||
return size;
|
||||
}
|
||||
|
@ -167,7 +163,6 @@ static u32 WRITERING_DMA(tDMA_TAG *pMem, u32 qwc)
|
|||
int _GIFchain()
|
||||
{
|
||||
tDMA_TAG *pMem;
|
||||
int qwc = 0;
|
||||
|
||||
pMem = dmaGetAddr(gif->madr, false);
|
||||
if (pMem == NULL)
|
||||
|
@ -182,11 +177,6 @@ int _GIFchain()
|
|||
return -1;
|
||||
}
|
||||
|
||||
//in Intermittent Mode it enabled, IMAGE_MODE transfers are sliced.
|
||||
|
||||
///(gifRegs->stat.IMT && GSTransferStatus.PTH3 <= IMAGE_MODE) qwc = min((int)gif->qwc, 8);
|
||||
/*else qwc = gif->qwc;*/
|
||||
|
||||
return WRITERING_DMA(pMem, gif->qwc);
|
||||
}
|
||||
|
||||
|
@ -327,7 +317,7 @@ void GIFdma()
|
|||
|
||||
|
||||
|
||||
//gifRegs->stat.OPH = true;
|
||||
//gifRegs->stat.OPH = true; // why set the GS output path flag here? (rama)
|
||||
gifRegs->stat.FQC = min((u16)0x10, gif->qwc);// FQC=31, hack ;) (for values of 31 that equal 16) [ used to be 0xE00; // APATH=3]
|
||||
//Check with Path3 masking games
|
||||
if (gif->qwc > 0) {
|
||||
|
@ -346,7 +336,7 @@ void GIFdma()
|
|||
|
||||
}
|
||||
|
||||
//gifRegs->stat.OPH = true;
|
||||
//gifRegs->stat.OPH = true; // why set the GS output path flag here? (rama)
|
||||
// Transfer Dn_QWC from Dn_MADR to GIF
|
||||
if ((gif->chcr.MOD == NORMAL_MODE) || (gif->qwc > 0)) // Normal Mode
|
||||
{
|
||||
|
@ -450,42 +440,44 @@ static __forceinline bool mfifoGIFrbTransfer()
|
|||
u32 mfifoqwc = min(gifqwc, (u32)gif->qwc);
|
||||
u32 *src;
|
||||
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_3, mfifoqwc);
|
||||
|
||||
// TODO (minor optimization): The new GIFpath parser can do rather efficient wrapping of
|
||||
// its own internally now. We just need to groom a version of it that can wrap around MFIFO
|
||||
// memory similarly to how it wraps VU1 memory on PATH1.
|
||||
|
||||
/* Check if the transfer should wrap around the ring buffer */
|
||||
if ((gif->madr + mfifoqwc * 16) > (dmacRegs->rbor.ADDR + dmacRegs->rbsr.RMSK + 16))
|
||||
{
|
||||
uint s1 = ((dmacRegs->rbor.ADDR + dmacRegs->rbsr.RMSK + 16) - gif->madr) >> 4;
|
||||
uint s2 = (mfifoqwc - s1);
|
||||
// fixme - I don't think these should use WRITERING_DMA, since our source
|
||||
// isn't the DmaGetAddr(gif->madr) address that WRITERING_DMA expects.
|
||||
|
||||
/* it does (wrap around), so first copy 's1' bytes from 'addr' to 'data' */
|
||||
/* and second copy 's2' bytes from 'maddr' to '&data[s1]' */
|
||||
|
||||
src = (u32*)PSM(gif->madr);
|
||||
if (src == NULL) return false;
|
||||
s1 = WRITERING_DMA(src, s1);
|
||||
uint copied = GIFPath_CopyTag(GIF_PATH_3, (u128*)src, s1);
|
||||
|
||||
if (s1 == (mfifoqwc - s2))
|
||||
if (copied == s1) // but only copy second if first didn't abort prematurely for some reason.
|
||||
{
|
||||
/* and second copy 's2' bytes from 'maddr' to '&data[s1]' */
|
||||
src = (u32*)PSM(dmacRegs->rbor.ADDR);
|
||||
if (src == NULL) return false;
|
||||
s2 = WRITERING_DMA(src, s2);
|
||||
}
|
||||
else
|
||||
{
|
||||
s2 = 0;
|
||||
copied += GIFPath_CopyTag(GIF_PATH_3, (u128*)src, s2);
|
||||
}
|
||||
|
||||
mfifoqwc = s1 + s2;
|
||||
mfifoqwc = copied;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* it doesn't, so just transfer 'qwc*16' words from 'gif->madr' to GS */
|
||||
src = (u32*)PSM(gif->madr);
|
||||
if (src == NULL) return false;
|
||||
mfifoqwc = WRITERING_DMA(src, mfifoqwc);
|
||||
mfifoqwc = GIFPath_CopyTag(GIF_PATH_3, (u128*)src, mfifoqwc);
|
||||
gif->madr = dmacRegs->rbor.ADDR + (gif->madr & dmacRegs->rbsr.RMSK);
|
||||
}
|
||||
|
||||
GetMTGS().SendDataPacket();
|
||||
gifqwc -= mfifoqwc;
|
||||
|
||||
return true;
|
||||
|
@ -571,36 +563,36 @@ void mfifoGIFtransfer(int qwc)
|
|||
|
||||
switch (ptag->ID)
|
||||
{
|
||||
case TAG_REFE: // Refe - Transfer Packet According to ADDR field
|
||||
case TAG_REFE: // Refe - Transfer Packet According to ADDR field
|
||||
gif->tadr = qwctag(gif->tadr + 16);
|
||||
gifstate = GIF_STATE_DONE; //End Transfer
|
||||
break;
|
||||
|
||||
case TAG_CNT: // CNT - Transfer QWC following the tag.
|
||||
case TAG_CNT: // CNT - Transfer QWC following the tag.
|
||||
gif->madr = qwctag(gif->tadr + 16); //Set MADR to QW after Tag
|
||||
gif->tadr = qwctag(gif->madr + (gif->qwc << 4)); //Set TADR to QW following the data
|
||||
gif->tadr = qwctag(gif->madr + (gif->qwc << 4)); //Set TADR to QW following the data
|
||||
gifstate = GIF_STATE_READY;
|
||||
break;
|
||||
|
||||
case TAG_NEXT: // Next - Transfer QWC following tag. TADR = ADDR
|
||||
case TAG_NEXT: // Next - Transfer QWC following tag. TADR = ADDR
|
||||
{
|
||||
u32 temp = gif->madr; //Temporarily Store ADDR
|
||||
gif->madr = qwctag(gif->tadr + 16); //Set MADR to QW following the tag
|
||||
gif->tadr = temp; //Copy temporarily stored ADDR to Tag
|
||||
u32 temp = gif->madr; //Temporarily Store ADDR
|
||||
gif->madr = qwctag(gif->tadr + 16); //Set MADR to QW following the tag
|
||||
gif->tadr = temp; //Copy temporarily stored ADDR to Tag
|
||||
gifstate = GIF_STATE_READY;
|
||||
break;
|
||||
}
|
||||
|
||||
case TAG_REF: // Ref - Transfer QWC from ADDR field
|
||||
case TAG_REFS: // Refs - Transfer QWC from ADDR field (Stall Control)
|
||||
case TAG_REF: // Ref - Transfer QWC from ADDR field
|
||||
case TAG_REFS: // Refs - Transfer QWC from ADDR field (Stall Control)
|
||||
gif->tadr = qwctag(gif->tadr + 16); //Set TADR to next tag
|
||||
gifstate = GIF_STATE_READY;
|
||||
break;
|
||||
|
||||
case TAG_END: // End - Transfer QWC following the tag
|
||||
gif->madr = qwctag(gif->tadr + 16); //Set MADR to data following the tag
|
||||
gif->tadr = qwctag(gif->madr + (gif->qwc << 4)); //Set TADR to QW following the data
|
||||
gifstate = GIF_STATE_DONE; //End Transfer
|
||||
case TAG_END: // End - Transfer QWC following the tag
|
||||
gif->madr = qwctag(gif->tadr + 16); //Set MADR to data following the tag
|
||||
gif->tadr = qwctag(gif->madr + (gif->qwc << 4)); //Set TADR to QW following the data
|
||||
gifstate = GIF_STATE_DONE; //End Transfer
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -638,7 +630,6 @@ void gifMFIFOInterrupt()
|
|||
|
||||
if(GSTransferStatus.PTH3 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH3 )
|
||||
{
|
||||
gifRegs->stat.OPH = false;
|
||||
gifRegs->stat.APATH = GIF_APATH_IDLE;
|
||||
if(gifRegs->stat.P1Q) gsPath1Interrupt();
|
||||
}
|
||||
|
|
|
@ -290,7 +290,7 @@ extern void gifMFIFOInterrupt();
|
|||
|
||||
//Just some temporary bits to store Path1 transfers if another is in progress.
|
||||
extern void gsPath1Interrupt();
|
||||
extern u8 Path1Buffer[0x1000000];
|
||||
extern __aligned16 u8 Path1Buffer[0x1000000];
|
||||
extern u32 Path1WritePos;
|
||||
extern u32 Path1ReadPos;
|
||||
#endif
|
||||
|
|
610
pcsx2/MTGS.cpp
610
pcsx2/MTGS.cpp
|
@ -29,7 +29,7 @@
|
|||
|
||||
using namespace Threading;
|
||||
|
||||
#if 0 // PCSX2_DEBUG
|
||||
#if 0 //PCSX2_DEBUG
|
||||
# define MTGS_LOG Console.WriteLn
|
||||
#else
|
||||
# define MTGS_LOG 0&&
|
||||
|
@ -46,34 +46,7 @@ using namespace Threading;
|
|||
// MTGS Threaded Class Implementation
|
||||
// =====================================================================================================
|
||||
|
||||
// Size of the ringbuffer as a power of 2 -- size is a multiple of simd128s.
|
||||
// (actual size is 1<<m_RingBufferSizeFactor simd vectors [128-bit values])
|
||||
// A value of 19 is a 8meg ring buffer. 18 would be 4 megs, and 20 would be 16 megs.
|
||||
// Default was 2mb, but some games with lots of MTGS activity want 8mb to run fast (rama)
|
||||
static const uint RingBufferSizeFactor = 19;
|
||||
|
||||
// size of the ringbuffer in simd128's.
|
||||
static const uint RingBufferSize = 1<<RingBufferSizeFactor;
|
||||
|
||||
// Mask to apply to ring buffer indices to wrap the pointer from end to
|
||||
// start (the wrapping is what makes it a ringbuffer, yo!)
|
||||
static const uint RingBufferMask = RingBufferSize - 1;
|
||||
|
||||
struct MTGS_BufferedData
|
||||
{
|
||||
u128 m_Ring[RingBufferSize];
|
||||
u8 Regs[Ps2MemSize::GSregs];
|
||||
|
||||
MTGS_BufferedData() {}
|
||||
|
||||
u128& operator[]( uint idx )
|
||||
{
|
||||
pxAssert( idx < RingBufferSize );
|
||||
return m_Ring[idx];
|
||||
}
|
||||
};
|
||||
|
||||
static __aligned(32) MTGS_BufferedData RingBuffer;
|
||||
__aligned(32) MTGS_BufferedData RingBuffer;
|
||||
extern bool renderswitch;
|
||||
|
||||
|
||||
|
@ -97,16 +70,16 @@ void SysMtgsThread::OnStart()
|
|||
{
|
||||
m_PluginOpened = false;
|
||||
|
||||
m_RingPos = 0;
|
||||
m_ReadPos = 0;
|
||||
m_WritePos = 0;
|
||||
m_RingBufferIsBusy = false;
|
||||
m_packet_size = 0;
|
||||
m_packet_ringpos = 0;
|
||||
m_packet_writepos = 0;
|
||||
|
||||
m_QueuedFrameCount = 0;
|
||||
m_VsyncSignalListener = false;
|
||||
m_SignalRingEnable = 0;
|
||||
m_SignalRingPosition= 0;
|
||||
m_RingWrapSpot = 0;
|
||||
|
||||
m_CopyDataTally = 0;
|
||||
|
||||
|
@ -125,12 +98,16 @@ void SysMtgsThread::OnResumeReady()
|
|||
|
||||
void SysMtgsThread::ResetGS()
|
||||
{
|
||||
pxAssertDev( !IsOpen() || (m_ReadPos == m_WritePos), "Must close or terminate the GS thread prior to gsReset." );
|
||||
|
||||
// MTGS Reset process:
|
||||
// * clear the ringbuffer.
|
||||
// * Signal a reset.
|
||||
// * clear the path and byRegs structs (used by GIFtagDummy)
|
||||
|
||||
m_RingPos = m_WritePos;
|
||||
m_ReadPos = m_WritePos;
|
||||
m_QueuedFrameCount = 0;
|
||||
m_VsyncSignalListener = false;
|
||||
|
||||
MTGS_LOG( "MTGS: Sending Reset..." );
|
||||
SendSimplePacket( GS_RINGTYPE_RESET, 0, 0, 0 );
|
||||
|
@ -155,30 +132,31 @@ void SysMtgsThread::PostVsyncEnd()
|
|||
// 256-byte copy is only a few dozen cycles -- executed 60 times a second -- so probably
|
||||
// not worth the effort or overhead of trying to selectively avoid it.
|
||||
|
||||
PrepDataPacket(GS_RINGTYPE_VSYNC, sizeof(RingCmdPacket_Vsync));
|
||||
RingCmdPacket_Vsync& local( *(RingCmdPacket_Vsync*)GetDataPacketPtr() );
|
||||
uint packsize = sizeof(RingCmdPacket_Vsync) / 16;
|
||||
PrepDataPacket(GS_RINGTYPE_VSYNC, packsize);
|
||||
MemCopy_WrappedDest( (u128*)PS2MEM_GS, RingBuffer.m_Ring, m_packet_writepos, RingBufferSize, 0xf );
|
||||
|
||||
memcpy_fast( local.regset1, PS2MEM_GS, sizeof(local.regset1) );
|
||||
local.csr = GSCSRr;
|
||||
local.imr = GSIMR;
|
||||
local.siglblid = GSSIGLBLID;
|
||||
u32* remainder = (u32*)GetDataPacketPtr();
|
||||
remainder[0] = GSCSRr;
|
||||
remainder[1] = GSIMR;
|
||||
(GSRegSIGBLID&)remainder[2] = GSSIGLBLID;
|
||||
m_packet_writepos = (m_packet_writepos + 1) & RingBufferMask;
|
||||
|
||||
SendDataPacket();
|
||||
|
||||
// Alter-frame flushing! Restarts the ringbuffer (wraps) on every other frame. This is a
|
||||
// mandatory feature that prevents the MTGS from queuing more than 2 frames at any time.
|
||||
// (queued frames cause input lag and desynced audio -- bad!). Ring restarts work for this
|
||||
// because they act as sync points where the EE must stall to wait for the GS to catch-up,
|
||||
// and they also allow us to reuse the front of the ringbuffer more often, which should improve
|
||||
// L2 cache performance.
|
||||
// Vsyncs should always start the GS thread, regardless of how little has actually be queued.
|
||||
if (m_CopyDataTally != 0) SetEvent();
|
||||
|
||||
if( m_QueuedFrameCount > 0 )
|
||||
RestartRingbuffer();
|
||||
else
|
||||
{
|
||||
m_QueuedFrameCount++;
|
||||
SetEvent();
|
||||
}
|
||||
// If the MTGS is allowed to queue a lot of frames in advance, it creates input lag.
|
||||
// Use the Queued FrameCount to stall the EE if another vsync (or two) are already queued
|
||||
// in the ringbuffer. The queue limit is disabled when both FrameLimiting and Vsync are
|
||||
// disabled, since the queue can have perverse effects on framerate benchmarking.
|
||||
|
||||
if ((AtomicIncrement(m_QueuedFrameCount) < EmuConfig.GS.VsyncQueueSize) || (!EmuConfig.GS.VsyncEnable && !EmuConfig.GS.FrameLimitEnable)) return;
|
||||
|
||||
m_VsyncSignalListener = true;
|
||||
//Console.WriteLn( Color_Blue, "(EEcore Sleep) Vsync\t\tringpos=0x%06x, writepos=0x%06x", volatize(m_ReadPos), m_WritePos );
|
||||
m_sem_Vsync.WaitNoCancel();
|
||||
}
|
||||
|
||||
struct PacketTagType
|
||||
|
@ -261,12 +239,14 @@ void SysMtgsThread::OpenPlugin()
|
|||
|
||||
class RingBufferLock : public ScopedLock
|
||||
{
|
||||
typedef ScopedLock _parent;
|
||||
|
||||
protected:
|
||||
SysMtgsThread& m_mtgs;
|
||||
|
||||
public:
|
||||
RingBufferLock( SysMtgsThread& mtgs )
|
||||
: ScopedLock( mtgs.m_lock_RingBufferBusy )
|
||||
: ScopedLock( mtgs.m_mtx_RingBufferBusy )
|
||||
, m_mtgs( mtgs )
|
||||
{
|
||||
m_mtgs.m_RingBufferIsBusy = true;
|
||||
|
@ -276,6 +256,18 @@ public:
|
|||
{
|
||||
m_mtgs.m_RingBufferIsBusy = false;
|
||||
}
|
||||
|
||||
void Acquire()
|
||||
{
|
||||
_parent::Acquire();
|
||||
m_mtgs.m_RingBufferIsBusy = true;
|
||||
}
|
||||
|
||||
void Release()
|
||||
{
|
||||
m_mtgs.m_RingBufferIsBusy = false;
|
||||
_parent::Release();
|
||||
}
|
||||
};
|
||||
|
||||
void SysMtgsThread::ExecuteTaskInThread()
|
||||
|
@ -284,31 +276,33 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
PacketTagType prevCmd;
|
||||
#endif
|
||||
|
||||
RingBufferLock busy( *this );
|
||||
|
||||
while( true )
|
||||
{
|
||||
busy.Release();
|
||||
|
||||
// Performance note: Both of these perform cancellation tests, but pthread_testcancel
|
||||
// is very optimized (only 1 instruction test in most cases), so no point in trying
|
||||
// to avoid it.
|
||||
|
||||
m_sem_event.WaitWithoutYield();
|
||||
StateCheckInThread();
|
||||
busy.Acquire();
|
||||
|
||||
{
|
||||
RingBufferLock busy( *this );
|
||||
|
||||
// note: m_RingPos is intentionally not volatile, because it should only
|
||||
// note: m_ReadPos is intentionally not volatile, because it should only
|
||||
// ever be modified by this thread.
|
||||
while( m_RingPos != volatize(m_WritePos))
|
||||
while( m_ReadPos != volatize(m_WritePos))
|
||||
{
|
||||
if( EmuConfig.GS.DisableOutput )
|
||||
{
|
||||
m_RingPos = m_WritePos;
|
||||
m_ReadPos = m_WritePos;
|
||||
continue;
|
||||
}
|
||||
|
||||
pxAssert( m_RingPos < RingBufferSize );
|
||||
pxAssert( m_ReadPos < RingBufferSize );
|
||||
|
||||
const PacketTagType& tag = (PacketTagType&)RingBuffer[m_RingPos];
|
||||
const PacketTagType& tag = (PacketTagType&)RingBuffer[m_ReadPos];
|
||||
u32 ringposinc = 1;
|
||||
|
||||
#ifdef RINGBUF_DEBUG_STACK
|
||||
|
@ -316,11 +310,11 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
|
||||
m_lock_Stack.Lock();
|
||||
uptr stackpos = ringposStack.back();
|
||||
if( stackpos != m_RingPos )
|
||||
if( stackpos != m_ReadPos )
|
||||
{
|
||||
Console.Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, m_RingPos, prevCmd.command );
|
||||
Console.Error( "MTGS Ringbuffer Critical Failure ---> %x to %x (prevCmd: %x)\n", stackpos, m_ReadPos, prevCmd.command );
|
||||
}
|
||||
pxAssert( stackpos == m_RingPos );
|
||||
pxAssert( stackpos == m_ReadPos );
|
||||
prevCmd = tag;
|
||||
ringposStack.pop_back();
|
||||
m_lock_Stack.Release();
|
||||
|
@ -330,38 +324,75 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
{
|
||||
case GS_RINGTYPE_P1:
|
||||
{
|
||||
uint datapos = (m_ReadPos+1) & RingBufferMask;
|
||||
const int qsize = tag.data[0];
|
||||
const u128* data = &RingBuffer[m_RingPos+1];
|
||||
const u128* data = &RingBuffer[datapos];
|
||||
|
||||
MTGS_LOG( "(MTGS Packet Read) ringtype=P1, qwc=%u", qsize );
|
||||
|
||||
// make sure that tag>>16 is the MAX size readable
|
||||
GSgifTransfer1((u32*)(data - 0x400 + qsize), 0x4000-qsize*16);
|
||||
//GSgifTransfer1((u32*)data, qsize);
|
||||
uint endpos = datapos + qsize;
|
||||
if( endpos >= RingBufferSize )
|
||||
{
|
||||
uint firstcopylen = RingBufferSize - datapos;
|
||||
GSgifTransfer( (u32*)data, firstcopylen );
|
||||
datapos = endpos & RingBufferMask;
|
||||
GSgifTransfer( (u32*)RingBuffer.m_Ring, datapos );
|
||||
}
|
||||
else
|
||||
{
|
||||
GSgifTransfer( (u32*)data, qsize );
|
||||
}
|
||||
|
||||
ringposinc += qsize;
|
||||
}
|
||||
break;
|
||||
|
||||
case GS_RINGTYPE_P2:
|
||||
{
|
||||
uint datapos = (m_ReadPos+1) & RingBufferMask;
|
||||
const int qsize = tag.data[0];
|
||||
const u128* data = &RingBuffer[m_RingPos+1];
|
||||
const u128* data = &RingBuffer[datapos];
|
||||
|
||||
MTGS_LOG( "(MTGS Packet Read) ringtype=P2, qwc=%u", qsize );
|
||||
|
||||
GSgifTransfer2((u32*)data, qsize);
|
||||
uint endpos = datapos + qsize;
|
||||
if( endpos >= RingBufferSize )
|
||||
{
|
||||
uint firstcopylen = RingBufferSize - datapos;
|
||||
GSgifTransfer2( (u32*)data, firstcopylen );
|
||||
datapos = endpos & RingBufferMask;
|
||||
GSgifTransfer2( (u32*)RingBuffer.m_Ring, datapos );
|
||||
}
|
||||
else
|
||||
{
|
||||
GSgifTransfer2( (u32*)data, qsize );
|
||||
}
|
||||
|
||||
ringposinc += qsize;
|
||||
}
|
||||
break;
|
||||
|
||||
case GS_RINGTYPE_P3:
|
||||
{
|
||||
uint datapos = (m_ReadPos+1) & RingBufferMask;
|
||||
const int qsize = tag.data[0];
|
||||
const u128* data = &RingBuffer[m_RingPos+1];
|
||||
const u128* data = &RingBuffer[datapos];
|
||||
|
||||
MTGS_LOG( "(MTGS Packet Read) ringtype=P3, qwc=%u", qsize );
|
||||
|
||||
GSgifTransfer3((u32*)data, qsize);
|
||||
uint endpos = datapos + qsize;
|
||||
if( endpos >= RingBufferSize )
|
||||
{
|
||||
uint firstcopylen = RingBufferSize - datapos;
|
||||
GSgifTransfer3( (u32*)data, firstcopylen );
|
||||
datapos = endpos & RingBufferMask;
|
||||
GSgifTransfer3( (u32*)RingBuffer.m_Ring, datapos );
|
||||
}
|
||||
else
|
||||
{
|
||||
GSgifTransfer3( (u32*)data, qsize );
|
||||
}
|
||||
|
||||
ringposinc += qsize;
|
||||
}
|
||||
break;
|
||||
|
@ -370,25 +401,25 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
{
|
||||
switch( tag.command )
|
||||
{
|
||||
case GS_RINGTYPE_RESTART:
|
||||
//MTGS_LOG( "(MTGS Packet Read) ringtype=Restart" );
|
||||
m_RingPos = 0;
|
||||
continue;
|
||||
|
||||
case GS_RINGTYPE_VSYNC:
|
||||
{
|
||||
const int qsize = tag.data[0];
|
||||
ringposinc += qsize;
|
||||
|
||||
MTGS_LOG( "(MTGS Packet Read) ringtype=Vsync, field=%u, skip=%s", tag.data[0], tag.data[1] ? "true" : "false" );
|
||||
|
||||
MTGS_LOG( "(MTGS Packet Read) ringtype=Vsync, field=%u, skip=%s", !!(((u32&)RingBuffer.Regs[0x1000]) & 0x2000) ? 0 : 1, tag.data[1] ? "true" : "false" );
|
||||
|
||||
// Mail in the important GS registers.
|
||||
RingCmdPacket_Vsync& local((RingCmdPacket_Vsync&)RingBuffer[m_RingPos+1]);
|
||||
memcpy_fast( RingBuffer.Regs, local.regset1, sizeof(local.regset1));
|
||||
((u32&)RingBuffer.Regs[0x1000]) = local.csr;
|
||||
((u32&)RingBuffer.Regs[0x1010]) = local.imr;
|
||||
((GSRegSIGBLID&)RingBuffer.Regs[0x1080]) = local.siglblid;
|
||||
|
||||
// This seemingly obtuse system is needed in order to handle cases where the vsync data wraps
|
||||
// around the edge of the ringbuffer. If not for that I'd just use a struct. >_<
|
||||
|
||||
uint datapos = (m_ReadPos+1) & RingBufferMask;
|
||||
MemCopy_WrappedSrc( RingBuffer.m_Ring, datapos, RingBufferSize, (u128*)RingBuffer.Regs, 0xf );
|
||||
|
||||
u32* remainder = (u32*)&RingBuffer[datapos];
|
||||
((u32&)RingBuffer.Regs[0x1000]) = remainder[0];
|
||||
((u32&)RingBuffer.Regs[0x1010]) = remainder[1];
|
||||
((GSRegSIGBLID&)RingBuffer.Regs[0x1080]) = (GSRegSIGBLID&)remainder[2];
|
||||
|
||||
// CSR & 0x2000; is the pageflip id.
|
||||
GSvsync(((u32&)RingBuffer.Regs[0x1000]) & 0x2000);
|
||||
gsFrameSkip();
|
||||
|
@ -398,7 +429,13 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
if( (GSopen2 == NULL) && (PADupdate != NULL) )
|
||||
PADupdate(0);
|
||||
|
||||
AtomicDecrement( m_QueuedFrameCount );
|
||||
if (!!AtomicExchange(m_VsyncSignalListener, false))
|
||||
m_sem_Vsync.Post();
|
||||
|
||||
busy.Release();
|
||||
StateCheckInThread();
|
||||
busy.Acquire();
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -438,9 +475,9 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
default:
|
||||
Console.Error("GSThreadProc, bad packet (%x) at m_RingPos: %x, m_WritePos: %x", tag.command, m_RingPos, m_WritePos);
|
||||
Console.Error("GSThreadProc, bad packet (%x) at m_ReadPos: %x, m_WritePos: %x", tag.command, m_ReadPos, m_WritePos);
|
||||
pxFail( "Bad packet encountered in the MTGS Ringbuffer." );
|
||||
m_RingPos = m_WritePos;
|
||||
m_ReadPos = m_WritePos;
|
||||
continue;
|
||||
#else
|
||||
// Optimized performance in non-Dev builds.
|
||||
|
@ -450,23 +487,29 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
}
|
||||
}
|
||||
|
||||
uint newringpos = m_RingPos + ringposinc;
|
||||
pxAssert( newringpos <= RingBufferSize );
|
||||
m_RingPos = newringpos & RingBufferMask;
|
||||
uint newringpos = (m_ReadPos + ringposinc) & RingBufferMask;
|
||||
|
||||
if( EmuConfig.GS.SynchronousMTGS )
|
||||
{
|
||||
pxAssert( m_WritePos == newringpos );
|
||||
}
|
||||
|
||||
m_ReadPos = newringpos;
|
||||
|
||||
if( m_SignalRingEnable != 0 )
|
||||
{
|
||||
// The EEcore has requested a signal after some amount of processed data.
|
||||
if( AtomicExchangeSub( m_SignalRingPosition, ringposinc ) <= 0 )
|
||||
{
|
||||
// Make sure to post the signal after the m_RingPos has been updated...
|
||||
// Make sure to post the signal after the m_ReadPos has been updated...
|
||||
AtomicExchange( m_SignalRingEnable, 0 );
|
||||
m_sem_OnRingReset.Post();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
busy.Release();
|
||||
|
||||
// Safety valve in case standard signals fail for some reason -- this ensures the EEcore
|
||||
// won't sleep the eternity, even if SignalRingPosition didn't reach 0 for some reason.
|
||||
|
@ -479,7 +522,10 @@ void SysMtgsThread::ExecuteTaskInThread()
|
|||
m_sem_OnRingReset.Post();
|
||||
}
|
||||
|
||||
//Console.Warning( "(MTGS Thread) Nothing to do! ringpos=0x%06x", m_RingPos );
|
||||
if (!!AtomicExchange(m_VsyncSignalListener, false))
|
||||
m_sem_Vsync.Post();
|
||||
|
||||
//Console.Warning( "(MTGS Thread) Nothing to do! ringpos=0x%06x", m_ReadPos );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -519,15 +565,15 @@ void SysMtgsThread::WaitGS()
|
|||
if( m_ExecMode == ExecMode_NoThreadYet || !IsRunning() ) return;
|
||||
if( !pxAssertDev( IsOpen(), "MTGS Warning! WaitGS issued on a closed thread." ) ) return;
|
||||
|
||||
if( volatize(m_RingPos) != m_WritePos )
|
||||
if( volatize(m_ReadPos) != m_WritePos )
|
||||
{
|
||||
SetEvent();
|
||||
RethrowException();
|
||||
|
||||
do {
|
||||
m_lock_RingBufferBusy.Wait();
|
||||
m_mtx_RingBufferBusy.Wait();
|
||||
RethrowException();
|
||||
} while( volatize(m_RingPos) != m_WritePos );
|
||||
} while( volatize(m_ReadPos) != m_WritePos );
|
||||
}
|
||||
|
||||
// Completely synchronize GS and MTGS register states.
|
||||
|
@ -546,7 +592,7 @@ void SysMtgsThread::SetEvent()
|
|||
|
||||
u8* SysMtgsThread::GetDataPacketPtr() const
|
||||
{
|
||||
return (u8*)&RingBuffer[m_packet_ringpos];
|
||||
return (u8*)&RingBuffer[m_packet_writepos & RingBufferMask];
|
||||
}
|
||||
|
||||
// Closes the data packet send command, and initiates the gs thread (if needed).
|
||||
|
@ -555,31 +601,14 @@ void SysMtgsThread::SendDataPacket()
|
|||
// make sure a previous copy block has been started somewhere.
|
||||
pxAssert( m_packet_size != 0 );
|
||||
|
||||
uint temp = m_packet_ringpos + m_packet_size;
|
||||
pxAssert( temp <= RingBufferSize );
|
||||
temp &= RingBufferMask;
|
||||
uint actualSize = ((m_packet_writepos - m_packet_startpos) & RingBufferMask)-1;
|
||||
pxAssert( actualSize <= m_packet_size );
|
||||
pxAssert( m_packet_writepos < RingBufferSize );
|
||||
|
||||
if( IsDebugBuild )
|
||||
{
|
||||
if( m_packet_ringpos + m_packet_size < RingBufferSize )
|
||||
{
|
||||
uint readpos = volatize(m_RingPos);
|
||||
if( readpos != m_WritePos )
|
||||
{
|
||||
// The writepos should never leapfrog the readpos
|
||||
// since that indicates a bad write.
|
||||
if( m_packet_ringpos < readpos )
|
||||
pxAssert( temp < readpos );
|
||||
}
|
||||
PacketTagType& tag = (PacketTagType&)RingBuffer[m_packet_startpos];
|
||||
tag.data[0] = actualSize;
|
||||
|
||||
// Updating the writepos should never make it equal the readpos, since
|
||||
// that would stop the buffer prematurely (and indicates bad code in the
|
||||
// ringbuffer manager)
|
||||
pxAssert( readpos != temp );
|
||||
}
|
||||
}
|
||||
|
||||
m_WritePos = temp;
|
||||
m_WritePos = m_packet_writepos;
|
||||
|
||||
if( EmuConfig.GS.SynchronousMTGS )
|
||||
{
|
||||
|
@ -596,142 +625,95 @@ void SysMtgsThread::SendDataPacket()
|
|||
//m_PacketLocker.Release();
|
||||
}
|
||||
|
||||
int SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
||||
void SysMtgsThread::GenericStall( uint size )
|
||||
{
|
||||
// Note on volatiles: m_WritePos is not modified by the GS thread, so there's no need
|
||||
// to use volatile reads here. We do cache it though, since we know it never changes,
|
||||
// except for calls to RingbufferRestert() -- handled below.
|
||||
uint writepos = m_WritePos;
|
||||
|
||||
// Checks if a previous copy was started without an accompanying call to GSRINGBUF_DONECOPY
|
||||
pxAssert( m_packet_size == 0 );
|
||||
const uint writepos = m_WritePos;
|
||||
|
||||
// Sanity checks! (within the confines of our ringbuffer please!)
|
||||
pxAssert( size < RingBufferSize );
|
||||
pxAssert( writepos < RingBufferSize );
|
||||
|
||||
// generic gs wait/stall.
|
||||
// if the writepos is past the readpos then we're safe.
|
||||
// But if not then we need to make sure the readpos is outside the scope of
|
||||
// the block about to be written (writepos + size)
|
||||
|
||||
uint readpos = volatize(m_ReadPos);
|
||||
uint freeroom;
|
||||
|
||||
if (writepos < readpos)
|
||||
freeroom = readpos - writepos;
|
||||
else
|
||||
freeroom = RingBufferSize - (writepos - readpos);
|
||||
|
||||
if (freeroom <= size)
|
||||
{
|
||||
// writepos will overlap readpos if we commit the data, so we need to wait until
|
||||
// readpos is out past the end of the future write pos, or until it wraps around
|
||||
// (in which case writepos will be >= readpos).
|
||||
|
||||
// Ideally though we want to wait longer, because if we just toss in this packet
|
||||
// the next packet will likely stall up too. So lets set a condition for the MTGS
|
||||
// thread to wake up the EE once there's a sizable chunk of the ringbuffer emptied.
|
||||
|
||||
uint somedone = (RingBufferSize - freeroom) / 4;
|
||||
if( somedone < size+1 ) somedone = size + 1;
|
||||
|
||||
// FMV Optimization: FMVs typically send *very* little data to the GS, in some cases
|
||||
// every other frame is nothing more than a page swap. Sleeping the EEcore is a
|
||||
// waste of time, and we get better results using a spinwait.
|
||||
|
||||
if( somedone > 0x80 )
|
||||
{
|
||||
pxAssertDev( m_SignalRingEnable == 0, "MTGS Thread Synchronization Error" );
|
||||
m_SignalRingPosition = somedone;
|
||||
|
||||
//Console.WriteLn( Color_Blue, "(EEcore Sleep) PrepDataPacker \tringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", readpos, writepos, m_SignalRingPosition );
|
||||
|
||||
while(true) {
|
||||
AtomicExchange( m_SignalRingEnable, 1 );
|
||||
SetEvent();
|
||||
m_sem_OnRingReset.WaitWithoutYield();
|
||||
readpos = volatize(m_ReadPos);
|
||||
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
|
||||
|
||||
if (writepos < readpos)
|
||||
freeroom = readpos - writepos;
|
||||
else
|
||||
freeroom = RingBufferSize - (writepos - readpos);
|
||||
|
||||
if (freeroom > size) break;
|
||||
}
|
||||
|
||||
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
|
||||
}
|
||||
else
|
||||
{
|
||||
//Console.WriteLn( Color_StrongGray, "(EEcore Spin) PrepDataPacket!" );
|
||||
SetEvent();
|
||||
while(true) {
|
||||
SpinWait();
|
||||
readpos = volatize(m_ReadPos);
|
||||
|
||||
if (writepos < readpos)
|
||||
freeroom = readpos - writepos;
|
||||
else
|
||||
freeroom = RingBufferSize - (writepos - readpos);
|
||||
|
||||
if (freeroom > size) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
||||
{
|
||||
m_packet_size = size;
|
||||
++size; // takes into account our RingCommand QWC.
|
||||
|
||||
if( writepos + size < RingBufferSize )
|
||||
{
|
||||
// generic gs wait/stall.
|
||||
// if the writepos is past the readpos then we're safe.
|
||||
// But if not then we need to make sure the readpos is outside the scope of
|
||||
// the block about to be written (writepos + size)
|
||||
|
||||
uint readpos = volatize(m_RingPos);
|
||||
if( (writepos < readpos) && (writepos+size >= readpos) )
|
||||
{
|
||||
// writepos is behind the readpos and will overlap it if we commit the data,
|
||||
// so we need to wait until readpos is out past the end of the future write pos,
|
||||
// or until it wraps around (in which case writepos will be >= readpos).
|
||||
|
||||
// Ideally though we want to wait longer, because if we just toss in this packet
|
||||
// the next packet will likely stall up too. So lets set a condition for the MTGS
|
||||
// thread to wake up the EE once there's a sizable chunk of the ringbuffer emptied.
|
||||
|
||||
uint totalAccum = (m_RingWrapSpot - readpos) + writepos;
|
||||
uint somedone = totalAccum / 4;
|
||||
if( somedone < size+1 ) somedone = size + 1;
|
||||
|
||||
// FMV Optimization: FMVs typically send *very* little data to the GS, in some cases
|
||||
// every other frame is nothing more than a page swap. Sleeping the EEcore is a
|
||||
// waste of time, and we get better results using a spinwait.
|
||||
|
||||
if( somedone > 0x80 )
|
||||
{
|
||||
pxAssertDev( m_SignalRingEnable == 0, "MTGS Thread Synchronization Error" );
|
||||
m_SignalRingPosition = somedone;
|
||||
|
||||
//Console.WriteLn( Color_Blue, "(EEcore Sleep) GenStall \tringpos=0x%06x, writepos=0x%06x, wrapspot=0x%06x, signalpos=0x%06x", readpos, writepos, m_RingWrapSpot, m_SignalRingPosition );
|
||||
|
||||
do {
|
||||
AtomicExchange( m_SignalRingEnable, 1 );
|
||||
SetEvent();
|
||||
m_sem_OnRingReset.WaitWithoutYield();
|
||||
readpos = volatize(m_RingPos);
|
||||
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
|
||||
} while( (writepos < readpos) && (writepos+size >= readpos) );
|
||||
|
||||
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
|
||||
}
|
||||
else
|
||||
{
|
||||
SetEvent();
|
||||
do {
|
||||
SpinWait();
|
||||
readpos = volatize(m_RingPos);
|
||||
} while( (writepos < readpos) && (writepos+size >= readpos) );
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( writepos + size > RingBufferSize )
|
||||
{
|
||||
pxAssert( writepos != 0 );
|
||||
|
||||
// If the incoming packet doesn't fit, then start over from the start of the ring
|
||||
// buffer (it's a lot easier than trying to wrap the packet around the end of the
|
||||
// buffer).
|
||||
|
||||
//Console.WriteLn( "MTGS > Ringbuffer Got Filled!");
|
||||
RestartRingbuffer( size );
|
||||
writepos = m_WritePos;
|
||||
}
|
||||
else // always true - if( writepos + size == MTGS_RINGBUFFEREND )
|
||||
{
|
||||
// Yay. Perfect fit. What are the odds?
|
||||
// Copy is ready so long as readpos is less than writepos and *not* equal to the
|
||||
// base of the ringbuffer (otherwise the buffer will stop when the writepos is
|
||||
// wrapped around to zero later-on in SendDataPacket).
|
||||
|
||||
uint readpos = volatize(m_RingPos);
|
||||
//Console.WriteLn( "MTGS > Perfect Fit!\tringpos=0x%06x, writepos=0x%06x", readpos, writepos );
|
||||
if( readpos > writepos || readpos == 0 )
|
||||
{
|
||||
uint totalAccum = (readpos == 0) ? RingBufferSize : ((m_RingWrapSpot - readpos) + writepos);
|
||||
uint somedone = totalAccum / 4;
|
||||
if( somedone < size+1 ) somedone = size + 1;
|
||||
|
||||
// FMV Optimization: (see above) This condition of a perfect fit is so rare that optimizing
|
||||
// for it is pointless -- but it was also mindlessly simple copy-paste. So there. :p
|
||||
|
||||
if( somedone > 0x80 )
|
||||
{
|
||||
m_SignalRingPosition = somedone;
|
||||
|
||||
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Perfect Sleep!\twrapspot=0x%06x, ringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", m_RingWrapSpot, readpos, writepos, m_SignalRingPosition );
|
||||
|
||||
do {
|
||||
AtomicExchange( m_SignalRingEnable, 1 );
|
||||
SetEvent();
|
||||
m_sem_OnRingReset.WaitWithoutYield();
|
||||
readpos = volatize(m_RingPos);
|
||||
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Perfect Post-sleep Report!\tringpos=0x%06x", readpos );
|
||||
} while( (writepos < readpos) || (readpos==0) );
|
||||
|
||||
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
|
||||
}
|
||||
else
|
||||
{
|
||||
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Perfect Spin!" );
|
||||
SetEvent();
|
||||
do {
|
||||
SpinWait();
|
||||
readpos = volatize(m_RingPos);
|
||||
} while( (writepos < readpos) || (readpos==0) );
|
||||
}
|
||||
}
|
||||
|
||||
m_QueuedFrameCount = 0;
|
||||
m_RingWrapSpot = RingBufferSize;
|
||||
}
|
||||
|
||||
#ifdef RINGBUF_DEBUG_STACK
|
||||
m_lock_Stack.Lock();
|
||||
ringposStack.push_front( writepos );
|
||||
m_lock_Stack.Release();
|
||||
#endif
|
||||
GenericStall(size);
|
||||
|
||||
// Command qword: Low word is the command, and the high word is the packet
|
||||
// length in SIMDs (128 bits).
|
||||
|
@ -739,9 +721,8 @@ int SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
|||
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
|
||||
tag.command = cmd;
|
||||
tag.data[0] = m_packet_size;
|
||||
m_packet_ringpos = m_WritePos + 1;
|
||||
|
||||
return m_packet_size;
|
||||
m_packet_startpos = m_WritePos;
|
||||
m_packet_writepos = (m_WritePos + 1) & RingBufferMask;
|
||||
}
|
||||
|
||||
// Returns the amount of giftag data processed (in simd128 values).
|
||||
|
@ -749,132 +730,17 @@ int SysMtgsThread::PrepDataPacket( MTGS_RingCommand cmd, u32 size )
|
|||
// around VU memory instead of having buffer overflow...
|
||||
// Parameters:
|
||||
// size - size of the packet data, in smd128's
|
||||
int SysMtgsThread::PrepDataPacket( GIF_PATH pathidx, const u8* srcdata, u32 size )
|
||||
void SysMtgsThread::PrepDataPacket( GIF_PATH pathidx, u32 size )
|
||||
{
|
||||
//m_PacketLocker.Acquire();
|
||||
|
||||
return PrepDataPacket( (MTGS_RingCommand)pathidx, GIFPath_ParseTag(pathidx, srcdata, size) );
|
||||
PrepDataPacket( (MTGS_RingCommand)pathidx, size );
|
||||
}
|
||||
|
||||
void SysMtgsThread::RestartRingbuffer( uint packsize )
|
||||
__forceinline void SysMtgsThread::_FinishSimplePacket()
|
||||
{
|
||||
if( m_WritePos == 0 ) return;
|
||||
const uint thefuture = packsize;
|
||||
|
||||
//Console.WriteLn( Color_Magenta, "**** Ringbuffer Restart!!" );
|
||||
// Always kick the MTGS into action for a ringbuffer restart.
|
||||
SetEvent();
|
||||
|
||||
uint readpos = volatize(m_RingPos);
|
||||
|
||||
if( (readpos > m_WritePos) || (readpos <= thefuture) )
|
||||
{
|
||||
// We have to be careful not to leapfrog our read-position, which would happen if
|
||||
// it's greater than the current write position (since wrapping writepos to 0 would
|
||||
// be the act of skipping PAST readpos). Stall until it loops around to the
|
||||
// beginning of the buffer, and past the size of our packet allocation.
|
||||
|
||||
uint somedone;
|
||||
|
||||
if( readpos > m_WritePos )
|
||||
somedone = (m_RingWrapSpot - readpos) + packsize + 1;
|
||||
else
|
||||
somedone = (packsize + 1) - readpos;
|
||||
|
||||
if( somedone > 0x80 )
|
||||
{
|
||||
m_SignalRingPosition = somedone;
|
||||
//Console.WriteLn( Color_Blue, "(EEcore Sleep) Restart!\tringpos=0x%06x, writepos=0x%06x, wrapspot=0x%06x, signalpos=0x%06x",
|
||||
// readpos, m_WritePos, m_RingWrapSpot, m_SignalRingPosition );
|
||||
|
||||
do {
|
||||
AtomicExchange( m_SignalRingEnable, 1 );
|
||||
SetEvent();
|
||||
m_sem_OnRingReset.WaitWithoutYield();
|
||||
readpos = volatize(m_RingPos);
|
||||
//Console.WriteLn( Color_Blue, "(EEcore Awake) Report!\tringpos=0x%06x", readpos );
|
||||
} while( (readpos > m_WritePos) || (readpos <= thefuture) );
|
||||
}
|
||||
else
|
||||
{
|
||||
SetEvent();
|
||||
do {
|
||||
SpinWait();
|
||||
readpos = volatize(m_RingPos);
|
||||
} while( (readpos > m_WritePos) || (readpos <= thefuture) );
|
||||
}
|
||||
}
|
||||
|
||||
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
|
||||
|
||||
tag.command = GS_RINGTYPE_RESTART;
|
||||
|
||||
m_RingWrapSpot = m_WritePos;
|
||||
m_WritePos = 0;
|
||||
m_QueuedFrameCount = 0;
|
||||
|
||||
if( EmuConfig.GS.SynchronousMTGS )
|
||||
WaitGS();
|
||||
}
|
||||
|
||||
__forceinline uint SysMtgsThread::_PrepForSimplePacket()
|
||||
{
|
||||
#ifdef RINGBUF_DEBUG_STACK
|
||||
m_lock_Stack.Lock();
|
||||
ringposStack.push_front( m_WritePos );
|
||||
m_lock_Stack.Release();
|
||||
#endif
|
||||
|
||||
uint future_writepos = m_WritePos+1;
|
||||
pxAssert( future_writepos <= RingBufferSize );
|
||||
|
||||
future_writepos &= RingBufferMask;
|
||||
if( future_writepos == 0 )
|
||||
{
|
||||
m_QueuedFrameCount = 0;
|
||||
m_RingWrapSpot = RingBufferSize;
|
||||
}
|
||||
|
||||
uint readpos = volatize(m_RingPos);
|
||||
if( future_writepos == readpos )
|
||||
{
|
||||
// The ringbuffer read pos is blocking the future write position, so stall out
|
||||
// until the read position has moved.
|
||||
|
||||
uint totalAccum = (m_RingWrapSpot - readpos) + future_writepos;
|
||||
uint somedone = totalAccum / 4;
|
||||
|
||||
if( somedone > 0x80 )
|
||||
{
|
||||
m_SignalRingPosition = somedone;
|
||||
|
||||
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Simple Sleep!\t\twrapspot=0x%06x, ringpos=0x%06x, writepos=0x%06x, signalpos=0x%06x", m_RingWrapSpot, readpos, writepos, m_SignalRingPosition );
|
||||
|
||||
do {
|
||||
AtomicExchange( m_SignalRingEnable, 1 );
|
||||
SetEvent();
|
||||
m_sem_OnRingReset.WaitWithoutYield();
|
||||
readpos = volatize(m_RingPos);
|
||||
//Console.WriteLn( Color_Blue, "(MTGS Sync) EEcore Simple Post-sleep Report!\tringpos=0x%06x", readpos );
|
||||
} while( future_writepos == readpos );
|
||||
|
||||
pxAssertDev( m_SignalRingPosition <= 0, "MTGS Thread Synchronization Error" );
|
||||
}
|
||||
else
|
||||
{
|
||||
SetEvent();
|
||||
do {
|
||||
SpinWait();
|
||||
} while( future_writepos == volatize(m_RingPos) );
|
||||
}
|
||||
}
|
||||
|
||||
return future_writepos;
|
||||
}
|
||||
|
||||
__forceinline void SysMtgsThread::_FinishSimplePacket( uint future_writepos )
|
||||
{
|
||||
pxAssert( future_writepos != volatize(m_RingPos) );
|
||||
uint future_writepos = (m_WritePos+1) & RingBufferMask;
|
||||
pxAssert( future_writepos != volatize(m_ReadPos) );
|
||||
m_WritePos = future_writepos;
|
||||
|
||||
if( EmuConfig.GS.SynchronousMTGS )
|
||||
|
@ -887,7 +753,7 @@ void SysMtgsThread::SendSimplePacket( MTGS_RingCommand type, int data0, int data
|
|||
{
|
||||
//ScopedLock locker( m_PacketLocker );
|
||||
|
||||
const uint thefuture = _PrepForSimplePacket();
|
||||
GenericStall(1);
|
||||
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
|
||||
|
||||
tag.command = type;
|
||||
|
@ -895,21 +761,21 @@ void SysMtgsThread::SendSimplePacket( MTGS_RingCommand type, int data0, int data
|
|||
tag.data[1] = data1;
|
||||
tag.data[2] = data2;
|
||||
|
||||
_FinishSimplePacket( thefuture );
|
||||
_FinishSimplePacket();
|
||||
}
|
||||
|
||||
void SysMtgsThread::SendPointerPacket( MTGS_RingCommand type, u32 data0, void* data1 )
|
||||
{
|
||||
//ScopedLock locker( m_PacketLocker );
|
||||
|
||||
const uint thefuture = _PrepForSimplePacket();
|
||||
GenericStall(1);
|
||||
PacketTagType& tag = (PacketTagType&)RingBuffer[m_WritePos];
|
||||
|
||||
tag.command = type;
|
||||
tag.data[0] = data0;
|
||||
*(uptr*)&tag.data[1] = (uptr)data1;
|
||||
|
||||
_FinishSimplePacket( thefuture );
|
||||
_FinishSimplePacket();
|
||||
}
|
||||
|
||||
void SysMtgsThread::SendGameCRC( u32 crc )
|
||||
|
|
|
@ -217,6 +217,7 @@ Pcsx2Config::GSOptions::GSOptions()
|
|||
|
||||
SynchronousMTGS = false;
|
||||
DisableOutput = false;
|
||||
VsyncQueueSize = 2;
|
||||
|
||||
DefaultRegionMode = Region_NTSC;
|
||||
FramesToDraw = 2;
|
||||
|
@ -234,6 +235,7 @@ void Pcsx2Config::GSOptions::LoadSave( IniInterface& ini )
|
|||
|
||||
IniEntry( SynchronousMTGS );
|
||||
IniEntry( DisableOutput );
|
||||
IniEntry( VsyncQueueSize );
|
||||
|
||||
IniEntry( FrameLimitEnable );
|
||||
IniEntry( FrameSkipEnable );
|
||||
|
|
|
@ -144,6 +144,7 @@ static s32 CALLBACK fallback_test() { return 0; }
|
|||
_GSvsync GSvsync;
|
||||
_GSopen GSopen;
|
||||
_GSopen2 GSopen2;
|
||||
_GSgifTransfer GSgifTransfer;
|
||||
_GSgifTransfer1 GSgifTransfer1;
|
||||
_GSgifTransfer2 GSgifTransfer2;
|
||||
_GSgifTransfer3 GSgifTransfer3;
|
||||
|
@ -309,7 +310,8 @@ static const LegacyApi_ReqMethod s_MethMessReq_GS[] =
|
|||
{
|
||||
{ "GSopen", (vMeth**)&GSopen, NULL },
|
||||
{ "GSvsync", (vMeth**)&GSvsync, NULL },
|
||||
{ "GSgifTransfer1", (vMeth**)&GSgifTransfer1, NULL },
|
||||
{ "GSgifTransfer", (vMeth**)&GSgifTransfer, NULL },
|
||||
//{ "GSgifTransfer1", (vMeth**)&GSgifTransfer1, NULL },
|
||||
{ "GSgifTransfer2", (vMeth**)&GSgifTransfer2, NULL },
|
||||
{ "GSgifTransfer3", (vMeth**)&GSgifTransfer3, NULL },
|
||||
{ "GSreadFIFO2", (vMeth**)&GSreadFIFO2, NULL },
|
||||
|
|
|
@ -2057,21 +2057,8 @@ void _vuXGKICK(VURegs * VU)
|
|||
|
||||
u8* data = ((u8*)VU->Mem + ((VU->VI[_Is_].US[0]*16) & 0x3fff));
|
||||
u32 size;
|
||||
size = GetMTGS().PrepDataPacket( GIF_PATH_1, data, (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)) >> 4);
|
||||
u8* pmem = GetMTGS().GetDataPacketPtr();
|
||||
|
||||
if((size << 4) > (u32)(0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)))
|
||||
{
|
||||
//DevCon.Warning("addr + Size = 0x%x, transferring %x then doing %x", ((VU->VI[_Is_].US[0]*16) & 0x3fff) + (size << 4), (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)) >> 4, size - (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff) >> 4));
|
||||
memcpy_aligned(pmem, (u8*)VU->Mem+((VU->VI[_Is_].US[0]*16) & 0x3fff), 0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff));
|
||||
size -= (0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff)) >> 4;
|
||||
//DevCon.Warning("Size left %x", size);
|
||||
pmem += 0x4000-((VU->VI[_Is_].US[0]*16) & 0x3fff);
|
||||
memcpy_aligned(pmem, (u8*)VU->Mem, size<<4);
|
||||
}
|
||||
else {
|
||||
memcpy_aligned(pmem, (u8*)VU->Mem+((VU->VI[_Is_].US[0]*16) & 0x3fff), size<<4);
|
||||
}
|
||||
GetMTGS().PrepDataPacket( GIF_PATH_1, 0x400 );
|
||||
size = GIFPath_CopyTag( GIF_PATH_1, (u128*)data, (0x400-(VU->VI[_Is_].US[0] & 0x3ff)) );
|
||||
GetMTGS().SendDataPacket();
|
||||
}
|
||||
|
||||
|
|
|
@ -345,7 +345,6 @@ __forceinline void vif1Interrupt()
|
|||
|
||||
if(GSTransferStatus.PTH2 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH2)
|
||||
{
|
||||
gifRegs->stat.OPH = false;
|
||||
gifRegs->stat.APATH = GIF_APATH_IDLE;
|
||||
if(gifRegs->stat.P1Q) gsPath1Interrupt();
|
||||
}
|
||||
|
@ -440,11 +439,6 @@ __forceinline void vif1Interrupt()
|
|||
if (vif1.cmd != 0) Console.WriteLn("vif1.cmd still set %x tag size %x", vif1.cmd, vif1.tag.size);
|
||||
#endif
|
||||
|
||||
|
||||
if((vif1ch->chcr.DIR == VIF_NORMAL_TO_MEM_MODE) && vif1.GSLastDownloadSize <= 16)
|
||||
{ //Reverse fifo has finished and nothing is left, so lets clear the outputting flag
|
||||
gifRegs->stat.OPH = false;
|
||||
}
|
||||
vif1ch->chcr.STR = false;
|
||||
vif1.vifstalled = false;
|
||||
g_vifCycles = 0;
|
||||
|
|
|
@ -239,7 +239,6 @@ void vifMFIFOInterrupt()
|
|||
if(GSTransferStatus.PTH2 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH2)
|
||||
{
|
||||
GSTransferStatus.PTH2 = STOPPED_MODE;
|
||||
if(gifRegs->stat.DIR == 0)gifRegs->stat.OPH = false;
|
||||
gifRegs->stat.APATH = GIF_APATH_IDLE;
|
||||
if(gifRegs->stat.P1Q) gsPath1Interrupt();
|
||||
/*gifRegs->stat.APATH = GIF_APATH_IDLE;
|
||||
|
|
|
@ -167,10 +167,16 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// HACK ATTACK!
|
||||
// we shouldn't be clearing the queue flag here at all. Ideally, the queue statuses
|
||||
// should be checked, handled, and cleared from the EOP check in GIFPath only. --air
|
||||
gifRegs->stat.clear_flags(GIF_STAT_P2Q);
|
||||
|
||||
// the tag size should ALWAYS be 128 bits (qwc). If it isn't, it means there's a serious bug
|
||||
// somewhere in the VIF (likely relating to +/-'ing the tag.size during processing).
|
||||
// NOTE: ICO [PAL] exploits this during bootup. Needs investigation. --air
|
||||
//pxAssumeMsg( (vif1.tag.size & 3) == 0, "Invalid Vif1 DIRECT packet size detected!" );
|
||||
|
||||
nVifStruct& v = nVif[1];
|
||||
const int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
|
||||
u32 size = ret << 2;
|
||||
|
@ -184,8 +190,6 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
|
|||
|
||||
if(vif1.vifpacketsize < 4 && v.bSize < 16)
|
||||
{
|
||||
nVifStruct& v = nVif[idx];
|
||||
|
||||
memcpy(&v.buffer[v.bPtr], data, vif1.vifpacketsize << 2);
|
||||
v.bSize += vif1.vifpacketsize << 2;
|
||||
v.bPtr += vif1.vifpacketsize << 2;
|
||||
|
@ -199,7 +203,6 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
|
|||
}
|
||||
else
|
||||
{
|
||||
nVifStruct& v = nVif[idx];
|
||||
if(v.bSize)
|
||||
{
|
||||
int ret = 0;
|
||||
|
@ -213,8 +216,8 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
|
|||
v.bSize = 0;
|
||||
v.bPtr = 0;
|
||||
}
|
||||
const uint count = GetMTGS().PrepDataPacket(GIF_PATH_2, v.buffer, 1);
|
||||
memcpy_fast(GetMTGS().GetDataPacketPtr(), v.buffer, count << 4);
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_2, 1);
|
||||
GIFPath_CopyTag(GIF_PATH_2, (u128*)v.buffer, 1);
|
||||
GetMTGS().SendDataPacket();
|
||||
|
||||
if(vif1.tag.size == 0)
|
||||
|
@ -226,16 +229,17 @@ template<int idx> _f int _vifCode_Direct(int pass, u8* data, bool isDirectHL) {
|
|||
}
|
||||
else
|
||||
{
|
||||
const uint count = GetMTGS().PrepDataPacket(GIF_PATH_2, data, size >> 4);
|
||||
memcpy_fast(GetMTGS().GetDataPacketPtr(), data, count << 4);
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_2, size/16);
|
||||
uint count = GIFPath_CopyTag(GIF_PATH_2, (u128*)data, size/16) * 4;
|
||||
GetMTGS().SendDataPacket();
|
||||
vif1.tag.size -= count << 2;
|
||||
|
||||
vif1.tag.size -= count;
|
||||
if(vif1.tag.size == 0)
|
||||
{
|
||||
vif1.cmd = 0;
|
||||
}
|
||||
vif1.vifstalled = true;
|
||||
return count << 2;
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -36,16 +36,8 @@ _vifT bool analyzeIbit(u32* &data, int iBit) {
|
|||
if (iBit && !vifX.cmd && !vifXRegs->err.MII) {
|
||||
//DevCon.WriteLn("Vif I-Bit IRQ");
|
||||
vifX.irq++;
|
||||
// On i-bit, the command is run, vif stalls etc,
|
||||
// however if the vifcode is MARK, you do NOT stall, just send IRQ. - Max Payne shows this up.
|
||||
//if(((vifXRegs->code >> 24) & 0x7f) == 0x7) return 0;
|
||||
|
||||
// If we have a vifcode with i-bit, the following instruction
|
||||
// should stall unless its MARK?.. we test that case here...
|
||||
// Not 100% sure if this is the correct behavior, so printing
|
||||
// a console message to see games that use this. (cottonvibes)
|
||||
|
||||
// Okay did some testing with Max Payne, it does this
|
||||
// Okay did some testing with Max Payne, it does this:
|
||||
// VifMark value = 0x666 (i know, evil!)
|
||||
// NOP with I Bit
|
||||
// VifMark value = 0
|
||||
|
@ -53,6 +45,23 @@ _vifT bool analyzeIbit(u32* &data, int iBit) {
|
|||
// If you break after the 2nd Mark has run, the game reports invalid mark 0 and the game dies.
|
||||
// So it has to occur here, testing a theory that it only doesn't stall if the command with
|
||||
// the iBit IS mark, but still sends the IRQ to let the cpu know the mark is there. (Refraction)
|
||||
//
|
||||
// --------------------------
|
||||
//
|
||||
// This is how it probably works: i-bit sets the IRQ flag, and VIF keeps running until it encounters
|
||||
// a non-MARK instruction. This includes the *current* instruction. ie, execution only continues
|
||||
// unimpeded if MARK[i] is specified, and keeps executing unimpeded until any non-MARK command.
|
||||
// Any other command with an I bit should stall immediately.
|
||||
// Example:
|
||||
//
|
||||
// VifMark[i] value = 0x321 (with I bit)
|
||||
// VifMark value = 0
|
||||
// VifMark value = 0x333
|
||||
// NOP
|
||||
//
|
||||
// ... the VIF should not stall and raise the interrupt until after the NOP is processed.
|
||||
// So the final value for MARK as the game sees it will be 0x333. --air
|
||||
|
||||
return runMark<idx>(data);
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -134,10 +134,10 @@ bool AppDoAssert( const DiagnosticOrigin& origin, const wxChar *msg )
|
|||
wxString trace( pxGetStackTrace(origin.function) );
|
||||
wxString dbgmsg( origin.ToString( msg ) );
|
||||
|
||||
wxMessageOutputDebug().Printf( dbgmsg );
|
||||
wxMessageOutputDebug().Printf( L"%s", dbgmsg );
|
||||
|
||||
Console.Error( dbgmsg );
|
||||
Console.WriteLn( trace );
|
||||
Console.Error( L"%s", dbgmsg );
|
||||
Console.WriteLn( L"%s", trace );
|
||||
|
||||
wxString windowmsg( L"Assertion failed: " );
|
||||
if( msg != NULL )
|
||||
|
|
|
@ -189,13 +189,13 @@ void Pcsx2App::DetectCpuAndUserMode()
|
|||
x86caps.CountCores();
|
||||
x86caps.SIMD_EstablishMXCSRmask();
|
||||
|
||||
if( !x86caps.hasMultimediaExtensions )
|
||||
if( !x86caps.hasMultimediaExtensions || !x86caps.hasStreamingSIMDExtensions )
|
||||
{
|
||||
// Note: due to memcpy_fast, we need minimum MMX even for interpreters. This will
|
||||
// hopefully change later once we have a dynamically recompiled memcpy.
|
||||
// Note: Due to optimizations to GIFpath parsers, memcpy, and possibly other things, we need
|
||||
// a bare minimum of SSE supported by the CPU.
|
||||
throw Exception::HardwareDeficiency()
|
||||
.SetDiagMsg(L"Critical Failure: MMX Extensions not available.")
|
||||
.SetUserMsg(_("MMX extensions are not available. PCSX2 requires cpu with MMX extension support to run."));
|
||||
.SetDiagMsg(L"Critical Failure: SSE Extensions not available.")
|
||||
.SetUserMsg(_("SSE extensions are not available. PCSX2 requires a cpu that supports the SSE instruction set."));
|
||||
}
|
||||
|
||||
ReadUserModeSettings();
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "Gif.h"
|
||||
#include "Vif_Dma.h"
|
||||
#include "Vif.h"
|
||||
#include <xmmintrin.h>
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// GIFpath -- the GIFtag Parser
|
||||
|
@ -92,12 +93,16 @@ struct GIFPath
|
|||
|
||||
void Reset();
|
||||
void PrepPackedRegs();
|
||||
void SetTag(const void* mem);
|
||||
bool StepReg();
|
||||
u8 GetReg();
|
||||
bool IsActive() const;
|
||||
|
||||
int ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size);
|
||||
template< bool Aligned >
|
||||
void SetTag(const void* mem);
|
||||
|
||||
template< GIF_PATH pathidx, bool Aligned >
|
||||
int CopyTag(const u128* pMem, u32 size);
|
||||
|
||||
int ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
|
||||
};
|
||||
|
||||
|
@ -285,9 +290,11 @@ __forceinline void GIFPath::PrepPackedRegs()
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
template< bool Aligned >
|
||||
__forceinline void GIFPath::SetTag(const void* mem)
|
||||
{
|
||||
const_cast<GIFTAG&>(tag) = *((GIFTAG*)mem);
|
||||
_mm_store_ps( (float*)&tag, Aligned ? _mm_load_ps((const float*)mem) : _mm_loadu_ps((const float*)mem) );
|
||||
|
||||
nloop = tag.NLOOP;
|
||||
curreg = 0;
|
||||
|
@ -350,7 +357,8 @@ static __forceinline void gsHandler(const u8* pMem)
|
|||
// qwords, rounded down; any extra bits are lost
|
||||
// games must take care to ensure transfer rectangles are exact multiples of a qword
|
||||
vif1.GSLastDownloadSize = vif1.TRXREG.RRW * vif1.TRXREG.RRH * bpp >> 7;
|
||||
gifRegs->stat.OPH = true;
|
||||
//DevCon.Warning("GS download in progress. OPH = %x", gifRegs->stat.OPH);
|
||||
//gifRegs->stat.OPH = true; // Too early to set it here. It should be done on a BUSDIR call (rama)
|
||||
}
|
||||
}
|
||||
if (reg >= 0x60)
|
||||
|
@ -371,10 +379,9 @@ static __forceinline void gsHandler(const u8* pMem)
|
|||
#define aMin(x, y) std::min(x, y)
|
||||
|
||||
// Parameters:
|
||||
// size (path1) - difference between the end of VU memory and pMem.
|
||||
// size (path2/3) - max size of incoming data stream, in qwc (simd128)
|
||||
|
||||
|
||||
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
|
||||
// path does not terminate (EOP) within the specified size, it is assumed that the path must
|
||||
// loop around to the start of VU memory and continue processing.
|
||||
__forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size)
|
||||
{
|
||||
u32 startSize = size; // Start Size
|
||||
|
@ -382,7 +389,7 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s
|
|||
while (size > 0) {
|
||||
if (!nloop) {
|
||||
|
||||
SetTag(pMem);
|
||||
SetTag<false>(pMem);
|
||||
incTag(1);
|
||||
}
|
||||
else
|
||||
|
@ -509,6 +516,7 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s
|
|||
|
||||
Console.Warning("GIFTAG error, size exceeded VU memory size %x", startSize);
|
||||
nloop = 0;
|
||||
const_cast<GIFTAG&>(tag).EOP = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -521,15 +529,65 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s
|
|||
return size;
|
||||
}
|
||||
|
||||
__forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
||||
__forceinline void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len )
|
||||
{
|
||||
uint endpos = destStart + len;
|
||||
if( endpos < destSize )
|
||||
{
|
||||
memcpy_qwc(&destBase[destStart], src, len );
|
||||
destStart += len;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint firstcopylen = destSize - destStart;
|
||||
memcpy_qwc(&destBase[destStart], src, firstcopylen );
|
||||
|
||||
destStart = endpos % destSize;
|
||||
memcpy_qwc(destBase, src+firstcopylen, destStart );
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len )
|
||||
{
|
||||
uint endpos = srcStart + len;
|
||||
if( endpos < srcSize )
|
||||
{
|
||||
memcpy_qwc(dest, &srcBase[srcStart], len );
|
||||
srcStart += len;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint firstcopylen = srcSize - srcStart;
|
||||
memcpy_qwc(dest, &srcBase[srcStart], firstcopylen );
|
||||
|
||||
srcStart = endpos % srcSize;
|
||||
memcpy_qwc(dest+firstcopylen, srcBase, srcStart );
|
||||
}
|
||||
}
|
||||
|
||||
#define copyTag() do { \
|
||||
_mm_store_ps( (float*)&RingBuffer.m_Ring[ringpos], Aligned ? _mm_load_ps((float*)pMem128) : _mm_loadu_ps((float*)pMem128)); \
|
||||
++pMem128; --size; \
|
||||
ringpos = (ringpos+1)&RingBufferMask; \
|
||||
} while(false)
|
||||
|
||||
// Parameters:
|
||||
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
|
||||
// path does not terminate (EOP) within the specified size, it is assumed that the path must
|
||||
// loop around to the start of VU memory and continue processing.
|
||||
template< GIF_PATH pathidx, bool Aligned >
|
||||
__forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
|
||||
{
|
||||
uint& ringpos = GetMTGS().m_packet_writepos;
|
||||
const uint original_ringpos = ringpos;
|
||||
|
||||
u32 startSize = size; // Start Size
|
||||
|
||||
while (size > 0) {
|
||||
if (!nloop) {
|
||||
|
||||
SetTag(pMem);
|
||||
incTag(1);
|
||||
SetTag<Aligned>((u8*)pMem128);
|
||||
copyTag();
|
||||
|
||||
if(nloop > 0)
|
||||
{
|
||||
|
@ -560,7 +618,7 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
}
|
||||
if(GSTransferStatus.PTH3 < PENDINGSTOP_MODE || pathidx != 2)
|
||||
{
|
||||
gifRegs->stat.OPH = true;
|
||||
//gifRegs->stat.OPH = true; // why set the GS output path flag here? (rama)
|
||||
gifRegs->stat.APATH = pathidx + 1;
|
||||
}
|
||||
|
||||
|
@ -588,7 +646,7 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
break;
|
||||
}
|
||||
gifRegs->stat.APATH = pathidx + 1;
|
||||
gifRegs->stat.OPH = true;
|
||||
//gifRegs->stat.OPH = true; // why set the GS output path flag here? (rama)
|
||||
|
||||
switch(tag.FLG) {
|
||||
case GIF_FLG_PACKED:
|
||||
|
@ -599,9 +657,9 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
{
|
||||
do {
|
||||
if (GetReg() == 0xe) {
|
||||
gsHandler(pMem);
|
||||
gsHandler((u8*)pMem128);
|
||||
}
|
||||
incTag(1);
|
||||
copyTag();
|
||||
} while(StepReg() && size > 0 && SIGNAL_IMR_Pending == false);
|
||||
}
|
||||
else
|
||||
|
@ -644,11 +702,14 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
curreg = 0;
|
||||
nloop = 0;
|
||||
}
|
||||
incTag(len);
|
||||
|
||||
MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len );
|
||||
pMem128 += len;
|
||||
size -= len;
|
||||
}
|
||||
break;
|
||||
case GIF_FLG_REGLIST:
|
||||
{
|
||||
{
|
||||
GIF_LOG("Reglist Mode EOP %x", tag.EOP);
|
||||
|
||||
// In reglist mode, the GIF packs 2 registers into each QWC. The nloop however
|
||||
|
@ -687,8 +748,9 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
nloop = 0;
|
||||
}
|
||||
|
||||
incTag(len);
|
||||
|
||||
MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len );
|
||||
pMem128 += len;
|
||||
size -= len;
|
||||
}
|
||||
break;
|
||||
case GIF_FLG_IMAGE:
|
||||
|
@ -696,13 +758,15 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
{
|
||||
GIF_LOG("IMAGE Mode EOP %x", tag.EOP);
|
||||
int len = aMin(size, nloop);
|
||||
incTag(len);
|
||||
|
||||
MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len );
|
||||
|
||||
pMem128 += len;
|
||||
size -= len;
|
||||
nloop -= len;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if(pathidx == GIF_PATH_1)
|
||||
|
@ -713,11 +777,11 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
{
|
||||
size = 0x3ff - startSize;
|
||||
startSize = 0x3ff;
|
||||
pMem -= 0x4000;
|
||||
pMem128 -= 0x400;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Note: The BIOS does an XGKICK on the VU1 and lets yt DMA to the GS without an EOP
|
||||
// Note: The BIOS does an XGKICK on the VU1 and lets it DMA to the GS without an EOP
|
||||
// (seemingly to loop forever), only to write an EOP later on. No other game is known to
|
||||
// do anything of the sort.
|
||||
// So lets just cap the DMA at 16k, and force it to "look" like it's terminated for now.
|
||||
|
@ -727,6 +791,12 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
|
||||
Console.Warning("GIFTAG error, size exceeded VU memory size %x", startSize);
|
||||
nloop = 0;
|
||||
const_cast<GIFTAG&>(tag).EOP = 1;
|
||||
|
||||
// Don't send the packet to the GS -- its incomplete and might cause the GS plugin
|
||||
// to get confused and die. >_<
|
||||
|
||||
ringpos = original_ringpos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -749,6 +819,9 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
gsIrq();
|
||||
}
|
||||
}
|
||||
|
||||
// [TODO] : DMAC Arbitration rights should select the next queued GIF transfer here.
|
||||
|
||||
break;
|
||||
}
|
||||
if(SIGNAL_IMR_Pending == true)
|
||||
|
@ -793,47 +866,40 @@ __forceinline int GIFPath::ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
|||
gif->qwc -= size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
// Processes a GIFtag & packet, and throws out some gsIRQs as needed.
|
||||
// Used to keep interrupts in sync with the EE, while the GS itself
|
||||
// runs potentially several frames behind.
|
||||
// Parameters:
|
||||
// size - max size of incoming data stream, in qwc (simd128)
|
||||
__forceinline int GIFPath_ParseTag(GIF_PATH pathidx, const u8* pMem, u32 size)
|
||||
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
|
||||
// path does not terminate (EOP) within the specified size, it is assumed that the path must
|
||||
// loop around to the start of VU memory and continue processing.
|
||||
__forceinline int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size)
|
||||
{
|
||||
#ifdef PCSX2_GSRING_SAMPLING_STATS
|
||||
static uptr profStartPtr = 0;
|
||||
static uptr profEndPtr = 0;
|
||||
if (profStartPtr == 0) {
|
||||
__asm
|
||||
{
|
||||
__beginfunc:
|
||||
mov profStartPtr, offset __beginfunc;
|
||||
mov profEndPtr, offset __endfunc;
|
||||
}
|
||||
ProfilerRegisterSource( "GSRingBufCopy", (void*)profStartPtr, profEndPtr - profStartPtr );
|
||||
}
|
||||
#endif
|
||||
|
||||
int retSize = s_gifPath[pathidx].ParseTag(pathidx, pMem, size);
|
||||
|
||||
#ifdef PCSX2_GSRING_SAMPLING_STATS
|
||||
__asm
|
||||
switch( pathidx )
|
||||
{
|
||||
__endfunc:
|
||||
nop;
|
||||
case GIF_PATH_1:
|
||||
pxAssertMsg(!s_gifPath[GIF_PATH_2].IsActive(), "GIFpath conflict: Attempted to start PATH1 while PATH2 is already active.");
|
||||
pxAssertMsg(!s_gifPath[GIF_PATH_3].IsActive() || (GSTransferStatus.PTH3 == IMAGE_MODE), "GIFpath conflict: Attempted to start PATH1 while PATH3 is already active.");
|
||||
return s_gifPath[GIF_PATH_1].CopyTag<GIF_PATH_1,true>(pMem, size);
|
||||
case GIF_PATH_2:
|
||||
pxAssertMsg(!s_gifPath[GIF_PATH_1].IsActive(), "GIFpath conflict: Attempted to start PATH2 while PATH1 is already active.");
|
||||
pxAssertMsg(!s_gifPath[GIF_PATH_3].IsActive() || (GSTransferStatus.PTH3 == IMAGE_MODE), "GIFpath conflict: Attempted to start PATH2 while PATH3 is already active.");
|
||||
return s_gifPath[GIF_PATH_2].CopyTag<GIF_PATH_2,false>(pMem, size);
|
||||
case GIF_PATH_3:
|
||||
pxAssertMsg(!s_gifPath[GIF_PATH_1].IsActive(), "GIFpath conflict: Attempted to start PATH3 while PATH1 is already active.");
|
||||
pxAssertMsg(!s_gifPath[GIF_PATH_2].IsActive(), "GIFpath conflict: Attempted to start PATH3 while PATH2 is already active.");
|
||||
return s_gifPath[GIF_PATH_3].CopyTag<GIF_PATH_3,true>(pMem, size);
|
||||
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
#endif
|
||||
return retSize;
|
||||
|
||||
return 0; // unreachable
|
||||
}
|
||||
|
||||
//Quick version for queueing PATH1 data
|
||||
|
||||
// Quick version for queuing PATH1 data.
|
||||
// This version calculates the real length of the packet data only. It does not process
|
||||
// IRQs or DMA status updates.
|
||||
__forceinline int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size)
|
||||
{
|
||||
int retSize = s_gifPath[pathidx].ParseTagQuick(pathidx, pMem, size);
|
||||
|
|
|
@ -1258,11 +1258,11 @@ void recompileNextInstruction(int delayslot)
|
|||
// Calling of this function can be enabled or disabled through the use of EmuConfig.Recompiler.PreBlockChecks
|
||||
static void __fastcall PreBlockCheck( u32 blockpc )
|
||||
{
|
||||
static int lastrec = 0;
|
||||
/*static int lastrec = 0;
|
||||
static int curcount = 0;
|
||||
const int skip = 0;
|
||||
|
||||
/*if( blockpc != 0x81fc0 ) {//&& lastrec != g_lastpc ) {
|
||||
if( blockpc != 0x81fc0 ) {//&& lastrec != g_lastpc ) {
|
||||
curcount++;
|
||||
|
||||
if( curcount > skip ) {
|
||||
|
|
|
@ -1097,7 +1097,6 @@ void __fastcall mVU_XGKICK_(u32 addr) {
|
|||
u8* data = microVU1.regs->Mem + (addr*16);
|
||||
u32 diff = 0x400 - addr;
|
||||
u32 size;
|
||||
u8* pDest;
|
||||
|
||||
if(gifRegs->stat.APATH <= GIF_APATH1 || (gifRegs->stat.APATH == GIF_APATH3 && gifRegs->stat.IP3 == true) && SIGNAL_IMR_Pending == false)
|
||||
{
|
||||
|
@ -1106,23 +1105,12 @@ void __fastcall mVU_XGKICK_(u32 addr) {
|
|||
//Flush any pending transfers so things dont go up in the wrong order
|
||||
while(gifRegs->stat.P1Q == true) gsPath1Interrupt();
|
||||
}
|
||||
size = GetMTGS().PrepDataPacket(GIF_PATH_1, data, diff);
|
||||
pDest = GetMTGS().GetDataPacketPtr();
|
||||
|
||||
if (size > diff) {
|
||||
//DevCon.WriteLn("XGkick Wrap!");
|
||||
memcpy_qwc(pDest, microVU1.regs->Mem + (addr*16), diff);
|
||||
size -= diff;
|
||||
pDest += diff*16;
|
||||
memcpy_qwc(pDest, microVU1.regs->Mem, size);
|
||||
}
|
||||
else {
|
||||
memcpy_qwc(pDest, microVU1.regs->Mem + (addr*16), size);
|
||||
}
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_1, 0x400);
|
||||
size = GIFPath_CopyTag(GIF_PATH_1, (u128*)data, diff);
|
||||
GetMTGS().SendDataPacket();
|
||||
|
||||
if(GSTransferStatus.PTH1 == STOPPED_MODE)
|
||||
{
|
||||
gifRegs->stat.OPH = false;
|
||||
gifRegs->stat.APATH = GIF_APATH_IDLE;
|
||||
}
|
||||
}
|
||||
|
@ -1130,17 +1118,16 @@ void __fastcall mVU_XGKICK_(u32 addr) {
|
|||
{
|
||||
//DevCon.Warning("GIF APATH busy %x Holding for later W %x, R %x", gifRegs->stat.APATH, Path1WritePos, Path1ReadPos);
|
||||
size = GIFPath_ParseTagQuick(GIF_PATH_1, data, diff);
|
||||
pDest = &Path1Buffer[Path1WritePos*16];
|
||||
u8* pDest = &Path1Buffer[Path1WritePos*16];
|
||||
|
||||
pxAssumeMsg((Path1WritePos+size < sizeof(Path1Buffer)), "XGKick Buffer Overflow detected on Path1Buffer!");
|
||||
Path1WritePos += size;
|
||||
|
||||
pxAssumeMsg((Path1WritePos < sizeof(Path1Buffer)), "XGKick Buffer Overflow detected on Path1Buffer!");
|
||||
//DevCon.Warning("Storing size %x PATH 1", size);
|
||||
|
||||
if (size > diff) {
|
||||
// fixme: one of these days the following *16's will get cleaned up when we introduce
|
||||
// a special qwc/simd16 optimized version of memcpy_aligned. :)
|
||||
//DevCon.Status("XGkick Wrap!");
|
||||
memcpy_qwc(pDest, microVU1.regs->Mem + (addr*16), diff);
|
||||
Path1WritePos += size;
|
||||
size -= diff;
|
||||
pDest += diff*16;
|
||||
memcpy_qwc(pDest, microVU1.regs->Mem, size);
|
||||
|
|
|
@ -1988,24 +1988,12 @@ void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
|
|||
//Flush any pending transfers so things dont go up in the wrong order
|
||||
while(gifRegs->stat.P1Q == true) gsPath1Interrupt();
|
||||
}
|
||||
size = GetMTGS().PrepDataPacket(GIF_PATH_1, data, diff);
|
||||
pDest = GetMTGS().GetDataPacketPtr();
|
||||
if (size > diff) {
|
||||
// fixme: one of these days the following *16's will get cleaned up when we introduce
|
||||
// a special qwc/simd16 optimized version of memcpy_aligned. :)
|
||||
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, diff*16);
|
||||
size -= diff;
|
||||
pDest += diff*16;
|
||||
memcpy_aligned(pDest, VU1.Mem, size*16);
|
||||
}
|
||||
else {
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, size*16);
|
||||
}
|
||||
GetMTGS().PrepDataPacket(GIF_PATH_1, 0x400);
|
||||
size = GIFPath_CopyTag(GIF_PATH_1, (u128*)data, diff);
|
||||
GetMTGS().SendDataPacket();
|
||||
|
||||
if(GSTransferStatus.PTH1 == STOPPED_MODE )
|
||||
{
|
||||
gifRegs->stat.OPH = false;
|
||||
gifRegs->stat.APATH = GIF_APATH_IDLE;
|
||||
}
|
||||
}
|
||||
|
@ -2015,8 +2003,6 @@ void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
|
|||
size = GIFPath_ParseTagQuick(GIF_PATH_1, data, diff);
|
||||
pDest = &Path1Buffer[Path1WritePos*16];
|
||||
|
||||
|
||||
|
||||
pxAssumeMsg((Path1WritePos+size < sizeof(Path1Buffer)), "XGKick Buffer Overflow detected on Path1Buffer!");
|
||||
|
||||
//DevCon.Warning("Storing size %x PATH 1", size);
|
||||
|
@ -2024,14 +2010,14 @@ void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
|
|||
// fixme: one of these days the following *16's will get cleaned up when we introduce
|
||||
// a special qwc/simd16 optimized version of memcpy_aligned. :)
|
||||
//DevCon.Status("XGkick Wrap!");
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, diff*16);
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, diff);
|
||||
Path1WritePos += size;
|
||||
size -= diff;
|
||||
pDest += diff*16;
|
||||
memcpy_aligned(pDest, VU1.Mem, size*16);
|
||||
memcpy_aligned(pDest, VU1.Mem, size);
|
||||
}
|
||||
else {
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, size*16);
|
||||
memcpy_aligned(pDest, VU1.Mem + addr, size);
|
||||
Path1WritePos += size;
|
||||
}
|
||||
//if(!gifRegs->stat.P1Q) CPU_INT(28, 128);
|
||||
|
|
|
@ -195,8 +195,6 @@
|
|||
<Unit filename="../spdif.h" />
|
||||
<Unit filename="../spu2freeze.cpp" />
|
||||
<Unit filename="../spu2sys.cpp" />
|
||||
<Unit filename="../utf8.cpp" />
|
||||
<Unit filename="../utf8.h" />
|
||||
<Extensions>
|
||||
<code_completion />
|
||||
<debugger />
|
||||
|
|
|
@ -635,7 +635,7 @@ typedef struct
|
|||
int imageTransfer;
|
||||
int imageWnew, imageHnew, imageX, imageY, imageEndX, imageEndY;
|
||||
|
||||
pathInfo path[3];
|
||||
pathInfo path[4];
|
||||
GIFRegDIMX dimx;
|
||||
void setRGBA(u32 r, u32 g, u32 b, u32 a)
|
||||
{
|
||||
|
|
|
@ -265,8 +265,17 @@ void CALLBACK GSgifTransfer3(u32 *pMem, u32 size)
|
|||
_GSgifTransfer<2>(pMem, size);
|
||||
}
|
||||
|
||||
void InitPath()
|
||||
void CALLBACK GSgifTransfer(u32 *pMem, u32 size)
|
||||
{
|
||||
gs.path[0].mode = gs.path[1].mode = gs.path[2].mode = 0;
|
||||
FUNCLOG
|
||||
|
||||
//ZZLog::GS_Log("GSgifTransfer3 size = %lx (mode %d, gs.path3.tag.nloop = %d).", size, gs.path[2].mode, gs.path[2].tag.nloop);
|
||||
|
||||
_GSgifTransfer<3>(pMem, size);
|
||||
}
|
||||
|
||||
void InitPath()
|
||||
{
|
||||
gs.path[0].mode = gs.path[1].mode = gs.path[2].mode = gs.path[3].mode = 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue