mirror of https://github.com/PCSX2/pcsx2.git
ReorderingMTGS: Templated the GIFPath parsers, to allow for SSE optimizations.
git-svn-id: http://pcsx2.googlecode.com/svn/branches/ReorderingMTGS@3474 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
934578c8fe
commit
43cd559801
|
@ -49,6 +49,7 @@ void gsSetRegionMode( GS_RegionMode region )
|
||||||
void gsInit()
|
void gsInit()
|
||||||
{
|
{
|
||||||
memzero(g_RealGSMem);
|
memzero(g_RealGSMem);
|
||||||
|
GIFPath_Initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
extern bool SIGNAL_IMR_Pending;
|
extern bool SIGNAL_IMR_Pending;
|
||||||
|
|
10
pcsx2/GS.h
10
pcsx2/GS.h
|
@ -18,6 +18,15 @@
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
#include "System/SysThreads.h"
|
#include "System/SysThreads.h"
|
||||||
|
|
||||||
|
enum CpuExtType
|
||||||
|
{
|
||||||
|
CpuExt_Base,
|
||||||
|
CpuExt_MMX,
|
||||||
|
CpuExt_SSE,
|
||||||
|
CpuExt_SSE2,
|
||||||
|
CpuExt_SSE41,
|
||||||
|
};
|
||||||
|
|
||||||
extern __aligned16 u8 g_RealGSMem[Ps2MemSize::GSregs];
|
extern __aligned16 u8 g_RealGSMem[Ps2MemSize::GSregs];
|
||||||
|
|
||||||
enum CSR_FifoState
|
enum CSR_FifoState
|
||||||
|
@ -229,6 +238,7 @@ enum GIF_PATH
|
||||||
GIF_PATH_3,
|
GIF_PATH_3,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern void GIFPath_Initialize();
|
||||||
extern int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size);
|
extern int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size);
|
||||||
extern int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
|
extern int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
|
||||||
extern void GIFPath_Reset();
|
extern void GIFPath_Reset();
|
||||||
|
|
|
@ -93,12 +93,16 @@ struct GIFPath
|
||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
void PrepPackedRegs();
|
void PrepPackedRegs();
|
||||||
void SetTag(const void* mem);
|
|
||||||
bool StepReg();
|
bool StepReg();
|
||||||
u8 GetReg();
|
u8 GetReg();
|
||||||
bool IsActive() const;
|
bool IsActive() const;
|
||||||
|
|
||||||
int CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size);
|
template< CpuExtType CpuExt, bool Aligned >
|
||||||
|
void SetTag(const void* mem);
|
||||||
|
|
||||||
|
template< CpuExtType CpuExt, int pathidx >
|
||||||
|
int CopyTag(const u128* pMem, u32 size);
|
||||||
|
|
||||||
int ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
|
int ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -286,10 +290,14 @@ __forceinline void GIFPath::PrepPackedRegs()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template< CpuExtType CpuExt, bool Aligned >
|
||||||
__forceinline void GIFPath::SetTag(const void* mem)
|
__forceinline void GIFPath::SetTag(const void* mem)
|
||||||
{
|
{
|
||||||
_mm_store_ps( (float*)&tag, _mm_loadu_ps((float*)mem) );
|
if( CpuExt >= CpuExt_SSE )
|
||||||
//const_cast<GIFTAG&>(tag) = *((GIFTAG*)mem);
|
_mm_store_ps( (float*)&tag, Aligned ? _mm_load_ps((const float*)mem) : _mm_loadu_ps((const float*)mem) );
|
||||||
|
else
|
||||||
|
const_cast<GIFTAG&>(tag) = *((GIFTAG*)mem);
|
||||||
|
|
||||||
nloop = tag.NLOOP;
|
nloop = tag.NLOOP;
|
||||||
curreg = 0;
|
curreg = 0;
|
||||||
|
@ -373,10 +381,9 @@ static __forceinline void gsHandler(const u8* pMem)
|
||||||
#define aMin(x, y) std::min(x, y)
|
#define aMin(x, y) std::min(x, y)
|
||||||
|
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// size (path1) - difference between the end of VU memory and pMem.
|
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
|
||||||
// size (path2/3) - max size of incoming data stream, in qwc (simd128)
|
// path does not terminate (EOP) within the specified size, it is assumed that the path must
|
||||||
|
// loop around to the start of VU memory and continue processing.
|
||||||
|
|
||||||
__forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size)
|
__forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size)
|
||||||
{
|
{
|
||||||
u32 startSize = size; // Start Size
|
u32 startSize = size; // Start Size
|
||||||
|
@ -384,7 +391,7 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s
|
||||||
while (size > 0) {
|
while (size > 0) {
|
||||||
if (!nloop) {
|
if (!nloop) {
|
||||||
|
|
||||||
SetTag(pMem);
|
SetTag<CpuExt_Base,false>(pMem);
|
||||||
incTag(1);
|
incTag(1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -523,7 +530,7 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len )
|
__forceinline void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len )
|
||||||
{
|
{
|
||||||
uint endpos = destStart + len;
|
uint endpos = destStart + len;
|
||||||
if( endpos < destSize )
|
if( endpos < destSize )
|
||||||
|
@ -541,7 +548,7 @@ void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len )
|
__forceinline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len )
|
||||||
{
|
{
|
||||||
uint endpos = srcStart + len;
|
uint endpos = srcStart + len;
|
||||||
if( endpos < srcSize )
|
if( endpos < srcSize )
|
||||||
|
@ -559,16 +566,21 @@ void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// [TODO] optimization: If later templated, we can have Paths 1 and 3 use aligned SSE movs,
|
|
||||||
// since only PATH2 can feed us unaligned source data.
|
|
||||||
#define copyTag() do { \
|
#define copyTag() do { \
|
||||||
/*RingBuffer.m_Ring[ringpos] = *pMem128;*/ \
|
if( CpuExt >= CpuExt_SSE ) \
|
||||||
_mm_store_ps( (float*)&RingBuffer.m_Ring[ringpos], _mm_loadu_ps((float*)pMem128)); \
|
_mm_store_ps( (float*)&RingBuffer.m_Ring[ringpos], (pathidx!=GIF_PATH_2) ? _mm_load_ps((float*)pMem128) : _mm_loadu_ps((float*)pMem128)); \
|
||||||
|
else \
|
||||||
|
RingBuffer.m_Ring[ringpos] = *pMem128; \
|
||||||
++pMem128; --size; \
|
++pMem128; --size; \
|
||||||
ringpos = (ringpos+1)&RingBufferMask; \
|
ringpos = (ringpos+1)&RingBufferMask; \
|
||||||
} while(false)
|
} while(false)
|
||||||
|
|
||||||
__forceinline int GIFPath::CopyTag(GIF_PATH pathidx, const u128* pMem128, u32 size)
|
// Parameters:
|
||||||
|
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
|
||||||
|
// path does not terminate (EOP) within the specified size, it is assumed that the path must
|
||||||
|
// loop around to the start of VU memory and continue processing.
|
||||||
|
template< CpuExtType CpuExt, int pathidx >
|
||||||
|
__forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size)
|
||||||
{
|
{
|
||||||
uint& ringpos = GetMTGS().m_packet_ringpos;
|
uint& ringpos = GetMTGS().m_packet_ringpos;
|
||||||
const uint original_ringpos = ringpos;
|
const uint original_ringpos = ringpos;
|
||||||
|
@ -578,12 +590,7 @@ __forceinline int GIFPath::CopyTag(GIF_PATH pathidx, const u128* pMem128, u32 si
|
||||||
while (size > 0) {
|
while (size > 0) {
|
||||||
if (!nloop) {
|
if (!nloop) {
|
||||||
|
|
||||||
// [TODO] Optimization: Use MMX intrinsics for SetTag and CopyTag, which both currently
|
SetTag<CpuExt, (pathidx!=GIF_PATH_2)>((u8*)pMem128);
|
||||||
// produce a series of mov eax,[src]; mov [dest],eax instructions to copy these
|
|
||||||
// individual qwcs. Warning: Path2 transfers are not always QWC-aligned, but they are
|
|
||||||
// always aligned on an 8 byte boundary; so its probably best to use MMX here.
|
|
||||||
|
|
||||||
SetTag((u8*)pMem128);
|
|
||||||
copyTag();
|
copyTag();
|
||||||
|
|
||||||
if(nloop > 0)
|
if(nloop > 0)
|
||||||
|
@ -863,9 +870,30 @@ __forceinline int GIFPath::CopyTag(GIF_PATH pathidx, const u128* pMem128, u32 si
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef int __fastcall FnType_CopyTag(const u128* pMem, u32 size);
|
||||||
|
|
||||||
|
static __aligned16 FnType_CopyTag* tbl_CopyTag[3];
|
||||||
|
|
||||||
|
// Parameters:
|
||||||
|
// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the
|
||||||
|
// path does not terminate (EOP) within the specified size, it is assumed that the path must
|
||||||
|
// loop around to the start of VU memory and continue processing.
|
||||||
|
template< CpuExtType CpuExt, int pathidx >
|
||||||
|
static int __fastcall _CopyTag_tmpl(const u128* pMem, u32 size)
|
||||||
|
{
|
||||||
|
return s_gifPath[pathidx].CopyTag<CpuExt,pathidx>(pMem, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GIFPath_Initialize()
|
||||||
|
{
|
||||||
|
tbl_CopyTag[0] = x86caps.hasStreamingSIMDExtensions ? _CopyTag_tmpl<CpuExt_SSE, 0> : _CopyTag_tmpl<CpuExt_Base, 0>;
|
||||||
|
tbl_CopyTag[1] = x86caps.hasStreamingSIMDExtensions ? _CopyTag_tmpl<CpuExt_SSE, 1> : _CopyTag_tmpl<CpuExt_Base, 1>;
|
||||||
|
tbl_CopyTag[2] = x86caps.hasStreamingSIMDExtensions ? _CopyTag_tmpl<CpuExt_SSE, 2> : _CopyTag_tmpl<CpuExt_Base, 2>;
|
||||||
|
}
|
||||||
|
|
||||||
__forceinline int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size)
|
__forceinline int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size)
|
||||||
{
|
{
|
||||||
return s_gifPath[pathidx].CopyTag(pathidx, pMem, size);
|
return tbl_CopyTag[pathidx](pMem, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Quick version for queueing PATH1 data.
|
// Quick version for queueing PATH1 data.
|
||||||
|
|
Loading…
Reference in New Issue