* Minor optimization to GIFpath, by utilizing the precached value for numregs in place of a convoluted test against NREG.

* Disabled the SSE store version of memzero (no performance benefits and it was messy anyway)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3473 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-07-13 04:36:39 +00:00
parent 8c6a66d955
commit 56d3982dc5
2 changed files with 25 additions and 43 deletions

View File

@ -73,6 +73,10 @@ static __forceinline void memzero_ptr( void *dest )
return;
}
#if 0
// SSE-based memory clear. Currently disabled so to avoid unnecessary dependence on
// SSE cpu instruction sets. (memzero typically isn't used in any performance critical
// situations anyway)
enum
{
remainder = MZFbytes & 127,
@ -86,8 +90,6 @@ static __forceinline void memzero_ptr( void *dest )
if( (MZFbytes & 0xf) == 0 )
{
u64 _xmm_backup[2];
if( ((uptr)dest & 0xf) != 0 )
{
// UNALIGNED COPY MODE.
@ -97,24 +99,21 @@ static __forceinline void memzero_ptr( void *dest )
{
__asm
{
movups _xmm_backup,xmm0;
mov ecx,dest
pxor xmm0,xmm0
mov eax,bytes128
align 16
_loop_6:
movups [ecx],xmm0;
movups [ecx+0x10],xmm0;
movups [ecx+0x20],xmm0;
movups [ecx+0x30],xmm0;
movups [ecx+0x40],xmm0;
movups [ecx+0x50],xmm0;
movups [ecx+0x60],xmm0;
movups [ecx+0x70],xmm0;
movups [ecx],xmm0
movups [ecx+0x10],xmm0
movups [ecx+0x20],xmm0
movups [ecx+0x30],xmm0
movups [ecx+0x40],xmm0
movups [ecx+0x50],xmm0
movups [ecx+0x60],xmm0
movups [ecx+0x70],xmm0
sub ecx,-128
dec eax;
sub eax,1
jnz _loop_6;
}
if( remainder != 0 )
@ -130,10 +129,6 @@ static __forceinline void memzero_ptr( void *dest )
jnz _loop_5;
}
}
__asm
{
movups xmm0,[_xmm_backup];
}
return;
}
}
@ -145,24 +140,21 @@ static __forceinline void memzero_ptr( void *dest )
__asm
{
movups _xmm_backup,xmm0;
mov ecx,dest
pxor xmm0,xmm0
mov eax,bytes128
align 16
_loop_8:
movaps [ecx],xmm0;
movaps [ecx+0x10],xmm0;
movaps [ecx+0x20],xmm0;
movaps [ecx+0x30],xmm0;
movaps [ecx+0x40],xmm0;
movaps [ecx+0x50],xmm0;
movaps [ecx+0x60],xmm0;
movaps [ecx+0x70],xmm0;
movaps [ecx],xmm0
movaps [ecx+0x10],xmm0
movaps [ecx+0x20],xmm0
movaps [ecx+0x30],xmm0
movaps [ecx+0x40],xmm0
movaps [ecx+0x50],xmm0
movaps [ecx+0x60],xmm0
movaps [ecx+0x70],xmm0
sub ecx,-128
dec eax;
sub eax,1
jnz _loop_8;
}
if( remainder != 0 )
@ -173,18 +165,15 @@ static __forceinline void memzero_ptr( void *dest )
mov eax, remainder
_loop_10:
movaps [ecx+eax],xmm0;
movaps [ecx+eax],xmm0
sub eax,16;
jnz _loop_10;
}
}
__asm
{
movups xmm0,[_xmm_backup];
}
return;
}
}
#endif
// This function only works on 32-bit alignments.
pxAssume( (MZFbytes & 0x3) == 0 );
@ -271,8 +260,6 @@ static __forceinline void memset_8( void *dest )
return;
}
//u64 _xmm_backup[2];
/*static const size_t remainder = MZFbytes & 127;
static const size_t bytes128 = MZFbytes / 128;
if( bytes128 > 32 )
@ -283,7 +270,6 @@ static __forceinline void memset_8( void *dest )
__asm
{
movups _xmm_backup,xmm0;
mov eax,bytes128
mov ecx,dest
movss xmm0,data
@ -316,10 +302,6 @@ static __forceinline void memset_8( void *dest )
jnz _loop_10;
}
}
__asm
{
movups xmm0,[_xmm_backup];
}
}*/
// This function only works on 32-bit alignments of data copied.

View File

@ -252,7 +252,7 @@ __forceinline void GIFPath::Reset()
__forceinline bool GIFPath::StepReg()
{
if ((++curreg & 0xf) == tag.NREG) {
if (++curreg >= numregs) {
curreg = 0;
if (--nloop == 0) {
return false;