Merge pull request #251 from xsacha/memzero_ptr

Remove redundant memzero_ptr. All compilers use SSE implementation
This commit is contained in:
Gregory Hainaut 2014-09-21 17:15:54 +02:00
commit fd7b692d00
8 changed files with 8 additions and 201 deletions

View File

@ -33,12 +33,6 @@ static __fi void memset32( T& obj )
*dest = data;
}
template< uint size >
static __fi void memzero_ptr( void* dest )
{
memset( dest, 0, size );
}
template< typename T >
static __fi void memzero( T& obj )
{

View File

@ -58,193 +58,6 @@
#define MZFbytes (_bytes)
// This is an implementation of the memzero_ptr fast memset routine (for zero-clears only).
template< size_t _bytes >
static __fi void memzero_ptr( void *dest )
{
if( MZFbytes == 0 ) return;
// This function only works on 32-bit alignments. For anything else we just fall back
// on the compiler-provided implementation of memset...
if( (MZFbytes & 0x3) != 0 )
{
memset( dest, 0, MZFbytes );
return;
}
#if 0
// SSE-based memory clear. Currently disabled so to avoid unnecessary dependence on
// SSE cpu instruction sets. (memzero typically isn't used in any performance critical
// situations anyway)
enum
{
remainder = MZFbytes & 127,
bytes128 = MZFbytes / 128
};
// Initial check -- if the length is not a multiple of 16 then fall back on
// using rep movsd methods. Handling these unaligned clears in a more efficient
// manner isn't necessary in pcsx2 (meaning they aren't used in speed-critical
// scenarios).
if( (MZFbytes & 0xf) == 0 )
{
if( ((uptr)dest & 0xf) != 0 )
{
// UNALIGNED COPY MODE.
// For unaligned copies we have a threshold of at least 128 vectors. Anything
// less and it's probably better off just falling back on the rep movsd.
if( bytes128 > 128 )
{
__asm
{
mov ecx,dest
pxor xmm0,xmm0
mov eax,bytes128
_loop_6:
movups [ecx],xmm0
movups [ecx+0x10],xmm0
movups [ecx+0x20],xmm0
movups [ecx+0x30],xmm0
movups [ecx+0x40],xmm0
movups [ecx+0x50],xmm0
movups [ecx+0x60],xmm0
movups [ecx+0x70],xmm0
sub ecx,-128
sub eax,1
jnz _loop_6;
}
if( remainder != 0 )
{
// Copy the remainder in reverse (using the decrementing eax as our indexer)
__asm
{
mov eax, remainder
_loop_5:
movups [ecx+eax],xmm0;
sub eax,16;
jnz _loop_5;
}
}
return;
}
}
else if( bytes128 > 48 )
{
// ALIGNED COPY MODE
// Data is aligned and the size of data is large enough to merit a nice
// fancy chunk of unrolled goodness:
__asm
{
mov ecx,dest
pxor xmm0,xmm0
mov eax,bytes128
_loop_8:
movaps [ecx],xmm0
movaps [ecx+0x10],xmm0
movaps [ecx+0x20],xmm0
movaps [ecx+0x30],xmm0
movaps [ecx+0x40],xmm0
movaps [ecx+0x50],xmm0
movaps [ecx+0x60],xmm0
movaps [ecx+0x70],xmm0
sub ecx,-128
sub eax,1
jnz _loop_8;
}
if( remainder != 0 )
{
// Copy the remainder in reverse (using the decrementing eax as our indexer)
__asm
{
mov eax, remainder
_loop_10:
movaps [ecx+eax],xmm0
sub eax,16;
jnz _loop_10;
}
}
return;
}
}
#endif
// This function only works on 32-bit alignments.
pxAssume( (MZFbytes & 0x3) == 0 );
pxAssume( ((uptr)dest & 0x3) == 0 );
enum
{
remdat = MZFbytes >> 2
};
// This case statement handles 5 special-case sizes (small blocks)
// in addition to the generic large block that uses rep stosd.
switch( remdat )
{
case 1:
*(u32*)dest = 0;
return;
case 2:
*(u64*)dest = 0;
return;
case 3:
__asm
{
mov edi, dest
xor eax, eax
stosd
stosd
stosd
}
return;
case 4:
__asm
{
mov edi, dest
xor eax, eax
stosd
stosd
stosd
stosd
}
return;
case 5:
__asm
{
mov edi, dest
xor eax, eax
stosd
stosd
stosd
stosd
stosd
}
return;
default:
__asm
{
mov ecx, remdat
mov edi, dest
xor eax, eax
rep stosd
}
return;
}
}
// An optimized memset for 8 bit destination data.
template< u8 data, size_t _bytes >
static __fi void memset_8( void *dest )
@ -549,7 +362,7 @@ static __fi void memset_32( void *dest )
template< typename T >
static __fi void memzero( T& object )
{
memzero_ptr<sizeof(T)>( &object );
memset(&object, 0, sizeof(T));
}
// This method clears an object with the given 8 bit value.

View File

@ -261,7 +261,7 @@ s32 cdvdReadConfig(u8* config)
((cdvd.COffset == 2) && (cdvd.CBlockIndex >= 7))
)
{
memzero_ptr<16>(config);
memset(config, 0, 16);
return 0;
}
@ -430,7 +430,7 @@ void cdvdReadKey(u8, u16, u32 arg2, u8* key)
cdvdReloadElfInfo();
// clear key values
memzero_ptr<16>(key);
memset(key, 0, 16);
if (!DiscSerial.IsEmpty())
{

View File

@ -27,7 +27,7 @@ void psxHwReset() {
/* if (Config.Sio) psxHu32(0x1070) |= 0x80;
if (Config.SpuIrq) psxHu32(0x1070) |= 0x200;*/
memzero_ptr<0x10000>(iopHw);
memset(iopHw, 0, 0x10000);
// mdecInit(); //initialize mdec decoder
cdrReset();

View File

@ -60,7 +60,7 @@ void iopMemoryReserve::Reset()
DbgCon.WriteLn("IOP resetting main memory...");
memzero_ptr<0x2000 * sizeof(uptr) * 2>( psxMemWLUT ); // clears both allocations, RLUT and WLUT
memset(psxMemWLUT, 0, 0x2000 * sizeof(uptr) * 2); // clears both allocations, RLUT and WLUT
// Trick! We're accessing RLUT here through WLUT, since it's the non-const pointer.
// So the ones with a 0x2000 prefixed are RLUT tables.

View File

@ -710,7 +710,7 @@ static void recResetRaw()
maxrecmem = 0;
memzero_ptr<RECCONSTBUF_SIZE * sizeof(recConstBuf)>(recConstBuf);
memset(recConstBuf, 0, RECCONSTBUF_SIZE * sizeof(recConstBuf));
if( s_pInstCache )
memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize );

View File

@ -165,7 +165,7 @@ __ri void mVUdeleteProg(microVU& mVU, microProgram*& prog) {
// Creates a new Micro Program
__ri microProgram* mVUcreateProg(microVU& mVU, int startPC) {
microProgram* prog = (microProgram*)_aligned_malloc(sizeof(microProgram), 64);
memzero_ptr<sizeof(microProgram)>(prog);
memset(prog, 0, sizeof(microProgram));
prog->idx = mVU.prog.total++;
prog->ranges = new std::deque<microRange>();
prog->startPC = startPC;

View File

@ -448,7 +448,7 @@ void SuperVUReset(int vuindex)
DevCon.WriteLn("SuperVU%u: Resetting recompiler cache.", vuindex);
if (!recVUStack[vuindex]) recVUStack[vuindex] = new u8[SUPERVU_STACKSIZE * 4];
memzero_ptr<SUPERVU_STACKSIZE>(recVUStack[vuindex]);
memset(recVUStack[vuindex], 0, SUPERVU_STACKSIZE);
s_recVUMem[vuindex]->Reset();
s_recVUPtr[vuindex] = *s_recVUMem[vuindex];