mirror of https://github.com/PCSX2/pcsx2.git
Merge pull request #251 from xsacha/memzero_ptr
Remove redundant memzero_ptr. All compilers use SSE implementation
This commit is contained in:
commit
fd7b692d00
|
@ -33,12 +33,6 @@ static __fi void memset32( T& obj )
|
||||||
*dest = data;
|
*dest = data;
|
||||||
}
|
}
|
||||||
|
|
||||||
template< uint size >
|
|
||||||
static __fi void memzero_ptr( void* dest )
|
|
||||||
{
|
|
||||||
memset( dest, 0, size );
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename T >
|
template< typename T >
|
||||||
static __fi void memzero( T& obj )
|
static __fi void memzero( T& obj )
|
||||||
{
|
{
|
||||||
|
|
|
@ -58,193 +58,6 @@
|
||||||
|
|
||||||
#define MZFbytes (_bytes)
|
#define MZFbytes (_bytes)
|
||||||
|
|
||||||
// This is an implementation of the memzero_ptr fast memset routine (for zero-clears only).
|
|
||||||
template< size_t _bytes >
|
|
||||||
static __fi void memzero_ptr( void *dest )
|
|
||||||
{
|
|
||||||
if( MZFbytes == 0 ) return;
|
|
||||||
|
|
||||||
// This function only works on 32-bit alignments. For anything else we just fall back
|
|
||||||
// on the compiler-provided implementation of memset...
|
|
||||||
|
|
||||||
if( (MZFbytes & 0x3) != 0 )
|
|
||||||
{
|
|
||||||
memset( dest, 0, MZFbytes );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
// SSE-based memory clear. Currently disabled so to avoid unnecessary dependence on
|
|
||||||
// SSE cpu instruction sets. (memzero typically isn't used in any performance critical
|
|
||||||
// situations anyway)
|
|
||||||
enum
|
|
||||||
{
|
|
||||||
remainder = MZFbytes & 127,
|
|
||||||
bytes128 = MZFbytes / 128
|
|
||||||
};
|
|
||||||
|
|
||||||
// Initial check -- if the length is not a multiple of 16 then fall back on
|
|
||||||
// using rep movsd methods. Handling these unaligned clears in a more efficient
|
|
||||||
// manner isn't necessary in pcsx2 (meaning they aren't used in speed-critical
|
|
||||||
// scenarios).
|
|
||||||
|
|
||||||
if( (MZFbytes & 0xf) == 0 )
|
|
||||||
{
|
|
||||||
if( ((uptr)dest & 0xf) != 0 )
|
|
||||||
{
|
|
||||||
// UNALIGNED COPY MODE.
|
|
||||||
// For unaligned copies we have a threshold of at least 128 vectors. Anything
|
|
||||||
// less and it's probably better off just falling back on the rep movsd.
|
|
||||||
if( bytes128 > 128 )
|
|
||||||
{
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov ecx,dest
|
|
||||||
pxor xmm0,xmm0
|
|
||||||
mov eax,bytes128
|
|
||||||
|
|
||||||
_loop_6:
|
|
||||||
movups [ecx],xmm0
|
|
||||||
movups [ecx+0x10],xmm0
|
|
||||||
movups [ecx+0x20],xmm0
|
|
||||||
movups [ecx+0x30],xmm0
|
|
||||||
movups [ecx+0x40],xmm0
|
|
||||||
movups [ecx+0x50],xmm0
|
|
||||||
movups [ecx+0x60],xmm0
|
|
||||||
movups [ecx+0x70],xmm0
|
|
||||||
sub ecx,-128
|
|
||||||
sub eax,1
|
|
||||||
jnz _loop_6;
|
|
||||||
}
|
|
||||||
if( remainder != 0 )
|
|
||||||
{
|
|
||||||
// Copy the remainder in reverse (using the decrementing eax as our indexer)
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov eax, remainder
|
|
||||||
|
|
||||||
_loop_5:
|
|
||||||
movups [ecx+eax],xmm0;
|
|
||||||
sub eax,16;
|
|
||||||
jnz _loop_5;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if( bytes128 > 48 )
|
|
||||||
{
|
|
||||||
// ALIGNED COPY MODE
|
|
||||||
// Data is aligned and the size of data is large enough to merit a nice
|
|
||||||
// fancy chunk of unrolled goodness:
|
|
||||||
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov ecx,dest
|
|
||||||
pxor xmm0,xmm0
|
|
||||||
mov eax,bytes128
|
|
||||||
|
|
||||||
_loop_8:
|
|
||||||
movaps [ecx],xmm0
|
|
||||||
movaps [ecx+0x10],xmm0
|
|
||||||
movaps [ecx+0x20],xmm0
|
|
||||||
movaps [ecx+0x30],xmm0
|
|
||||||
movaps [ecx+0x40],xmm0
|
|
||||||
movaps [ecx+0x50],xmm0
|
|
||||||
movaps [ecx+0x60],xmm0
|
|
||||||
movaps [ecx+0x70],xmm0
|
|
||||||
sub ecx,-128
|
|
||||||
sub eax,1
|
|
||||||
jnz _loop_8;
|
|
||||||
}
|
|
||||||
if( remainder != 0 )
|
|
||||||
{
|
|
||||||
// Copy the remainder in reverse (using the decrementing eax as our indexer)
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov eax, remainder
|
|
||||||
|
|
||||||
_loop_10:
|
|
||||||
movaps [ecx+eax],xmm0
|
|
||||||
sub eax,16;
|
|
||||||
jnz _loop_10;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// This function only works on 32-bit alignments.
|
|
||||||
pxAssume( (MZFbytes & 0x3) == 0 );
|
|
||||||
pxAssume( ((uptr)dest & 0x3) == 0 );
|
|
||||||
|
|
||||||
enum
|
|
||||||
{
|
|
||||||
remdat = MZFbytes >> 2
|
|
||||||
};
|
|
||||||
|
|
||||||
// This case statement handles 5 special-case sizes (small blocks)
|
|
||||||
// in addition to the generic large block that uses rep stosd.
|
|
||||||
|
|
||||||
switch( remdat )
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
*(u32*)dest = 0;
|
|
||||||
return;
|
|
||||||
|
|
||||||
case 2:
|
|
||||||
*(u64*)dest = 0;
|
|
||||||
return;
|
|
||||||
|
|
||||||
case 3:
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov edi, dest
|
|
||||||
xor eax, eax
|
|
||||||
stosd
|
|
||||||
stosd
|
|
||||||
stosd
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
|
|
||||||
case 4:
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov edi, dest
|
|
||||||
xor eax, eax
|
|
||||||
stosd
|
|
||||||
stosd
|
|
||||||
stosd
|
|
||||||
stosd
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
|
|
||||||
case 5:
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov edi, dest
|
|
||||||
xor eax, eax
|
|
||||||
stosd
|
|
||||||
stosd
|
|
||||||
stosd
|
|
||||||
stosd
|
|
||||||
stosd
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
|
|
||||||
default:
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov ecx, remdat
|
|
||||||
mov edi, dest
|
|
||||||
xor eax, eax
|
|
||||||
rep stosd
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// An optimized memset for 8 bit destination data.
|
// An optimized memset for 8 bit destination data.
|
||||||
template< u8 data, size_t _bytes >
|
template< u8 data, size_t _bytes >
|
||||||
static __fi void memset_8( void *dest )
|
static __fi void memset_8( void *dest )
|
||||||
|
@ -549,7 +362,7 @@ static __fi void memset_32( void *dest )
|
||||||
template< typename T >
|
template< typename T >
|
||||||
static __fi void memzero( T& object )
|
static __fi void memzero( T& object )
|
||||||
{
|
{
|
||||||
memzero_ptr<sizeof(T)>( &object );
|
memset(&object, 0, sizeof(T));
|
||||||
}
|
}
|
||||||
|
|
||||||
// This method clears an object with the given 8 bit value.
|
// This method clears an object with the given 8 bit value.
|
||||||
|
|
|
@ -261,7 +261,7 @@ s32 cdvdReadConfig(u8* config)
|
||||||
((cdvd.COffset == 2) && (cdvd.CBlockIndex >= 7))
|
((cdvd.COffset == 2) && (cdvd.CBlockIndex >= 7))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
memzero_ptr<16>(config);
|
memset(config, 0, 16);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -430,7 +430,7 @@ void cdvdReadKey(u8, u16, u32 arg2, u8* key)
|
||||||
cdvdReloadElfInfo();
|
cdvdReloadElfInfo();
|
||||||
|
|
||||||
// clear key values
|
// clear key values
|
||||||
memzero_ptr<16>(key);
|
memset(key, 0, 16);
|
||||||
|
|
||||||
if (!DiscSerial.IsEmpty())
|
if (!DiscSerial.IsEmpty())
|
||||||
{
|
{
|
||||||
|
|
|
@ -27,7 +27,7 @@ void psxHwReset() {
|
||||||
/* if (Config.Sio) psxHu32(0x1070) |= 0x80;
|
/* if (Config.Sio) psxHu32(0x1070) |= 0x80;
|
||||||
if (Config.SpuIrq) psxHu32(0x1070) |= 0x200;*/
|
if (Config.SpuIrq) psxHu32(0x1070) |= 0x200;*/
|
||||||
|
|
||||||
memzero_ptr<0x10000>(iopHw);
|
memset(iopHw, 0, 0x10000);
|
||||||
|
|
||||||
// mdecInit(); //initialize mdec decoder
|
// mdecInit(); //initialize mdec decoder
|
||||||
cdrReset();
|
cdrReset();
|
||||||
|
|
|
@ -60,7 +60,7 @@ void iopMemoryReserve::Reset()
|
||||||
|
|
||||||
DbgCon.WriteLn("IOP resetting main memory...");
|
DbgCon.WriteLn("IOP resetting main memory...");
|
||||||
|
|
||||||
memzero_ptr<0x2000 * sizeof(uptr) * 2>( psxMemWLUT ); // clears both allocations, RLUT and WLUT
|
memset(psxMemWLUT, 0, 0x2000 * sizeof(uptr) * 2); // clears both allocations, RLUT and WLUT
|
||||||
|
|
||||||
// Trick! We're accessing RLUT here through WLUT, since it's the non-const pointer.
|
// Trick! We're accessing RLUT here through WLUT, since it's the non-const pointer.
|
||||||
// So the ones with a 0x2000 prefixed are RLUT tables.
|
// So the ones with a 0x2000 prefixed are RLUT tables.
|
||||||
|
|
|
@ -710,7 +710,7 @@ static void recResetRaw()
|
||||||
|
|
||||||
maxrecmem = 0;
|
maxrecmem = 0;
|
||||||
|
|
||||||
memzero_ptr<RECCONSTBUF_SIZE * sizeof(recConstBuf)>(recConstBuf);
|
memset(recConstBuf, 0, RECCONSTBUF_SIZE * sizeof(recConstBuf));
|
||||||
|
|
||||||
if( s_pInstCache )
|
if( s_pInstCache )
|
||||||
memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize );
|
memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize );
|
||||||
|
|
|
@ -165,7 +165,7 @@ __ri void mVUdeleteProg(microVU& mVU, microProgram*& prog) {
|
||||||
// Creates a new Micro Program
|
// Creates a new Micro Program
|
||||||
__ri microProgram* mVUcreateProg(microVU& mVU, int startPC) {
|
__ri microProgram* mVUcreateProg(microVU& mVU, int startPC) {
|
||||||
microProgram* prog = (microProgram*)_aligned_malloc(sizeof(microProgram), 64);
|
microProgram* prog = (microProgram*)_aligned_malloc(sizeof(microProgram), 64);
|
||||||
memzero_ptr<sizeof(microProgram)>(prog);
|
memset(prog, 0, sizeof(microProgram));
|
||||||
prog->idx = mVU.prog.total++;
|
prog->idx = mVU.prog.total++;
|
||||||
prog->ranges = new std::deque<microRange>();
|
prog->ranges = new std::deque<microRange>();
|
||||||
prog->startPC = startPC;
|
prog->startPC = startPC;
|
||||||
|
|
|
@ -448,7 +448,7 @@ void SuperVUReset(int vuindex)
|
||||||
DevCon.WriteLn("SuperVU%u: Resetting recompiler cache.", vuindex);
|
DevCon.WriteLn("SuperVU%u: Resetting recompiler cache.", vuindex);
|
||||||
|
|
||||||
if (!recVUStack[vuindex]) recVUStack[vuindex] = new u8[SUPERVU_STACKSIZE * 4];
|
if (!recVUStack[vuindex]) recVUStack[vuindex] = new u8[SUPERVU_STACKSIZE * 4];
|
||||||
memzero_ptr<SUPERVU_STACKSIZE>(recVUStack[vuindex]);
|
memset(recVUStack[vuindex], 0, SUPERVU_STACKSIZE);
|
||||||
|
|
||||||
s_recVUMem[vuindex]->Reset();
|
s_recVUMem[vuindex]->Reset();
|
||||||
s_recVUPtr[vuindex] = *s_recVUMem[vuindex];
|
s_recVUPtr[vuindex] = *s_recVUMem[vuindex];
|
||||||
|
|
Loading…
Reference in New Issue