mirror of https://github.com/PCSX2/pcsx2.git
Made several improvements and cleanups to the memzero API - including better linux version of the header. Also changed memcpy_amd_ / memcpy_fast to use __fastcall convention.
git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@639 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
c63a7dc032
commit
b2eb1cad4f
|
@ -419,7 +419,7 @@ s32 cdvdReadConfig(u8* config)
|
|||
((cdvd.COffset == 2) && (cdvd.CBlockIndex >= 7))
|
||||
)
|
||||
{
|
||||
memzero_air<16>(config);
|
||||
memzero_ptr<16>(config);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -490,7 +490,7 @@ void cdvdReadKey(u8 arg0, u16 arg1, u32 arg2, u8* key) {
|
|||
key_14 = ((numbers & 0x003E0) >> 2) | 0x04; // numbers = F8 extra = 04 unused = 03
|
||||
|
||||
// clear key values
|
||||
memzero_air<16>(key);
|
||||
memzero_ptr<16>(key);
|
||||
|
||||
// store key values
|
||||
key[ 0] = (key_0_3&0x000000FF)>> 0;
|
||||
|
|
|
@ -63,8 +63,8 @@ void CDVDFS_init(){
|
|||
cdReadMode.datapattern = CdSecS2048; //isofs driver only needs
|
||||
//2KB sectors
|
||||
|
||||
memset(fd_table, 0, sizeof(fd_table));
|
||||
memset(fd_used, 0, 16*sizeof(int));
|
||||
memzero_obj( fd_table );
|
||||
memzero_obj( fd_used );
|
||||
|
||||
inited = TRUE;
|
||||
|
||||
|
|
|
@ -520,7 +520,7 @@ void cdrReadInterrupt() {
|
|||
|
||||
if (cdr.RErr == -1) {
|
||||
CDR_LOG(" err\n");
|
||||
memzero_air<2340>(cdr.Transfer);
|
||||
memzero_ptr<2340>(cdr.Transfer);
|
||||
cdr.Stat = DiskError;
|
||||
cdr.Result[0]|= 0x01;
|
||||
ReadTrack();
|
||||
|
|
|
@ -113,7 +113,7 @@ static __forceinline void cpuRcntSet()
|
|||
void rcntInit() {
|
||||
int i;
|
||||
|
||||
memset(counters, 0, sizeof(counters));
|
||||
memzero_obj(counters);
|
||||
|
||||
for (i=0; i<4; i++) {
|
||||
counters[i].rate = 2;
|
||||
|
|
|
@ -136,7 +136,7 @@ void IPUProcessInterrupt()
|
|||
// Register accesses (run on EE thread)
|
||||
int ipuInit()
|
||||
{
|
||||
memzero_air<sizeof(IPUregisters)>(ipuRegs);
|
||||
memzero_ptr<sizeof(IPUregisters)>(ipuRegs);
|
||||
memzero_obj(g_BP);
|
||||
|
||||
//other stuff
|
||||
|
@ -154,7 +154,7 @@ int ipuInit()
|
|||
|
||||
void ipuReset()
|
||||
{
|
||||
memzero_air<sizeof(IPUregisters)>(ipuRegs);
|
||||
memzero_ptr<sizeof(IPUregisters)>(ipuRegs);
|
||||
g_nDMATransfer = 0;
|
||||
}
|
||||
|
||||
|
@ -384,7 +384,7 @@ static void ipuBCLR(u32 val) {
|
|||
g_BP.IFC = 0;
|
||||
ipuRegs->ctrl.BUSY = 0;
|
||||
ipuRegs->cmd.BUSY = 0;
|
||||
memzero_air<80>(readbits);
|
||||
memzero_ptr<80>(readbits);
|
||||
IPU_LOG("Clear IPU input FIFO. Set Bit offset=0x%X\n", g_BP.BP);
|
||||
}
|
||||
|
||||
|
|
|
@ -1054,8 +1054,8 @@ void mpeg2sliceIDEC(void* pdone)
|
|||
decoder->coded_block_pattern = 0x3F;//all 6 blocks
|
||||
//ipuRegs->ctrl.CBP = 0x3f;
|
||||
|
||||
memzero_air<sizeof(macroblock_8)>(decoder->mb8);
|
||||
memzero_air<sizeof(rgb32)>(decoder->rgb32);
|
||||
memzero_ptr<sizeof(macroblock_8)>(decoder->mb8);
|
||||
memzero_ptr<sizeof(rgb32)>(decoder->rgb32);
|
||||
|
||||
slice_intra_DCT (decoder, 0, (u8*)decoder->mb8->Y, DCT_stride);
|
||||
slice_intra_DCT (decoder, 0, (u8*)decoder->mb8->Y + 8, DCT_stride);
|
||||
|
@ -1194,8 +1194,8 @@ void mpeg2_slice(void* pdone)
|
|||
*(int*)pdone = 0;
|
||||
ipuRegs->ctrl.ECD = 0;
|
||||
|
||||
memzero_air<sizeof(macroblock_8)>(decoder->mb8);
|
||||
memzero_air<sizeof(macroblock_16)>(decoder->mb16);
|
||||
memzero_ptr<sizeof(macroblock_8)>(decoder->mb8);
|
||||
memzero_ptr<sizeof(macroblock_16)>(decoder->mb16);
|
||||
|
||||
bitstream_init (decoder);
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ void psxMemAlloc()
|
|||
|
||||
void psxMemReset()
|
||||
{
|
||||
memzero_air<Ps2MemSize::IopRam>(psxM);
|
||||
memzero_ptr<Ps2MemSize::IopRam>(psxM);
|
||||
}
|
||||
|
||||
void psxMemShutdown()
|
||||
|
@ -404,8 +404,8 @@ void psxMemReset()
|
|||
|
||||
DbgCon::Status( "psxMemReset > Resetting core memory!" );
|
||||
|
||||
memzero_air<0x10000 * sizeof(uptr) * 2>( psxMemWLUT ); // clears both allocations, RLUT and WLUT
|
||||
memzero_air<m_psxMemSize>( m_psxAllMem );
|
||||
memzero_ptr<0x10000 * sizeof(uptr) * 2>( psxMemWLUT ); // clears both allocations, RLUT and WLUT
|
||||
memzero_ptr<m_psxMemSize>( m_psxAllMem );
|
||||
|
||||
// Trick! We're accessing RLUT here through WLUT, since it's the non-const pointer.
|
||||
// So the ones with a 1 prefixed (ala 0x18000, etc) are RLUT tables.
|
||||
|
|
|
@ -16,55 +16,55 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#ifndef _PCSX2_MEMZERO_H_
|
||||
#define _PCSX2_MEMZERO_H_
|
||||
#ifndef _LNX_MEMZERO_H_
|
||||
#define _LNX_MEMZERO_H_
|
||||
|
||||
// This stubs out that memzero Windows specific stuff Air seems to have added
|
||||
// all over, to allow Linux to compile. I may actually try to translate the file at
|
||||
// some point, but for now, lets just use memset.
|
||||
// This header contains non-optimized implementation of memzero_ptr and memset8_obj,
|
||||
// memset16_obj, etc.
|
||||
|
||||
template< size_t bytes >
|
||||
static __forceinline void memzero_air( void *dest )
|
||||
template< u32 data, typename T >
|
||||
static __forceinline void memset32_obj( T& obj )
|
||||
{
|
||||
memset(dest, 0, bytes);
|
||||
// this function works on 32-bit aligned lengths of data only.
|
||||
// If the data length is not a factor of 32 bits, the C++ optimizing compiler will
|
||||
// probably just generate mysteriously broken code in Release builds. ;)
|
||||
|
||||
jASSUME( (sizeof(T) & 0x3) == 0 );
|
||||
|
||||
u32* dest = (u32*)&obj;
|
||||
for( int i=sizeof(T)>>2; i; --i, ++dest )
|
||||
*dest = data;
|
||||
}
|
||||
|
||||
template< u8 data, size_t bytes >
|
||||
static __forceinline void memset_8( void *dest )
|
||||
template< uint size >
|
||||
static __forceinline void memzero_ptr( void* dest )
|
||||
{
|
||||
memset(dest, data, bytes);
|
||||
memset( dest, 0, size );
|
||||
}
|
||||
|
||||
template< u16 data, size_t bytes >
|
||||
static __forceinline void memset_16( void *dest )
|
||||
{
|
||||
memset(dest, data, bytes);
|
||||
}
|
||||
|
||||
template< u32 data, size_t bytes >
|
||||
static __forceinline void memset_32( void *dest )
|
||||
{
|
||||
memset(dest, data, bytes);
|
||||
}
|
||||
|
||||
// This method can clear any object-like entity -- which is anything that is not a pointer.
|
||||
// Structures, static arrays, etc. No need to include sizeof() crap, this does it automatically
|
||||
// for you!
|
||||
template< typename T >
|
||||
static __forceinline void memzero_obj( T& object )
|
||||
static __forceinline void memzero_obj( T& obj )
|
||||
{
|
||||
memzero_air<sizeof(T)>( &object );
|
||||
memset( &obj, 0, sizeof( T ) );
|
||||
}
|
||||
|
||||
template< uint data, typename T >
|
||||
static __forceinline void memset_obj( T& object )
|
||||
template< u8 data, typename T >
|
||||
static __forceinline void memset8_obj( T& obj )
|
||||
{
|
||||
if( data <= 0xff )
|
||||
memset_8<(u8)data, sizeof(T)>( &object );
|
||||
else if( data <= 0xffff )
|
||||
memset_16<(u16)data, sizeof(T)>( &object );
|
||||
// Aligned sizes use the optimized 32 bit inline memset. Unaligned sizes use memset.
|
||||
if( (sizeof(T) & 0x3) != 0 )
|
||||
memset( &obj, data, sizeof( T ) );
|
||||
else
|
||||
memset_32<(u32)data, sizeof(T)>( &object );
|
||||
memset32_obj<data + (data<<8) + (data<<16) + (data<<24)>( obj );
|
||||
}
|
||||
|
||||
#endif
|
||||
template< u16 data, typename T >
|
||||
static __forceinline void memset16_obj( T& obj )
|
||||
{
|
||||
if( (sizeof(T) & 0x3) != 0 )
|
||||
_memset_16_unaligned( &obj, data, sizeof( T ) )
|
||||
else
|
||||
memset32_obj<data + (data<<16)>( obj );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -68,8 +68,6 @@ BIOS
|
|||
extern u32 maxrecmem;
|
||||
extern int rdram_devices, rdram_sdevid;
|
||||
|
||||
extern void * memcpy_fast(void *dest, const void *src, size_t n);
|
||||
|
||||
//#define FULLTLB
|
||||
int MemMode = 0; // 0 is Kernel Mode, 1 is Supervisor Mode, 2 is User Mode
|
||||
|
||||
|
@ -518,7 +516,7 @@ void vm_Reset()
|
|||
{
|
||||
jASSUME( memLUT != NULL );
|
||||
|
||||
memzero_air<sizeof(PSMEMORYMAP)*0x100000>(memLUT);
|
||||
memzero_ptr<sizeof(PSMEMORYMAP)*0x100000>(memLUT);
|
||||
for (int i=0; i<0x02000; i++) memLUT[i + 0x00000] = initMemoryMap(&s_psM.aPFNs[i], &s_psM.aVFNs[i]);
|
||||
for (int i=2; i<0x00010; i++) memLUT[i + 0x10000] = initMemoryMap(&s_psHw.aPFNs[i], &s_psHw.aVFNs[i]);
|
||||
for (int i=0; i<0x00800; i++) memLUT[i + 0x1c000] = initMemoryMap(&s_psxM.aPFNs[(i & 0x1ff)], &s_psxM.aVFNs[(i & 0x1ff)]);
|
||||
|
@ -2707,8 +2705,8 @@ void memReset()
|
|||
mprotect(PS2EMEM_EROM, Ps2MemSize::ERom, PROT_READ|PROT_WRITE);
|
||||
# endif
|
||||
|
||||
memzero_air<Ps2MemSize::Base>(PS2MEM_BASE);
|
||||
memzero_air<Ps2MemSize::Scratch>(PS2MEM_SCRATCH);
|
||||
memzero_ptr<Ps2MemSize::Base>(PS2MEM_BASE);
|
||||
memzero_ptr<Ps2MemSize::Scratch>(PS2MEM_SCRATCH);
|
||||
vm_Reset();
|
||||
|
||||
#else
|
||||
|
@ -2728,7 +2726,7 @@ void memReset()
|
|||
// rest of the emu is not really set up to support a "soft" reset of that sort
|
||||
// we opt for the hard/safe version.
|
||||
|
||||
memzero_air<m_allMemSize>( m_psAllMem );
|
||||
memzero_ptr<m_allMemSize>( m_psAllMem );
|
||||
#ifdef ENABLECACHE
|
||||
memset(pCache,0,sizeof(_cacheS)*64);
|
||||
#endif
|
||||
|
|
|
@ -808,3 +808,12 @@ u64 GetCPUTicks()
|
|||
return ((u64)t.tv_sec*GetTickFrequency())+t.tv_usec;
|
||||
#endif
|
||||
}
|
||||
|
||||
void _memset16_unaligned( void* dest, u16 data, size_t size )
|
||||
{
|
||||
jASSUME( (size & 0x1) == 0 );
|
||||
|
||||
u16* dst = (u16*)dest;
|
||||
for(int i=size; i; --i, ++dst )
|
||||
*dst = data;
|
||||
}
|
||||
|
|
12
pcsx2/Misc.h
12
pcsx2/Misc.h
|
@ -227,22 +227,24 @@ extern u8 g_globalXMMSaved;
|
|||
#define FreezeXMMRegs(save) if( g_EEFreezeRegs ) { FreezeXMMRegs_(save); }
|
||||
#define FreezeMMXRegs(save) if( g_EEFreezeRegs ) { FreezeMMXRegs_(save); }
|
||||
|
||||
void _memset16_unaligned( void* dest, u16 data, size_t size );
|
||||
|
||||
#if defined(_WIN32) && !defined(__x86_64__)
|
||||
// faster memcpy
|
||||
extern void __fastcall memcpy_raz_u(void *dest, const void *src, size_t bytes);
|
||||
extern void __fastcall memcpy_raz_(void *dest, const void *src, size_t qwc);
|
||||
extern void * memcpy_amd_(void *dest, const void *src, size_t n);
|
||||
#include "windows/memzero.h"
|
||||
extern void __fastcall memcpy_amd_(void *dest, const void *src, size_t n);
|
||||
# include "windows/memzero.h"
|
||||
# define memcpy_fast memcpy_amd_
|
||||
|
||||
#define memcpy_fast memcpy_amd_
|
||||
//#define memcpy_fast memcpy //Dont use normal memcpy, it has sse in 2k5!
|
||||
#else
|
||||
// for now disable linux fast memcpy
|
||||
|
||||
// for now linux uses the GCC memcpy/memset implementations.
|
||||
#define memcpy_fast memcpy
|
||||
#define memcpy_raz_ memcpy
|
||||
#define memcpy_raz_u memcpy
|
||||
#include "Linux/memzero.h"
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
@ -86,9 +86,9 @@ void cpuReset()
|
|||
psxMemReset();
|
||||
vuMicroMemReset();
|
||||
|
||||
memset(&cpuRegs, 0, sizeof(cpuRegs));
|
||||
memset(&fpuRegs, 0, sizeof(fpuRegs));
|
||||
memset(&tlb, 0, sizeof(tlb));
|
||||
memzero_obj(cpuRegs);
|
||||
memzero_obj(fpuRegs);
|
||||
memzero_obj(tlb);
|
||||
|
||||
cpuRegs.pc = 0xbfc00000; ///set pc reg to stack
|
||||
cpuRegs.CP0.n.Config = 0x440;
|
||||
|
|
|
@ -153,7 +153,7 @@ void SIO_CommandWrite(u8 value,int way) {
|
|||
PAD_LOG("RESET MEMORY CARD\n");
|
||||
|
||||
sio.bufcount = 8;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[3] = sio.terminator;
|
||||
sio.buf[2] = '+';
|
||||
sio.mcdst = 99;
|
||||
|
@ -161,7 +161,7 @@ void SIO_CommandWrite(u8 value,int way) {
|
|||
break;
|
||||
case 0x12: // RESET
|
||||
sio.bufcount = 8;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[3] = sio.terminator;
|
||||
sio.buf[2] = '+';
|
||||
sio.mcdst = 99;
|
||||
|
@ -171,7 +171,7 @@ void SIO_CommandWrite(u8 value,int way) {
|
|||
break;
|
||||
case 0x81: // COMMIT
|
||||
sio.bufcount = 8;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio.mcdst = 99;
|
||||
sio.buf[3] = sio.terminator;
|
||||
sio.buf[2] = '+';
|
||||
|
@ -187,7 +187,7 @@ void SIO_CommandWrite(u8 value,int way) {
|
|||
case 0x22:
|
||||
case 0x23: // SECTOR SET
|
||||
sio.bufcount = 8; sio.mcdst = 99; sio.sector=0; sio.k=0;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio2.packet.recvVal3 = 0x8c;
|
||||
sio.buf[8]=sio.terminator;
|
||||
sio.buf[7]='+';
|
||||
|
@ -201,7 +201,7 @@ void SIO_CommandWrite(u8 value,int way) {
|
|||
break;
|
||||
case 0x26:
|
||||
sio.bufcount = 12; sio.mcdst = 99; sio2.packet.recvVal3 = 0x83;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
memcpy(&sio.buf[2], &mc_command_0x26, sizeof(mc_command_0x26));
|
||||
sio.buf[12]=sio.terminator;
|
||||
MEMCARDS_LOG("MC(%d) command 0x%02X\n", ((sio.CtrlReg&0x2000)>>13)+1, value);
|
||||
|
@ -210,7 +210,7 @@ void SIO_CommandWrite(u8 value,int way) {
|
|||
case 0x28:
|
||||
case 0xBF:
|
||||
sio.bufcount = 4; sio.mcdst = 99; sio2.packet.recvVal3 = 0x8b;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[4]=sio.terminator;
|
||||
sio.buf[3]='+';
|
||||
MEMCARDS_LOG("MC(%d) command 0x%02X\n", ((sio.CtrlReg&0x2000)>>13)+1, value);
|
||||
|
@ -223,7 +223,7 @@ void SIO_CommandWrite(u8 value,int way) {
|
|||
if(value==0x43) sio.lastsector = sio.sector; // Reading
|
||||
|
||||
sio.bufcount =133; sio.mcdst = 99;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[133]=sio.terminator;
|
||||
sio.buf[132]='+';
|
||||
MEMCARDS_LOG("MC(%d) command 0x%02X\n", ((sio.CtrlReg&0x2000)>>13)+1, value);
|
||||
|
@ -237,24 +237,24 @@ void SIO_CommandWrite(u8 value,int way) {
|
|||
case 0xf3:
|
||||
case 0xf7:
|
||||
sio.bufcount = 4; sio.mcdst = 99;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[4]=sio.terminator;
|
||||
sio.buf[3]='+';
|
||||
MEMCARDS_LOG("MC(%d) command 0x%02X\n", ((sio.CtrlReg&0x2000)>>13)+1, value);
|
||||
break;
|
||||
case 0x52:
|
||||
sio.rdwr = 1; memset_obj<0xff>(sio.buf);
|
||||
sio.rdwr = 1; memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[sio.bufcount]=sio.terminator; sio.buf[sio.bufcount-1]='+';
|
||||
MEMCARDS_LOG("MC(%d) command 0x%02X\n", ((sio.CtrlReg&0x2000)>>13)+1, value);
|
||||
break;
|
||||
case 0x57:
|
||||
sio.rdwr = 2; memset_obj<0xff>(sio.buf);
|
||||
sio.rdwr = 2; memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[sio.bufcount]=sio.terminator; sio.buf[sio.bufcount-1]='+';
|
||||
MEMCARDS_LOG("MC(%d) command 0x%02X\n", ((sio.CtrlReg&0x2000)>>13)+1, value);
|
||||
break;
|
||||
default:
|
||||
sio.mcdst = 0;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[sio.bufcount]=sio.terminator; sio.buf[sio.bufcount-1]='+';
|
||||
MEMCARDS_LOG("Unknown MC(%d) command 0x%02X\n", ((sio.CtrlReg&0x2000)>>13)+1, value);
|
||||
}
|
||||
|
@ -313,7 +313,7 @@ void SIO_CommandWrite(u8 value,int way) {
|
|||
case 0x42:
|
||||
if (sio.parp==2) {
|
||||
sio.bufcount=5+value;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[sio.bufcount-1]='+';
|
||||
sio.buf[sio.bufcount]=sio.terminator;
|
||||
MEMCARDS_LOG("MC(%d) WRITE command 0x%02X\n\n\n\n\n", ((sio.CtrlReg&0x2000)>>13)+1, value);
|
||||
|
@ -387,7 +387,7 @@ void SIO_CommandWrite(u8 value,int way) {
|
|||
case 17:
|
||||
case 19:
|
||||
sio.bufcount=13;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[12] = 0; // Xor value of data from index 4 to 11
|
||||
sio.buf[3]='+';
|
||||
sio.buf[13] = sio.terminator;
|
||||
|
@ -396,13 +396,13 @@ void SIO_CommandWrite(u8 value,int way) {
|
|||
case 7:
|
||||
case 11:
|
||||
sio.bufcount=13;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[12]='+';
|
||||
sio.buf[13] = sio.terminator;
|
||||
break;
|
||||
default:
|
||||
sio.bufcount=4;
|
||||
memset_obj<0xff>(sio.buf);
|
||||
memset8_obj<0xff>(sio.buf);
|
||||
sio.buf[3]='+';
|
||||
sio.buf[4] = sio.terminator;
|
||||
}
|
||||
|
@ -628,7 +628,7 @@ void SaveMcd(int mcd, const u8 *data, u32 adr, int size) {
|
|||
|
||||
void EraseMcd(int mcd, u32 adr) {
|
||||
u8 data[528*16];
|
||||
memset_obj<0xff>(data); // clears to -1's
|
||||
memset8_obj<0xff>(data); // clears to -1's
|
||||
if(mcd == 1)
|
||||
{
|
||||
SeekMcd(MemoryCard1, adr);
|
||||
|
|
|
@ -180,13 +180,6 @@ void vuMicroMemReset()
|
|||
jASSUME( VU0.Mem != NULL );
|
||||
jASSUME( VU1.Mem != NULL );
|
||||
|
||||
/*#ifdef PCSX2_VIRTUAL_MEM
|
||||
memLUT[0x11000].aPFNs = &s_psVuMem.aPFNs[0]; memLUT[0x11000].aVFNs = &s_psVuMem.aVFNs[0];
|
||||
memLUT[0x11001].aPFNs = &s_psVuMem.aPFNs[0]; memLUT[0x11001].aVFNs = &s_psVuMem.aVFNs[0];
|
||||
memLUT[0x11002].aPFNs = &s_psVuMem.aPFNs[0]; memLUT[0x11002].aVFNs = &s_psVuMem.aVFNs[0];
|
||||
memLUT[0x11003].aPFNs = &s_psVuMem.aPFNs[0]; memLUT[0x11003].aVFNs = &s_psVuMem.aVFNs[0];
|
||||
#endif*/
|
||||
|
||||
// === VU0 Initialization ===
|
||||
memzero_obj(VU0.ACC);
|
||||
memzero_obj(VU0.VF);
|
||||
|
@ -196,8 +189,8 @@ void vuMicroMemReset()
|
|||
VU0.VF[0].f.z = 0.0f;
|
||||
VU0.VF[0].f.w = 1.0f;
|
||||
VU0.VI[0].UL = 0;
|
||||
memzero_air<4*1024>(VU0.Mem);
|
||||
memzero_air<4*1024>(VU0.Micro);
|
||||
memzero_ptr<4*1024>(VU0.Mem);
|
||||
memzero_ptr<4*1024>(VU0.Micro);
|
||||
|
||||
/* this is kinda tricky, maxmem is set to 0x4400 here,
|
||||
tho it's not 100% accurate, since the mem goes from
|
||||
|
@ -219,8 +212,8 @@ void vuMicroMemReset()
|
|||
VU1.VF[0].f.z = 0.0f;
|
||||
VU1.VF[0].f.w = 1.0f;
|
||||
VU1.VI[0].UL = 0;
|
||||
memzero_air<16*1024>(VU1.Mem);
|
||||
memzero_air<16*1024>(VU1.Micro);
|
||||
memzero_ptr<16*1024>(VU1.Mem);
|
||||
memzero_ptr<16*1024>(VU1.Micro);
|
||||
|
||||
VU1.maxmem = -1;//16*1024-4;
|
||||
VU1.maxmicro = 16*1024-4;
|
||||
|
@ -247,7 +240,7 @@ void SaveState::vuMicroFreeze()
|
|||
else
|
||||
{
|
||||
// Old versions stored the VIregs as 32 bit values...
|
||||
memset( VU0.VI, 0, sizeof( VU0.VI ) );
|
||||
memzero_obj( VU0.VI );
|
||||
for(int i=0; i<32; i++ )
|
||||
Freeze( VU0.VI[i].UL );
|
||||
}
|
||||
|
@ -263,7 +256,7 @@ void SaveState::vuMicroFreeze()
|
|||
else
|
||||
{
|
||||
// Old versions stored the VIregs as 32 bit values...
|
||||
memset( VU1.VI, 0, sizeof( VU1.VI ) );
|
||||
memzero_obj( VU1.VI );
|
||||
for(int i=0; i<32; i++ )
|
||||
Freeze( VU1.VI[i].UL );
|
||||
}
|
||||
|
|
|
@ -68,8 +68,6 @@ static const unsigned int VIF1dmanum = 1;
|
|||
int g_vifCycles = 0;
|
||||
int path3hack = 0;
|
||||
|
||||
extern void * memcpy_fast(void *dest, const void *src, size_t n);
|
||||
|
||||
typedef void (*UNPACKFUNCTYPE)( u32 *dest, u32 *data, int size );
|
||||
typedef int (*UNPACKPARTFUNCTYPESSE)( u32 *dest, u32 *data, int size );
|
||||
extern void (*Vif1CMDTLB[82])();
|
||||
|
@ -856,9 +854,8 @@ static int Vif0TransSTRow(u32 *data){ // STROW
|
|||
case 3: pmem[8] = data[2]; pmem2[2] = data[2];
|
||||
case 2: pmem[4] = data[1]; pmem2[1] = data[1];
|
||||
case 1: pmem[0] = data[0]; pmem2[0] = data[0]; break;
|
||||
#ifdef _MSC_VER
|
||||
default: __assume(0);
|
||||
#endif
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
vif0.tag.addr += ret;
|
||||
vif0.tag.size -= ret;
|
||||
|
@ -878,9 +875,8 @@ static int Vif0TransSTCol(u32 *data){ // STCOL
|
|||
case 3: pmem[8] = data[2]; pmem2[2] = data[2];
|
||||
case 2: pmem[4] = data[1]; pmem2[1] = data[1];
|
||||
case 1: pmem[0] = data[0]; pmem2[0] = data[0]; break;
|
||||
#ifdef _MSC_VER
|
||||
default: __assume(0);
|
||||
#endif
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
vif0.tag.addr += ret;
|
||||
vif0.tag.size -= ret;
|
||||
|
|
|
@ -334,7 +334,7 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine
|
|||
textdomain(PACKAGE);
|
||||
#endif
|
||||
|
||||
memset(&g_TestRun, 0, sizeof(g_TestRun));
|
||||
memzero_obj(g_TestRun);
|
||||
|
||||
_getcwd( g_WorkingFolder, g_MaxPath );
|
||||
|
||||
|
@ -429,8 +429,8 @@ BOOL Open_File_Proc( std::string& outstr )
|
|||
char szFileTitle[ g_MaxPath ];
|
||||
char * filter = "ELF Files (*.ELF)\0*.ELF\0ALL Files (*.*)\0*.*\0";
|
||||
|
||||
memset( &szFileName, 0, sizeof( szFileName ) );
|
||||
memset( &szFileTitle, 0, sizeof( szFileTitle ) );
|
||||
memzero_obj( szFileName );
|
||||
memzero_obj( szFileTitle );
|
||||
|
||||
ofn.lStructSize = sizeof( OPENFILENAME );
|
||||
ofn.hwndOwner = gApp.hWnd;
|
||||
|
|
|
@ -543,8 +543,8 @@ void OnStates_LoadOther()
|
|||
char szFileTitle[g_MaxPath];
|
||||
char szFilter[g_MaxPath];
|
||||
|
||||
memset(&szFileName, 0, sizeof(szFileName));
|
||||
memset(&szFileTitle, 0, sizeof(szFileTitle));
|
||||
memzero_obj( szFileName );
|
||||
memzero_obj( szFileTitle );
|
||||
|
||||
strcpy(szFilter, _("PCSX2 State Format"));
|
||||
strcatz(szFilter, "*.*;*.*");
|
||||
|
@ -575,8 +575,8 @@ void OnStates_SaveOther()
|
|||
char szFileTitle[g_MaxPath];
|
||||
char szFilter[g_MaxPath];
|
||||
|
||||
memset(&szFileName, 0, sizeof(szFileName));
|
||||
memset(&szFileTitle, 0, sizeof(szFileTitle));
|
||||
memzero_obj( szFileName );
|
||||
memzero_obj( szFileTitle );
|
||||
|
||||
strcpy(szFilter, _("PCSX2 State Format"));
|
||||
strcatz(szFilter, "*.*;*.*");
|
||||
|
|
|
@ -16,44 +16,77 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#ifndef _PCSX2_MEMZERO_H_
|
||||
#define _PCSX2_MEMZERO_H_
|
||||
#ifndef _WIN_MEMZERO_H_
|
||||
#define _WIN_MEMZERO_H_
|
||||
|
||||
// This is an implementation of the memzero_air fast memset routine (for zero-clears only).
|
||||
// It uses templates so that it generates very efficient and compact inline code for clears.
|
||||
// These functions are meant for memset operations of constant length only.
|
||||
// For dynamic length clears, use the C-compiler provided memset instead.
|
||||
|
||||
// MemZero Code Strategies:
|
||||
// I use a trick to help the MSVC compiler optimize it's asm code better. The compiler
|
||||
// won't optimize local variables very well because it insists in storing them on the
|
||||
// stack and then loading them out of the stack when I use them from inline ASM, and
|
||||
// it won't allow me to use template parameters in inline asm code either. But I can
|
||||
// assign the template parameters to enums, and then use the enums from asm code.
|
||||
// Yeah, silly, but it works. :D (air)
|
||||
|
||||
// All methods defined in this header use template in combination with the aforementioned
|
||||
// enumerations to generate very efficient and compact inlined code. These optimized
|
||||
// memsets work on the theory that most uses of memset involve static arrays and
|
||||
// structures, which are constant in size, thus allowing us to generate optimal compile-
|
||||
// time code for each use of the function.
|
||||
|
||||
// Notes on XMM0's "storage" area (_xmm_backup):
|
||||
// Unfortunately there's no way to guarantee alignment for this variable. If I use the
|
||||
// __declspec(aligned(16)) decorator, MSVC fails to inline the function since stack
|
||||
// alignment requires prep work. And for the same reason it's not possible to check the
|
||||
// alignment of the stack at compile time, so I'm forced to use movups to store and
|
||||
// retrieve xmm0.
|
||||
|
||||
|
||||
// This is an implementation of the memzero_ptr fast memset routine (for zero-clears only).
|
||||
template< size_t bytes >
|
||||
static __forceinline void memzero_air( void *dest )
|
||||
static __forceinline void memzero_ptr( void *dest )
|
||||
{
|
||||
if( bytes == 0 ) return;
|
||||
|
||||
u64 _xmm_backup[2];
|
||||
// This function only works on 32-bit alignments. For anything else we just fall back
|
||||
// on the compiler-provided implementation of memset...
|
||||
|
||||
enum half_local
|
||||
if( (bytes & 0x3) != 0 )
|
||||
{
|
||||
memset( dest, 0, bytes );
|
||||
return;
|
||||
}
|
||||
|
||||
enum
|
||||
{
|
||||
remainder = bytes & 127,
|
||||
bytes128 = bytes / 128
|
||||
};
|
||||
|
||||
// Initial check -- if the length is not a multiple of 16 then fall back on
|
||||
// using rep movsd methods. Handling these unaligned writes in a more efficient
|
||||
// manner isn't necessary in pcsx2.
|
||||
// using rep movsd methods. Handling these unaligned clears in a more efficient
|
||||
// manner isn't necessary in pcsx2 (meaning they aren't used in speed-critical
|
||||
// scenarios).
|
||||
|
||||
if( (bytes & 0xf) == 0 )
|
||||
{
|
||||
u64 _xmm_backup[2];
|
||||
|
||||
if( ((uptr)dest & 0xf) != 0 )
|
||||
{
|
||||
// UNALIGNED COPY MODE.
|
||||
// For unaligned copies we have a threshold of at least 128 vectors. Anything
|
||||
// less and it's probably better off just falling back on the rep movsd.
|
||||
if( bytes128 >128 )
|
||||
if( bytes128 > 128 )
|
||||
{
|
||||
__asm
|
||||
{
|
||||
movups _xmm_backup,xmm0;
|
||||
mov eax,bytes128
|
||||
mov ecx,dest
|
||||
pxor xmm0,xmm0
|
||||
mov eax,bytes128
|
||||
|
||||
align 16
|
||||
|
||||
|
@ -99,9 +132,9 @@ static __forceinline void memzero_air( void *dest )
|
|||
__asm
|
||||
{
|
||||
movups _xmm_backup,xmm0;
|
||||
mov eax,bytes128
|
||||
mov ecx,dest
|
||||
pxor xmm0,xmm0
|
||||
mov eax,bytes128
|
||||
|
||||
align 16
|
||||
|
||||
|
@ -143,37 +176,26 @@ static __forceinline void memzero_air( void *dest )
|
|||
jASSUME( (bytes & 0x3) == 0 );
|
||||
jASSUME( ((uptr)dest & 0x3) == 0 );
|
||||
|
||||
enum __local
|
||||
enum
|
||||
{
|
||||
remdat = bytes>>2
|
||||
};
|
||||
|
||||
// This case statement handles 5 special-case sizes (small blocks)
|
||||
// in addition to the generic large block.
|
||||
// in addition to the generic large block that uses rep stosd.
|
||||
|
||||
switch( remdat )
|
||||
{
|
||||
case 1:
|
||||
__asm
|
||||
{
|
||||
mov edi, dest
|
||||
xor eax, eax
|
||||
mov edi, eax
|
||||
}
|
||||
*(u32*)dest = 0;
|
||||
return;
|
||||
|
||||
case 2:
|
||||
_asm
|
||||
{
|
||||
mov edi, dest
|
||||
xor eax, eax
|
||||
stosd
|
||||
stosd
|
||||
}
|
||||
*(u64*)dest = 0;
|
||||
return;
|
||||
|
||||
case 3:
|
||||
_asm
|
||||
__asm
|
||||
{
|
||||
mov edi, dest
|
||||
xor eax, eax
|
||||
|
@ -184,7 +206,7 @@ static __forceinline void memzero_air( void *dest )
|
|||
return;
|
||||
|
||||
case 4:
|
||||
_asm
|
||||
__asm
|
||||
{
|
||||
mov edi, dest
|
||||
xor eax, eax
|
||||
|
@ -196,7 +218,7 @@ static __forceinline void memzero_air( void *dest )
|
|||
return;
|
||||
|
||||
case 5:
|
||||
_asm
|
||||
__asm
|
||||
{
|
||||
mov edi, dest
|
||||
xor eax, eax
|
||||
|
@ -220,11 +242,21 @@ static __forceinline void memzero_air( void *dest )
|
|||
}
|
||||
}
|
||||
|
||||
// An optimized memset for 8 bit destination data.
|
||||
template< u8 data, size_t bytes >
|
||||
static __forceinline void memset_8( void *dest )
|
||||
{
|
||||
if( bytes == 0 ) return;
|
||||
|
||||
if( (bytes & 0x3) != 0 )
|
||||
{
|
||||
// unaligned data length. No point in doing an optimized inline version (too complicated!)
|
||||
// So fall back on the compiler implementation:
|
||||
|
||||
memset( dest, data, bytes );
|
||||
return;
|
||||
}
|
||||
|
||||
//u64 _xmm_backup[2];
|
||||
|
||||
/*static const size_t remainder = bytes & 127;
|
||||
|
@ -274,25 +306,74 @@ static __forceinline void memset_8( void *dest )
|
|||
{
|
||||
movups xmm0,[_xmm_backup];
|
||||
}
|
||||
}
|
||||
else*/
|
||||
}*/
|
||||
|
||||
// This function only works on 32-bit alignments of data copied.
|
||||
jASSUME( (bytes & 0x3) == 0 );
|
||||
|
||||
enum
|
||||
{
|
||||
// This function only works on 32-bit alignments of data copied.
|
||||
jASSUME( (bytes & 0x3) == 0 );
|
||||
remdat = bytes>>2,
|
||||
data32 = data + (data<<8) + (data<<16) + (data<<24)
|
||||
};
|
||||
|
||||
enum local
|
||||
{
|
||||
remdat = bytes>>2,
|
||||
data32 = data + (data<<8) + (data<<16) + (data<<24)
|
||||
};
|
||||
// macro to execute the x86/32 "stosd" copies.
|
||||
switch( remdat )
|
||||
{
|
||||
case 1:
|
||||
*(u32*)dest = data32;
|
||||
return;
|
||||
|
||||
__asm
|
||||
{
|
||||
mov eax, data32
|
||||
mov ecx, remdat
|
||||
mov edi, dest
|
||||
rep stosd
|
||||
}
|
||||
case 2:
|
||||
((u32*)dest)[0] = data32;
|
||||
((u32*)dest)[1] = data32;
|
||||
return;
|
||||
|
||||
case 3:
|
||||
__asm
|
||||
{
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
}
|
||||
return;
|
||||
|
||||
case 4:
|
||||
__asm
|
||||
{
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
}
|
||||
return;
|
||||
|
||||
case 5:
|
||||
__asm
|
||||
{
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
}
|
||||
return;
|
||||
|
||||
default:
|
||||
__asm
|
||||
{
|
||||
mov ecx, remdat;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
rep stosd;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -301,24 +382,86 @@ static __forceinline void memset_16( void *dest )
|
|||
{
|
||||
if( bytes == 0 ) return;
|
||||
|
||||
if( (bytes & 0x1) != 0 )
|
||||
throw Exception::LogicError( "Invalid parameter passed to memset_16 - data length is not a multiple of 16 or 32 bits." );
|
||||
|
||||
if( (bytes & 0x3) != 0 )
|
||||
{
|
||||
// Unaligned data length. No point in doing an optimized inline version (too complicated with
|
||||
// remainders and such).
|
||||
|
||||
_memset16_unaligned( dest, data, bytes );
|
||||
return;
|
||||
}
|
||||
|
||||
//u64 _xmm_backup[2];
|
||||
|
||||
{
|
||||
// This function only works on 32-bit alignments of data copied.
|
||||
jASSUME( (bytes & 0x3) == 0 );
|
||||
// This function only works on 32-bit alignments of data copied.
|
||||
jASSUME( (bytes & 0x3) == 0 );
|
||||
|
||||
enum local
|
||||
{
|
||||
remdat = bytes>>2,
|
||||
data32 = data + (data<<16)
|
||||
};
|
||||
__asm
|
||||
{
|
||||
mov eax, data32
|
||||
mov ecx, remdat
|
||||
mov edi, dest
|
||||
rep stosd
|
||||
}
|
||||
enum
|
||||
{
|
||||
remdat = bytes>>2,
|
||||
data32 = data + (data<<16)
|
||||
};
|
||||
|
||||
// macro to execute the x86/32 "stosd" copies.
|
||||
switch( remdat )
|
||||
{
|
||||
case 1:
|
||||
*(u32*)dest = data32;
|
||||
return;
|
||||
|
||||
case 2:
|
||||
((u32*)dest)[0] = data32;
|
||||
((u32*)dest)[1] = data32;
|
||||
return;
|
||||
|
||||
case 3:
|
||||
__asm
|
||||
{
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
}
|
||||
return;
|
||||
|
||||
case 4:
|
||||
__asm
|
||||
{
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
}
|
||||
return;
|
||||
|
||||
case 5:
|
||||
__asm
|
||||
{
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
}
|
||||
return;
|
||||
|
||||
default:
|
||||
__asm
|
||||
{
|
||||
mov ecx, remdat;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
rep stosd;
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -327,24 +470,86 @@ static __forceinline void memset_32( void *dest )
|
|||
{
|
||||
if( bytes == 0 ) return;
|
||||
|
||||
if( (bytes & 0x3) != 0 )
|
||||
throw Exception::LogicError( "Invalid parameter passed to memset_32 - data length is not a multiple of 32 bits." );
|
||||
|
||||
|
||||
//u64 _xmm_backup[2];
|
||||
|
||||
{
|
||||
// This function only works on 32-bit alignments of data copied.
|
||||
jASSUME( (bytes & 0x3) == 0 );
|
||||
// This function only works on 32-bit alignments of data copied.
|
||||
// If the data length is not a factor of 32 bits, the C++ optimizing compiler will
|
||||
// probably just generate mysteriously broken code in Release builds. ;)
|
||||
|
||||
enum local
|
||||
{
|
||||
remdat = bytes>>2,
|
||||
data32 = data
|
||||
};
|
||||
__asm
|
||||
{
|
||||
mov eax, data32
|
||||
mov ecx, remdat
|
||||
mov edi, dest
|
||||
rep stosd
|
||||
}
|
||||
jASSUME( (bytes & 0x3) == 0 );
|
||||
|
||||
enum
|
||||
{
|
||||
remdat = bytes>>2,
|
||||
data32 = data
|
||||
};
|
||||
|
||||
// macro to execute the x86/32 "stosd" copies.
|
||||
switch( remdat )
|
||||
{
|
||||
case 1:
|
||||
*(u32*)dest = data32;
|
||||
return;
|
||||
|
||||
case 2:
|
||||
__asm
|
||||
{
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
stosd;
|
||||
}
|
||||
return;
|
||||
|
||||
case 3:
|
||||
__asm
|
||||
{
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
}
|
||||
return;
|
||||
|
||||
case 4:
|
||||
__asm
|
||||
{
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
}
|
||||
return;
|
||||
|
||||
case 5:
|
||||
__asm
|
||||
{
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
stosd;
|
||||
}
|
||||
return;
|
||||
|
||||
default:
|
||||
__asm
|
||||
{
|
||||
mov ecx, remdat;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
rep stosd;
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -354,18 +559,29 @@ static __forceinline void memset_32( void *dest )
|
|||
template< typename T >
|
||||
static __forceinline void memzero_obj( T& object )
|
||||
{
|
||||
memzero_air<sizeof(T)>( &object );
|
||||
memzero_ptr<sizeof(T)>( &object );
|
||||
}
|
||||
|
||||
template< uint data, typename T >
|
||||
static __forceinline void memset_obj( T& object )
|
||||
// This method clears an object with the given 8 bit value.
|
||||
template< u8 data, typename T >
|
||||
static __forceinline void memset8_obj( T& object )
|
||||
{
|
||||
if( data <= 0xff )
|
||||
memset_8<(u8)data, sizeof(T)>( &object );
|
||||
else if( data <= 0xffff )
|
||||
memset_16<(u16)data, sizeof(T)>( &object );
|
||||
else
|
||||
memset_32<(u32)data, sizeof(T)>( &object );
|
||||
memset_8<data, sizeof(T)>( &object );
|
||||
}
|
||||
|
||||
#endif
|
||||
// This method clears an object with the given 16 bit value.
|
||||
template< u16 data, typename T >
|
||||
static __forceinline void memset16_obj( T& object )
|
||||
{
|
||||
memset_16<data, sizeof(T)>( &object );
|
||||
}
|
||||
|
||||
// This method clears an object with the given 32 bit value.
|
||||
template< u32 data, typename T >
|
||||
static __forceinline void memset32_obj( T& object )
|
||||
{
|
||||
memset_32<data, sizeof(T)>( &object );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -89,16 +89,18 @@ void checkregs()
|
|||
|
||||
|
||||
__declspec(align(16)) static u8 _xmm_backup[16*2];
|
||||
//this one checks for alligments too ...
|
||||
|
||||
// this one checks for alignments too ...
|
||||
__declspec(naked) void __fastcall memcpy_raz_u(void *dest, const void *src, size_t bytes)
|
||||
{
|
||||
// If src is aligned, use memcpy_raz instead:
|
||||
__asm
|
||||
{
|
||||
test edx,0xf;
|
||||
jz memcpy_raz_;
|
||||
}
|
||||
//THIS CODE IS COPY PASTED FROM memcpy_raz_
|
||||
|
||||
|
||||
// MOVSRC = opcode used to read. I use the same code for the aligned version, with a different define :)
|
||||
#define MOVSRC movups
|
||||
__asm
|
||||
{
|
||||
|
@ -112,7 +114,7 @@ __declspec(naked) void __fastcall memcpy_raz_u(void *dest, const void *src, size
|
|||
cmp eax,127;
|
||||
jna _loop_1;
|
||||
|
||||
//unrolled version also toiches xmm1, save it :)
|
||||
//unrolled version also touches xmm1, save it :)
|
||||
movaps [_xmm_backup+0x10],xmm1;
|
||||
|
||||
//since this is a common branch target it could be good to align it -- no idea if it has any effect :p
|
||||
|
@ -178,14 +180,17 @@ cleanup:
|
|||
}
|
||||
#undef MOVSRC
|
||||
}
|
||||
//Custom memcpy, only for 16 byte aligned stuff (used for mtgs)
|
||||
//These functions are optimised for medium-small transfer sizes (<2048, >=128).No prefetching is used since the reads are linear
|
||||
//and the cache logic can predict em :)
|
||||
//this implementation use forward copy, in 128 byte blocks, and then does the remaining in 16 byte blocks :)
|
||||
//MOVSRC = opcode used to read.I use the same code for the unaligned version, with a different define :)
|
||||
#define MOVSRC movaps
|
||||
// Custom memcpy, only for 16 byte aligned stuff (used for mtgs)
|
||||
// This function is optimized for medium-small transfer sizes (<2048, >=128). No prefetching is
|
||||
// used since the reads are linear and the cache logic can predict em :)
|
||||
|
||||
__declspec(naked) void __fastcall memcpy_raz_(void *dest, const void *src, size_t bytes)
|
||||
{
|
||||
// Code Implementation Notes:
|
||||
// Uses a forward copy, in 128 byte blocks, and then does the remaining in 16 byte blocks :)
|
||||
|
||||
// MOVSRC = opcode used to read. I use the same code for the unaligned version, with a different define :)
|
||||
#define MOVSRC movaps
|
||||
__asm
|
||||
{
|
||||
//Reads before reads, to avoid stalls
|
||||
|
@ -270,16 +275,15 @@ cleanup:
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
// Fast memcpy as coded by AMD.
|
||||
|
||||
void * memcpy_amd_(void *dest, const void *src, size_t n)
|
||||
// This function clobbers all MMX registers, and is generally not optimal for short memory
|
||||
// copies due to the amount of overhead required to test for alignments, copy length,
|
||||
// and other ABI overhead.
|
||||
void __fastcall memcpy_amd_(void *dest, const void *src, size_t n)
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
__asm call checkregs
|
||||
#endif
|
||||
|
||||
__asm {
|
||||
mov edi, ecx ; destination
|
||||
mov esi, edx ; source
|
||||
mov ecx, [n] ; number of bytes to copy
|
||||
mov edi, [dest] ; destination
|
||||
mov esi, [src] ; source
|
||||
mov ebx, ecx ; keep a copy of count
|
||||
|
||||
cld
|
||||
|
@ -473,7 +477,7 @@ $memcpy_last_few: ; dword aligned from before movsd's
|
|||
$memcpy_final:
|
||||
emms ; clean up the MMX state
|
||||
sfence ; flush the write buffer
|
||||
mov eax, [dest] ; ret value = destination pointer
|
||||
//mov eax, [dest] ; ret value = destination pointer
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include "PrecompiledHeader.h"
|
||||
|
||||
#include "System.h"
|
||||
#include "Misc.h"
|
||||
#include "iR5900.h"
|
||||
#include "Vif.h"
|
||||
#include "VU.h"
|
||||
|
@ -83,7 +83,7 @@ using namespace std;
|
|||
static int s_xmmchecknext = 0;
|
||||
|
||||
void _initXMMregs() {
|
||||
memset(xmmregs, 0, sizeof(xmmregs));
|
||||
memzero_obj( xmmregs );
|
||||
g_xmmAllocCounter = 0;
|
||||
s_xmmchecknext = 0;
|
||||
}
|
||||
|
|
|
@ -163,7 +163,7 @@ static void iIopDumpBlock( int startpc, u8 * ptr )
|
|||
// write the instruction info
|
||||
fprintf(f, "\n\nlive0 - %x, lastuse - %x used - %x\n", EEINST_LIVE0, EEINST_LASTUSE, EEINST_USED);
|
||||
|
||||
memset(used, 0, sizeof(used));
|
||||
memzero_obj(used);
|
||||
numused = 0;
|
||||
for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) {
|
||||
if( s_pInstCache->regs[i] & EEINST_USED ) {
|
||||
|
|
|
@ -384,7 +384,7 @@ void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs)
|
|||
// check upper flags
|
||||
if (ptr[1] & 0x80000000) { // I flag
|
||||
info->cycle = vucycle;
|
||||
memzero_air<sizeof(lregs)>(lregs);
|
||||
memzero_ptr<sizeof(lregs)>(lregs);
|
||||
}
|
||||
else {
|
||||
|
||||
|
@ -1454,4 +1454,4 @@ void SetVUNanMode(int mode)
|
|||
{
|
||||
g_VuNanHandling = mode;
|
||||
if ( mode ) SysPrintf("enabling vunan mode");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -158,7 +158,7 @@ struct VuBlockHeader
|
|||
class VuInstruction
|
||||
{
|
||||
public:
|
||||
VuInstruction() { memzero_air<sizeof(VuInstruction)>(this); nParentPc = -1; vicached = -1; }
|
||||
VuInstruction() { memzero_ptr<sizeof(VuInstruction)>(this); nParentPc = -1; vicached = -1; }
|
||||
|
||||
int nParentPc; // used for syncing with flag writes, -1 for no parent
|
||||
|
||||
|
@ -419,7 +419,7 @@ void SuperVUReset(int vuindex)
|
|||
{
|
||||
DbgCon::Status( "SuperVU reset > Resetting recompiler memory and structures." );
|
||||
memset(s_recVUMem, 0xcd, VU_EXESIZE);
|
||||
memzero_air<SUPERVU_STACKSIZE>(recVUStack);
|
||||
memzero_ptr<SUPERVU_STACKSIZE>(recVUStack);
|
||||
|
||||
s_recVUPtr = s_recVUMem;
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
*/
|
||||
#include "PrecompiledHeader.h"
|
||||
|
||||
#include "System.h"
|
||||
#include "Misc.h"
|
||||
#include "iR5900.h"
|
||||
#include "Vif.h"
|
||||
#include "VU.h"
|
||||
|
@ -37,7 +37,7 @@ int g_x86checknext;
|
|||
// use special x86 register allocation for ia32
|
||||
|
||||
void _initX86regs() {
|
||||
memset(x86regs, 0, sizeof(x86regs));
|
||||
memzero_obj(x86regs);
|
||||
g_x86AllocCounter = 0;
|
||||
g_x86checknext = 0;
|
||||
}
|
||||
|
@ -402,7 +402,7 @@ static int s_mmxchecknext = 0;
|
|||
|
||||
void _initMMXregs()
|
||||
{
|
||||
memset(mmxregs, 0, sizeof(mmxregs));
|
||||
memzero_obj(mmxregs);
|
||||
g_mmxAllocCounter = 0;
|
||||
s_mmxchecknext = 0;
|
||||
}
|
||||
|
|
|
@ -175,7 +175,7 @@ static void iDumpBlock( int startpc, u8 * ptr )
|
|||
fprintf(f, "\n\nlive0 - %x, live1 - %x, live2 - %x, lastuse - %x\nmmx - %x, xmm - %x, used - %x\n",
|
||||
EEINST_LIVE0, EEINST_LIVE1, EEINST_LIVE2, EEINST_LASTUSE, EEINST_MMX, EEINST_XMM, EEINST_USED);
|
||||
|
||||
memset(used, 0, sizeof(used));
|
||||
memzero_obj(used);
|
||||
numused = 0;
|
||||
for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) {
|
||||
if( s_pInstCache->regs[i] & EEINST_USED ) {
|
||||
|
@ -184,7 +184,7 @@ static void iDumpBlock( int startpc, u8 * ptr )
|
|||
}
|
||||
}
|
||||
|
||||
memset(fpuused, 0, sizeof(fpuused));
|
||||
memzero_obj(fpuused);
|
||||
fpunumused = 0;
|
||||
for(i = 0; i < ARRAYSIZE(s_pInstCache->fpuregs); ++i) {
|
||||
if( s_pInstCache->fpuregs[i] & EEINST_USED ) {
|
||||
|
|
|
@ -239,7 +239,7 @@ void cpudetectInit()
|
|||
int num;
|
||||
char str[50];
|
||||
|
||||
memset( cpuinfo.x86ID, 0, sizeof( cpuinfo.x86ID ) );
|
||||
memzero_obj( cpuinfo.x86ID );
|
||||
cpuinfo.x86Family = 0;
|
||||
cpuinfo.x86Model = 0;
|
||||
cpuinfo.x86PType = 0;
|
||||
|
@ -326,7 +326,7 @@ void cpudetectInit()
|
|||
if ( cpuinfo.x86ID[ 0 ] == 'G' ){ cputype=0;}//trick lines but if you know a way better ;p
|
||||
if ( cpuinfo.x86ID[ 0 ] == 'A' ){ cputype=1;}
|
||||
|
||||
memset(cpuinfo.x86Fam, 0, sizeof(cpuinfo.x86Fam));
|
||||
memzero_obj( cpuinfo.x86Fam );
|
||||
iCpuId( 0x80000002, (u32*)cpuinfo.x86Fam);
|
||||
iCpuId( 0x80000003, (u32*)(cpuinfo.x86Fam+16));
|
||||
iCpuId( 0x80000004, (u32*)(cpuinfo.x86Fam+32));
|
||||
|
|
Loading…
Reference in New Issue