Remove some slow, redundant memcpy implementations: memcpy_const/memcpy_qwc/memcpy_aligned.

This commit is contained in:
Sacha 2014-08-13 17:55:23 +10:00
parent 9102e40f9c
commit 3f7cb991e4
15 changed files with 45 additions and 46 deletions

View File

@ -725,7 +725,7 @@ int cdvdReadSector() {
mdest[11] = 0;
// normal 2048 bytes of sector data
memcpy_const(&mdest[12], cdr.Transfer, 2048);
memcpy(&mdest[12], cdr.Transfer, 2048);
// 4 bytes of edc (not calculated at present)
mdest[2060] = 0;

View File

@ -442,27 +442,27 @@ extern __aligned(32) MTGS_BufferedData RingBuffer;
inline void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len ) {
uint endpos = destStart + len;
if ( endpos < destSize ) {
memcpy_qwc(&destBase[destStart], src, len );
memcpy(&destBase[destStart], src, len*16);
destStart += len;
}
else {
uint firstcopylen = destSize - destStart;
memcpy_qwc(&destBase[destStart], src, firstcopylen );
memcpy(&destBase[destStart], src, firstcopylen*16);
destStart = endpos % destSize;
memcpy_qwc(destBase, src+firstcopylen, destStart );
memcpy(destBase, src+firstcopylen, destStart*16);
}
}
inline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len ) {
uint endpos = srcStart + len;
if ( endpos < srcSize ) {
memcpy_qwc(dest, &srcBase[srcStart], len );
memcpy(dest, &srcBase[srcStart], len*16);
srcStart += len;
}
else {
uint firstcopylen = srcSize - srcStart;
memcpy_qwc(dest, &srcBase[srcStart], firstcopylen );
memcpy(dest, &srcBase[srcStart], firstcopylen*16);
srcStart = endpos % srcSize;
memcpy_qwc(dest+firstcopylen, srcBase, srcStart );
memcpy(dest+firstcopylen, srcBase, srcStart*16);
}
}

View File

@ -241,8 +241,7 @@ struct Gif_Path {
mtgsReadWait(); // Let MTGS run to free up buffer space
}
pxAssertDev(curSize+size<=buffSize, "Gif Path Buffer Overflow!");
if (aligned) memcpy_qwc (&buffer[curSize], pMem, size/16);
else memcpy_fast(&buffer[curSize], pMem, size);
memcpy (&buffer[curSize], pMem, size);
curSize += size;
}

View File

@ -181,7 +181,7 @@ void SysMtgsThread::OpenPlugin()
{
if( m_PluginOpened ) return;
memcpy_aligned( RingBuffer.Regs, PS2MEM_GS, sizeof(PS2MEM_GS) );
memcpy( RingBuffer.Regs, PS2MEM_GS, sizeof(PS2MEM_GS) );
GSsetBaseMem( RingBuffer.Regs );
GSirqCallback( dummyIrqCallback );

View File

@ -220,7 +220,7 @@ static void CALLBACK GS_Legacy_gifTransfer( const u32* src, u32 data )
// the transfer is most likely wrapped/partial. We need to queue it into a linear buffer
// and then send it on its way on the next copy.
memcpy_qwc( path1queue, src128, data );
memcpy( path1queue, src128, data*16);
path1size = data;
}
else
@ -235,7 +235,7 @@ static void CALLBACK GS_Legacy_gifTransfer( const u32* src, u32 data )
if (src128 == RingBuffer.m_Ring)
{
pxAssert( (data+path1size) <= 0x400 );
memcpy_qwc( &path1queue[path1size], src128, data );
memcpy( &path1queue[path1size], src128, data*16);
path1size += data;
}
GSgifTransfer1( (u32*)path1queue, 0 );

View File

@ -97,7 +97,7 @@ int _SPR0chain()
//Taking an arbitary small value for games which like to check the QWC/MADR instead of STR, so get most of
//the cycle delay out of the way before the end.
partialqwc = spr0ch.qwc;
memcpy_qwc(pMem, &psSu128(spr0ch.sadr), partialqwc);
memcpy(pMem, &psSu128(spr0ch.sadr), partialqwc*16);
// clear VU mem also!
TestClearVUs(spr0ch.madr, partialqwc, true);
@ -151,7 +151,7 @@ void _SPR0interleave()
case MFD_RESERVED:
// clear VU mem also!
TestClearVUs(spr0ch.madr, spr0ch.qwc, true);
memcpy_qwc(pMem, &psSu128(spr0ch.sadr), spr0ch.qwc);
memcpy(pMem, &psSu128(spr0ch.sadr), spr0ch.qwc*16);
break;
}
spr0ch.sadr += spr0ch.qwc * 16;
@ -322,7 +322,7 @@ __fi static void SPR1transfer(const void* data, int qwc)
TestClearVUs(spr1ch.madr, spr1ch.qwc, false);
}
memcpy_qwc(&psSu128(spr1ch.sadr), data, qwc);
memcpy(&psSu128(spr1ch.sadr), data, qwc*16);
spr1ch.sadr += qwc * 16;
}
@ -381,7 +381,7 @@ void _SPR1interleave()
spr1ch.qwc = std::min(tqwc, qwc);
qwc -= spr1ch.qwc;
pMem = SPRdmaGetAddr(spr1ch.madr, false);
memcpy_qwc(&psSu128(spr1ch.sadr), pMem, spr1ch.qwc);
memcpy(&psSu128(spr1ch.sadr), pMem, spr1ch.qwc*16);
spr1ch.sadr += spr1ch.qwc * 16;
spr1ch.madr += (sqwc + spr1ch.qwc) * 16;
}

View File

@ -126,17 +126,17 @@ namespace VU1micro
#endif
runCount++;
memcpy_const((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs));
memcpy_const((u8*)backVUmem, (u8*)VU1.Mem, 0x4000);
memcpy((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs));
memcpy((u8*)backVUmem, (u8*)VU1.Mem, 0x4000);
do { // while loop needed since not always will return finished
SuperVUExecuteProgram(VU1.VI[ REG_TPC ].UL & 0x3fff, 1);
} while( VU0.VI[ REG_VPU_STAT ].UL&0x100 );
memcpy_const((u8*)cmpVUregs, (u8*)&VU1, sizeof(VURegs));
memcpy_const((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000);
memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
memcpy((u8*)cmpVUregs, (u8*)&VU1, sizeof(VURegs));
memcpy((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000);
memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
//Currently breaking mVU execution is disabled. Check mVUtestCycles<vuIndex>() in microVU_Compile.inl
runVUrec(VU1.VI[REG_TPC].UL, 300000 /*0x7fffffff*/, 1);
@ -227,8 +227,8 @@ namespace VU1micro
if (mVUdebugNow) {
resetVUrec(1);
memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
runVUrec(VU1.VI[REG_TPC].UL, 300000 /*0x7fffffff*/, 1);

View File

@ -1016,8 +1016,8 @@ void SaveBranchState()
s_psaveInstInfo = g_pCurInstInfo;
// save all mmx regs
memcpy_const(s_saveMMXregs, mmxregs, sizeof(mmxregs));
memcpy_const(s_saveXMMregs, xmmregs, sizeof(xmmregs));
memcpy(s_saveMMXregs, mmxregs, sizeof(mmxregs));
memcpy(s_saveXMMregs, xmmregs, sizeof(xmmregs));
}
void LoadBranchState()
@ -1031,8 +1031,8 @@ void LoadBranchState()
g_pCurInstInfo = s_psaveInstInfo;
// restore all mmx regs
memcpy_const(mmxregs, s_saveMMXregs, sizeof(mmxregs));
memcpy_const(xmmregs, s_saveXMMregs, sizeof(xmmregs));
memcpy(mmxregs, s_saveMMXregs, sizeof(mmxregs));
memcpy(xmmregs, s_saveXMMregs, sizeof(xmmregs));
}
void iFlushCall(int flushtype)

View File

@ -181,8 +181,8 @@ __ri microProgram* mVUcreateProg(microVU& mVU, int startPC) {
// Caches Micro Program
__ri void mVUcacheProg(microVU& mVU, microProgram& prog) {
if (!mVU.index) memcpy_const(prog.data, mVU.regs().Micro, 0x1000);
else memcpy_const(prog.data, mVU.regs().Micro, 0x4000);
if (!mVU.index) memcpy(prog.data, mVU.regs().Micro, 0x1000);
else memcpy(prog.data, mVU.regs().Micro, 0x4000);
mVUdumpProg(mVU, prog);
}

View File

@ -92,7 +92,7 @@ public:
blockEnd = blockList = newBlock;
}
memcpy_const(&newBlock->block, pBlock, sizeof(microBlock));
memcpy(&newBlock->block, pBlock, sizeof(microBlock));
thisBlock = &newBlock->block;
}
return thisBlock;

View File

@ -170,7 +170,7 @@ void normBranchCompile(microVU& mVU, u32 branchPC) {
}
void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) {
memcpy_const(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
memcpy(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
mVUsetupBranch(mVU, mFC);
mVUbackupRegs(mVU);
@ -386,7 +386,7 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) {
s32* ajmp = xJcc32((JccComparisonType)JMPcc);
u32 bPC = iPC; // mVUcompile can modify iPC, mVUpBlock, and mVUregs so back them up
microBlock* pBlock = mVUpBlock;
memcpy_const(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
memcpy(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
incPC2(1); // Get PC for branch not-taken
mVUcompile(mVU, xPC, (uptr)&mVUregs);

View File

@ -427,10 +427,10 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr) {
mVU.p = 0; // All blocks start at p index #0
mVU.q = 0; // All blocks start at q index #0
if ((uptr)&mVUregs != pState) { // Loads up Pipeline State Info
memcpy_const((u8*)&mVUregs, (u8*)pState, sizeof(microRegInfo));
memcpy((u8*)&mVUregs, (u8*)pState, sizeof(microRegInfo));
}
if (doEarlyExit(mVU) && ((uptr)&mVU.prog.lpState != pState)) {
memcpy_const((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo));
memcpy((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo));
}
mVUblock.x86ptrStart = thisPtr;
mVUpBlock = mVUblocks[mVUstartPC/2]->add(&mVUblock); // Add this block to block manager

View File

@ -86,7 +86,7 @@ public:
wxsFormat(L"HashBucket Chain (bucket size=%d)", bucket.Size+1)
);
}
memcpy_const(&bucket.Chain[bucket.Size++], &dataPtr, sizeof(T));
memcpy(&bucket.Chain[bucket.Size++], &dataPtr, sizeof(T));
}
void clear() {
for (int i = 0; i < hSize; i++) {

View File

@ -117,7 +117,7 @@ _vifT int nVifUnpack(const u8* data) {
if (ret == vif.tag.size) { // Full Transfer
if (v.bSize) { // Last transfer was partial
memcpy_aligned(&v.buffer[v.bSize], data, size);
memcpy(&v.buffer[v.bSize], data, size);
v.bSize += size;
size = v.bSize;
data = v.buffer;
@ -140,7 +140,7 @@ _vifT int nVifUnpack(const u8* data) {
v.bSize = 0;
}
else { // Partial Transfer
memcpy_aligned(&v.buffer[v.bSize], data, size);
memcpy(&v.buffer[v.bSize], data, size);
v.bSize += size;
vif.tag.size -= ret;

View File

@ -131,15 +131,15 @@ void recSuperVU1::Execute(u32 cycles) {
#endif
runCount++;
memcpy_const((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs));
memcpy_const((u8*)backVUmem, (u8*) VU1.Mem, 0x4000);
memcpy((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs));
memcpy((u8*)backVUmem, (u8*) VU1.Mem, 0x4000);
runMVU1(cycles);
memcpy_const((u8*)cmpVUregs,(u8*)&VU1, sizeof(VURegs));
memcpy_const((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000);
memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
memcpy((u8*)cmpVUregs,(u8*)&VU1, sizeof(VURegs));
memcpy((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000);
memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
runSVU1(cycles);
if ((memcmp((u8*)cmpVUregs, (u8*)&VU1, (16*32) + (16*16))) || (memcmp((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000))) {
@ -230,8 +230,8 @@ void recSuperVU1::Execute(u32 cycles) {
resetMVU1();
memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
runMVU1(cycles);