Remove some slow, redundant memcpy implementations: memcpy_const/memcpy_qwc/memcpy_aligned.

This commit is contained in:
Sacha 2014-08-13 17:55:23 +10:00
parent 9102e40f9c
commit 3f7cb991e4
15 changed files with 45 additions and 46 deletions

View File

@ -725,7 +725,7 @@ int cdvdReadSector() {
mdest[11] = 0; mdest[11] = 0;
// normal 2048 bytes of sector data // normal 2048 bytes of sector data
memcpy_const(&mdest[12], cdr.Transfer, 2048); memcpy(&mdest[12], cdr.Transfer, 2048);
// 4 bytes of edc (not calculated at present) // 4 bytes of edc (not calculated at present)
mdest[2060] = 0; mdest[2060] = 0;

View File

@ -442,27 +442,27 @@ extern __aligned(32) MTGS_BufferedData RingBuffer;
inline void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len ) { inline void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len ) {
uint endpos = destStart + len; uint endpos = destStart + len;
if ( endpos < destSize ) { if ( endpos < destSize ) {
memcpy_qwc(&destBase[destStart], src, len ); memcpy(&destBase[destStart], src, len*16);
destStart += len; destStart += len;
} }
else { else {
uint firstcopylen = destSize - destStart; uint firstcopylen = destSize - destStart;
memcpy_qwc(&destBase[destStart], src, firstcopylen ); memcpy(&destBase[destStart], src, firstcopylen*16);
destStart = endpos % destSize; destStart = endpos % destSize;
memcpy_qwc(destBase, src+firstcopylen, destStart ); memcpy(destBase, src+firstcopylen, destStart*16);
} }
} }
inline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len ) { inline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len ) {
uint endpos = srcStart + len; uint endpos = srcStart + len;
if ( endpos < srcSize ) { if ( endpos < srcSize ) {
memcpy_qwc(dest, &srcBase[srcStart], len ); memcpy(dest, &srcBase[srcStart], len*16);
srcStart += len; srcStart += len;
} }
else { else {
uint firstcopylen = srcSize - srcStart; uint firstcopylen = srcSize - srcStart;
memcpy_qwc(dest, &srcBase[srcStart], firstcopylen ); memcpy(dest, &srcBase[srcStart], firstcopylen*16);
srcStart = endpos % srcSize; srcStart = endpos % srcSize;
memcpy_qwc(dest+firstcopylen, srcBase, srcStart ); memcpy(dest+firstcopylen, srcBase, srcStart*16);
} }
} }

View File

@ -241,8 +241,7 @@ struct Gif_Path {
mtgsReadWait(); // Let MTGS run to free up buffer space mtgsReadWait(); // Let MTGS run to free up buffer space
} }
pxAssertDev(curSize+size<=buffSize, "Gif Path Buffer Overflow!"); pxAssertDev(curSize+size<=buffSize, "Gif Path Buffer Overflow!");
if (aligned) memcpy_qwc (&buffer[curSize], pMem, size/16); memcpy (&buffer[curSize], pMem, size);
else memcpy_fast(&buffer[curSize], pMem, size);
curSize += size; curSize += size;
} }

View File

@ -181,7 +181,7 @@ void SysMtgsThread::OpenPlugin()
{ {
if( m_PluginOpened ) return; if( m_PluginOpened ) return;
memcpy_aligned( RingBuffer.Regs, PS2MEM_GS, sizeof(PS2MEM_GS) ); memcpy( RingBuffer.Regs, PS2MEM_GS, sizeof(PS2MEM_GS) );
GSsetBaseMem( RingBuffer.Regs ); GSsetBaseMem( RingBuffer.Regs );
GSirqCallback( dummyIrqCallback ); GSirqCallback( dummyIrqCallback );

View File

@ -220,7 +220,7 @@ static void CALLBACK GS_Legacy_gifTransfer( const u32* src, u32 data )
// the transfer is most likely wrapped/partial. We need to queue it into a linear buffer // the transfer is most likely wrapped/partial. We need to queue it into a linear buffer
// and then send it on its way on the next copy. // and then send it on its way on the next copy.
memcpy_qwc( path1queue, src128, data ); memcpy( path1queue, src128, data*16);
path1size = data; path1size = data;
} }
else else
@ -235,7 +235,7 @@ static void CALLBACK GS_Legacy_gifTransfer( const u32* src, u32 data )
if (src128 == RingBuffer.m_Ring) if (src128 == RingBuffer.m_Ring)
{ {
pxAssert( (data+path1size) <= 0x400 ); pxAssert( (data+path1size) <= 0x400 );
memcpy_qwc( &path1queue[path1size], src128, data ); memcpy( &path1queue[path1size], src128, data*16);
path1size += data; path1size += data;
} }
GSgifTransfer1( (u32*)path1queue, 0 ); GSgifTransfer1( (u32*)path1queue, 0 );

View File

@ -97,7 +97,7 @@ int _SPR0chain()
//Taking an arbitary small value for games which like to check the QWC/MADR instead of STR, so get most of //Taking an arbitary small value for games which like to check the QWC/MADR instead of STR, so get most of
//the cycle delay out of the way before the end. //the cycle delay out of the way before the end.
partialqwc = spr0ch.qwc; partialqwc = spr0ch.qwc;
memcpy_qwc(pMem, &psSu128(spr0ch.sadr), partialqwc); memcpy(pMem, &psSu128(spr0ch.sadr), partialqwc*16);
// clear VU mem also! // clear VU mem also!
TestClearVUs(spr0ch.madr, partialqwc, true); TestClearVUs(spr0ch.madr, partialqwc, true);
@ -151,7 +151,7 @@ void _SPR0interleave()
case MFD_RESERVED: case MFD_RESERVED:
// clear VU mem also! // clear VU mem also!
TestClearVUs(spr0ch.madr, spr0ch.qwc, true); TestClearVUs(spr0ch.madr, spr0ch.qwc, true);
memcpy_qwc(pMem, &psSu128(spr0ch.sadr), spr0ch.qwc); memcpy(pMem, &psSu128(spr0ch.sadr), spr0ch.qwc*16);
break; break;
} }
spr0ch.sadr += spr0ch.qwc * 16; spr0ch.sadr += spr0ch.qwc * 16;
@ -322,7 +322,7 @@ __fi static void SPR1transfer(const void* data, int qwc)
TestClearVUs(spr1ch.madr, spr1ch.qwc, false); TestClearVUs(spr1ch.madr, spr1ch.qwc, false);
} }
memcpy_qwc(&psSu128(spr1ch.sadr), data, qwc); memcpy(&psSu128(spr1ch.sadr), data, qwc*16);
spr1ch.sadr += qwc * 16; spr1ch.sadr += qwc * 16;
} }
@ -381,7 +381,7 @@ void _SPR1interleave()
spr1ch.qwc = std::min(tqwc, qwc); spr1ch.qwc = std::min(tqwc, qwc);
qwc -= spr1ch.qwc; qwc -= spr1ch.qwc;
pMem = SPRdmaGetAddr(spr1ch.madr, false); pMem = SPRdmaGetAddr(spr1ch.madr, false);
memcpy_qwc(&psSu128(spr1ch.sadr), pMem, spr1ch.qwc); memcpy(&psSu128(spr1ch.sadr), pMem, spr1ch.qwc*16);
spr1ch.sadr += spr1ch.qwc * 16; spr1ch.sadr += spr1ch.qwc * 16;
spr1ch.madr += (sqwc + spr1ch.qwc) * 16; spr1ch.madr += (sqwc + spr1ch.qwc) * 16;
} }

View File

@ -126,17 +126,17 @@ namespace VU1micro
#endif #endif
runCount++; runCount++;
memcpy_const((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs)); memcpy((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs));
memcpy_const((u8*)backVUmem, (u8*)VU1.Mem, 0x4000); memcpy((u8*)backVUmem, (u8*)VU1.Mem, 0x4000);
do { // while loop needed since not always will return finished do { // while loop needed since not always will return finished
SuperVUExecuteProgram(VU1.VI[ REG_TPC ].UL & 0x3fff, 1); SuperVUExecuteProgram(VU1.VI[ REG_TPC ].UL & 0x3fff, 1);
} while( VU0.VI[ REG_VPU_STAT ].UL&0x100 ); } while( VU0.VI[ REG_VPU_STAT ].UL&0x100 );
memcpy_const((u8*)cmpVUregs, (u8*)&VU1, sizeof(VURegs)); memcpy((u8*)cmpVUregs, (u8*)&VU1, sizeof(VURegs));
memcpy_const((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000); memcpy((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000);
memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
//Currently breaking mVU execution is disabled. Check mVUtestCycles<vuIndex>() in microVU_Compile.inl //Currently breaking mVU execution is disabled. Check mVUtestCycles<vuIndex>() in microVU_Compile.inl
runVUrec(VU1.VI[REG_TPC].UL, 300000 /*0x7fffffff*/, 1); runVUrec(VU1.VI[REG_TPC].UL, 300000 /*0x7fffffff*/, 1);
@ -227,8 +227,8 @@ namespace VU1micro
if (mVUdebugNow) { if (mVUdebugNow) {
resetVUrec(1); resetVUrec(1);
memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
runVUrec(VU1.VI[REG_TPC].UL, 300000 /*0x7fffffff*/, 1); runVUrec(VU1.VI[REG_TPC].UL, 300000 /*0x7fffffff*/, 1);

View File

@ -1016,8 +1016,8 @@ void SaveBranchState()
s_psaveInstInfo = g_pCurInstInfo; s_psaveInstInfo = g_pCurInstInfo;
// save all mmx regs // save all mmx regs
memcpy_const(s_saveMMXregs, mmxregs, sizeof(mmxregs)); memcpy(s_saveMMXregs, mmxregs, sizeof(mmxregs));
memcpy_const(s_saveXMMregs, xmmregs, sizeof(xmmregs)); memcpy(s_saveXMMregs, xmmregs, sizeof(xmmregs));
} }
void LoadBranchState() void LoadBranchState()
@ -1031,8 +1031,8 @@ void LoadBranchState()
g_pCurInstInfo = s_psaveInstInfo; g_pCurInstInfo = s_psaveInstInfo;
// restore all mmx regs // restore all mmx regs
memcpy_const(mmxregs, s_saveMMXregs, sizeof(mmxregs)); memcpy(mmxregs, s_saveMMXregs, sizeof(mmxregs));
memcpy_const(xmmregs, s_saveXMMregs, sizeof(xmmregs)); memcpy(xmmregs, s_saveXMMregs, sizeof(xmmregs));
} }
void iFlushCall(int flushtype) void iFlushCall(int flushtype)

View File

@ -181,8 +181,8 @@ __ri microProgram* mVUcreateProg(microVU& mVU, int startPC) {
// Caches Micro Program // Caches Micro Program
__ri void mVUcacheProg(microVU& mVU, microProgram& prog) { __ri void mVUcacheProg(microVU& mVU, microProgram& prog) {
if (!mVU.index) memcpy_const(prog.data, mVU.regs().Micro, 0x1000); if (!mVU.index) memcpy(prog.data, mVU.regs().Micro, 0x1000);
else memcpy_const(prog.data, mVU.regs().Micro, 0x4000); else memcpy(prog.data, mVU.regs().Micro, 0x4000);
mVUdumpProg(mVU, prog); mVUdumpProg(mVU, prog);
} }

View File

@ -92,7 +92,7 @@ public:
blockEnd = blockList = newBlock; blockEnd = blockList = newBlock;
} }
memcpy_const(&newBlock->block, pBlock, sizeof(microBlock)); memcpy(&newBlock->block, pBlock, sizeof(microBlock));
thisBlock = &newBlock->block; thisBlock = &newBlock->block;
} }
return thisBlock; return thisBlock;

View File

@ -170,7 +170,7 @@ void normBranchCompile(microVU& mVU, u32 branchPC) {
} }
void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) { void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) {
memcpy_const(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); memcpy(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
mVUsetupBranch(mVU, mFC); mVUsetupBranch(mVU, mFC);
mVUbackupRegs(mVU); mVUbackupRegs(mVU);
@ -386,7 +386,7 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) {
s32* ajmp = xJcc32((JccComparisonType)JMPcc); s32* ajmp = xJcc32((JccComparisonType)JMPcc);
u32 bPC = iPC; // mVUcompile can modify iPC, mVUpBlock, and mVUregs so back them up u32 bPC = iPC; // mVUcompile can modify iPC, mVUpBlock, and mVUregs so back them up
microBlock* pBlock = mVUpBlock; microBlock* pBlock = mVUpBlock;
memcpy_const(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); memcpy(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
incPC2(1); // Get PC for branch not-taken incPC2(1); // Get PC for branch not-taken
mVUcompile(mVU, xPC, (uptr)&mVUregs); mVUcompile(mVU, xPC, (uptr)&mVUregs);

View File

@ -427,10 +427,10 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr) {
mVU.p = 0; // All blocks start at p index #0 mVU.p = 0; // All blocks start at p index #0
mVU.q = 0; // All blocks start at q index #0 mVU.q = 0; // All blocks start at q index #0
if ((uptr)&mVUregs != pState) { // Loads up Pipeline State Info if ((uptr)&mVUregs != pState) { // Loads up Pipeline State Info
memcpy_const((u8*)&mVUregs, (u8*)pState, sizeof(microRegInfo)); memcpy((u8*)&mVUregs, (u8*)pState, sizeof(microRegInfo));
} }
if (doEarlyExit(mVU) && ((uptr)&mVU.prog.lpState != pState)) { if (doEarlyExit(mVU) && ((uptr)&mVU.prog.lpState != pState)) {
memcpy_const((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo)); memcpy((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo));
} }
mVUblock.x86ptrStart = thisPtr; mVUblock.x86ptrStart = thisPtr;
mVUpBlock = mVUblocks[mVUstartPC/2]->add(&mVUblock); // Add this block to block manager mVUpBlock = mVUblocks[mVUstartPC/2]->add(&mVUblock); // Add this block to block manager

View File

@ -86,7 +86,7 @@ public:
wxsFormat(L"HashBucket Chain (bucket size=%d)", bucket.Size+1) wxsFormat(L"HashBucket Chain (bucket size=%d)", bucket.Size+1)
); );
} }
memcpy_const(&bucket.Chain[bucket.Size++], &dataPtr, sizeof(T)); memcpy(&bucket.Chain[bucket.Size++], &dataPtr, sizeof(T));
} }
void clear() { void clear() {
for (int i = 0; i < hSize; i++) { for (int i = 0; i < hSize; i++) {

View File

@ -117,7 +117,7 @@ _vifT int nVifUnpack(const u8* data) {
if (ret == vif.tag.size) { // Full Transfer if (ret == vif.tag.size) { // Full Transfer
if (v.bSize) { // Last transfer was partial if (v.bSize) { // Last transfer was partial
memcpy_aligned(&v.buffer[v.bSize], data, size); memcpy(&v.buffer[v.bSize], data, size);
v.bSize += size; v.bSize += size;
size = v.bSize; size = v.bSize;
data = v.buffer; data = v.buffer;
@ -140,7 +140,7 @@ _vifT int nVifUnpack(const u8* data) {
v.bSize = 0; v.bSize = 0;
} }
else { // Partial Transfer else { // Partial Transfer
memcpy_aligned(&v.buffer[v.bSize], data, size); memcpy(&v.buffer[v.bSize], data, size);
v.bSize += size; v.bSize += size;
vif.tag.size -= ret; vif.tag.size -= ret;

View File

@ -131,15 +131,15 @@ void recSuperVU1::Execute(u32 cycles) {
#endif #endif
runCount++; runCount++;
memcpy_const((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs)); memcpy((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs));
memcpy_const((u8*)backVUmem, (u8*) VU1.Mem, 0x4000); memcpy((u8*)backVUmem, (u8*) VU1.Mem, 0x4000);
runMVU1(cycles); runMVU1(cycles);
memcpy_const((u8*)cmpVUregs,(u8*)&VU1, sizeof(VURegs)); memcpy((u8*)cmpVUregs,(u8*)&VU1, sizeof(VURegs));
memcpy_const((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000); memcpy((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000);
memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
runSVU1(cycles); runSVU1(cycles);
if ((memcmp((u8*)cmpVUregs, (u8*)&VU1, (16*32) + (16*16))) || (memcmp((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000))) { if ((memcmp((u8*)cmpVUregs, (u8*)&VU1, (16*32) + (16*16))) || (memcmp((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000))) {
@ -230,8 +230,8 @@ void recSuperVU1::Execute(u32 cycles) {
resetMVU1(); resetMVU1();
memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs));
memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000);
runMVU1(cycles); runMVU1(cycles);