From 3f7cb991e4c89018958f4d997bbaad60929c789c Mon Sep 17 00:00:00 2001 From: Sacha Date: Wed, 13 Aug 2014 17:55:23 +1000 Subject: [PATCH] Remove some slow, redundant memcpy implementations: memcpy_const/memcpy_qwc/memcpy_aligned. --- pcsx2/CDVD/CDVD.cpp | 2 +- pcsx2/GS.h | 12 ++++++------ pcsx2/Gif_Unit.h | 3 +-- pcsx2/MTGS.cpp | 2 +- pcsx2/PluginManager.cpp | 4 ++-- pcsx2/SPR.cpp | 8 ++++---- pcsx2/x86/iVU1micro.cpp | 16 ++++++++-------- pcsx2/x86/ix86-32/iR5900-32.cpp | 8 ++++---- pcsx2/x86/microVU.cpp | 4 ++-- pcsx2/x86/microVU.h | 2 +- pcsx2/x86/microVU_Branch.inl | 4 ++-- pcsx2/x86/microVU_Compile.inl | 4 ++-- pcsx2/x86/newVif_HashBucket.h | 2 +- pcsx2/x86/newVif_Unpack.cpp | 4 ++-- pcsx2/x86/sVU_Compare.h | 16 ++++++++-------- 15 files changed, 45 insertions(+), 46 deletions(-) diff --git a/pcsx2/CDVD/CDVD.cpp b/pcsx2/CDVD/CDVD.cpp index 277067c68b..db6ade7589 100644 --- a/pcsx2/CDVD/CDVD.cpp +++ b/pcsx2/CDVD/CDVD.cpp @@ -725,7 +725,7 @@ int cdvdReadSector() { mdest[11] = 0; // normal 2048 bytes of sector data - memcpy_const(&mdest[12], cdr.Transfer, 2048); + memcpy(&mdest[12], cdr.Transfer, 2048); // 4 bytes of edc (not calculated at present) mdest[2060] = 0; diff --git a/pcsx2/GS.h b/pcsx2/GS.h index dbacc47192..958c8e072d 100644 --- a/pcsx2/GS.h +++ b/pcsx2/GS.h @@ -442,27 +442,27 @@ extern __aligned(32) MTGS_BufferedData RingBuffer; inline void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len ) { uint endpos = destStart + len; if ( endpos < destSize ) { - memcpy_qwc(&destBase[destStart], src, len ); + memcpy(&destBase[destStart], src, len*16); destStart += len; } else { uint firstcopylen = destSize - destStart; - memcpy_qwc(&destBase[destStart], src, firstcopylen ); + memcpy(&destBase[destStart], src, firstcopylen*16); destStart = endpos % destSize; - memcpy_qwc(destBase, src+firstcopylen, destStart ); + memcpy(destBase, src+firstcopylen, destStart*16); } } inline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len ) { uint endpos = srcStart + len; if ( endpos < srcSize ) { - memcpy_qwc(dest, &srcBase[srcStart], len ); + memcpy(dest, &srcBase[srcStart], len*16); srcStart += len; } else { uint firstcopylen = srcSize - srcStart; - memcpy_qwc(dest, &srcBase[srcStart], firstcopylen ); + memcpy(dest, &srcBase[srcStart], firstcopylen*16); srcStart = endpos % srcSize; - memcpy_qwc(dest+firstcopylen, srcBase, srcStart ); + memcpy(dest+firstcopylen, srcBase, srcStart*16); } } diff --git a/pcsx2/Gif_Unit.h b/pcsx2/Gif_Unit.h index 1928fd833a..3b2d13a842 100644 --- a/pcsx2/Gif_Unit.h +++ b/pcsx2/Gif_Unit.h @@ -241,8 +241,7 @@ struct Gif_Path { mtgsReadWait(); // Let MTGS run to free up buffer space } pxAssertDev(curSize+size<=buffSize, "Gif Path Buffer Overflow!"); - if (aligned) memcpy_qwc (&buffer[curSize], pMem, size/16); - else memcpy_fast(&buffer[curSize], pMem, size); + memcpy (&buffer[curSize], pMem, size); curSize += size; } diff --git a/pcsx2/MTGS.cpp b/pcsx2/MTGS.cpp index c3887bf4eb..d9b437ea31 100644 --- a/pcsx2/MTGS.cpp +++ b/pcsx2/MTGS.cpp @@ -181,7 +181,7 @@ void SysMtgsThread::OpenPlugin() { if( m_PluginOpened ) return; - memcpy_aligned( RingBuffer.Regs, PS2MEM_GS, sizeof(PS2MEM_GS) ); + memcpy( RingBuffer.Regs, PS2MEM_GS, sizeof(PS2MEM_GS) ); GSsetBaseMem( RingBuffer.Regs ); GSirqCallback( dummyIrqCallback ); diff --git a/pcsx2/PluginManager.cpp b/pcsx2/PluginManager.cpp index 4b27665cf2..43924c089f 100644 --- a/pcsx2/PluginManager.cpp +++ b/pcsx2/PluginManager.cpp @@ -220,7 +220,7 @@ static void CALLBACK GS_Legacy_gifTransfer( const u32* src, u32 data ) // the transfer is most likely wrapped/partial. We need to queue it into a linear buffer // and then send it on its way on the next copy. - memcpy_qwc( path1queue, src128, data ); + memcpy( path1queue, src128, data*16); path1size = data; } else @@ -235,7 +235,7 @@ static void CALLBACK GS_Legacy_gifTransfer( const u32* src, u32 data ) if (src128 == RingBuffer.m_Ring) { pxAssert( (data+path1size) <= 0x400 ); - memcpy_qwc( &path1queue[path1size], src128, data ); + memcpy( &path1queue[path1size], src128, data*16); path1size += data; } GSgifTransfer1( (u32*)path1queue, 0 ); diff --git a/pcsx2/SPR.cpp b/pcsx2/SPR.cpp index 6349ea4d94..858666e762 100644 --- a/pcsx2/SPR.cpp +++ b/pcsx2/SPR.cpp @@ -97,7 +97,7 @@ int _SPR0chain() //Taking an arbitary small value for games which like to check the QWC/MADR instead of STR, so get most of //the cycle delay out of the way before the end. partialqwc = spr0ch.qwc; - memcpy_qwc(pMem, &psSu128(spr0ch.sadr), partialqwc); + memcpy(pMem, &psSu128(spr0ch.sadr), partialqwc*16); // clear VU mem also! TestClearVUs(spr0ch.madr, partialqwc, true); @@ -151,7 +151,7 @@ void _SPR0interleave() case MFD_RESERVED: // clear VU mem also! TestClearVUs(spr0ch.madr, spr0ch.qwc, true); - memcpy_qwc(pMem, &psSu128(spr0ch.sadr), spr0ch.qwc); + memcpy(pMem, &psSu128(spr0ch.sadr), spr0ch.qwc*16); break; } spr0ch.sadr += spr0ch.qwc * 16; @@ -322,7 +322,7 @@ __fi static void SPR1transfer(const void* data, int qwc) TestClearVUs(spr1ch.madr, spr1ch.qwc, false); } - memcpy_qwc(&psSu128(spr1ch.sadr), data, qwc); + memcpy(&psSu128(spr1ch.sadr), data, qwc*16); spr1ch.sadr += qwc * 16; } @@ -381,7 +381,7 @@ void _SPR1interleave() spr1ch.qwc = std::min(tqwc, qwc); qwc -= spr1ch.qwc; pMem = SPRdmaGetAddr(spr1ch.madr, false); - memcpy_qwc(&psSu128(spr1ch.sadr), pMem, spr1ch.qwc); + memcpy(&psSu128(spr1ch.sadr), pMem, spr1ch.qwc*16); spr1ch.sadr += spr1ch.qwc * 16; spr1ch.madr += (sqwc + spr1ch.qwc) * 16; } diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index bef7648059..4a878ceb67 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -126,17 +126,17 @@ namespace VU1micro #endif runCount++; - memcpy_const((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs)); - memcpy_const((u8*)backVUmem, (u8*)VU1.Mem, 0x4000); + memcpy((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs)); + memcpy((u8*)backVUmem, (u8*)VU1.Mem, 0x4000); do { // while loop needed since not always will return finished SuperVUExecuteProgram(VU1.VI[ REG_TPC ].UL & 0x3fff, 1); } while( VU0.VI[ REG_VPU_STAT ].UL&0x100 ); - memcpy_const((u8*)cmpVUregs, (u8*)&VU1, sizeof(VURegs)); - memcpy_const((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000); - memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); - memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); + memcpy((u8*)cmpVUregs, (u8*)&VU1, sizeof(VURegs)); + memcpy((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000); + memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); + memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); //Currently breaking mVU execution is disabled. Check mVUtestCycles() in microVU_Compile.inl runVUrec(VU1.VI[REG_TPC].UL, 300000 /*0x7fffffff*/, 1); @@ -227,8 +227,8 @@ namespace VU1micro if (mVUdebugNow) { resetVUrec(1); - memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); - memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); + memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); + memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); runVUrec(VU1.VI[REG_TPC].UL, 300000 /*0x7fffffff*/, 1); diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 182ff5e0a3..c92fb4a2bf 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -1016,8 +1016,8 @@ void SaveBranchState() s_psaveInstInfo = g_pCurInstInfo; // save all mmx regs - memcpy_const(s_saveMMXregs, mmxregs, sizeof(mmxregs)); - memcpy_const(s_saveXMMregs, xmmregs, sizeof(xmmregs)); + memcpy(s_saveMMXregs, mmxregs, sizeof(mmxregs)); + memcpy(s_saveXMMregs, xmmregs, sizeof(xmmregs)); } void LoadBranchState() @@ -1031,8 +1031,8 @@ void LoadBranchState() g_pCurInstInfo = s_psaveInstInfo; // restore all mmx regs - memcpy_const(mmxregs, s_saveMMXregs, sizeof(mmxregs)); - memcpy_const(xmmregs, s_saveXMMregs, sizeof(xmmregs)); + memcpy(mmxregs, s_saveMMXregs, sizeof(mmxregs)); + memcpy(xmmregs, s_saveXMMregs, sizeof(xmmregs)); } void iFlushCall(int flushtype) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 0f5c45141a..6cc4526f71 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -181,8 +181,8 @@ __ri microProgram* mVUcreateProg(microVU& mVU, int startPC) { // Caches Micro Program __ri void mVUcacheProg(microVU& mVU, microProgram& prog) { - if (!mVU.index) memcpy_const(prog.data, mVU.regs().Micro, 0x1000); - else memcpy_const(prog.data, mVU.regs().Micro, 0x4000); + if (!mVU.index) memcpy(prog.data, mVU.regs().Micro, 0x1000); + else memcpy(prog.data, mVU.regs().Micro, 0x4000); mVUdumpProg(mVU, prog); } diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index a1bf51790a..1529bb39f1 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -92,7 +92,7 @@ public: blockEnd = blockList = newBlock; } - memcpy_const(&newBlock->block, pBlock, sizeof(microBlock)); + memcpy(&newBlock->block, pBlock, sizeof(microBlock)); thisBlock = &newBlock->block; } return thisBlock; diff --git a/pcsx2/x86/microVU_Branch.inl b/pcsx2/x86/microVU_Branch.inl index 267b858f20..c4d34672e1 100644 --- a/pcsx2/x86/microVU_Branch.inl +++ b/pcsx2/x86/microVU_Branch.inl @@ -170,7 +170,7 @@ void normBranchCompile(microVU& mVU, u32 branchPC) { } void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) { - memcpy_const(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); + memcpy(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); mVUsetupBranch(mVU, mFC); mVUbackupRegs(mVU); @@ -386,7 +386,7 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) { s32* ajmp = xJcc32((JccComparisonType)JMPcc); u32 bPC = iPC; // mVUcompile can modify iPC, mVUpBlock, and mVUregs so back them up microBlock* pBlock = mVUpBlock; - memcpy_const(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); + memcpy(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); incPC2(1); // Get PC for branch not-taken mVUcompile(mVU, xPC, (uptr)&mVUregs); diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index d8a9ec8084..872746721d 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -427,10 +427,10 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr) { mVU.p = 0; // All blocks start at p index #0 mVU.q = 0; // All blocks start at q index #0 if ((uptr)&mVUregs != pState) { // Loads up Pipeline State Info - memcpy_const((u8*)&mVUregs, (u8*)pState, sizeof(microRegInfo)); + memcpy((u8*)&mVUregs, (u8*)pState, sizeof(microRegInfo)); } if (doEarlyExit(mVU) && ((uptr)&mVU.prog.lpState != pState)) { - memcpy_const((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo)); + memcpy((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo)); } mVUblock.x86ptrStart = thisPtr; mVUpBlock = mVUblocks[mVUstartPC/2]->add(&mVUblock); // Add this block to block manager diff --git a/pcsx2/x86/newVif_HashBucket.h b/pcsx2/x86/newVif_HashBucket.h index 90c0202689..d0f65fc129 100644 --- a/pcsx2/x86/newVif_HashBucket.h +++ b/pcsx2/x86/newVif_HashBucket.h @@ -86,7 +86,7 @@ public: wxsFormat(L"HashBucket Chain (bucket size=%d)", bucket.Size+1) ); } - memcpy_const(&bucket.Chain[bucket.Size++], &dataPtr, sizeof(T)); + memcpy(&bucket.Chain[bucket.Size++], &dataPtr, sizeof(T)); } void clear() { for (int i = 0; i < hSize; i++) { diff --git a/pcsx2/x86/newVif_Unpack.cpp b/pcsx2/x86/newVif_Unpack.cpp index 2bb5474487..86d6c1d4f3 100644 --- a/pcsx2/x86/newVif_Unpack.cpp +++ b/pcsx2/x86/newVif_Unpack.cpp @@ -117,7 +117,7 @@ _vifT int nVifUnpack(const u8* data) { if (ret == vif.tag.size) { // Full Transfer if (v.bSize) { // Last transfer was partial - memcpy_aligned(&v.buffer[v.bSize], data, size); + memcpy(&v.buffer[v.bSize], data, size); v.bSize += size; size = v.bSize; data = v.buffer; @@ -140,7 +140,7 @@ _vifT int nVifUnpack(const u8* data) { v.bSize = 0; } else { // Partial Transfer - memcpy_aligned(&v.buffer[v.bSize], data, size); + memcpy(&v.buffer[v.bSize], data, size); v.bSize += size; vif.tag.size -= ret; diff --git a/pcsx2/x86/sVU_Compare.h b/pcsx2/x86/sVU_Compare.h index 7ea4f3518e..13229111e0 100644 --- a/pcsx2/x86/sVU_Compare.h +++ b/pcsx2/x86/sVU_Compare.h @@ -131,15 +131,15 @@ void recSuperVU1::Execute(u32 cycles) { #endif runCount++; - memcpy_const((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs)); - memcpy_const((u8*)backVUmem, (u8*) VU1.Mem, 0x4000); + memcpy((u8*)backVUregs, (u8*)&VU1, sizeof(VURegs)); + memcpy((u8*)backVUmem, (u8*) VU1.Mem, 0x4000); runMVU1(cycles); - memcpy_const((u8*)cmpVUregs,(u8*)&VU1, sizeof(VURegs)); - memcpy_const((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000); - memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); - memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); + memcpy((u8*)cmpVUregs,(u8*)&VU1, sizeof(VURegs)); + memcpy((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000); + memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); + memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); runSVU1(cycles); if ((memcmp((u8*)cmpVUregs, (u8*)&VU1, (16*32) + (16*16))) || (memcmp((u8*)cmpVUmem, (u8*)VU1.Mem, 0x4000))) { @@ -230,8 +230,8 @@ void recSuperVU1::Execute(u32 cycles) { resetMVU1(); - memcpy_const((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); - memcpy_const((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); + memcpy((u8*)&VU1, (u8*)backVUregs, sizeof(VURegs)); + memcpy((u8*)VU1.Mem, (u8*)backVUmem, 0x4000); runMVU1(cycles);