diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 09d93905ad..fad13c1b8b 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -90,6 +90,7 @@ void mVUreset(microVU& mVU, bool resetReserve) mVUdispatcherAB(mVU); mVUdispatcherCD(mVU); mvuGenerateWaitMTVU(mVU); + mvuGenerateCopyPipelineState(mVU); mVUemitSearch(); mVU.regs().nextBlockCycles = 0; diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index fa859d03de..e2b779b795 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -252,6 +252,7 @@ struct microVU u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume) u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit) u8* waitMTVU; // Ptr to function to save registers/sync VU1 thread + u8* copyPLState; // Ptr to function to copy pipeline state into microVU u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick u32 code; // Contains the current Instruction u32 divFlag; // 1 instance of I/D flags diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 803f4cf53c..3ed2b73866 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -459,16 +459,6 @@ void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC) } } -// Saves Pipeline State for resuming from early exits -__fi void mVUsavePipelineState(microVU& mVU) -{ - u32* lpS = (u32*)&mVU.prog.lpState; - for (size_t i = 0; i < (sizeof(microRegInfo) - 4) / 4; i++, lpS++) - { - xMOV(ptr32[lpS], lpS[0]); - } -} - // Test cycles to see if we need to exit-early... void mVUtestCycles(microVU& mVU, microFlagCycles& mFC) { @@ -483,12 +473,29 @@ void mVUtestCycles(microVU& mVU, microFlagCycles& mFC) xCMP(eax, 0); xForwardJGE32 skip; - mVUsavePipelineState(mVU); + u8* writeback = x86Ptr; + xLoadFarAddr(rax, x86Ptr); + xFastCall((void*)mVU.copyPLState); + if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack) xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles); - mVUendProgram(mVU, &mFC, 0); + { + xAlignPtr(SSE_ALIGN_N); + + u8* curx86Ptr = x86Ptr; + x86SetPtr(writeback); + xLoadFarAddr(rax, curx86Ptr); + x86SetPtr(curx86Ptr); + + static_assert((sizeof(microRegInfo) % 4) == 0); + const u32* lpPtr = reinterpret_cast(&mVU.prog.lpState); + const u32* lpEnd = lpPtr + (sizeof(microRegInfo) / 4); + while (lpPtr != lpEnd) + xWrite32(*(lpPtr++)); + } + skip.SetTarget(); xSUB(ptr32[&mVU.cycles], mVUcycles); diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 87261c50f2..4fcd69eed6 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -125,6 +125,7 @@ void mVUdispatcherCD(mV) void mvuGenerateWaitMTVU(mV) { + xAlignCallTarget(); mVU.waitMTVU = x86Ptr; int num_xmms = 0, num_gprs = 0; @@ -198,6 +199,58 @@ void mvuGenerateWaitMTVU(mV) "microVU: Dispatcher generation exceeded reserved cache area!"); } +void mvuGenerateCopyPipelineState(mV) +{ + xAlignCallTarget(); + mVU.copyPLState = x86Ptr; + + if (x86caps.hasAVX2) + { + xVMOVAPS(ymm0, ptr[rax]); + xVMOVAPS(ymm1, ptr[rax + 32u]); + xVMOVAPS(ymm2, ptr[rax + 64u]); + xVMOVAPS(ymm3, ptr[rax + 96u]); + xVMOVAPS(ymm4, ptr[rax + 128u]); + + xVMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState)], ymm0); + xVMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 32u], ymm1); + xVMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 64u], ymm2); + xVMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 96u], ymm3); + xVMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 128u], ymm4); + + xVZEROUPPER(); + } + else + { + xMOVAPS(xmm0, ptr[rax]); + xMOVAPS(xmm1, ptr[rax + 16u]); + xMOVAPS(xmm2, ptr[rax + 32u]); + xMOVAPS(xmm3, ptr[rax + 48u]); + xMOVAPS(xmm4, ptr[rax + 64u]); + xMOVAPS(xmm5, ptr[rax + 80u]); + xMOVAPS(xmm6, ptr[rax + 96u]); + xMOVAPS(xmm7, ptr[rax + 112u]); + xMOVAPS(xmm8, ptr[rax + 128u]); + xMOVAPS(xmm9, ptr[rax + 144u]); + + xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState)], xmm0); + xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 16u], xmm1); + xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 32u], xmm2); + xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 48u], xmm3); + xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 64u], xmm4); + xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 80u], xmm5); + xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 96u], xmm6); + xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 112u], xmm7); + xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 128u], xmm8); + xMOVUPS(ptr[reinterpret_cast(&mVU.prog.lpState) + 144u], xmm9); + } + + xRET(); + + pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize), + "microVU: Dispatcher generation exceeded reserved cache area!"); +} + //------------------------------------------------------------------ // Execution Functions //------------------------------------------------------------------