diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index ff6b6d1048..a6a249e5df 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -220,6 +220,7 @@ int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy); // returns t #define EEINST_COP2_STATUS_FLAG 0x400 #define EEINST_COP2_MAC_FLAG 0x800 #define EEINST_COP2_CLIP_FLAG 0x1000 +#define EEINST_COP2_FINISH_VU0_MICRO 0x2000 struct EEINST { diff --git a/pcsx2/x86/iR5900Analysis.cpp b/pcsx2/x86/iR5900Analysis.cpp index c1d23128b2..6710a04b4c 100644 --- a/pcsx2/x86/iR5900Analysis.cpp +++ b/pcsx2/x86/iR5900Analysis.cpp @@ -227,3 +227,39 @@ void COP2FlagHackPass::CommitAllFlags() CommitMACFlag(); CommitClipFlag(); } + +COP2MicroFinishPass::COP2MicroFinishPass() = default; + +COP2MicroFinishPass::~COP2MicroFinishPass() = default; + +void COP2MicroFinishPass::Run(u32 start, u32 end, EEINST* inst_cache) +{ + bool needs_vu0_finish = true; + + ForEachInstruction(start, end, inst_cache, [&needs_vu0_finish](u32 apc, EEINST* inst) { + // Catch SQ/SB/SH/SW/SD to potential DMA->VIF0->VU0 exec. + // This is very unlikely in a cop2 chain. + if (_Opcode_ == 037 || _Opcode_ == 050 || _Opcode_ == 051 || _Opcode_ == 053 || _Opcode_ == 077) + { + needs_vu0_finish = true; + return true; + } + + // Look for COP2 instructions. + if (_Opcode_ != 022) + return true; + + // Set the flag on the current instruction, and clear it for the next. + if (needs_vu0_finish) + { + inst->info |= EEINST_COP2_FINISH_VU0_MICRO; + needs_vu0_finish = false; + } + + // Except for VCALLMS/VCALLMSR, that can start a micro, so the next instruction needs to finish it. + if (_Funct_ == 070 || _Funct_ == 071) + needs_vu0_finish = true; + + return true; + }); +} diff --git a/pcsx2/x86/iR5900Analysis.h b/pcsx2/x86/iR5900Analysis.h index 87e9d503db..b9665271a4 100644 --- a/pcsx2/x86/iR5900Analysis.h +++ b/pcsx2/x86/iR5900Analysis.h @@ -62,4 +62,13 @@ namespace R5900 u32 m_cfc2_pc = 0; }; + + class COP2MicroFinishPass final : public AnalysisPass + { + public: + COP2MicroFinishPass(); + ~COP2MicroFinishPass(); + + void Run(u32 start, u32 end, EEINST* inst_cache) override; + }; } // namespace R5900 \ No newline at end of file diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 793099631e..e67a8b052b 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -2190,10 +2190,12 @@ StartRecomp: } // eventually we'll want to have a vector of passes or something. - if (has_cop2_instructions && EmuConfig.Speedhacks.vuFlagHack) + if (has_cop2_instructions) { - COP2FlagHackPass fhpass; - fhpass.Run(startpc, s_nEndBlock, s_pInstCache + 1); + COP2MicroFinishPass().Run(startpc, s_nEndBlock, s_pInstCache + 1); + + if (EmuConfig.Speedhacks.vuFlagHack) + COP2FlagHackPass().Run(startpc, s_nEndBlock, s_pInstCache + 1); } // analyze instructions // diff --git a/pcsx2/x86/microVU_Macro.inl b/pcsx2/x86/microVU_Macro.inl index 2d077590fb..4e3966a487 100644 --- a/pcsx2/x86/microVU_Macro.inl +++ b/pcsx2/x86/microVU_Macro.inl @@ -323,35 +323,40 @@ void recBC2TL() { _setupBranchTest(JZ32, true); } void COP2_Interlock(bool mBitSync) { - if (cpuRegs.code & 1) { s_nBlockInterlocked = true; - _freeX86reg(eax); - xMOV(eax, ptr32[&cpuRegs.cycle]); - xADD(eax, scaleblockcycles_clear()); - xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles - xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); - xForwardJZ32 skipvuidle; - _cop2BackupRegs(); - if (mBitSync) + // We can safely skip the _vu0FinishMicro() call, when there's nothing + // that can trigger a VU0 program between CFC2/CTC2/COP2 instructions. + if ((g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0_MICRO) || mBitSync) { - xSUB(eax, ptr32[&VU0.cycle]); - xSUB(eax, ptr32[&VU0.nextBlockCycles]); - xCMP(eax, 4); - xForwardJL32 skip; - xLoadFarAddr(arg1reg, CpuVU0); - xMOV(arg2reg, s_nBlockInterlocked); - xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg); - skip.SetTarget(); + _freeX86reg(eax); + xMOV(eax, ptr32[&cpuRegs.cycle]); + xADD(eax, scaleblockcycles_clear()); + xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles - xFastCall((void*)_vu0WaitMicro); + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); + xForwardJZ32 skipvuidle; + _cop2BackupRegs(); + if (mBitSync) + { + xSUB(eax, ptr32[&VU0.cycle]); + xSUB(eax, ptr32[&VU0.nextBlockCycles]); + xCMP(eax, 4); + xForwardJL32 skip; + xLoadFarAddr(arg1reg, CpuVU0); + xMOV(arg2reg, s_nBlockInterlocked); + xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg); + skip.SetTarget(); + + xFastCall((void*)_vu0WaitMicro); + } + else + xFastCall((void*)_vu0FinishMicro); + _cop2RestoreRegs(); + skipvuidle.SetTarget(); } - else - xFastCall((void*)_vu0FinishMicro); - _cop2RestoreRegs(); - skipvuidle.SetTarget(); } } @@ -665,12 +670,15 @@ namespace OpcodeImpl { void recCOP2_BC2() { recCOP2_BC2t[_Rt_](); } void recCOP2_SPEC1() { - xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); - xForwardJZ32 skipvuidle; - _cop2BackupRegs(); - xFastCall((void*)_vu0FinishMicro); - _cop2RestoreRegs(); - skipvuidle.SetTarget(); + if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0_MICRO) + { + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); + xForwardJZ32 skipvuidle; + _cop2BackupRegs(); + xFastCall((void*)_vu0FinishMicro); + _cop2RestoreRegs(); + skipvuidle.SetTarget(); + } recCOP2SPECIAL1t[_Funct_]();