diff --git a/pcsx2/Gif_Unit.cpp b/pcsx2/Gif_Unit.cpp index f351c896c6..74bba29167 100644 --- a/pcsx2/Gif_Unit.cpp +++ b/pcsx2/Gif_Unit.cpp @@ -112,15 +112,15 @@ bool Gif_HandlerAD_MTVU(u8* pMem) if (reg == 0x60) { // SIGNAL GUNIT_WARN("GIF Handler - SIGNAL"); - if (vu1Thread.gsInterrupts.load(std::memory_order_acquire) & VU_Thread::InterruptFlagSignal) + if (vu1Thread.mtvuInterrupts.load(std::memory_order_acquire) & VU_Thread::InterruptFlagSignal) Console.Error("GIF Handler MTVU - Double SIGNAL Not Handled"); vu1Thread.gsSignal.store(((u64)data[1] << 32) | data[0], std::memory_order_relaxed); - vu1Thread.gsInterrupts.fetch_or(VU_Thread::InterruptFlagSignal, std::memory_order_release); + vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagSignal, std::memory_order_release); } else if (reg == 0x61) { // FINISH GUNIT_WARN("GIF Handler - FINISH"); - u32 old = vu1Thread.gsInterrupts.fetch_or(VU_Thread::InterruptFlagFinish, std::memory_order_relaxed); + u32 old = vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagFinish, std::memory_order_relaxed); if (old & VU_Thread::InterruptFlagFinish) Console.Error("GIF Handler MTVU - Double FINISH Not Handled"); } @@ -140,7 +140,7 @@ bool Gif_HandlerAD_MTVU(u8* pMem) u32 wantedMsk = existingMsk | labelMsk; wanted = ((u64)wantedMsk << 32) | wantedData; } - vu1Thread.gsInterrupts.fetch_or(VU_Thread::InterruptFlagLabel, std::memory_order_release); + vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagLabel, std::memory_order_release); } else if (reg >= 0x63 && reg != 0x7f) { diff --git a/pcsx2/MTVU.cpp b/pcsx2/MTVU.cpp index d377fd970b..2392debcab 100644 --- a/pcsx2/MTVU.cpp +++ b/pcsx2/MTVU.cpp @@ -69,9 +69,9 @@ void SaveStateBase::mtvuFreeze() Freeze(v); } - u32 gsInterrupts = vu1Thread.gsInterrupts.load(); + u32 gsInterrupts = vu1Thread.mtvuInterrupts.load(); Freeze(gsInterrupts); - vu1Thread.gsInterrupts.store(gsInterrupts); + vu1Thread.mtvuInterrupts.store(gsInterrupts); u64 gsSignal = vu1Thread.gsSignal.load(); Freeze(gsSignal); vu1Thread.gsSignal.store(gsSignal); @@ -113,7 +113,7 @@ void VU_Thread::Reset() memzero(vifRegs); for (size_t i = 0; i < 4; ++i) vu1Thread.vuCycles[i] = 0; - vu1Thread.gsInterrupts = 0; + vu1Thread.mtvuInterrupts = 0; } void VU_Thread::ExecuteTaskInThread() @@ -336,10 +336,10 @@ u32 VU_Thread::Get_vuCycles() 2; } -void VU_Thread::Get_GSChanges() +void VU_Thread::Get_MTVUChanges() { // Note: Atomic communication is with Gif_Unit.cpp Gif_HandlerAD_MTVU - u32 interrupts = gsInterrupts.load(std::memory_order_relaxed); + u32 interrupts = mtvuInterrupts.load(std::memory_order_relaxed); if (!interrupts) return; @@ -349,7 +349,7 @@ void VU_Thread::Get_GSChanges() const u64 signal = gsSignal.load(std::memory_order_relaxed); // If load of signal was moved after clearing the flag, the other thread could write a new value before we load without noticing the double signal // Prevent that with release semantics - gsInterrupts.fetch_and(~InterruptFlagSignal, std::memory_order_release); + mtvuInterrupts.fetch_and(~InterruptFlagSignal, std::memory_order_release); GUNIT_WARN("SIGNAL firing"); const u32 signalMsk = (u32)(signal >> 32); const u32 signalData = (u32)signal; @@ -372,7 +372,7 @@ void VU_Thread::Get_GSChanges() } if (interrupts & InterruptFlagFinish) { - gsInterrupts.fetch_and(~InterruptFlagFinish, std::memory_order_relaxed); + mtvuInterrupts.fetch_and(~InterruptFlagFinish, std::memory_order_relaxed); GUNIT_WARN("Finish firing"); CSRreg.FINISH = true; gifUnit.gsFINISH.gsFINISHFired = false; @@ -382,7 +382,7 @@ void VU_Thread::Get_GSChanges() } if (interrupts & InterruptFlagLabel) { - gsInterrupts.fetch_and(~InterruptFlagLabel, std::memory_order_acquire); + mtvuInterrupts.fetch_and(~InterruptFlagLabel, std::memory_order_acquire); // If other thread updates gsLabel for a second interrupt, that's okay. Worst case we think there's a label interrupt but gsLabel is 0 // We do not want the exchange of gsLabel to move ahead of clearing the flag, or the other thread could add more work before we clear the flag, resulting in an update with the flag unset // acquire semantics should supply that guarantee @@ -392,6 +392,21 @@ void VU_Thread::Get_GSChanges() const u32 labelData = (u32)label; GSSIGLBLID.LBLID = (GSSIGLBLID.LBLID & ~labelMsk) | (labelData & labelMsk); } + if (interrupts & InterruptFlagVUEBit) + { + mtvuInterrupts.fetch_and(~InterruptFlagVUEBit, std::memory_order_relaxed); + + VU0.VI[REG_VPU_STAT].UL &= ~0x0100; + //DevCon.Warning("E-Bit registered %x", VU0.VI[REG_VPU_STAT].UL); + } + if (interrupts & InterruptFlagVUTBit) + { + mtvuInterrupts.fetch_and(~InterruptFlagVUTBit, std::memory_order_relaxed); + VU0.VI[REG_VPU_STAT].UL &= ~0x0100; + VU0.VI[REG_VPU_STAT].UL |= 0x0400; + //DevCon.Warning("T-Bit registered %x", VU0.VI[REG_VPU_STAT].UL); + hwIntcIrq(7); + } } void VU_Thread::KickStart(bool forceKick) @@ -423,7 +438,7 @@ void VU_Thread::WaitVU() void VU_Thread::ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop) { MTVU_LOG("MTVU - ExecuteVU!"); - Get_GSChanges(); // Clear any pending interrupts + Get_MTVUChanges(); // Clear any pending interrupts ReserveSpace(4); Write(MTVU_VU_EXECUTE); Write(vu_addr); @@ -435,7 +450,7 @@ void VU_Thread::ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop) u32 cycles = std::min(Get_vuCycles(), 3000u); cpuRegs.cycle += cycles * EmuConfig.Speedhacks.EECycleSkip; VU0.cycle += cycles * EmuConfig.Speedhacks.EECycleSkip; - Get_GSChanges(); + Get_MTVUChanges(); } void VU_Thread::VifUnpack(vifStruct& _vif, VIFregisters& _vifRegs, u8* data, u32 size) diff --git a/pcsx2/MTVU.h b/pcsx2/MTVU.h index 380bdde2f6..a184442d5a 100644 --- a/pcsx2/MTVU.h +++ b/pcsx2/MTVU.h @@ -52,9 +52,11 @@ public: InterruptFlagFinish = 1 << 0, InterruptFlagSignal = 1 << 1, InterruptFlagLabel = 1 << 2, + InterruptFlagVUEBit = 1 << 3, + InterruptFlagVUTBit = 1 << 4, }; - std::atomic gsInterrupts; // Used for GS Signal, Finish etc + std::atomic mtvuInterrupts; // Used for GS Signal, Finish etc, plus VU End/T-Bit std::atomic gsLabel; // Used for GS Label command std::atomic gsSignal; // Used for GS Signal command @@ -72,7 +74,7 @@ public: // Waits till MTVU is done processing void WaitVU(); - void Get_GSChanges(); + void Get_MTVUChanges(); void ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop); diff --git a/pcsx2/VU1micro.cpp b/pcsx2/VU1micro.cpp index 741cb4ee48..10d6932317 100644 --- a/pcsx2/VU1micro.cpp +++ b/pcsx2/VU1micro.cpp @@ -37,7 +37,9 @@ void vu1ResetRegs() void vu1Finish(bool add_cycles) { if (THREAD_VU1) { - if (VU0.VI[REG_VPU_STAT].UL & 0x100) DevCon.Error("MTVU: VU0.VI[REG_VPU_STAT].UL & 0x100"); + //if (VU0.VI[REG_VPU_STAT].UL & 0x100) DevCon.Error("MTVU: VU0.VI[REG_VPU_STAT].UL & 0x100"); + vu1Thread.WaitVU(); + vu1Thread.Get_MTVUChanges(); return; } u32 vu1cycles = VU1.cycle; @@ -58,8 +60,16 @@ void vu1Finish(bool add_cycles) { void __fastcall vu1ExecMicro(u32 addr) { if (THREAD_VU1) { - vu1Thread.ExecuteVU(addr, vif1Regs.top, vif1Regs.itop); VU0.VI[REG_VPU_STAT].UL &= ~0xFF00; + + // Okay this is a little bit of a hack, but with good reason. + // Most of the time with MTVU we want to pretend the VU has finished quickly as to gain the benefit from running another thread + // however with T-Bit games when the T-Bit is enabled, it needs to wait in case a T-Bit happens, so we need to set "Busy" + // We shouldn't do this all the time as it negates the extra thread and causes games like Ratchet & Clank to be no faster. + if(VU0.VI[REG_FBRST].UL & 0x800) + VU0.VI[REG_VPU_STAT].UL |= 0x0100; + + vu1Thread.ExecuteVU(addr, vif1Regs.top, vif1Regs.itop); return; } static int count = 0; diff --git a/pcsx2/VUmicro.cpp b/pcsx2/VUmicro.cpp index 021a61f058..8f10183144 100644 --- a/pcsx2/VUmicro.cpp +++ b/pcsx2/VUmicro.cpp @@ -26,7 +26,8 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) { if (m_Idx && THREAD_VU1) { - vu1Thread.Get_GSChanges(); + vu1Thread.Get_MTVUChanges(); + return; } if (!(stat & test)) return; diff --git a/pcsx2/Vif1_Dma.cpp b/pcsx2/Vif1_Dma.cpp index f37e033919..dfc5859833 100644 --- a/pcsx2/Vif1_Dma.cpp +++ b/pcsx2/Vif1_Dma.cpp @@ -20,6 +20,7 @@ #include "Gif_Unit.h" #include "VUmicro.h" #include "newVif.h" +#include "MTVU.h" u32 g_vif1Cycles = 0; @@ -232,6 +233,9 @@ __fi void vif1VUFinish() { if (VU0.VI[REG_VPU_STAT].UL & 0x500) { + if(THREAD_VU1) + vu1Thread.Get_MTVUChanges(); + CPU_INT(VIF_VU1_FINISH, 128); return; } diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index a98c10727b..d2d4fbf617 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -73,8 +73,10 @@ void mVUinit(microVU& mVU, uint vuIndex) { // Resets Rec Data void mVUreset(microVU& mVU, bool resetReserve) { + if (THREAD_VU1) { + DevCon.Warning("mVU Reset"); // If MTVU is toggled on during gameplay we need to flush the running VU1 program, else it gets in a mess if (VU0.VI[REG_VPU_STAT].UL & 0x100) { @@ -346,6 +348,7 @@ void recMicroVU0::Reset() { void recMicroVU1::Reset() { if(!pxAssertDev(m_Reserved, "MicroVU1 CPU Provider has not been reserved prior to reset!")) return; vu1Thread.WaitVU(); + vu1Thread.Get_MTVUChanges(); mVUreset(microVU1, true); } @@ -388,7 +391,7 @@ void recMicroVU1::Execute(u32 cycles) { VU1.VI[REG_TPC].UL <<= 3; ((mVUrecCall)microVU1.startFunct)(VU1.VI[REG_TPC].UL, cycles); VU1.VI[REG_TPC].UL >>= 3; - if(microVU1.regs().flags & 0x4) + if(microVU1.regs().flags & 0x4 && !THREAD_VU1) { microVU1.regs().flags &= ~0x4; hwIntcIrq(7); diff --git a/pcsx2/x86/microVU_Branch.inl b/pcsx2/x86/microVU_Branch.inl index d008a93da6..6965c694cf 100644 --- a/pcsx2/x86/microVU_Branch.inl +++ b/pcsx2/x86/microVU_Branch.inl @@ -114,8 +114,10 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) { if (!mVU.index || !THREAD_VU1) { xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag } + else + xFastCall((void*)mVUTBit); } - + if (isEbit != 2) { // Save PC, and Jump to Exit Point xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); xJMP(mVU.exitFunct); @@ -214,6 +216,8 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) { if (!mVU.index || !THREAD_VU1) { xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag } + else + xFastCall((void*)mVUEBit); } if (isEbit != 2 && isEbit != 3) { // Save PC, and Jump to Exit Point @@ -280,8 +284,10 @@ void normBranch(mV, microFlagCycles& mFC) { u32 tempPC = iPC; xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4)); xForwardJump32 eJMP(Jcc_Zero); - xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2)); - xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + if (!mVU.index || !THREAD_VU1) { + xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2)); + xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + } iPC = branchAddr(mVU)/4; mVUDTendProgram(mVU, &mFC, 1); eJMP.SetTarget(); @@ -292,8 +298,10 @@ void normBranch(mV, microFlagCycles& mFC) { u32 tempPC = iPC; xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8)); xForwardJump32 eJMP(Jcc_Zero); - xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4)); - xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + if (!mVU.index || !THREAD_VU1) { + xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4)); + xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + } iPC = branchAddr(mVU)/4; mVUDTendProgram(mVU, &mFC, 1); eJMP.SetTarget(); @@ -393,8 +401,10 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) { u32 tempPC = iPC; xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8)); xForwardJump32 eJMP(Jcc_Zero); - xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4)); - xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + if (!mVU.index || !THREAD_VU1) { + xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4)); + xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + } mVUDTendProgram(mVU, &mFC, 2); xCMP(ptr16[&mVU.branch], 0); xForwardJump32 tJMP(xInvertCond((JccComparisonType)JMPcc)); @@ -414,8 +424,10 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) { u32 tempPC = iPC; xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4)); xForwardJump32 eJMP(Jcc_Zero); - xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2)); - xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + if (!mVU.index || !THREAD_VU1) { + xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2)); + xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + } mVUDTendProgram(mVU, &mFC, 2); xCMP(ptr16[&mVU.branch], 0); xForwardJump32 dJMP(xInvertCond((JccComparisonType)JMPcc)); @@ -548,8 +560,10 @@ void normJump(mV, microFlagCycles& mFC) { { xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4)); xForwardJump32 eJMP(Jcc_Zero); - xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2)); - xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + if (!mVU.index || !THREAD_VU1) { + xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2)); + xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + } mVUDTendProgram(mVU, &mFC, 2); xMOV(gprT1, ptr32[&mVU.branch]); xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1); @@ -560,8 +574,10 @@ void normJump(mV, microFlagCycles& mFC) { { xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8)); xForwardJump32 eJMP(Jcc_Zero); - xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4)); - xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + if (!mVU.index || !THREAD_VU1) { + xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4)); + xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + } mVUDTendProgram(mVU, &mFC, 2); xMOV(gprT1, ptr32[&mVU.branch]); xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1); diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index aec138a0c8..4a66f6607a 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -514,8 +514,10 @@ void mVUDoDBit(microVU& mVU, microFlagCycles* mFC) { xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4)); xForwardJump32 eJMP(Jcc_Zero); - xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2)); - xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + if (!isVU1 || !THREAD_VU1) { + xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2)); + xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + } incPC(1); mVUDTendProgram(mVU, mFC, 1); incPC(-1); @@ -526,8 +528,10 @@ void mVUDoTBit(microVU& mVU, microFlagCycles* mFC) { xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8)); xForwardJump32 eJMP(Jcc_Zero); - xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4)); - xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + if (!isVU1 || !THREAD_VU1) { + xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4)); + xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT); + } incPC(1); mVUDTendProgram(mVU, mFC, 1); incPC(-1); diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 47c4a3b5e2..c7596230b0 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -180,6 +180,16 @@ static void __fc mVUwarningRegAccess(u32 prog, u32 pc) { Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog); } +static void __fc mVUTBit() { + u32 old = vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagVUTBit, std::memory_order_release); + if (old & VU_Thread::InterruptFlagVUTBit) + DevCon.Warning("Old TBit not registered"); +} + +static void __fc mVUEBit() { + u32 old = vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagVUEBit, std::memory_order_release); +} + static inline u32 branchAddrN(const mV) { pxAssumeDev(islowerOP, "MicroVU: Expected Lower OP code for valid branch addr.");