From df79a17baa283b1bca4ad05e0fff293771dd5087 Mon Sep 17 00:00:00 2001 From: kozarovv <15552250+kozarovv@users.noreply.github.com> Date: Sun, 9 Aug 2020 08:30:24 +0200 Subject: [PATCH] VU: Improve VU0/EE sync, Implement better M-Bit Handling, Fix VU program handing on VIF --- pcsx2/COP2.cpp | 4 +- pcsx2/Config.h | 4 +- pcsx2/Pcsx2Config.cpp | 12 ++- pcsx2/VU.h | 5 + pcsx2/VU0.cpp | 24 +++-- pcsx2/VU0micro.cpp | 2 +- pcsx2/VU0microInterp.cpp | 12 +-- pcsx2/VU1micro.cpp | 3 +- pcsx2/VU1microInterp.cpp | 2 +- pcsx2/VUmicro.cpp | 49 +++++---- pcsx2/VUmicro.h | 1 + pcsx2/Vif0_Dma.cpp | 17 ++- pcsx2/Vif_Codes.cpp | 5 +- pcsx2/Vif_Transfer.cpp | 2 +- pcsx2/gui/Panels/GameFixesPanel.cpp | 8 +- pcsx2/x86/iR5900.h | 1 + pcsx2/x86/ix86-32/iR5900-32.cpp | 25 +++++ pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 18 +++- pcsx2/x86/microVU.cpp | 3 + pcsx2/x86/microVU_Branch.inl | 81 ++++++++++++-- pcsx2/x86/microVU_Compile.inl | 150 ++++++++++++++++++-------- pcsx2/x86/microVU_Execute.inl | 39 ++++--- pcsx2/x86/microVU_Macro.inl | 70 ++++++++++-- 23 files changed, 404 insertions(+), 133 deletions(-) diff --git a/pcsx2/COP2.cpp b/pcsx2/COP2.cpp index 05bc16743b..9a566a133e 100644 --- a/pcsx2/COP2.cpp +++ b/pcsx2/COP2.cpp @@ -29,13 +29,13 @@ using namespace R5900::Interpreter; void VCALLMS() { vu0Finish(); vu0ExecMicro(((cpuRegs.code >> 6) & 0x7FFF)); - vif0Regs.stat.VEW = false; + //vif0Regs.stat.VEW = false; } void VCALLMSR() { vu0Finish(); vu0ExecMicro(VU0.VI[REG_CMSAR0].US[0]); - vif0Regs.stat.VEW = false; + //vif0Regs.stat.VEW = false; } void BC2F() diff --git a/pcsx2/Config.h b/pcsx2/Config.h index 614947b963..41572ffcbc 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -59,6 +59,7 @@ enum GamefixId Fix_GoemonTlbMiss, Fix_ScarfaceIbit, Fix_CrashTagTeamIbit, + Fix_VU0Kickstart, GamefixId_COUNT }; @@ -361,7 +362,8 @@ struct Pcsx2Config FMVinSoftwareHack : 1, // Toggle in and out of software rendering when an FMV runs. GoemonTlbHack : 1, // Gomeon tlb miss hack. The game need to access unmapped virtual address. Instead to handle it as exception, tlb are preloaded at startup ScarfaceIbit : 1, // Scarface I bit hack. Needed to stop constant VU recompilation - CrashTagTeamRacingIbit : 1; // Crash Tag Team Racing I bit hack. Needed to stop constant VU recompilation + CrashTagTeamRacingIbit : 1, // Crash Tag Team Racing I bit hack. Needed to stop constant VU recompilation + VU0KickstartHack : 1; // Speed up VU0 at start of program to avoid some VU1 sync issues BITFIELD_END GamefixOptions(); diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index a30941aaf4..4b41eeeb06 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -267,7 +267,8 @@ const wxChar *const tbl_GamefixNames[] = L"FMVinSoftware", L"GoemonTlb", L"ScarfaceIbit", - L"CrashTagTeamRacingIbit" + L"CrashTagTeamRacingIbit", + L"VU0Kickstart" }; const __fi wxChar* EnumToString( GamefixId id ) @@ -330,7 +331,8 @@ void Pcsx2Config::GamefixOptions::Set( GamefixId id, bool enabled ) case Fix_FMVinSoftware: FMVinSoftwareHack = enabled; break; case Fix_GoemonTlbMiss: GoemonTlbHack = enabled; break; case Fix_ScarfaceIbit: ScarfaceIbit = enabled; break; - case Fix_CrashTagTeamIbit: CrashTagTeamRacingIbit = enabled; break; + case Fix_CrashTagTeamIbit: CrashTagTeamRacingIbit = enabled; break; + case Fix_VU0Kickstart: VU0KickstartHack = enabled; break; jNO_DEFAULT; } } @@ -356,7 +358,8 @@ bool Pcsx2Config::GamefixOptions::Get( GamefixId id ) const case Fix_FMVinSoftware: return FMVinSoftwareHack; case Fix_GoemonTlbMiss: return GoemonTlbHack; case Fix_ScarfaceIbit: return ScarfaceIbit; - case Fix_CrashTagTeamIbit: return CrashTagTeamRacingIbit; + case Fix_CrashTagTeamIbit: return CrashTagTeamRacingIbit; + case Fix_VU0Kickstart: return VU0KickstartHack; jNO_DEFAULT; } return false; // unreachable, but we still need to suppress warnings >_< @@ -382,7 +385,8 @@ void Pcsx2Config::GamefixOptions::LoadSave( IniInterface& ini ) IniBitBool( FMVinSoftwareHack ); IniBitBool( GoemonTlbHack ); IniBitBool( ScarfaceIbit ); - IniBitBool( CrashTagTeamRacingIbit ); + IniBitBool( CrashTagTeamRacingIbit ); + IniBitBool( VU0KickstartHack ); } diff --git a/pcsx2/VU.h b/pcsx2/VU.h index f668064516..117089e75e 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -141,7 +141,12 @@ struct __aligned16 VURegs { u32 branchpc; u32 delaybranchpc; bool takedelaybranch; + u32 pending_q; + u32 pending_p; + __aligned16 u32 micro_macflags[4]; + __aligned16 u32 micro_clipflags[4]; + __aligned16 u32 micro_statusflags[4]; // MAC/Status flags -- these are used by interpreters but are kind of hacky // and shouldn't be relied on for any useful/valid info. Would like to move them out of // this struct eventually. diff --git a/pcsx2/VU0.cpp b/pcsx2/VU0.cpp index e471e56e4d..8b0cbd585a 100644 --- a/pcsx2/VU0.cpp +++ b/pcsx2/VU0.cpp @@ -58,17 +58,27 @@ __fi void _vu0run(bool breakOnMbit, bool addCycles) { if (!(VU0.VI[REG_VPU_STAT].UL & 1)) return; - int startcycle = VU0.cycle; - u32 runCycles = breakOnMbit ? vu0RunCycles : 0x7fffffff; - VU0.flags &= ~VUFLAG_MFLAGSET; + //VU0 is ahead of the EE and M-Bit is already encountered, so no need to wait for it, just catch up the EE + if ((VU0.flags & VUFLAG_MFLAGSET) && breakOnMbit && VU0.cycle >= cpuRegs.cycle) + { + cpuRegs.cycle = VU0.cycle; + return; + } + + u32 startcycle = VU0.cycle; + u32 runCycles = 0x7fffffff; do { // Run VU until it finishes or M-Bit CpuVU0->Execute(runCycles); } while ((VU0.VI[REG_VPU_STAT].UL & 1) // E-bit Termination - && (!breakOnMbit || !(VU0.flags & VUFLAG_MFLAGSET))); // M-bit Break + && (!breakOnMbit || !(VU0.flags & VUFLAG_MFLAGSET) || VU0.cycle < cpuRegs.cycle)); // M-bit Break // Add cycles if called from EE's COP2 - if (addCycles) cpuRegs.cycle += (VU0.cycle-startcycle)*2; + if (addCycles) + { + cpuRegs.cycle += (VU0.cycle - startcycle); + VU0.cycle = cpuRegs.cycle; + } } void _vu0WaitMicro() { _vu0run(1, 1); } // Runs VU0 Micro Until E-bit or M-Bit End @@ -101,7 +111,7 @@ namespace OpcodeImpl void QMFC2() { if (cpuRegs.code & 1) { - _vu0WaitMicro(); + _vu0FinishMicro(); } if (_Rt_ == 0) return; cpuRegs.GPR.r[_Rt_].UD[0] = VU0.VF[_Fs_].UD[0]; @@ -119,7 +129,7 @@ void QMTC2() { void CFC2() { if (cpuRegs.code & 1) { - _vu0WaitMicro(); + _vu0FinishMicro(); } if (_Rt_ == 0) return; diff --git a/pcsx2/VU0micro.cpp b/pcsx2/VU0micro.cpp index 259db16c73..651ed8102d 100644 --- a/pcsx2/VU0micro.cpp +++ b/pcsx2/VU0micro.cpp @@ -44,7 +44,7 @@ void __fastcall vu0ExecMicro(u32 addr) { VU0.VI[REG_VPU_STAT].UL &= ~0xFF; VU0.VI[REG_VPU_STAT].UL |= 0x01; - + VU0.cycle = cpuRegs.cycle; if ((s32)addr != -1) VU0.VI[REG_TPC].UL = addr; _vuExecMicroDebug(VU0); CpuVU0->ExecuteBlock(1); diff --git a/pcsx2/VU0microInterp.cpp b/pcsx2/VU0microInterp.cpp index e29171ba15..acd0084adf 100644 --- a/pcsx2/VU0microInterp.cpp +++ b/pcsx2/VU0microInterp.cpp @@ -157,12 +157,12 @@ static void _vu0Exec(VURegs* VU) if(VU->takedelaybranch) { - VU->branch = 2; - DevCon.Warning("VU0 - Branch/Jump in Delay Slot"); + VU->branch = 1; + DevCon.Warning("VU0 - Branch/Jump in Delay Slot"); VU->branchpc = VU->delaybranchpc; VU->delaybranchpc = 0; VU->takedelaybranch = false; - } + } } } @@ -206,8 +206,9 @@ void InterpVU0::Step() void InterpVU0::Execute(u32 cycles) { VU0.VI[REG_TPC].UL <<= 3; - for (int i = (int)cycles; i > 0 ; i--) { - if (!(VU0.VI[REG_VPU_STAT].UL & 0x1)) { + VU0.flags &= ~VUFLAG_MFLAGSET; + for (int i = (int)cycles; i > 0; i--) { + if (!(VU0.VI[REG_VPU_STAT].UL & 0x1) || (VU0.flags & VUFLAG_MFLAGSET)) { if (VU0.branch || VU0.ebit) { vu0Exec(&VU0); // run branch delay slot? } @@ -217,4 +218,3 @@ void InterpVU0::Execute(u32 cycles) } VU0.VI[REG_TPC].UL >>= 3; } - diff --git a/pcsx2/VU1micro.cpp b/pcsx2/VU1micro.cpp index 6dfb72a4dd..9a67898a75 100644 --- a/pcsx2/VU1micro.cpp +++ b/pcsx2/VU1micro.cpp @@ -57,10 +57,9 @@ void __fastcall vu1ExecMicro(u32 addr) vu1Finish(); VUM_LOG("vu1ExecMicro %x (count=%d)", addr, count++); - + VU1.cycle = cpuRegs.cycle; VU0.VI[REG_VPU_STAT].UL &= ~0xFF00; VU0.VI[REG_VPU_STAT].UL |= 0x0100; - if ((s32)addr != -1) VU1.VI[REG_TPC].UL = addr; _vuExecMicroDebug(VU1); diff --git a/pcsx2/VU1microInterp.cpp b/pcsx2/VU1microInterp.cpp index 0527796e13..9f158e6af4 100644 --- a/pcsx2/VU1microInterp.cpp +++ b/pcsx2/VU1microInterp.cpp @@ -157,7 +157,7 @@ static void _vu1Exec(VURegs* VU) if(VU->takedelaybranch) { - VU->branch = 2; + VU->branch = 1; //DevCon.Warning("VU1 - Branch/Jump in Delay Slot"); VU->branchpc = VU->delaybranchpc; VU->delaybranchpc = 0; diff --git a/pcsx2/VUmicro.cpp b/pcsx2/VUmicro.cpp index 82bbe8bca8..2f89429e88 100644 --- a/pcsx2/VUmicro.cpp +++ b/pcsx2/VUmicro.cpp @@ -25,27 +25,29 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) { const u32& stat = VU0.VI[REG_VPU_STAT].UL; const int test = m_Idx ? 0x100 : 1; - const int s = 1024*8; // Kick Start Cycles (Silver Surfer needs this amount) - const int c = 1024*1; // Continue Cycles + const int s = EmuConfig.Gamefixes.VU0KickstartHack ? 2048 : 0; // Kick Start Cycles (Silver Surfer, POP:SOT, Lotus needs this amount) + if (!(stat & test)) return; - if (startUp) { // Start Executing a microprogram + + if (startUp && s) { // Start Executing a microprogram Execute(s); // Kick start VU - // Let VUs run behind EE instead of ahead if (stat & test) { - cpuSetNextEventDelta((s+c)*2); - m_lastEEcycles = cpuRegs.cycle + (s*2); + cpuSetNextEventDelta(s); + + if (m_Idx) + VU1.cycle = cpuRegs.cycle; + else + VU0.cycle = cpuRegs.cycle; } } - else { // Continue Executing (VU roughly half the mhz of EE) - s32 delta = (s32)(u32)(cpuRegs.cycle - m_lastEEcycles) & ~1; - if (delta > 0) { // Enough time has passed - delta >>= 1; // Divide by 2 (unsigned) + else { // Continue Executing + u32 cycle = m_Idx ? VU1.cycle : VU0.cycle; + s32 delta = (s32)(u32)(cpuRegs.cycle - cycle); + if (delta > 0) { // Enough time has passed Execute(delta); // Execute the time since the last call - if (stat & test) { - cpuSetNextEventDelta(c*2); - m_lastEEcycles = cpuRegs.cycle; - } + if (stat & test) + cpuSetNextEventDelta(delta); } else cpuSetNextEventDelta(-delta); // Haven't caught-up from kick start } @@ -55,10 +57,10 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) { // EE data to VU0's registers. We want to run VU0 Micro right after this // to ensure that the register is used at the correct time. // This fixes spinning/hanging in some games like Ratchet and Clank's Intro. -void __fastcall BaseVUmicroCPU::ExecuteBlockJIT(BaseVUmicroCPU* cpu) { +void BaseVUmicroCPU::ExecuteBlockJIT(BaseVUmicroCPU* cpu) { const u32& stat = VU0.VI[REG_VPU_STAT].UL; const int test = cpu->m_Idx ? 0x100 : 1; - const int c = 128; // VU Execution Cycles + if (stat & test) { // VU is running #ifdef PCSX2_DEVBUILD static int warn = 5; @@ -67,10 +69,17 @@ void __fastcall BaseVUmicroCPU::ExecuteBlockJIT(BaseVUmicroCPU* cpu) { warn--; } #endif - cpu->Execute(c); // Execute VU - if (stat & test) { - cpu->m_lastEEcycles+=(c*2); - cpuSetNextEventDelta(c*2); + + u32 cycle = cpu->m_Idx ? VU1.cycle : VU0.cycle; + s32 delta = (s32)(u32)(cpuRegs.cycle - cycle); + if (delta > 0) { // Enough time has passed + cpu->Execute(delta); // Execute the time since the last call + if (stat & test) { + cpuSetNextEventDelta(delta); + } + } + else { + cpuSetNextEventDelta(-delta); // Haven't caught-up from kick start } } } diff --git a/pcsx2/VUmicro.h b/pcsx2/VUmicro.h index 8c6104ae91..f0b85fa053 100644 --- a/pcsx2/VUmicro.h +++ b/pcsx2/VUmicro.h @@ -262,6 +262,7 @@ extern BaseVUmicroCPU* CpuVU1; extern void vu0ResetRegs(); extern void __fastcall vu0ExecMicro(u32 addr); extern void vu0Exec(VURegs* VU); +extern void _vu0FinishMicro(); extern void vu0Finish(); extern void iDumpVU0Registers(); diff --git a/pcsx2/Vif0_Dma.cpp b/pcsx2/Vif0_Dma.cpp index ee04469518..1d5b542390 100644 --- a/pcsx2/Vif0_Dma.cpp +++ b/pcsx2/Vif0_Dma.cpp @@ -174,10 +174,16 @@ __fi void vif0Interrupt() if (!(vif0ch.chcr.STR)) Console.WriteLn("vif0 running when CHCR == %x", vif0ch.chcr._u32); + if(vif0.waitforvu) + { + //CPU_INT(DMAC_VIF0, 16); + return; + } + if (vif0.irq && vif0.vifstalled.enabled && vif0.vifstalled.value == VIF_IRQ_STALL) { vif0Regs.stat.INT = true; - + //Yakuza watches VIF_STAT so lets do this here. if (((vif0Regs.code >> 24) & 0x7f) != 0x7) { vif0Regs.stat.VIS = true; @@ -193,7 +199,7 @@ __fi void vif0Interrupt() // One game doesn't like vif stalling at end, can't remember what. Spiderman isn't keen on it tho //vif0ch.chcr.STR = false; vif0Regs.stat.FQC = std::min((u16)0x8, vif0ch.qwc); - if(vif0ch.qwc > 0 || !vif0.done) + if (vif0ch.qwc > 0 || !vif0.done) { VIF_LOG("VIF0 Stalled"); return; @@ -201,13 +207,6 @@ __fi void vif0Interrupt() } } - if(vif0.waitforvu) - { - //DevCon.Warning("Waiting on VU0"); - //CPU_INT(DMAC_VIF0, 16); - return; - } - vif0.vifstalled.enabled = false; //Must go after the Stall, incase it's still in progress, GTC africa likes to see it still transferring. diff --git a/pcsx2/Vif_Codes.cpp b/pcsx2/Vif_Codes.cpp index 1937795e49..1548bed3d8 100644 --- a/pcsx2/Vif_Codes.cpp +++ b/pcsx2/Vif_Codes.cpp @@ -36,7 +36,7 @@ vifOp(vifCode_Null); __ri void vifExecQueue(int idx) { - if (!GetVifX.queued_program) + if (!GetVifX.queued_program || (VU0.VI[REG_VPU_STAT].UL & 1 << (idx * 8))) return; GetVifX.queued_program = false; @@ -59,6 +59,8 @@ __ri void vifExecQueue(int idx) } static __fi void vifFlush(int idx) { + vifExecQueue(idx); + if (!idx) vif0FLUSH(); else vif1FLUSH(); @@ -119,6 +121,7 @@ void ExecuteVU(int idx) vifX.cmd = 0; vifX.pass = 0; } + vifExecQueue(idx); } //------------------------------------------------------------------ diff --git a/pcsx2/Vif_Transfer.cpp b/pcsx2/Vif_Transfer.cpp index bf26d5c6fd..031a2e43cf 100644 --- a/pcsx2/Vif_Transfer.cpp +++ b/pcsx2/Vif_Transfer.cpp @@ -49,7 +49,7 @@ _vifT void vifTransferLoop(u32* &data) { vifX.cmd = data[0] >> 24; - //VIF_LOG("New VifCMD %x tagsize %x", vifX.cmd, vifX.tag.size); + VIF_LOG("New VifCMD %x tagsize %x irq %d", vifX.cmd, vifX.tag.size, vifX.irq); if (IsDevBuild && SysTrace.EE.VIFcode.IsActive()) { // Pass 2 means "log it" vifCmdHandler[idx][vifX.cmd & 0x7f](2, data); diff --git a/pcsx2/gui/Panels/GameFixesPanel.cpp b/pcsx2/gui/Panels/GameFixesPanel.cpp index 3368c43b7d..f740c9bac5 100644 --- a/pcsx2/gui/Panels/GameFixesPanel.cpp +++ b/pcsx2/gui/Panels/GameFixesPanel.cpp @@ -104,9 +104,13 @@ Panels::GameFixesPanel::GameFixesPanel( wxWindow* parent ) _("VU I bit Hack avoid constant recompilation (Scarface The World Is Yours)"), wxEmptyString }, - { + { _("VU I bit Hack avoid constant recompilation (Crash Tag Team Racing)"), - wxEmptyString + wxEmptyString + }, + { + _("VU0 Kickstart to avoid sync problems with VU1"), + wxEmptyString } }; diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 7f068763fb..21c69497f3 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -74,6 +74,7 @@ void SetBranchImm( u32 imm ); void iFlushCall(int flushtype); void recBranchCall( void (*func)() ); void recCall( void (*func)() ); +u32 scaleblockcycles_clear(); namespace R5900{ namespace Dynarec { diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index f88802ddbc..df2a6e1e86 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -1027,6 +1027,31 @@ static u32 scaleblockcycles() return scaled; } +u32 scaleblockcycles_clear() +{ + u32 scaled = scaleblockcycles_calculation(); + +#if 0 // Enable this to get some runtime statistics about the scaling result in practice + static u32 scaled_overall = 0, unscaled_overall = 0; + if (g_resetEeScalingStats) + { + scaled_overall = unscaled_overall = 0; + g_resetEeScalingStats = false; + } + u32 unscaled = DEFAULT_SCALED_BLOCKS(); + if (!unscaled) unscaled = 1; + + scaled_overall += scaled; + unscaled_overall += unscaled; + float ratio = static_cast(unscaled_overall) / scaled_overall; + + DevCon.WriteLn(L"Unscaled overall: %d, scaled overall: %d, relative EE clock speed: %d %%", + unscaled_overall, scaled_overall, static_cast(100 * ratio)); +#endif + s_nBlockCycles &= 0x7; + + return scaled; +} // Generates dynarec code for Event tests followed by a block dispatch (branch). // Parameters: diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 3e4ec4c558..1cc8d150ed 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -573,6 +573,14 @@ void recSWC1() void recLQC2() { + iFlushCall(FLUSH_EVERYTHING); + xMOV(eax, ptr[&cpuRegs.cycle]); + xADD(eax, scaleblockcycles_clear()); + xMOV(ptr[&cpuRegs.cycle], eax); // update cycles + xLoadFarAddr(arg1reg, CpuVU0); + xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + iFlushCall(FLUSH_EVERYTHING); + if (_Rt_) xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]); else @@ -602,6 +610,14 @@ void recLQC2() void recSQC2() { + iFlushCall(FLUSH_EVERYTHING); + xMOV(eax, ptr[&cpuRegs.cycle]); + xADD(eax, scaleblockcycles_clear()); + xMOV(ptr[&cpuRegs.cycle], eax); // update cycles + xLoadFarAddr(arg1reg, CpuVU0); + xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + iFlushCall(FLUSH_EVERYTHING); + xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]); if (GPR_IS_CONST1(_Rs_)) @@ -628,4 +644,4 @@ void recSQC2() } } } // end namespace R5900::Dynarec::OpcodeImpl using namespace R5900::Dynarec; -using namespace R5900::Dynarec::OpcodeImpl; +using namespace R5900::Dynarec::OpcodeImpl; \ No newline at end of file diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 23f39cfd84..5d81ece017 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -351,8 +351,11 @@ void recMicroVU1::Reset() { void recMicroVU0::Execute(u32 cycles) { pxAssert(m_Reserved); // please allocate me first! :| + VU0.flags &= ~VUFLAG_MFLAGSET; + if(!(VU0.VI[REG_VPU_STAT].UL & 1)) return; VU0.VI[REG_TPC].UL <<= 3; + // Sometimes games spin on vu0, so be careful with this value // woody hangs if too high on sVU (untested on mVU) // Edit: Need to test this again, if anyone ever has a "Woody" game :p diff --git a/pcsx2/x86/microVU_Branch.inl b/pcsx2/x86/microVU_Branch.inl index 19080a1456..ceab0e3eaf 100644 --- a/pcsx2/x86/microVU_Branch.inl +++ b/pcsx2/x86/microVU_Branch.inl @@ -70,12 +70,36 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) { xMOVSS(ptr32[&mVU.regs().VI[REG_P].UL], xmmPQ); } - // Save Flag Instances - xMOV(ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL], getFlagReg(fStatus)); + // Save MAC, Status and CLIP Flag Instances + xMOV(ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL], getFlagReg(fStatus)); mVUallocMFLAGa(mVU, gprT1, fMac); mVUallocCFLAGa(mVU, gprT2, fClip); - xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1); - xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2); + xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1); + xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2); + + if (!isEbit) { // Backup flag instances + xMOVAPS(xmmT1, ptr128[mVU.macFlag]); + xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1); + xMOVAPS(xmmT1, ptr128[mVU.clipFlag]); + xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1); + + xMOV(ptr32[&mVU.regs().micro_statusflags[0]], gprF0); + xMOV(ptr32[&mVU.regs().micro_statusflags[1]], gprF1); + xMOV(ptr32[&mVU.regs().micro_statusflags[2]], gprF2); + xMOV(ptr32[&mVU.regs().micro_statusflags[3]], gprF3); + } else { // Flush flag instances + xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]); + xSHUF.PS(xmmT1, xmmT1, 0); + xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1); + + xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL]); + xSHUF.PS(xmmT1, xmmT1, 0); + xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1); + + xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL]); + xSHUF.PS(xmmT1, xmmT1, 0); + xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1); + } if (isEbit || isVU1) { // Clear 'is busy' Flags if (!mVU.index || !THREAD_VU1) { @@ -98,7 +122,12 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) { int fClip = getLastFlagInst(mVUpBlock->pState, mFC->xClip, 2, isEbit); int qInst = 0; int pInst = 0; - mVU.regAlloc->flushAll(); + microBlock stateBackup; + memcpy(&stateBackup, &mVUregs, sizeof(mVUregs)); //backup the state, it's about to get screwed with. + if(!isEbit) + mVU.regAlloc->TDwritebackAll(); //Writing back ok, invalidating early kills the rec, so don't do it :P + else + mVU.regAlloc->flushAll(); if (isEbit) { memzero(mVUinfo); @@ -124,20 +153,55 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) { } // Save P/Q Regs - if (qInst) { xPSHUF.D(xmmPQ, xmmPQ, 0xe5); } + if (qInst) { xPSHUF.D(xmmPQ, xmmPQ, 0xe1); } xMOVSS(ptr32[&mVU.regs().VI[REG_Q].UL], xmmPQ); + xPSHUF.D(xmmPQ, xmmPQ, 0xe1); + xMOVSS(ptr32[&mVU.regs().pending_q], xmmPQ); + xPSHUF.D(xmmPQ, xmmPQ, 0xe1); + if (isVU1) { - xPSHUF.D(xmmPQ, xmmPQ, pInst ? 3 : 2); + xPSHUF.D(xmmPQ, xmmPQ, pInst ? 0x1b : 0x1e); xMOVSS(ptr32[&mVU.regs().VI[REG_P].UL], xmmPQ); + xPSHUF.D(xmmPQ, xmmPQ, pInst ? 0x1b : 0x4b); + + xPSHUF.D(xmmPQ, xmmPQ, 0xe1); + xMOVSS(ptr32[&mVU.regs().pending_p], xmmPQ); + xPSHUF.D(xmmPQ, xmmPQ, 0x1b); } - // Save Flag Instances + // Save MAC, Status and CLIP Flag Instances xMOV(ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL], getFlagReg(fStatus)); mVUallocMFLAGa(mVU, gprT1, fMac); mVUallocCFLAGa(mVU, gprT2, fClip); xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1); xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2); + if (!isEbit) { // Backup flag instances + xMOVAPS(xmmT1, ptr128[mVU.macFlag]); + xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1); + xMOVAPS(xmmT1, ptr128[mVU.clipFlag]); + xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1); + + xMOV(ptr32[&mVU.regs().micro_statusflags[0]], gprF0); + xMOV(ptr32[&mVU.regs().micro_statusflags[1]], gprF1); + xMOV(ptr32[&mVU.regs().micro_statusflags[2]], gprF2); + xMOV(ptr32[&mVU.regs().micro_statusflags[3]], gprF3); + } + else { // Flush flag instances + xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]); + xSHUF.PS(xmmT1, xmmT1, 0); + xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1); + + xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL]); + xSHUF.PS(xmmT1, xmmT1, 0); + xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1); + + xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL]); + xSHUF.PS(xmmT1, xmmT1, 0); + xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1); + } + + if (isEbit || isVU1) { // Clear 'is busy' Flags if (!mVU.index || !THREAD_VU1) { xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag @@ -149,6 +213,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) { xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); xJMP(mVU.exitFunct); } + memcpy(&mVUregs, &stateBackup, sizeof(mVUregs)); //Restore the state for the rest of the recompile } // Recompiles Code for Proper Flags and Q/P regs on Block Linkings diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 196051e9e3..f7747eb60f 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -356,7 +356,7 @@ void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC) { // vu0 is allowed to exit early, so are dev builds (for inf loops) __fi bool doEarlyExit(microVU& mVU) { - return IsDevBuild || !isVU1; + return true;// IsDevBuild || !isVU1; } // Saves Pipeline State for resuming from early exits @@ -368,27 +368,32 @@ __fi void mVUsavePipelineState(microVU& mVU) { } // Test cycles to see if we need to exit-early... -void mVUtestCycles(microVU& mVU) { +void mVUtestCycles(microVU& mVU, microFlagCycles& mFC) { iPC = mVUstartPC; if (doEarlyExit(mVU)) { - xCMP(ptr32[&mVU.cycles], 0); - xForwardJG32 skip; + xMOV(eax, ptr32[&mVU.cycles]); + if (!EmuConfig.Gamefixes.VU0KickstartHack) + xSUB(eax, mVUcycles); // Running behind, make sure we have time to run the block + else + xSUB(eax, 1); // Running ahead, make sure cycles left are above 0 + xCMP(eax, 0); + xForwardJGE32 skip; + mVUsavePipelineState(mVU); if (isVU0) { // TEST32ItoM((uptr)&mVU.regs().flags, VUFLAG_MFLAGSET); // xFowardJZ32 vu0jmp; // mVUbackupRegs(mVU, true); // xFastCall(mVUwarning0, mVU.prog.cur->idx, xPC); // VU0 is allowed early exit for COP2 Interlock Simulation // mVUrestoreRegs(mVU, true); - mVUsavePipelineState(mVU); - mVUendProgram(mVU, NULL, 0); + mVUendProgram(mVU, &mFC, 0); // vu0jmp.SetTarget(); } else { - mVUbackupRegs(mVU, true); + /*mVUbackupRegs(mVU, true); xFastCall(mVUwarning1, mVU.prog.cur->idx, xPC); mVUrestoreRegs(mVU, true); - mVUsavePipelineState(mVU); - mVUendProgram(mVU, NULL, 0); + mVUsavePipelineState(mVU);*/ + mVUendProgram(mVU, &mFC, 0); } skip.SetTarget(); } @@ -401,7 +406,7 @@ void mVUtestCycles(microVU& mVU) { // This gets run at the start of every loop of mVU's first pass __fi void startLoop(mV) { - if (curI & _Mbit_) { DevCon.WriteLn (Color_Green, "microVU%d: M-bit set! PC = %x", getIndex, xPC); } + if (curI & _Mbit_ && isVU0) { DevCon.WriteLn (Color_Green, "microVU%d: M-bit set! PC = %x", getIndex, xPC); } if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set! PC = %x", getIndex, xPC); } if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set! PC = %x", getIndex, xPC); } memzero(mVUinfo); @@ -475,8 +480,8 @@ void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microF mVUsetCycles(mVU); mVUinfo.readQ = mVU.q; mVUinfo.writeQ = !mVU.q; - mVUinfo.readP = mVU.p; - mVUinfo.writeP = !mVU.p; + mVUinfo.readP = mVU.p && isVU1; + mVUinfo.writeP = !mVU.p && isVU1; mVUcount++; mVUsetFlagInfo(mVU); incPC(1); @@ -485,7 +490,8 @@ void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microF mVUsetFlags(mVU, mFC); // Sets Up Flag instances mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking mVUdebugPrintBlocks(mVU, false); // Prints Start/End PC of blocks executed, for debugging... - mVUtestCycles(mVU); // Update VU Cycles and Exit Early if Necessary + + mVUtestCycles(mVU, mFC); // Update VU Cycles and Exit Early if Necessary // Second Pass iPC = startPC / 4; @@ -534,37 +540,52 @@ void mVUSaveFlags(microVU& mVU,microFlagCycles &mFC, microFlagCycles &mFCBackup) memcpy(&mFCBackup, &mFC, sizeof(microFlagCycles)); mVUsetFlags(mVU, mFCBackup); // Sets Up Flag instances } -void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) { - +void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) +{ microFlagCycles mFC; - u8* thisPtr = x86Ptr; - const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU.microMemSize / 8); + u8* thisPtr = x86Ptr; + const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU.microMemSize / 8); // First Pass iPC = startPC / 4; mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range - mVU.regAlloc->reset(); // Reset regAlloc + mVU.regAlloc->reset(); // Reset regAlloc mVUinitFirstPass(mVU, pState, thisPtr); mVUbranch = 0; - for(int branch = 0; mVUcount < endCount;) { + for (int branch = 0; mVUcount < endCount;) { incPC(1); startLoop(mVU); mVUincCycles(mVU, 1); mVUopU(mVU, 0); mVUcheckBadOp(mVU); - if (curI & _Ebit_) { eBitPass1(mVU, branch); } - - if (curI & _Mbit_) { mVUup.mBit = true; } - - if (curI & _Ibit_) { mVUlow.isNOP = true; mVUup.iBit = true; } - else { incPC(-1); mVUopL(mVU, 0); incPC(1); } - if (curI & _Dbit_) { mVUup.dBit = true; } - if (curI & _Tbit_) { mVUup.tBit = true; } + if (curI & _Ebit_) { + eBitPass1(mVU, branch); + } + + if ((curI & _Mbit_) && isVU0) { + mVUup.mBit = true; + } + + if (curI & _Ibit_) { + mVUlow.isNOP = true; + mVUup.iBit = true; + } + else { + incPC(-1); + mVUopL(mVU, 0); + incPC(1); + } + if (curI & _Dbit_) { + mVUup.dBit = true; + } + if (curI & _Tbit_) { + mVUup.tBit = true; + } mVUsetCycles(mVU); - mVUinfo.readQ = mVU.q; + mVUinfo.readQ = mVU.q; mVUinfo.writeQ = !mVU.q; - mVUinfo.readP = mVU.p; - mVUinfo.writeP = !mVU.p; + mVUinfo.readP = mVU.p && isVU1; + mVUinfo.writeP = !mVU.p && isVU1; mVUcount++; if (branch >= 2) { @@ -588,6 +609,9 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) { mVUbranch = 0; } + if (mVUup.mBit && !branch && !mVUup.eBit) + break; + if (mVUinfo.isEOB) break; @@ -595,24 +619,30 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) { } // Fix up vi15 const info for propagation through blocks - mVUregs.vi15 = (doConstProp && mVUconstReg[15].isValid) ? (u16)mVUconstReg[15].regValue : 0; + mVUregs.vi15 = (doConstProp && mVUconstReg[15].isValid) ? (u16)mVUconstReg[15].regValue : 0; mVUregs.vi15v = (doConstProp && mVUconstReg[15].isValid) ? 1 : 0; mVUsetFlags(mVU, mFC); // Sets Up Flag instances mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking mVUdebugPrintBlocks(mVU, false); // Prints Start/End PC of blocks executed, for debugging... - mVUtestCycles(mVU); // Update VU Cycles and Exit Early if Necessary + mVUtestCycles(mVU, mFC); // Update VU Cycles and Exit Early if Necessary // Second Pass iPC = mVUstartPC; setCode(); mVUbranch = 0; u32 x = 0; - for( ; x < endCount; x++) { - if (mVUinfo.isEOB) { handleBadOp(mVU, x); x = 0xffff; } // handleBadOp currently just prints a warning - if (mVUup.mBit) { xOR(ptr32[&mVU.regs().flags], VUFLAG_MFLAGSET); } + + for (; x < endCount; x++) { + if (mVUinfo.isEOB) { + handleBadOp(mVU, x); + x = 0xffff; + } // handleBadOp currently just prints a warning + if (mVUup.mBit) { + xOR(ptr32[&mVU.regs().flags], VUFLAG_MFLAGSET); + } mVUexecuteInstruction(mVU); - if(!mVUinfo.isBdelay && !mVUlow.branch) //T/D Bit on branch is handled after the branch, branch delay slots are executed. + if (!mVUinfo.isBdelay && !mVUlow.branch) //T/D Bit on branch is handled after the branch, branch delay slots are executed. { if (mVUup.tBit) { mVUDoTBit(mVU, &mFC); @@ -620,6 +650,13 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) { else if (mVUup.dBit && doDBitHandling) { mVUDoDBit(mVU, &mFC); } + else if (mVUup.mBit && !mVUup.eBit && !mVUinfo.isEOB) { + mVUsetupRange(mVU, xPC, false); + incPC(2); + mVUendProgram(mVU, &mFC, 0); + incPC(-2); + goto perf_and_return; + } } if (mVUinfo.doXGKICK) { @@ -640,22 +677,41 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) { incPC(-3); // Go back to branch opcode switch (mVUlow.branch) { - case 1: case 2: normBranch(mVU, mFC); goto perf_and_return; // B/BAL - case 9: case 10: normJump (mVU, mFC); goto perf_and_return; // JR/JALR - case 3: condBranch(mVU, mFC, Jcc_Equal); goto perf_and_return; // IBEQ - case 4: condBranch(mVU, mFC, Jcc_GreaterOrEqual); goto perf_and_return; // IBGEZ - case 5: condBranch(mVU, mFC, Jcc_Greater); goto perf_and_return; // IBGTZ - case 6: condBranch(mVU, mFC, Jcc_LessOrEqual); goto perf_and_return; // IBLEQ - case 7: condBranch(mVU, mFC, Jcc_Less); goto perf_and_return; // IBLTZ - case 8: condBranch(mVU, mFC, Jcc_NotEqual); goto perf_and_return; // IBNEQ + case 1: // B/BAL + case 2: + normBranch(mVU, mFC); + goto perf_and_return; + case 9: // JR/JALR + case 10: + normJump(mVU, mFC); + goto perf_and_return; + case 3: // IBEQ + condBranch(mVU, mFC, Jcc_Equal); + goto perf_and_return; + case 4: // IBGEZ + condBranch(mVU, mFC, Jcc_GreaterOrEqual); + goto perf_and_return; + case 5: // IBGTZ + condBranch(mVU, mFC, Jcc_Greater); + goto perf_and_return; + case 6: // IBLEQ + condBranch(mVU, mFC, Jcc_LessOrEqual); + goto perf_and_return; + case 7: // IBLTZ + condBranch(mVU, mFC, Jcc_Less); + goto perf_and_return; + case 8: // IBNEQ + condBranch(mVU, mFC, Jcc_NotEqual); + goto perf_and_return; } - } } - if ((x == endCount) && (x!=1)) { Console.Error("microVU%d: Possible infinite compiling loop!", mVU.index); } + if ((x == endCount) && (x != 1)) { + Console.Error("microVU%d: Possible infinite compiling loop!", mVU.index); + } // E-bit End - mVUsetupRange(mVU, xPC-8, false); + mVUsetupRange(mVU, xPC - 8, false); mVUendProgram(mVU, &mFC, 1); perf_and_return: diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 4865b693ce..b4d0d56139 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -34,22 +34,35 @@ void mVUdispatcherAB(mV) { xLDMXCSR(g_sseVUMXCSR); // Load Regs - xMOV(gprF0, ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL]); - xMOV(gprF1, gprF0); - xMOV(gprF2, gprF0); - xMOV(gprF3, gprF0); - - xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_MAC_FLAG].UL]); - xSHUF.PS(xmmT1, xmmT1, 0); - xMOVAPS (ptr128[mVU.macFlag], xmmT1); - - xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_CLIP_FLAG].UL]); - xSHUF.PS(xmmT1, xmmT1, 0); - xMOVAPS (ptr128[mVU.clipFlag], xmmT1); - xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_P].UL]); xMOVAPS (xmmPQ, ptr128[&mVU.regs().VI[REG_Q].UL]); + xMOVDZX (xmmT2, ptr32[&mVU.regs().pending_q]); xSHUF.PS(xmmPQ, xmmT1, 0); // wzyx = PPQQ + //Load in other Q instance + xPSHUF.D(xmmPQ, xmmPQ, 0xe1); + xMOVSS(xmmPQ, xmmT2); + xPSHUF.D(xmmPQ, xmmPQ, 0xe1); + + if (isVU1) + { + //Load in other P instance + xMOVDZX(xmmT2, ptr32[&mVU.regs().pending_p]); + xPSHUF.D(xmmPQ, xmmPQ, 0x1B); + xMOVSS(xmmPQ, xmmT2); + xPSHUF.D(xmmPQ, xmmPQ, 0x1B); + } + + xMOVAPS(xmmT1, ptr128[&mVU.regs().micro_macflags]); + xMOVAPS(ptr128[mVU.macFlag], xmmT1); + + + xMOVAPS(xmmT1, ptr128[&mVU.regs().micro_clipflags]); + xMOVAPS(ptr128[mVU.clipFlag], xmmT1); + + xMOV(gprF0, ptr32[&mVU.regs().micro_statusflags[0]]); + xMOV(gprF1, ptr32[&mVU.regs().micro_statusflags[1]]); + xMOV(gprF2, ptr32[&mVU.regs().micro_statusflags[2]]); + xMOV(gprF3, ptr32[&mVU.regs().micro_statusflags[3]]); // Jump to Recompiled Code Block xJMP(rax); diff --git a/pcsx2/x86/microVU_Macro.inl b/pcsx2/x86/microVU_Macro.inl index c59bb486f3..4bbac85447 100644 --- a/pcsx2/x86/microVU_Macro.inl +++ b/pcsx2/x86/microVU_Macro.inl @@ -247,8 +247,14 @@ void recBC2TL() { _setupBranchTest(JZ32, true); } //------------------------------------------------------------------ void COP2_Interlock(bool mBitSync) { + if (cpuRegs.code & 1) { - iFlushCall(FLUSH_EVERYTHING | FLUSH_PC); + iFlushCall(FLUSH_EVERYTHING); + xMOV(eax, ptr[&cpuRegs.cycle]); + xADD(eax, scaleblockcycles_clear()); + xMOV(ptr[&cpuRegs.cycle], eax); // update cycles + xLoadFarAddr(arg1reg, CpuVU0); + xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); if (mBitSync) xFastCall((void*)_vu0WaitMicro); else xFastCall((void*)_vu0FinishMicro); } @@ -268,6 +274,14 @@ static void recCFC2() { COP2_Interlock(false); if (!_Rt_) return; + if (!(cpuRegs.code & 1) && !EmuConfig.Gamefixes.VU0KickstartHack) { + iFlushCall(FLUSH_EVERYTHING); + xMOV(eax, ptr[&cpuRegs.cycle]); + xADD(eax, scaleblockcycles_clear()); + xMOV(ptr[&cpuRegs.cycle], eax); // update cycles + xLoadFarAddr(arg1reg, CpuVU0); + xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + } iFlushCall(FLUSH_EVERYTHING); if (_Rd_ == REG_STATUS_FLAG) { // Normalize Status Flag @@ -331,6 +345,14 @@ static void recCTC2() { printCOP2("CTC2"); COP2_Interlock(1); if (!_Rd_) return; + if (!(cpuRegs.code & 1) && !EmuConfig.Gamefixes.VU0KickstartHack) { + iFlushCall(FLUSH_EVERYTHING); + xMOV(eax, ptr[&cpuRegs.cycle]); + xADD(eax, scaleblockcycles_clear()); + xMOV(ptr[&cpuRegs.cycle], eax); // update cycles + xLoadFarAddr(arg1reg, CpuVU0); + xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + } iFlushCall(FLUSH_EVERYTHING); switch(_Rd_) { @@ -342,12 +364,25 @@ static void recCTC2() { xMOV(ptr32[&vu0Regs.VI[REG_R].UL], eax); break; case REG_STATUS_FLAG: + { if (_Rt_) { // Denormalizes flag into eax (gprT1) mVUallocSFLAGd(&cpuRegs.GPR.r[_Rt_].UL[0]); xMOV(ptr32[&vu0Regs.VI[_Rd_].UL], eax); } else xMOV(ptr32[&vu0Regs.VI[_Rd_].UL], 0); + __aligned16 u32 sticky_flags[4] = { 0xFC0,0xFC0,0xFC0,0xFC0 }; + __aligned16 u32 status_flags[4] = { 0x3F,0x3F,0x3F,0x3F }; + + //Need to update the sticky flags for microVU + xMOVDZX(xmmT1, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); + xSHUF.PS(xmmT1, xmmT1, 0); + xAND.PS(xmmT1, ptr128[&sticky_flags]); + xMOVAPS(xmmT2, ptr128[&VU0.micro_statusflags]); + xAND.PS(xmmT1, ptr128[&status_flags]); + xOR.PS(xmmT1, xmmT2); + xMOVAPS(ptr128[&VU0.micro_statusflags], xmmT1); break; + } case REG_CMSAR1: // Execute VU1 Micro SubRoutine if (_Rt_) { xMOV(ecx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); @@ -357,8 +392,8 @@ static void recCTC2() { xFastCall((void*)vif1VUFinish); break; case REG_FBRST: - if (!_Rt_) { - xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], 0); + if (!_Rt_) { + xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], 0); return; } else xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); @@ -373,8 +408,6 @@ static void recCTC2() { // Executing vu0 block here fixes the intro of Ratchet and Clank // sVU's COP2 has a comment that "Donald Duck" needs this too... if (_Rd_) _eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_); - xLoadFarAddr(arg1reg, CpuVU0); - xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); break; } } @@ -384,6 +417,15 @@ static void recQMFC2() { printCOP2("QMFC2"); COP2_Interlock(false); if (!_Rt_) return; + + if (!(cpuRegs.code & 1) && !EmuConfig.Gamefixes.VU0KickstartHack) { + iFlushCall(FLUSH_EVERYTHING); + xMOV(eax, ptr[&cpuRegs.cycle]); + xADD(eax, scaleblockcycles_clear()); + xMOV(ptr[&cpuRegs.cycle], eax); // update cycles + xLoadFarAddr(arg1reg, CpuVU0); + xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + } iFlushCall(FLUSH_EVERYTHING); // FixMe: For some reason this line is needed or else games break: @@ -398,6 +440,14 @@ static void recQMTC2() { printCOP2("QMTC2"); COP2_Interlock(true); if (!_Rd_) return; + if (!(cpuRegs.code & 1) && !EmuConfig.Gamefixes.VU0KickstartHack) { + iFlushCall(FLUSH_EVERYTHING); + xMOV(eax, ptr[&cpuRegs.cycle]); + xADD(eax, scaleblockcycles_clear()); + xMOV(ptr[&cpuRegs.cycle], eax); // update cycles + xLoadFarAddr(arg1reg, CpuVU0); + xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + } iFlushCall(FLUSH_EVERYTHING); xMOVAPS(xmmT1, ptr128[&cpuRegs.GPR.r[_Rt_]]); @@ -468,5 +518,11 @@ namespace R5900 { namespace Dynarec { namespace OpcodeImpl { void recCOP2() { recCOP2t[_Rs_](); }}}} void recCOP2_BC2 () { recCOP2_BC2t[_Rt_](); } -void recCOP2_SPEC1() { recCOP2SPECIAL1t[_Funct_](); } -void recCOP2_SPEC2() { recCOP2SPECIAL2t[(cpuRegs.code&3)|((cpuRegs.code>>4)&0x7c)](); } +void recCOP2_SPEC1() { + iFlushCall(FLUSH_EVERYTHING); + xMOV(eax, ptr[&cpuRegs.cycle]); + xADD(eax, scaleblockcycles_clear()); + xMOV(ptr[&cpuRegs.cycle], eax); // update cycles + xFastCall((void*)_vu0FinishMicro); recCOP2SPECIAL1t[_Funct_](); +} +void recCOP2_SPEC2() { recCOP2SPECIAL2t[(cpuRegs.code&3)|((cpuRegs.code>>4)&0x7c)](); } \ No newline at end of file