From b4eaf3722f32937bd0ce4812bf107c66f6ffbcb4 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Tue, 7 Sep 2021 13:40:01 +0100 Subject: [PATCH] VU: Adjust timings of VU calls --- bin/GameIndex.yaml | 2 -- pcsx2/VU.h | 1 + pcsx2/VU0.cpp | 4 +-- pcsx2/VU0microInterp.cpp | 11 +++++-- pcsx2/VU1microInterp.cpp | 2 +- pcsx2/VUmicro.cpp | 28 +++++++++++++----- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 8 ++--- pcsx2/x86/microVU_Branch.inl | 22 ++++---------- pcsx2/x86/microVU_Compile.inl | 6 +++- pcsx2/x86/microVU_IR.h | 3 +- pcsx2/x86/microVU_Macro.inl | 42 ++++++++++++++++++--------- 11 files changed, 79 insertions(+), 50 deletions(-) diff --git a/bin/GameIndex.yaml b/bin/GameIndex.yaml index 3b249e166f..931e3e24e1 100644 --- a/bin/GameIndex.yaml +++ b/bin/GameIndex.yaml @@ -11696,7 +11696,6 @@ SLES-52568: compat: 5 gameFixes: - XGKickHack # Fixes bad Geometry. - - VUKickstartHack # Fixes loading hang. SLES-52569: name: "Spyro - A Hero's Tail" region: "PAL-M6" @@ -35789,7 +35788,6 @@ SLUS-20909: compat: 5 gameFixes: - XGKickHack # Fixes bad geometry. - - VUKickstartHack # Fixes loading hang. SLUS-20910: name: "Test Drive - Eve of Destruction" region: "NTSC-U" diff --git a/pcsx2/VU.h b/pcsx2/VU.h index a52c6a62f0..5e286bad8a 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -157,6 +157,7 @@ struct __aligned16 VURegs u32 ebit; u32 pending_q; u32 pending_p; + u32 blockhasmbit; __aligned16 u32 micro_macflags[4]; __aligned16 u32 micro_clipflags[4]; diff --git a/pcsx2/VU0.cpp b/pcsx2/VU0.cpp index c3656ef701..c9c40a34c4 100644 --- a/pcsx2/VU0.cpp +++ b/pcsx2/VU0.cpp @@ -59,7 +59,7 @@ __fi void _vu0run(bool breakOnMbit, bool addCycles) { if (!(VU0.VI[REG_VPU_STAT].UL & 1)) return; //VU0 is ahead of the EE and M-Bit is already encountered, so no need to wait for it, just catch up the EE - if ((VU0.flags & VUFLAG_MFLAGSET) && breakOnMbit && VU0.cycle >= cpuRegs.cycle) + if ((VU0.flags & VUFLAG_MFLAGSET) && breakOnMbit && (s32)(cpuRegs.cycle - VU0.cycle) < 0) { cpuRegs.cycle = VU0.cycle; return; @@ -71,7 +71,7 @@ __fi void _vu0run(bool breakOnMbit, bool addCycles) { do { // Run VU until it finishes or M-Bit CpuVU0->Execute(runCycles); } while ((VU0.VI[REG_VPU_STAT].UL & 1) // E-bit Termination - && (!breakOnMbit || !(VU0.flags & VUFLAG_MFLAGSET))); // M-bit Break + && (!breakOnMbit || !(VU0.flags & VUFLAG_MFLAGSET) || (s32)(cpuRegs.cycle - VU0.cycle) > 0)); // M-bit Break // Add cycles if called from EE's COP2 if (addCycles) diff --git a/pcsx2/VU0microInterp.cpp b/pcsx2/VU0microInterp.cpp index aa963c2e36..c2489b0ea0 100644 --- a/pcsx2/VU0microInterp.cpp +++ b/pcsx2/VU0microInterp.cpp @@ -49,9 +49,10 @@ static void _vu0Exec(VURegs* VU) { VU->ebit = 2; } - if (ptr[1] & 0x20000000) // M flag + if (ptr[1] & 0x20000000 && VU == &VU0) // M flag { VU->flags|= VUFLAG_MFLAGSET; + VU0.blockhasmbit = true; // Console.WriteLn("fixme: M flag set"); } if (ptr[1] & 0x10000000) // D flag @@ -182,6 +183,9 @@ static void _vu0Exec(VURegs* VU) { VU->VI[REG_TPC].UL = VU->branchpc; + if (VU->blockhasmbit) + VU->blockhasmbit = false; + if(VU->takedelaybranch) { DevCon.Warning("VU0 - Branch/Jump in Delay Slot"); @@ -200,6 +204,9 @@ static void _vu0Exec(VURegs* VU) _vuFlushAll(VU); VU0.VI[REG_VPU_STAT].UL&= ~0x1; /* E flag */ vif0Regs.stat.VEW = false; + + if (VU->blockhasmbit) + VU->blockhasmbit = false; } } @@ -269,6 +276,6 @@ void InterpVU0::Execute(u32 cycles) vu0Exec(&VU0); } VU0.VI[REG_TPC].UL >>= 3; - + VU0.nextBlockCycles = (VU0.cycle - cpuRegs.cycle) + 1; fesetround(originalRounding); } diff --git a/pcsx2/VU1microInterp.cpp b/pcsx2/VU1microInterp.cpp index efe8caef19..34806c2aae 100644 --- a/pcsx2/VU1microInterp.cpp +++ b/pcsx2/VU1microInterp.cpp @@ -328,6 +328,6 @@ void InterpVU1::Execute(u32 cycles) Step(); } VU1.VI[REG_TPC].UL >>= 3; - + VU1.nextBlockCycles = (VU1.cycle - cpuRegs.cycle) + 1; fesetround(originalRounding); } diff --git a/pcsx2/VUmicro.cpp b/pcsx2/VUmicro.cpp index 61e64598a5..e81834bdb7 100644 --- a/pcsx2/VUmicro.cpp +++ b/pcsx2/VUmicro.cpp @@ -82,11 +82,15 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) return; } - if (startUp && s) // Start Executing a microprogram (When kickstarted) + // You might be looking at this and thinking, what the hell is going on? What's with all these conditions? + // Well, basically M-Bit timed games are REALLY picky, so we need some extra checks in to make sure the VU + // doesn't go too long without updating/syncing as games will wait for an M-Bit then transfer a bunch of stuff + // since they will know what the timing is going to be on them, so we need to keep it somewhat tight. + // For everything else (Especially stuff that needs kickstart), they can do what they like. + if (startUp) // Start Executing a microprogram (When kickstarted) { Execute(s); // Kick start VU - // I don't like doing this, but Crash Twinsanity seems to be upset without it if (stat & test) { if (m_Idx) @@ -94,7 +98,12 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) else cpuRegs.cycle = VU0.cycle; - cpuSetNextEventDelta(s); + u32 nextblockcycles = m_Idx ? VU1.nextBlockCycles : VU0.nextBlockCycles; + + if((VU0.flags & VUFLAG_MFLAGSET) || VU0.blockhasmbit) + cpuSetNextEventDelta(nextblockcycles); + else if(s) + cpuSetNextEventDelta(s); } } else // Continue Executing @@ -110,19 +119,21 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) } else { - if (delta >= nextblockcycles) // When running behind, make sure we have enough cycles passed for the block to run + if (delta >= nextblockcycles && delta > 0) // When running behind, make sure we have enough cycles passed for the block to run Execute(delta); } - if (stat & test) + if ((stat & test) && !EmuConfig.Gamefixes.VUKickstartHack) { // Queue up next required time to run a block nextblockcycles = m_Idx ? VU1.nextBlockCycles : VU0.nextBlockCycles; cycle = m_Idx ? VU1.cycle : VU0.cycle; - nextblockcycles = EmuConfig.Gamefixes.VUKickstartHack ? (cycle - cpuRegs.cycle) : nextblockcycles; + nextblockcycles = nextblockcycles - (cycle - cpuRegs.cycle); - if(nextblockcycles) + if (nextblockcycles > 0 || (VU0.flags & VUFLAG_MFLAGSET) || VU0.blockhasmbit) + { cpuSetNextEventDelta(nextblockcycles); + } } } } @@ -136,12 +147,13 @@ void BaseVUmicroCPU::ExecuteBlockJIT(BaseVUmicroCPU* cpu) const u32& stat = VU0.VI[REG_VPU_STAT].UL; const int test = 1; + //DevCon.Warning("Was set %d cycles ago", cpuRegs.cycle - setcycle); if (stat & test) { // VU is running s32 delta = (s32)(u32)(cpuRegs.cycle - VU0.cycle); if (delta > 0) - { // Enough time has passed + { cpu->Execute(delta); // Execute the time since the last call } } diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 27ac059790..99f15409da 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -580,11 +580,11 @@ void recLQC2() { iFlushCall(FLUSH_EVERYTHING); - xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); - xForwardJZ32 skipvuidle; xMOV(eax, ptr32[&cpuRegs.cycle]); xADD(eax, scaleblockcycles_clear()); xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); + xForwardJZ32 skipvuidle; xSUB(eax, ptr32[&VU0.cycle]); xSUB(eax, ptr32[&VU0.nextBlockCycles]); xCMP(eax, 8); @@ -625,11 +625,11 @@ void recSQC2() { iFlushCall(FLUSH_EVERYTHING); - xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); - xForwardJZ32 skipvuidle; xMOV(eax, ptr32[&cpuRegs.cycle]); xADD(eax, scaleblockcycles_clear()); xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); + xForwardJZ32 skipvuidle; xSUB(eax, ptr32[&VU0.cycle]); xSUB(eax, ptr32[&VU0.nextBlockCycles]); xCMP(eax, 8); diff --git a/pcsx2/x86/microVU_Branch.inl b/pcsx2/x86/microVU_Branch.inl index dec371bc7e..50bedba9d2 100644 --- a/pcsx2/x86/microVU_Branch.inl +++ b/pcsx2/x86/microVU_Branch.inl @@ -120,9 +120,10 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1); } + xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); + if (isEbit) // Clear 'is busy' Flags { - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); if (!mVU.index || !THREAD_VU1) { xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag @@ -130,8 +131,6 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) else xFastCall((void*)mVUTBit); } - else - xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles); if (isEbit != 2) // Save PC, and Jump to Exit Point { @@ -245,8 +244,10 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) else xFastCall((void*)mVUEBit); } - else - xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles); + else if(isEbit) + { + xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); + } if (isEbit != 2 && isEbit != 3) // Save PC, and Jump to Exit Point { @@ -305,7 +306,6 @@ void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) //So if it is taken, you need to end the program, else you get infinite loops. mVUendProgram(mVU, &mFC, 2); xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], arg1regd); - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xJMP(mVU.exitFunct); } @@ -366,7 +366,6 @@ void normBranch(mV, microFlagCycles& mFC) mVUendProgram(mVU, &mFC, 3); iPC = branchAddr(mVU) / 4; xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xJMP(mVU.exitFunct); iPC = tempPC; } @@ -464,7 +463,6 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) incPC(-4); // Go Back to Branch Opcode to get branchAddr iPC = branchAddr(mVU) / 4; xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xJMP(mVU.exitFunct); eJMP.SetTarget(); iPC = tempPC; @@ -484,13 +482,11 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) xForwardJump32 dJMP(xInvertCond((JccComparisonType)JMPcc)); incPC(4); // Set PC to First instruction of Non-Taken Side xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xJMP(mVU.exitFunct); dJMP.SetTarget(); incPC(-4); // Go Back to Branch Opcode to get branchAddr iPC = branchAddr(mVU) / 4; xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xJMP(mVU.exitFunct); eJMP.SetTarget(); iPC = tempPC; @@ -514,7 +510,6 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) incPC(-4); // Go Back to Branch Opcode to get branchAddr iPC = branchAddr(mVU) / 4; xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xJMP(mVU.exitFunct); iPC = tempPC; } @@ -530,14 +525,12 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) xForwardJump32 eJMP(((JccComparisonType)JMPcc)); incPC(1); // Set PC to First instruction of Non-Taken Side xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xJMP(mVU.exitFunct); eJMP.SetTarget(); incPC(-4); // Go Back to Branch Opcode to get branchAddr iPC = branchAddr(mVU) / 4; xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC); - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xJMP(mVU.exitFunct); return; } @@ -630,7 +623,6 @@ void normJump(mV, microFlagCycles& mFC) mVUDTendProgram(mVU, &mFC, 2); xMOV(gprT1, ptr32[&mVU.branch]); xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1); - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xJMP(mVU.exitFunct); eJMP.SetTarget(); } @@ -646,7 +638,6 @@ void normJump(mV, microFlagCycles& mFC) mVUDTendProgram(mVU, &mFC, 2); xMOV(gprT1, ptr32[&mVU.branch]); xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1); - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xJMP(mVU.exitFunct); eJMP.SetTarget(); } @@ -655,7 +646,6 @@ void normJump(mV, microFlagCycles& mFC) mVUendProgram(mVU, &mFC, 2); xMOV(gprT1, ptr32[&mVU.branch]); xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1); - xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xJMP(mVU.exitFunct); } else diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 4ca94ac9eb..d938bdba29 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -478,6 +478,7 @@ void mVUtestCycles(microVU& mVU, microFlagCycles& mFC) xForwardJGE32 skip; mVUsavePipelineState(mVU); + xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles); mVUendProgram(mVU, &mFC, 0); skip.SetTarget(); @@ -537,6 +538,7 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr) mVUregs.blockType = 0; mVUregs.viBackUp = 0; mVUregs.flagInfo = 0; + mVUregs.mbitinblock = false; mVUsFlagHack = CHECK_VU_FLAGHACK; mVUinitConstValues(mVU); } @@ -693,6 +695,7 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) if ((curI & _Mbit_) && isVU0) { + mVUregs.mbitinblock = true; if (xPC > 0) { incPC(-2); @@ -780,7 +783,7 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) // Fix up vi15 const info for propagation through blocks mVUregs.vi15 = (doConstProp && mVUconstReg[15].isValid) ? (u16)mVUconstReg[15].regValue : 0; mVUregs.vi15v = (doConstProp && mVUconstReg[15].isValid) ? 1 : 0; - + xMOV(ptr32[&mVU.regs().blockhasmbit], mVUregs.mbitinblock); mVUsetFlags(mVU, mFC); // Sets Up Flag instances mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking mVUdebugPrintBlocks(mVU, false); // Prints Start/End PC of blocks executed, for debugging... @@ -829,6 +832,7 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) } incPC(2); mVUsetupRange(mVU, xPC, false); + xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); mVUendProgram(mVU, &mFC, 0); normBranchCompile(mVU, xPC); incPC(-2); diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 033a1db95f..609f4d93dd 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -49,8 +49,9 @@ union __aligned16 microRegInfo u8 viBackUp; // VI reg number that was written to on branch-delay slot u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending) u8 r; + u8 mbitinblock; }; - u32 quick32[2]; + u32 quick32[3]; }; u8 vi15v; // 'vi15' constant is valid diff --git a/pcsx2/x86/microVU_Macro.inl b/pcsx2/x86/microVU_Macro.inl index 2f49a6544e..690d730f39 100644 --- a/pcsx2/x86/microVU_Macro.inl +++ b/pcsx2/x86/microVU_Macro.inl @@ -112,6 +112,9 @@ void endMacroOp(int mode) #define INTERPRETATE_COP2_FUNC(f) \ void recV##f() \ { \ + xMOV(eax, ptr32[&cpuRegs.cycle]); \ + xADD(eax, scaleblockcycles_clear()); \ + xMOV(ptr32[&cpuRegs.cycle], eax); \ recCall(V##f); \ _freeX86regs(); \ } @@ -283,15 +286,24 @@ void COP2_Interlock(bool mBitSync) if (cpuRegs.code & 1) { iFlushCall(FLUSH_EVERYTHING); + xMOV(eax, ptr32[&cpuRegs.cycle]); + xADD(eax, scaleblockcycles_clear()); + xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); xForwardJZ32 skipvuidle; - xMOV(eax, ptr[&cpuRegs.cycle]); - xADD(eax, scaleblockcycles_clear()); - xMOV(ptr[&cpuRegs.cycle], eax); // update cycles - xLoadFarAddr(arg1reg, CpuVU0); - xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); if (mBitSync) + { + xSUB(eax, ptr32[&vu0Regs.cycle]); + xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]); + xCMP(eax, 8); + xForwardJL32 skip; + xLoadFarAddr(arg1reg, CpuVU0); + xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + skip.SetTarget(); + xFastCall((void*)_vu0WaitMicro); + } else xFastCall((void*)_vu0FinishMicro); skipvuidle.SetTarget(); @@ -320,11 +332,11 @@ static void recCFC2() if (!(cpuRegs.code & 1)) { - xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); - xForwardJZ32 skipvuidle; xMOV(eax, ptr32[&cpuRegs.cycle]); xADD(eax, scaleblockcycles_clear()); xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); + xForwardJZ32 skipvuidle; xSUB(eax, ptr32[&vu0Regs.cycle]); xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]); xCMP(eax, 8); @@ -405,11 +417,12 @@ static void recCTC2() if (!(cpuRegs.code & 1)) { - xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); - xForwardJZ32 skipvuidle; xMOV(eax, ptr32[&cpuRegs.cycle]); xADD(eax, scaleblockcycles_clear()); xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles + + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); + xForwardJZ32 skipvuidle; xSUB(eax, ptr32[&vu0Regs.cycle]); xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]); xCMP(eax, 8); @@ -498,11 +511,12 @@ static void recQMFC2() if (!(cpuRegs.code & 1)) { - xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); - xForwardJZ32 skipvuidle; xMOV(eax, ptr32[&cpuRegs.cycle]); xADD(eax, scaleblockcycles_clear()); xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles + + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); + xForwardJZ32 skipvuidle; xSUB(eax, ptr32[&vu0Regs.cycle]); xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]); xCMP(eax, 8); @@ -532,12 +546,14 @@ static void recQMTC2() if (!(cpuRegs.code & 1)) { - xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); - xForwardJZ32 skipvuidle; xMOV(eax, ptr32[&cpuRegs.cycle]); xADD(eax, scaleblockcycles_clear()); xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles + + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); + xForwardJZ32 skipvuidle; xSUB(eax, ptr32[&vu0Regs.cycle]); + xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]); xCMP(eax, 8); xForwardJL32 skip; xLoadFarAddr(arg1reg, CpuVU0);