VU: Adjust timings of VU calls

This commit is contained in:
refractionpcsx2 2021-09-07 13:40:01 +01:00
parent ddf305fce5
commit b4eaf3722f
11 changed files with 79 additions and 50 deletions

View File

@ -11696,7 +11696,6 @@ SLES-52568:
compat: 5
gameFixes:
- XGKickHack # Fixes bad Geometry.
- VUKickstartHack # Fixes loading hang.
SLES-52569:
name: "Spyro - A Hero's Tail"
region: "PAL-M6"
@ -35789,7 +35788,6 @@ SLUS-20909:
compat: 5
gameFixes:
- XGKickHack # Fixes bad geometry.
- VUKickstartHack # Fixes loading hang.
SLUS-20910:
name: "Test Drive - Eve of Destruction"
region: "NTSC-U"

View File

@ -157,6 +157,7 @@ struct __aligned16 VURegs
u32 ebit;
u32 pending_q;
u32 pending_p;
u32 blockhasmbit;
__aligned16 u32 micro_macflags[4];
__aligned16 u32 micro_clipflags[4];

View File

@ -59,7 +59,7 @@ __fi void _vu0run(bool breakOnMbit, bool addCycles) {
if (!(VU0.VI[REG_VPU_STAT].UL & 1)) return;
//VU0 is ahead of the EE and M-Bit is already encountered, so no need to wait for it, just catch up the EE
if ((VU0.flags & VUFLAG_MFLAGSET) && breakOnMbit && VU0.cycle >= cpuRegs.cycle)
if ((VU0.flags & VUFLAG_MFLAGSET) && breakOnMbit && (s32)(cpuRegs.cycle - VU0.cycle) < 0)
{
cpuRegs.cycle = VU0.cycle;
return;
@ -71,7 +71,7 @@ __fi void _vu0run(bool breakOnMbit, bool addCycles) {
do { // Run VU until it finishes or M-Bit
CpuVU0->Execute(runCycles);
} while ((VU0.VI[REG_VPU_STAT].UL & 1) // E-bit Termination
&& (!breakOnMbit || !(VU0.flags & VUFLAG_MFLAGSET))); // M-bit Break
&& (!breakOnMbit || !(VU0.flags & VUFLAG_MFLAGSET) || (s32)(cpuRegs.cycle - VU0.cycle) > 0)); // M-bit Break
// Add cycles if called from EE's COP2
if (addCycles)

View File

@ -49,9 +49,10 @@ static void _vu0Exec(VURegs* VU)
{
VU->ebit = 2;
}
if (ptr[1] & 0x20000000) // M flag
if (ptr[1] & 0x20000000 && VU == &VU0) // M flag
{
VU->flags|= VUFLAG_MFLAGSET;
VU0.blockhasmbit = true;
// Console.WriteLn("fixme: M flag set");
}
if (ptr[1] & 0x10000000) // D flag
@ -182,6 +183,9 @@ static void _vu0Exec(VURegs* VU)
{
VU->VI[REG_TPC].UL = VU->branchpc;
if (VU->blockhasmbit)
VU->blockhasmbit = false;
if(VU->takedelaybranch)
{
DevCon.Warning("VU0 - Branch/Jump in Delay Slot");
@ -200,6 +204,9 @@ static void _vu0Exec(VURegs* VU)
_vuFlushAll(VU);
VU0.VI[REG_VPU_STAT].UL&= ~0x1; /* E flag */
vif0Regs.stat.VEW = false;
if (VU->blockhasmbit)
VU->blockhasmbit = false;
}
}
@ -269,6 +276,6 @@ void InterpVU0::Execute(u32 cycles)
vu0Exec(&VU0);
}
VU0.VI[REG_TPC].UL >>= 3;
VU0.nextBlockCycles = (VU0.cycle - cpuRegs.cycle) + 1;
fesetround(originalRounding);
}

View File

@ -328,6 +328,6 @@ void InterpVU1::Execute(u32 cycles)
Step();
}
VU1.VI[REG_TPC].UL >>= 3;
VU1.nextBlockCycles = (VU1.cycle - cpuRegs.cycle) + 1;
fesetround(originalRounding);
}

View File

@ -82,11 +82,15 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp)
return;
}
if (startUp && s) // Start Executing a microprogram (When kickstarted)
// You might be looking at this and thinking, what the hell is going on? What's with all these conditions?
// Well, basically M-Bit timed games are REALLY picky, so we need some extra checks in to make sure the VU
// doesn't go too long without updating/syncing as games will wait for an M-Bit then transfer a bunch of stuff
// since they will know what the timing is going to be on them, so we need to keep it somewhat tight.
// For everything else (Especially stuff that needs kickstart), they can do what they like.
if (startUp) // Start Executing a microprogram (When kickstarted)
{
Execute(s); // Kick start VU
// I don't like doing this, but Crash Twinsanity seems to be upset without it
if (stat & test)
{
if (m_Idx)
@ -94,7 +98,12 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp)
else
cpuRegs.cycle = VU0.cycle;
cpuSetNextEventDelta(s);
u32 nextblockcycles = m_Idx ? VU1.nextBlockCycles : VU0.nextBlockCycles;
if((VU0.flags & VUFLAG_MFLAGSET) || VU0.blockhasmbit)
cpuSetNextEventDelta(nextblockcycles);
else if(s)
cpuSetNextEventDelta(s);
}
}
else // Continue Executing
@ -110,19 +119,21 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp)
}
else
{
if (delta >= nextblockcycles) // When running behind, make sure we have enough cycles passed for the block to run
if (delta >= nextblockcycles && delta > 0) // When running behind, make sure we have enough cycles passed for the block to run
Execute(delta);
}
if (stat & test)
if ((stat & test) && !EmuConfig.Gamefixes.VUKickstartHack)
{
// Queue up next required time to run a block
nextblockcycles = m_Idx ? VU1.nextBlockCycles : VU0.nextBlockCycles;
cycle = m_Idx ? VU1.cycle : VU0.cycle;
nextblockcycles = EmuConfig.Gamefixes.VUKickstartHack ? (cycle - cpuRegs.cycle) : nextblockcycles;
nextblockcycles = nextblockcycles - (cycle - cpuRegs.cycle);
if(nextblockcycles)
if (nextblockcycles > 0 || (VU0.flags & VUFLAG_MFLAGSET) || VU0.blockhasmbit)
{
cpuSetNextEventDelta(nextblockcycles);
}
}
}
}
@ -136,12 +147,13 @@ void BaseVUmicroCPU::ExecuteBlockJIT(BaseVUmicroCPU* cpu)
const u32& stat = VU0.VI[REG_VPU_STAT].UL;
const int test = 1;
//DevCon.Warning("Was set %d cycles ago", cpuRegs.cycle - setcycle);
if (stat & test)
{ // VU is running
s32 delta = (s32)(u32)(cpuRegs.cycle - VU0.cycle);
if (delta > 0)
{ // Enough time has passed
{
cpu->Execute(delta); // Execute the time since the last call
}
}

View File

@ -580,11 +580,11 @@ void recLQC2()
{
iFlushCall(FLUSH_EVERYTHING);
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xMOV(eax, ptr32[&cpuRegs.cycle]);
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xSUB(eax, ptr32[&VU0.cycle]);
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
xCMP(eax, 8);
@ -625,11 +625,11 @@ void recSQC2()
{
iFlushCall(FLUSH_EVERYTHING);
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xMOV(eax, ptr32[&cpuRegs.cycle]);
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xSUB(eax, ptr32[&VU0.cycle]);
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
xCMP(eax, 8);

View File

@ -120,9 +120,10 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1);
}
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
if (isEbit) // Clear 'is busy' Flags
{
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
if (!mVU.index || !THREAD_VU1)
{
xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
@ -130,8 +131,6 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
else
xFastCall((void*)mVUTBit);
}
else
xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles);
if (isEbit != 2) // Save PC, and Jump to Exit Point
{
@ -245,8 +244,10 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
else
xFastCall((void*)mVUEBit);
}
else
xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles);
else if(isEbit)
{
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
}
if (isEbit != 2 && isEbit != 3) // Save PC, and Jump to Exit Point
{
@ -305,7 +306,6 @@ void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump)
//So if it is taken, you need to end the program, else you get infinite loops.
mVUendProgram(mVU, &mFC, 2);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], arg1regd);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xJMP(mVU.exitFunct);
}
@ -366,7 +366,6 @@ void normBranch(mV, microFlagCycles& mFC)
mVUendProgram(mVU, &mFC, 3);
iPC = branchAddr(mVU) / 4;
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xJMP(mVU.exitFunct);
iPC = tempPC;
}
@ -464,7 +463,6 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
incPC(-4); // Go Back to Branch Opcode to get branchAddr
iPC = branchAddr(mVU) / 4;
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xJMP(mVU.exitFunct);
eJMP.SetTarget();
iPC = tempPC;
@ -484,13 +482,11 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
xForwardJump32 dJMP(xInvertCond((JccComparisonType)JMPcc));
incPC(4); // Set PC to First instruction of Non-Taken Side
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xJMP(mVU.exitFunct);
dJMP.SetTarget();
incPC(-4); // Go Back to Branch Opcode to get branchAddr
iPC = branchAddr(mVU) / 4;
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xJMP(mVU.exitFunct);
eJMP.SetTarget();
iPC = tempPC;
@ -514,7 +510,6 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
incPC(-4); // Go Back to Branch Opcode to get branchAddr
iPC = branchAddr(mVU) / 4;
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xJMP(mVU.exitFunct);
iPC = tempPC;
}
@ -530,14 +525,12 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
xForwardJump32 eJMP(((JccComparisonType)JMPcc));
incPC(1); // Set PC to First instruction of Non-Taken Side
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xJMP(mVU.exitFunct);
eJMP.SetTarget();
incPC(-4); // Go Back to Branch Opcode to get branchAddr
iPC = branchAddr(mVU) / 4;
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xJMP(mVU.exitFunct);
return;
}
@ -630,7 +623,6 @@ void normJump(mV, microFlagCycles& mFC)
mVUDTendProgram(mVU, &mFC, 2);
xMOV(gprT1, ptr32[&mVU.branch]);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xJMP(mVU.exitFunct);
eJMP.SetTarget();
}
@ -646,7 +638,6 @@ void normJump(mV, microFlagCycles& mFC)
mVUDTendProgram(mVU, &mFC, 2);
xMOV(gprT1, ptr32[&mVU.branch]);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xJMP(mVU.exitFunct);
eJMP.SetTarget();
}
@ -655,7 +646,6 @@ void normJump(mV, microFlagCycles& mFC)
mVUendProgram(mVU, &mFC, 2);
xMOV(gprT1, ptr32[&mVU.branch]);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xJMP(mVU.exitFunct);
}
else

View File

@ -478,6 +478,7 @@ void mVUtestCycles(microVU& mVU, microFlagCycles& mFC)
xForwardJGE32 skip;
mVUsavePipelineState(mVU);
xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles);
mVUendProgram(mVU, &mFC, 0);
skip.SetTarget();
@ -537,6 +538,7 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr)
mVUregs.blockType = 0;
mVUregs.viBackUp = 0;
mVUregs.flagInfo = 0;
mVUregs.mbitinblock = false;
mVUsFlagHack = CHECK_VU_FLAGHACK;
mVUinitConstValues(mVU);
}
@ -693,6 +695,7 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
if ((curI & _Mbit_) && isVU0)
{
mVUregs.mbitinblock = true;
if (xPC > 0)
{
incPC(-2);
@ -780,7 +783,7 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
// Fix up vi15 const info for propagation through blocks
mVUregs.vi15 = (doConstProp && mVUconstReg[15].isValid) ? (u16)mVUconstReg[15].regValue : 0;
mVUregs.vi15v = (doConstProp && mVUconstReg[15].isValid) ? 1 : 0;
xMOV(ptr32[&mVU.regs().blockhasmbit], mVUregs.mbitinblock);
mVUsetFlags(mVU, mFC); // Sets Up Flag instances
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
mVUdebugPrintBlocks(mVU, false); // Prints Start/End PC of blocks executed, for debugging...
@ -829,6 +832,7 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
}
incPC(2);
mVUsetupRange(mVU, xPC, false);
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
mVUendProgram(mVU, &mFC, 0);
normBranchCompile(mVU, xPC);
incPC(-2);

View File

@ -49,8 +49,9 @@ union __aligned16 microRegInfo
u8 viBackUp; // VI reg number that was written to on branch-delay slot
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
u8 r;
u8 mbitinblock;
};
u32 quick32[2];
u32 quick32[3];
};
u8 vi15v; // 'vi15' constant is valid

View File

@ -112,6 +112,9 @@ void endMacroOp(int mode)
#define INTERPRETATE_COP2_FUNC(f) \
void recV##f() \
{ \
xMOV(eax, ptr32[&cpuRegs.cycle]); \
xADD(eax, scaleblockcycles_clear()); \
xMOV(ptr32[&cpuRegs.cycle], eax); \
recCall(V##f); \
_freeX86regs(); \
}
@ -283,15 +286,24 @@ void COP2_Interlock(bool mBitSync)
if (cpuRegs.code & 1)
{
iFlushCall(FLUSH_EVERYTHING);
xMOV(eax, ptr32[&cpuRegs.cycle]);
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xMOV(eax, ptr[&cpuRegs.cycle]);
xADD(eax, scaleblockcycles_clear());
xMOV(ptr[&cpuRegs.cycle], eax); // update cycles
xLoadFarAddr(arg1reg, CpuVU0);
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
if (mBitSync)
{
xSUB(eax, ptr32[&vu0Regs.cycle]);
xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]);
xCMP(eax, 8);
xForwardJL32 skip;
xLoadFarAddr(arg1reg, CpuVU0);
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
skip.SetTarget();
xFastCall((void*)_vu0WaitMicro);
}
else
xFastCall((void*)_vu0FinishMicro);
skipvuidle.SetTarget();
@ -320,11 +332,11 @@ static void recCFC2()
if (!(cpuRegs.code & 1))
{
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xMOV(eax, ptr32[&cpuRegs.cycle]);
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xSUB(eax, ptr32[&vu0Regs.cycle]);
xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]);
xCMP(eax, 8);
@ -405,11 +417,12 @@ static void recCTC2()
if (!(cpuRegs.code & 1))
{
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xMOV(eax, ptr32[&cpuRegs.cycle]);
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xSUB(eax, ptr32[&vu0Regs.cycle]);
xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]);
xCMP(eax, 8);
@ -498,11 +511,12 @@ static void recQMFC2()
if (!(cpuRegs.code & 1))
{
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xMOV(eax, ptr32[&cpuRegs.cycle]);
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xSUB(eax, ptr32[&vu0Regs.cycle]);
xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]);
xCMP(eax, 8);
@ -532,12 +546,14 @@ static void recQMTC2()
if (!(cpuRegs.code & 1))
{
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xMOV(eax, ptr32[&cpuRegs.cycle]);
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xSUB(eax, ptr32[&vu0Regs.cycle]);
xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]);
xCMP(eax, 8);
xForwardJL32 skip;
xLoadFarAddr(arg1reg, CpuVU0);