VU: optimise entering VU JITs

Keeps note on how many cycles it needs for the next block to save exiting the EE JIT and entering the microVU JIT for no reason
This commit is contained in:
refractionpcsx2 2021-08-28 06:45:15 +01:00
parent ef9c8ce877
commit d8dfe0a1e9
6 changed files with 23 additions and 17 deletions

View File

@ -155,6 +155,8 @@ struct __aligned16 VURegs {
u32 statusflag;
u32 clipflag;
s32 nextBlockCycles;
u8 *Mem;
u8 *Micro;

View File

@ -34,25 +34,17 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) {
if (startUp && s) { // Start Executing a microprogram
Execute(s); // Kick start VU
if (stat & test) {
cpuSetNextEventDelta(s);
if (m_Idx)
VU1.cycle = cpuRegs.cycle;
else
VU0.cycle = cpuRegs.cycle;
}
}
else { // Continue Executing
u32 cycle = m_Idx ? VU1.cycle : VU0.cycle;
s32 delta = (s32)(u32)(cpuRegs.cycle - cycle);
if (delta > 0) { // Enough time has passed
s32 nextblockcycles = m_Idx ? VU1.nextBlockCycles : VU0.nextBlockCycles;
if (delta < nextblockcycles)
return;
if (delta > 0) // Enough time has passed
Execute(delta); // Execute the time since the last call
if (stat & test)
cpuSetNextEventDelta(delta);
}
else cpuSetNextEventDelta(-delta); // Haven't caught-up from kick start
}
}
@ -62,11 +54,11 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) {
// This fixes spinning/hanging in some games like Ratchet and Clank's Intro.
void BaseVUmicroCPU::ExecuteBlockJIT(BaseVUmicroCPU* cpu) {
const u32& stat = VU0.VI[REG_VPU_STAT].UL;
const int test = cpu->m_Idx ? 0x100 : 1;
const int test = 1;
if (stat & test) { // VU is running
u32 cycle = cpu->m_Idx ? VU1.cycle : VU0.cycle;
s32 delta = (s32)(u32)(cpuRegs.cycle - cycle);
s32 delta = (s32)(u32)(cpuRegs.cycle - VU0.cycle);
if (delta > 0) { // Enough time has passed
cpu->Execute(delta); // Execute the time since the last call
}

View File

@ -581,6 +581,7 @@ void recLQC2()
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xSUB(eax, ptr32[&VU0.cycle]);
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
xCMP(eax, 8);
xForwardJL32 skip;
xLoadFarAddr(arg1reg, CpuVU0);
@ -625,6 +626,7 @@ void recSQC2()
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xSUB(eax, ptr32[&VU0.cycle]);
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
xCMP(eax, 8);
xForwardJL32 skip;
xLoadFarAddr(arg1reg, CpuVU0);

View File

@ -95,6 +95,7 @@ void mVUreset(microVU& mVU, bool resetReserve) {
mVUdispatcherCD(mVU);
mVUemitSearch();
mVU.regs().nextBlockCycles = 0;
// Clear All Program Data
//memset(&mVU.prog, 0, sizeof(mVU.prog));
memset(&mVU.prog.lpState, 0, sizeof(mVU.prog.lpState));

View File

@ -108,12 +108,15 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) {
}
if (isEbit) { // Clear 'is busy' Flags
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
if (!mVU.index || !THREAD_VU1) {
xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
}
else
xFastCall((void*)mVUTBit);
}
else
xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles);
if (isEbit != 2) { // Save PC, and Jump to Exit Point
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
@ -208,12 +211,15 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) {
if ((isEbit && isEbit != 3)) { // Clear 'is busy' Flags
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
if (!mVU.index || !THREAD_VU1) {
xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
}
else
xFastCall((void*)mVUEBit);
}
else
xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles);
if (isEbit != 2 && isEbit != 3) { // Save PC, and Jump to Exit Point
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);

View File

@ -302,6 +302,7 @@ static void recCFC2() {
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xSUB(eax, ptr32[&vu0Regs.cycle]);
xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]);
xCMP(eax, 8);
xForwardJL32 skip;
xLoadFarAddr(arg1reg, CpuVU0);
@ -380,6 +381,7 @@ static void recCTC2() {
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xSUB(eax, ptr32[&vu0Regs.cycle]);
xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]);
xCMP(eax, 8);
xForwardJL32 skip;
xLoadFarAddr(arg1reg, CpuVU0);
@ -460,6 +462,7 @@ static void recQMFC2() {
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xSUB(eax, ptr32[&vu0Regs.cycle]);
xSUB(eax, ptr32[&vu0Regs.nextBlockCycles]);
xCMP(eax, 8);
xForwardJL32 skip;
xLoadFarAddr(arg1reg, CpuVU0);