MTVU: Try to make T-Bit more reliable.

Add MTVUSpeedHack option to GameDB so it can be forcefully disabled
This commit is contained in:
refractionpcsx2 2021-10-23 20:36:02 +01:00
parent 05a7a61257
commit fd4a5acc40
13 changed files with 114 additions and 55 deletions

View File

@ -58,6 +58,7 @@ enum SpeedhackId
Speedhack_mvuFlag = SpeedhackId_FIRST,
Speedhack_InstantVU1,
Speedhack_MTVU,
SpeedhackId_COUNT
};

View File

@ -146,7 +146,7 @@ void VU_Thread::ExecuteRingBuffer()
s32 addr = Read();
vifRegs.top = Read();
vifRegs.itop = Read();
vuFBRST = Read();
if (addr != -1)
vuRegs.VI[REG_TPC].UL = addr & 0x7FF;
vuCPU->SetStartPC(vuRegs.VI[REG_TPC].UL << 3);
@ -406,13 +406,13 @@ void VU_Thread::Get_MTVUChanges()
{
mtvuInterrupts.fetch_and(~InterruptFlagVUEBit, std::memory_order_relaxed);
VU0.VI[REG_VPU_STAT].UL &= ~0x0100;
VU0.VI[REG_VPU_STAT].UL &= ~0xFF00;
//DevCon.Warning("E-Bit registered %x", VU0.VI[REG_VPU_STAT].UL);
}
if (interrupts & InterruptFlagVUTBit)
{
mtvuInterrupts.fetch_and(~InterruptFlagVUTBit, std::memory_order_relaxed);
VU0.VI[REG_VPU_STAT].UL &= ~0x0100;
VU0.VI[REG_VPU_STAT].UL &= ~0xFF00;
VU0.VI[REG_VPU_STAT].UL |= 0x0400;
//DevCon.Warning("T-Bit registered %x", VU0.VI[REG_VPU_STAT].UL);
hwIntcIrq(7);
@ -445,15 +445,16 @@ void VU_Thread::WaitVU()
}
}
void VU_Thread::ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop)
void VU_Thread::ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop, u32 fbrst)
{
MTVU_LOG("MTVU - ExecuteVU!");
Get_MTVUChanges(); // Clear any pending interrupts
ReserveSpace(4);
ReserveSpace(5);
Write(MTVU_VU_EXECUTE);
Write(vu_addr);
Write(vif_top);
Write(vif_itop);
Write(fbrst);
CommitWritePos();
gifUnit.TransferGSPacketData(GIF_TRANS_MTVU, NULL, 0);
KickStart();

View File

@ -47,6 +47,7 @@ public:
Semaphore semaXGkick;
std::atomic<unsigned int> vuCycles[4]; // Used for VU cycle stealing hack
u32 vuCycleIdx; // Used for VU cycle stealing hack
u32 vuFBRST;
enum InterruptFlag {
InterruptFlagFinish = 1 << 0,
@ -76,7 +77,7 @@ public:
void Get_MTVUChanges();
void ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop);
void ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop, u32 fbrst);
void VifUnpack(vifStruct& _vif, VIFregisters& _vifRegs, u8* data, u32 size);

View File

@ -65,9 +65,11 @@ void TraceLogFilters::LoadSave(SettingsWrapper& wrap)
}
const char* const tbl_SpeedhackNames[] =
{
{
"mvuFlag",
"InstantVU1"};
"InstantVU1",
"MTVU"
};
const char* EnumToString(SpeedhackId id)
{
@ -85,6 +87,9 @@ void Pcsx2Config::SpeedhackOptions::Set(SpeedhackId id, bool enabled)
case Speedhack_InstantVU1:
vu1Instant = enabled;
break;
case Speedhack_MTVU:
vuThread = enabled;
break;
jNO_DEFAULT;
}
}

View File

@ -37,8 +37,11 @@ static void TestClearVUs(u32 madr, u32 qwc, bool isWrite)
//Catch up VU1 too
CpuVU1->ExecuteBlock(0);
}
if ((madr >= 0x11008000) && (VU0.VI[REG_VPU_STAT].UL & 0x100) && !THREAD_VU1)
if ((madr >= 0x11008000) && (VU0.VI[REG_VPU_STAT].UL & 0x100) && (!THREAD_VU1 || !isWrite))
{
if (THREAD_VU1)
vu1Thread.WaitVU();
else
CpuVU1->Execute(vu1RunCycles);
cpuRegs.cycle = VU1.cycle;
//Catch up VU0 too

View File

@ -30,6 +30,7 @@
#include "R5900OpcodeTables.h"
#include "VUmicro.h"
#include "Vif_Dma.h"
#include "MTVU.h"
#define _Ft_ _Rt_
#define _Fs_ _Rd_

View File

@ -61,15 +61,16 @@ void __fastcall vu1ExecMicro(u32 addr)
{
if (THREAD_VU1) {
VU0.VI[REG_VPU_STAT].UL &= ~0xFF00;
// Okay this is a little bit of a hack, but with good reason.
// Most of the time with MTVU we want to pretend the VU has finished quickly as to gain the benefit from running another thread
// however with T-Bit games when the T-Bit is enabled, it needs to wait in case a T-Bit happens, so we need to set "Busy"
// We shouldn't do this all the time as it negates the extra thread and causes games like Ratchet & Clank to be no faster.
if(VU0.VI[REG_FBRST].UL & 0x800)
if (VU0.VI[REG_FBRST].UL & 0x800)
{
VU0.VI[REG_VPU_STAT].UL |= 0x0100;
}
vu1Thread.ExecuteVU(addr, vif1Regs.top, vif1Regs.itop);
vu1Thread.ExecuteVU(addr, vif1Regs.top, vif1Regs.itop, VU0.VI[REG_FBRST].UL);
return;
}
static int count = 0;

View File

@ -305,6 +305,7 @@ _vifT __fi u32 vifRead32(u32 mem)
{
vifStruct& vif = MTVU_VifX;
bool wait = idx && THREAD_VU1;
switch (mem)
{
case caseVif(ROW0):
@ -380,43 +381,35 @@ _vifT __fi bool vifWrite32(u32 mem, u32 value)
case caseVif(ROW0):
vif.MaskRow._u32[0] = value;
if (idx && THREAD_VU1)
vu1Thread.WriteRow(vif);
return false;
case caseVif(ROW1):
vif.MaskRow._u32[1] = value;
if (idx && THREAD_VU1)
vu1Thread.WriteRow(vif);
return false;
case caseVif(ROW2):
vif.MaskRow._u32[2] = value;
if (idx && THREAD_VU1)
vu1Thread.WriteRow(vif);
return false;
case caseVif(ROW3):
vif.MaskRow._u32[3] = value;
if (idx && THREAD_VU1)
vu1Thread.WriteRow(vif);
return false;
case caseVif(COL0):
vif.MaskCol._u32[0] = value;
if (idx && THREAD_VU1)
vu1Thread.WriteCol(vif);
return false;
case caseVif(COL1):
vif.MaskCol._u32[1] = value;
if (idx && THREAD_VU1)
vu1Thread.WriteCol(vif);
return false;
case caseVif(COL2):
vif.MaskCol._u32[2] = value;
if (idx && THREAD_VU1)
vu1Thread.WriteCol(vif);
return false;
case caseVif(COL3):
vif.MaskCol._u32[3] = value;
if (idx && THREAD_VU1)
vu1Thread.WriteCol(vif);
return false;
}

View File

@ -173,9 +173,6 @@ __fi void vif1SetupTransfer()
}
}
if (vif1ch.chcr.TTE)
{
// Transfer dma tag if tte is set
@ -233,7 +230,6 @@ __fi void vif1VUFinish()
{
if (VU0.VI[REG_VPU_STAT].UL & 0x500)
{
if(THREAD_VU1)
vu1Thread.Get_MTVUChanges();
CPU_INT(VIF_VU1_FINISH, 128);

View File

@ -327,19 +327,28 @@ __fi bool dmacWrite32( u32 mem, mem32_t& value )
{
if ((psHu32(mem & ~0xff) & 0x100) && dmacRegs.ctrl.DMAE && !psHu8(DMAC_ENABLER + 2))
{
DevCon.Warning("Gamefix: Write to DMA addr %x while STR is busy!", mem);
//DevCon.Warning("Gamefix: Write to DMA addr %x while STR is busy!", mem);
while (psHu32(mem & ~0xff) & 0x100)
{
switch ((mem >> 8) & 0xFF)
{
case 0x80: // VIF0
vif0Interrupt();
cpuRegs.interrupt &= ~(1 << DMAC_VIF0);
break;
case 0x90: // VIF1
if (vif1Regs.stat.VEW)
{
vu1Finish(false);
vif1VUFinish();
}
else
vif1Interrupt();
cpuRegs.interrupt &= ~(1 << DMAC_VIF1);
break;
case 0xA0: // GIF
gifInterrupt();
cpuRegs.interrupt &= ~(1 << DMAC_GIF);
break;
case 0xB0: // IPUFROM
[[fallthrough]];
@ -351,9 +360,11 @@ __fi bool dmacWrite32( u32 mem, mem32_t& value )
break;
case 0xD0: // SPRFROM
SPRFROMinterrupt();
cpuRegs.interrupt &= ~(1 << DMAC_FROM_SPR);
break;
case 0xD4: // SPRTO
SPRTOinterrupt();
cpuRegs.interrupt &= ~(1 << DMAC_TO_SPR);
break;
default:
return false;

View File

@ -127,21 +127,24 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
if (isEbit) // Clear 'is busy' Flags
{
if (!mVU.index || !THREAD_VU1)
{
xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
}
else
xFastCall((void*)mVUTBit);
}
if (isEbit != 2) // Save PC, and Jump to Exit Point
{
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUTBit);
xJMP(mVU.exitFunct);
}
memcpy(&mVUregs, &stateBackup, sizeof(mVUregs)); //Restore the state for the rest of the recompile
}
@ -244,6 +247,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1);
}
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
if ((isEbit && isEbit != 3)) // Clear 'is busy' Flags
{
@ -252,8 +256,6 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
{
xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
}
else
xFastCall((void*)mVUEBit);
}
else if(isEbit)
{
@ -262,7 +264,8 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
if (isEbit != 2 && isEbit != 3) // Save PC, and Jump to Exit Point
{
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUEBit);
xJMP(mVU.exitFunct);
}
memcpy(&mVUregs, &stateBackup, sizeof(mVUregs)); //Restore the state for the rest of the recompile
@ -321,6 +324,8 @@ void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump)
//So if it is taken, you need to end the program, else you get infinite loops.
mVUendProgram(mVU, &mFC, 2);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], arg1regd);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUEBit);
xJMP(mVU.exitFunct);
}
@ -340,6 +345,9 @@ void normBranch(mV, microFlagCycles& mFC)
if (mVUup.dBit && doDBitHandling)
{
u32 tempPC = iPC;
if (mVU.index && THREAD_VU1)
xTEST(ptr32[&vu1Thread.vuFBRST], (isVU1 ? 0x400 : 0x4));
else
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4));
xForwardJump32 eJMP(Jcc_Zero);
if (!mVU.index || !THREAD_VU1)
@ -355,6 +363,9 @@ void normBranch(mV, microFlagCycles& mFC)
if (mVUup.tBit)
{
u32 tempPC = iPC;
if (mVU.index && THREAD_VU1)
xTEST(ptr32[&vu1Thread.vuFBRST], (isVU1 ? 0x800 : 0x8));
else
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8));
xForwardJump32 eJMP(Jcc_Zero);
if (!mVU.index || !THREAD_VU1)
@ -381,6 +392,8 @@ void normBranch(mV, microFlagCycles& mFC)
mVUendProgram(mVU, &mFC, 3);
iPC = branchAddr(mVU) / 4;
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUEBit);
xJMP(mVU.exitFunct);
iPC = tempPC;
}
@ -407,6 +420,9 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
{
DevCon.Warning("T-Bit on branch, please report if broken");
u32 tempPC = iPC;
if (mVU.index && THREAD_VU1)
xTEST(ptr32[&vu1Thread.vuFBRST], (isVU1 ? 0x800 : 0x8));
else
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8));
xForwardJump32 eJMP(Jcc_Zero);
if (!mVU.index || !THREAD_VU1)
@ -419,11 +435,15 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
xForwardJump32 tJMP(xInvertCond((JccComparisonType)JMPcc));
incPC(4); // Set PC to First instruction of Non-Taken Side
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUTBit);
xJMP(mVU.exitFunct);
tJMP.SetTarget();
incPC(-4); // Go Back to Branch Opcode to get branchAddr
iPC = branchAddr(mVU) / 4;
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUTBit);
xJMP(mVU.exitFunct);
eJMP.SetTarget();
iPC = tempPC;
@ -431,6 +451,9 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
if (mVUup.dBit && doDBitHandling)
{
u32 tempPC = iPC;
if (mVU.index && THREAD_VU1)
xTEST(ptr32[&vu1Thread.vuFBRST], (isVU1 ? 0x400 : 0x4));
else
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4));
xForwardJump32 eJMP(Jcc_Zero);
if (!mVU.index || !THREAD_VU1)
@ -466,11 +489,15 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
xForwardJump32 dJMP((JccComparisonType)JMPcc);
incPC(4); // Set PC to First instruction of Non-Taken Side
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUEBit);
xJMP(mVU.exitFunct);
dJMP.SetTarget();
incPC(-4); // Go Back to Branch Opcode to get branchAddr
iPC = branchAddr(mVU) / 4;
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUEBit);
xJMP(mVU.exitFunct);
iPC = tempPC;
}
@ -486,12 +513,16 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
xForwardJump32 eJMP(((JccComparisonType)JMPcc));
incPC(1); // Set PC to First instruction of Non-Taken Side
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUEBit);
xJMP(mVU.exitFunct);
eJMP.SetTarget();
incPC(-4); // Go Back to Branch Opcode to get branchAddr
iPC = branchAddr(mVU) / 4;
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUEBit);
xJMP(mVU.exitFunct);
return;
}
@ -550,6 +581,9 @@ void normJump(mV, microFlagCycles& mFC)
}
if (mVUup.dBit && doDBitHandling)
{
if (THREAD_VU1)
xTEST(ptr32[&vu1Thread.vuFBRST], (isVU1 ? 0x400 : 0x4));
else
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4));
xForwardJump32 eJMP(Jcc_Zero);
if (!mVU.index || !THREAD_VU1)
@ -565,6 +599,9 @@ void normJump(mV, microFlagCycles& mFC)
}
if (mVUup.tBit)
{
if (mVU.index && THREAD_VU1)
xTEST(ptr32[&vu1Thread.vuFBRST], (isVU1 ? 0x800 : 0x8));
else
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8));
xForwardJump32 eJMP(Jcc_Zero);
if (!mVU.index || !THREAD_VU1)
@ -575,6 +612,8 @@ void normJump(mV, microFlagCycles& mFC)
mVUDTendProgram(mVU, &mFC, 2);
xMOV(gprT1, ptr32[&mVU.branch]);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUTBit);
xJMP(mVU.exitFunct);
eJMP.SetTarget();
}
@ -583,6 +622,8 @@ void normJump(mV, microFlagCycles& mFC)
mVUendProgram(mVU, &mFC, 2);
xMOV(gprT1, ptr32[&mVU.branch]);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1);
if (mVU.index && THREAD_VU1)
xFastCall((void*)mVUEBit);
xJMP(mVU.exitFunct);
}
else

View File

@ -549,6 +549,9 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr)
void mVUDoDBit(microVU& mVU, microFlagCycles* mFC)
{
if (mVU.index && THREAD_VU1)
xTEST(ptr32[&vu1Thread.vuFBRST], (isVU1 ? 0x400 : 0x4));
else
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4));
xForwardJump32 eJMP(Jcc_Zero);
if (!isVU1 || !THREAD_VU1)
@ -564,6 +567,9 @@ void mVUDoDBit(microVU& mVU, microFlagCycles* mFC)
void mVUDoTBit(microVU& mVU, microFlagCycles* mFC)
{
if (mVU.index && THREAD_VU1)
xTEST(ptr32[&vu1Thread.vuFBRST], (isVU1 ? 0x800 : 0x8));
else
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8));
xForwardJump32 eJMP(Jcc_Zero);
if (!isVU1 || !THREAD_VU1)

View File

@ -14,7 +14,6 @@
*/
#pragma once
extern void _vu0WaitMicro();
extern void _vu0FinishMicro();