microVU: Enable T-Bit to work with MTVU

This commit is contained in:
refractionpcsx2 2021-06-27 04:55:16 +01:00
parent f925c88753
commit c77e0a3a56
10 changed files with 102 additions and 37 deletions

View File

@ -112,15 +112,15 @@ bool Gif_HandlerAD_MTVU(u8* pMem)
if (reg == 0x60)
{ // SIGNAL
GUNIT_WARN("GIF Handler - SIGNAL");
if (vu1Thread.gsInterrupts.load(std::memory_order_acquire) & VU_Thread::InterruptFlagSignal)
if (vu1Thread.mtvuInterrupts.load(std::memory_order_acquire) & VU_Thread::InterruptFlagSignal)
Console.Error("GIF Handler MTVU - Double SIGNAL Not Handled");
vu1Thread.gsSignal.store(((u64)data[1] << 32) | data[0], std::memory_order_relaxed);
vu1Thread.gsInterrupts.fetch_or(VU_Thread::InterruptFlagSignal, std::memory_order_release);
vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagSignal, std::memory_order_release);
}
else if (reg == 0x61)
{ // FINISH
GUNIT_WARN("GIF Handler - FINISH");
u32 old = vu1Thread.gsInterrupts.fetch_or(VU_Thread::InterruptFlagFinish, std::memory_order_relaxed);
u32 old = vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagFinish, std::memory_order_relaxed);
if (old & VU_Thread::InterruptFlagFinish)
Console.Error("GIF Handler MTVU - Double FINISH Not Handled");
}
@ -140,7 +140,7 @@ bool Gif_HandlerAD_MTVU(u8* pMem)
u32 wantedMsk = existingMsk | labelMsk;
wanted = ((u64)wantedMsk << 32) | wantedData;
}
vu1Thread.gsInterrupts.fetch_or(VU_Thread::InterruptFlagLabel, std::memory_order_release);
vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagLabel, std::memory_order_release);
}
else if (reg >= 0x63 && reg != 0x7f)
{

View File

@ -69,9 +69,9 @@ void SaveStateBase::mtvuFreeze()
Freeze(v);
}
u32 gsInterrupts = vu1Thread.gsInterrupts.load();
u32 gsInterrupts = vu1Thread.mtvuInterrupts.load();
Freeze(gsInterrupts);
vu1Thread.gsInterrupts.store(gsInterrupts);
vu1Thread.mtvuInterrupts.store(gsInterrupts);
u64 gsSignal = vu1Thread.gsSignal.load();
Freeze(gsSignal);
vu1Thread.gsSignal.store(gsSignal);
@ -113,7 +113,7 @@ void VU_Thread::Reset()
memzero(vifRegs);
for (size_t i = 0; i < 4; ++i)
vu1Thread.vuCycles[i] = 0;
vu1Thread.gsInterrupts = 0;
vu1Thread.mtvuInterrupts = 0;
}
void VU_Thread::ExecuteTaskInThread()
@ -336,10 +336,10 @@ u32 VU_Thread::Get_vuCycles()
2;
}
void VU_Thread::Get_GSChanges()
void VU_Thread::Get_MTVUChanges()
{
// Note: Atomic communication is with Gif_Unit.cpp Gif_HandlerAD_MTVU
u32 interrupts = gsInterrupts.load(std::memory_order_relaxed);
u32 interrupts = mtvuInterrupts.load(std::memory_order_relaxed);
if (!interrupts)
return;
@ -349,7 +349,7 @@ void VU_Thread::Get_GSChanges()
const u64 signal = gsSignal.load(std::memory_order_relaxed);
// If load of signal was moved after clearing the flag, the other thread could write a new value before we load without noticing the double signal
// Prevent that with release semantics
gsInterrupts.fetch_and(~InterruptFlagSignal, std::memory_order_release);
mtvuInterrupts.fetch_and(~InterruptFlagSignal, std::memory_order_release);
GUNIT_WARN("SIGNAL firing");
const u32 signalMsk = (u32)(signal >> 32);
const u32 signalData = (u32)signal;
@ -372,7 +372,7 @@ void VU_Thread::Get_GSChanges()
}
if (interrupts & InterruptFlagFinish)
{
gsInterrupts.fetch_and(~InterruptFlagFinish, std::memory_order_relaxed);
mtvuInterrupts.fetch_and(~InterruptFlagFinish, std::memory_order_relaxed);
GUNIT_WARN("Finish firing");
CSRreg.FINISH = true;
gifUnit.gsFINISH.gsFINISHFired = false;
@ -382,7 +382,7 @@ void VU_Thread::Get_GSChanges()
}
if (interrupts & InterruptFlagLabel)
{
gsInterrupts.fetch_and(~InterruptFlagLabel, std::memory_order_acquire);
mtvuInterrupts.fetch_and(~InterruptFlagLabel, std::memory_order_acquire);
// If other thread updates gsLabel for a second interrupt, that's okay. Worst case we think there's a label interrupt but gsLabel is 0
// We do not want the exchange of gsLabel to move ahead of clearing the flag, or the other thread could add more work before we clear the flag, resulting in an update with the flag unset
// acquire semantics should supply that guarantee
@ -392,6 +392,21 @@ void VU_Thread::Get_GSChanges()
const u32 labelData = (u32)label;
GSSIGLBLID.LBLID = (GSSIGLBLID.LBLID & ~labelMsk) | (labelData & labelMsk);
}
if (interrupts & InterruptFlagVUEBit)
{
mtvuInterrupts.fetch_and(~InterruptFlagVUEBit, std::memory_order_relaxed);
VU0.VI[REG_VPU_STAT].UL &= ~0x0100;
//DevCon.Warning("E-Bit registered %x", VU0.VI[REG_VPU_STAT].UL);
}
if (interrupts & InterruptFlagVUTBit)
{
mtvuInterrupts.fetch_and(~InterruptFlagVUTBit, std::memory_order_relaxed);
VU0.VI[REG_VPU_STAT].UL &= ~0x0100;
VU0.VI[REG_VPU_STAT].UL |= 0x0400;
//DevCon.Warning("T-Bit registered %x", VU0.VI[REG_VPU_STAT].UL);
hwIntcIrq(7);
}
}
void VU_Thread::KickStart(bool forceKick)
@ -423,7 +438,7 @@ void VU_Thread::WaitVU()
void VU_Thread::ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop)
{
MTVU_LOG("MTVU - ExecuteVU!");
Get_GSChanges(); // Clear any pending interrupts
Get_MTVUChanges(); // Clear any pending interrupts
ReserveSpace(4);
Write(MTVU_VU_EXECUTE);
Write(vu_addr);
@ -435,7 +450,7 @@ void VU_Thread::ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop)
u32 cycles = std::min(Get_vuCycles(), 3000u);
cpuRegs.cycle += cycles * EmuConfig.Speedhacks.EECycleSkip;
VU0.cycle += cycles * EmuConfig.Speedhacks.EECycleSkip;
Get_GSChanges();
Get_MTVUChanges();
}
void VU_Thread::VifUnpack(vifStruct& _vif, VIFregisters& _vifRegs, u8* data, u32 size)

View File

@ -52,9 +52,11 @@ public:
InterruptFlagFinish = 1 << 0,
InterruptFlagSignal = 1 << 1,
InterruptFlagLabel = 1 << 2,
InterruptFlagVUEBit = 1 << 3,
InterruptFlagVUTBit = 1 << 4,
};
std::atomic<u32> gsInterrupts; // Used for GS Signal, Finish etc
std::atomic<u32> mtvuInterrupts; // Used for GS Signal, Finish etc, plus VU End/T-Bit
std::atomic<u64> gsLabel; // Used for GS Label command
std::atomic<u64> gsSignal; // Used for GS Signal command
@ -72,7 +74,7 @@ public:
// Waits till MTVU is done processing
void WaitVU();
void Get_GSChanges();
void Get_MTVUChanges();
void ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop);

View File

@ -37,7 +37,9 @@ void vu1ResetRegs()
void vu1Finish(bool add_cycles) {
if (THREAD_VU1) {
if (VU0.VI[REG_VPU_STAT].UL & 0x100) DevCon.Error("MTVU: VU0.VI[REG_VPU_STAT].UL & 0x100");
//if (VU0.VI[REG_VPU_STAT].UL & 0x100) DevCon.Error("MTVU: VU0.VI[REG_VPU_STAT].UL & 0x100");
vu1Thread.WaitVU();
vu1Thread.Get_MTVUChanges();
return;
}
u32 vu1cycles = VU1.cycle;
@ -58,8 +60,16 @@ void vu1Finish(bool add_cycles) {
void __fastcall vu1ExecMicro(u32 addr)
{
if (THREAD_VU1) {
vu1Thread.ExecuteVU(addr, vif1Regs.top, vif1Regs.itop);
VU0.VI[REG_VPU_STAT].UL &= ~0xFF00;
// Okay this is a little bit of a hack, but with good reason.
// Most of the time with MTVU we want to pretend the VU has finished quickly as to gain the benefit from running another thread
// however with T-Bit games when the T-Bit is enabled, it needs to wait in case a T-Bit happens, so we need to set "Busy"
// We shouldn't do this all the time as it negates the extra thread and causes games like Ratchet & Clank to be no faster.
if(VU0.VI[REG_FBRST].UL & 0x800)
VU0.VI[REG_VPU_STAT].UL |= 0x0100;
vu1Thread.ExecuteVU(addr, vif1Regs.top, vif1Regs.itop);
return;
}
static int count = 0;

View File

@ -26,7 +26,8 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) {
if (m_Idx && THREAD_VU1)
{
vu1Thread.Get_GSChanges();
vu1Thread.Get_MTVUChanges();
return;
}
if (!(stat & test)) return;

View File

@ -20,6 +20,7 @@
#include "Gif_Unit.h"
#include "VUmicro.h"
#include "newVif.h"
#include "MTVU.h"
u32 g_vif1Cycles = 0;
@ -232,6 +233,9 @@ __fi void vif1VUFinish()
{
if (VU0.VI[REG_VPU_STAT].UL & 0x500)
{
if(THREAD_VU1)
vu1Thread.Get_MTVUChanges();
CPU_INT(VIF_VU1_FINISH, 128);
return;
}

View File

@ -73,8 +73,10 @@ void mVUinit(microVU& mVU, uint vuIndex) {
// Resets Rec Data
void mVUreset(microVU& mVU, bool resetReserve) {
if (THREAD_VU1)
{
DevCon.Warning("mVU Reset");
// If MTVU is toggled on during gameplay we need to flush the running VU1 program, else it gets in a mess
if (VU0.VI[REG_VPU_STAT].UL & 0x100)
{
@ -346,6 +348,7 @@ void recMicroVU0::Reset() {
void recMicroVU1::Reset() {
if(!pxAssertDev(m_Reserved, "MicroVU1 CPU Provider has not been reserved prior to reset!")) return;
vu1Thread.WaitVU();
vu1Thread.Get_MTVUChanges();
mVUreset(microVU1, true);
}
@ -388,7 +391,7 @@ void recMicroVU1::Execute(u32 cycles) {
VU1.VI[REG_TPC].UL <<= 3;
((mVUrecCall)microVU1.startFunct)(VU1.VI[REG_TPC].UL, cycles);
VU1.VI[REG_TPC].UL >>= 3;
if(microVU1.regs().flags & 0x4)
if(microVU1.regs().flags & 0x4 && !THREAD_VU1)
{
microVU1.regs().flags &= ~0x4;
hwIntcIrq(7);

View File

@ -114,8 +114,10 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) {
if (!mVU.index || !THREAD_VU1) {
xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
}
else
xFastCall((void*)mVUTBit);
}
if (isEbit != 2) { // Save PC, and Jump to Exit Point
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
xJMP(mVU.exitFunct);
@ -214,6 +216,8 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) {
if (!mVU.index || !THREAD_VU1) {
xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
}
else
xFastCall((void*)mVUEBit);
}
if (isEbit != 2 && isEbit != 3) { // Save PC, and Jump to Exit Point
@ -280,8 +284,10 @@ void normBranch(mV, microFlagCycles& mFC) {
u32 tempPC = iPC;
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4));
xForwardJump32 eJMP(Jcc_Zero);
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
if (!mVU.index || !THREAD_VU1) {
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
}
iPC = branchAddr(mVU)/4;
mVUDTendProgram(mVU, &mFC, 1);
eJMP.SetTarget();
@ -292,8 +298,10 @@ void normBranch(mV, microFlagCycles& mFC) {
u32 tempPC = iPC;
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8));
xForwardJump32 eJMP(Jcc_Zero);
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
if (!mVU.index || !THREAD_VU1) {
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
}
iPC = branchAddr(mVU)/4;
mVUDTendProgram(mVU, &mFC, 1);
eJMP.SetTarget();
@ -393,8 +401,10 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) {
u32 tempPC = iPC;
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8));
xForwardJump32 eJMP(Jcc_Zero);
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
if (!mVU.index || !THREAD_VU1) {
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
}
mVUDTendProgram(mVU, &mFC, 2);
xCMP(ptr16[&mVU.branch], 0);
xForwardJump32 tJMP(xInvertCond((JccComparisonType)JMPcc));
@ -414,8 +424,10 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) {
u32 tempPC = iPC;
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4));
xForwardJump32 eJMP(Jcc_Zero);
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
if (!mVU.index || !THREAD_VU1) {
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
}
mVUDTendProgram(mVU, &mFC, 2);
xCMP(ptr16[&mVU.branch], 0);
xForwardJump32 dJMP(xInvertCond((JccComparisonType)JMPcc));
@ -548,8 +560,10 @@ void normJump(mV, microFlagCycles& mFC) {
{
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4));
xForwardJump32 eJMP(Jcc_Zero);
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
if (!mVU.index || !THREAD_VU1) {
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
}
mVUDTendProgram(mVU, &mFC, 2);
xMOV(gprT1, ptr32[&mVU.branch]);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1);
@ -560,8 +574,10 @@ void normJump(mV, microFlagCycles& mFC) {
{
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8));
xForwardJump32 eJMP(Jcc_Zero);
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
if (!mVU.index || !THREAD_VU1) {
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
}
mVUDTendProgram(mVU, &mFC, 2);
xMOV(gprT1, ptr32[&mVU.branch]);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT1);

View File

@ -514,8 +514,10 @@ void mVUDoDBit(microVU& mVU, microFlagCycles* mFC)
{
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x400 : 0x4));
xForwardJump32 eJMP(Jcc_Zero);
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
if (!isVU1 || !THREAD_VU1) {
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x200 : 0x2));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
}
incPC(1);
mVUDTendProgram(mVU, mFC, 1);
incPC(-1);
@ -526,8 +528,10 @@ void mVUDoTBit(microVU& mVU, microFlagCycles* mFC)
{
xTEST(ptr32[&VU0.VI[REG_FBRST].UL], (isVU1 ? 0x800 : 0x8));
xForwardJump32 eJMP(Jcc_Zero);
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
if (!isVU1 || !THREAD_VU1) {
xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? 0x400 : 0x4));
xOR(ptr32[&mVU.regs().flags], VUFLAG_INTCINTERRUPT);
}
incPC(1);
mVUDTendProgram(mVU, mFC, 1);
incPC(-1);

View File

@ -180,6 +180,16 @@ static void __fc mVUwarningRegAccess(u32 prog, u32 pc) {
Console.Error("microVU0 Warning: Accessing VU1 Regs! [%04x] [%x]", pc, prog);
}
static void __fc mVUTBit() {
u32 old = vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagVUTBit, std::memory_order_release);
if (old & VU_Thread::InterruptFlagVUTBit)
DevCon.Warning("Old TBit not registered");
}
static void __fc mVUEBit() {
u32 old = vu1Thread.mtvuInterrupts.fetch_or(VU_Thread::InterruptFlagVUEBit, std::memory_order_release);
}
static inline u32 branchAddrN(const mV)
{
pxAssumeDev(islowerOP, "MicroVU: Expected Lower OP code for valid branch addr.");