From 83143bd42e8c174e1872402c76a6bd20b024d2e7 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Mon, 6 Sep 2021 14:13:06 +0100 Subject: [PATCH] VU Int: Rewrote most of the FMAC/IALU handling, now with 2x performance --- pcsx2/VU.h | 16 +- pcsx2/VU0microInterp.cpp | 34 ++- pcsx2/VU1microInterp.cpp | 24 +- pcsx2/VUmicro.h | 2 +- pcsx2/VUops.cpp | 572 ++++++++++++++++++--------------------- pcsx2/VUops.h | 2 +- 6 files changed, 327 insertions(+), 323 deletions(-) diff --git a/pcsx2/VU.h b/pcsx2/VU.h index 1c3e592196..9b65185ca3 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -109,9 +109,11 @@ struct efuPipe struct fmacPipe { int enable; - int reg; + u32 regupper; + u32 reglower; int flagreg; - int xyzw; + u32 xyzwupper; + u32 xyzwlower; u32 sCycle; u32 Cycle; u32 macflag; @@ -184,10 +186,16 @@ struct __aligned16 VURegs u32 VIOldValue; u32 VIRegNumber; - fmacPipe fmac[8]; + fmacPipe fmac[4]; + u32 fmacreadpos; + u32 fmacwritepos; + u32 fmaccount; fdivPipe fdiv; efuPipe efu; - ialuPipe ialu[8]; + ialuPipe ialu[4]; + u32 ialureadpos; + u32 ialuwritepos; + u32 ialucount; VURegs() { diff --git a/pcsx2/VU0microInterp.cpp b/pcsx2/VU0microInterp.cpp index 3bf15f5ac9..92b39eacbc 100644 --- a/pcsx2/VU0microInterp.cpp +++ b/pcsx2/VU0microInterp.cpp @@ -81,27 +81,28 @@ static void _vu0Exec(VURegs* VU) lregs.cycles = 0; u32 cyclesBeforeOp = VU0.cycle-1; -#ifndef INT_VUSTALLHACK _vuTestUpperStalls(VU, &uregs); -#endif /* check upper flags */ - if (ptr[1] & 0x80000000) { /* I flag */ + if (ptr[1] & 0x80000000) // I flag + { _vuTestPipes(VU); + if (VU->VIBackupCycles > 0) VU->VIBackupCycles -= std::min((u8)(VU0.cycle - cyclesBeforeOp), VU->VIBackupCycles); + _vu0ExecUpper(VU, ptr); VU->VI[REG_I].UL = ptr[0]; memset(&lregs, 0, sizeof(lregs)); - } else { + } + else + { VU->code = ptr[0]; - VU0regs_LOWER_OPCODE[VU->code >> 25](&lregs); -#ifndef INT_VUSTALLHACK _vuTestLowerStalls(VU, &lregs); -#endif + _vuTestPipes(VU); if (VU->VIBackupCycles > 0) VU->VIBackupCycles -= std::min((u8)(VU0.cycle - cyclesBeforeOp), VU->VIBackupCycles); @@ -153,6 +154,10 @@ static void _vu0Exec(VURegs* VU) } } } + + if (uregs.pipe == VUPIPE_FMAC || lregs.pipe == VUPIPE_FMAC) + _vuClearFMAC(VU); + _vuAddUpperStalls(VU, &uregs); _vuAddLowerStalls(VU, &lregs); @@ -179,6 +184,10 @@ static void _vu0Exec(VURegs* VU) vif0Regs.stat.VEW = false; } } + + // Progress the write position of the FMAC pipeline by one place + if (uregs.pipe == VUPIPE_FMAC || lregs.pipe == VUPIPE_FMAC) + VU->fmacwritepos = ++VU->fmacwritepos & 3; } void vu0Exec(VURegs* VU) @@ -203,6 +212,17 @@ InterpVU0::InterpVU0() IsInterpreter = true; } +void InterpVU0::Reset() +{ + DevCon.Warning("VU0 Int Reset"); + VU0.fmacwritepos = 0; + VU0.fmacreadpos = 0; + VU0.fmaccount = 0; + VU0.ialuwritepos = 0; + VU0.ialureadpos = 0; + VU0.ialucount = 0; + +} void InterpVU0::SetStartPC(u32 startPC) { VU0.start_pc = startPC; diff --git a/pcsx2/VU1microInterp.cpp b/pcsx2/VU1microInterp.cpp index 521467928d..243f290f5a 100644 --- a/pcsx2/VU1microInterp.cpp +++ b/pcsx2/VU1microInterp.cpp @@ -92,11 +92,13 @@ static void _vu1Exec(VURegs* VU) _vuTestUpperStalls(VU, &uregs); /* check upper flags */ - if (ptr[1] & 0x80000000) - { /* I flag */ + if (ptr[1] & 0x80000000) // I Flag (Lower op is a float) + { _vuTestPipes(VU); + if (VU->VIBackupCycles > 0) VU->VIBackupCycles -= std::min((u8)(VU1.cycle - cyclesBeforeOp), VU->VIBackupCycles); + _vu1ExecUpper(VU, ptr); VU->VI[REG_I].UL = ptr[0]; @@ -174,10 +176,11 @@ static void _vu1Exec(VURegs* VU) } } } - _vuAddUpperStalls(VU, &uregs); + if (uregs.pipe == VUPIPE_FMAC || lregs.pipe == VUPIPE_FMAC) + _vuClearFMAC(VU); - //if (!(ptr[1] & 0x80000000)) - _vuAddLowerStalls(VU, &lregs); + _vuAddUpperStalls(VU, &uregs); + _vuAddLowerStalls(VU, &lregs); if (VU->branch > 0) { @@ -245,6 +248,10 @@ static void _vu1Exec(VURegs* VU) } } } + + // Progress the write position of the FMAC pipeline by one place + if (uregs.pipe == VUPIPE_FMAC || lregs.pipe == VUPIPE_FMAC) + VU->fmacwritepos = ++VU->fmacwritepos & 3; } void vu1Exec(VURegs* VU) @@ -272,6 +279,13 @@ InterpVU1::InterpVU1() void InterpVU1::Reset() { + DevCon.Warning("VU1 Int Reset"); + VU1.fmacwritepos = 0; + VU1.fmacreadpos = 0; + VU1.fmaccount = 0; + VU1.ialuwritepos = 0; + VU1.ialureadpos = 0; + VU1.ialucount = 0; vu1Thread.WaitVU(); } diff --git a/pcsx2/VUmicro.h b/pcsx2/VUmicro.h index 2753925c34..4195816ca7 100644 --- a/pcsx2/VUmicro.h +++ b/pcsx2/VUmicro.h @@ -170,7 +170,7 @@ public: void Reserve() { } void Shutdown() noexcept { } - void Reset() { } + void Reset(); void Step(); void SetStartPC(u32 startPC); diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 4bdcaac46e..bec6bec97e 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -47,101 +47,66 @@ u32 laststall = 0; static __aligned16 VECTOR RDzero; -static __ri bool _vuFMACflush(VURegs * VU) { +static __ri bool _vuFMACflush(VURegs * VU) +{ bool didflush = false; - int startpos = 0; - u32 cycle = 9999; - for (int startpipe = 0; startpipe < 8; startpipe++) + VUM_LOG("Flushing FMACs"); + + for (int i = VU->fmacreadpos; VU->fmaccount > 0; i = (i + 1) & 3) { - if (VU->fmac[startpipe].enable == 0) continue; - - if ((VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle) <= VU->cycle) + if ((VU->cycle - VU->fmac[i].sCycle) < VU->fmac[i].Cycle) { - if ((VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)) < cycle) - { - VUM_LOG("Setting start pos to %d", startpipe); - cycle = (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)); - startpos = startpipe; - } - else - VUM_LOG("Nope for pipe %d, cycle %d greater than %d", startpipe, (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)), cycle); + VUM_LOG("Not flushing FMAC pipe[%d] (macflag=%x clipflag=%x statusflag=%x) r %d w %d", i, VU->fmac[i].macflag, VU->fmac[i].clipflag, VU->fmac[i].statusflag, VU->fmacreadpos, VU->fmacwritepos); + return didflush; } + + VUM_LOG("flushing FMAC pipe[%d] (macflag=%x clipflag=%x statusflag=%x) r %d w %d", i, VU->fmac[i].macflag, VU->fmac[i].clipflag, VU->fmac[i].statusflag, VU->fmacreadpos, VU->fmacwritepos); + + // Clip flags (Affected by CLIP instruction) + if (VU->fmac[i].flagreg & (1 << REG_CLIP_FLAG)) + VU->VI[REG_CLIP_FLAG].UL = VU->fmac[i].clipflag; + + // Normal FMAC instructoins only affectx Z/S/I/O, D/I are modified only by FDIV instructions + // Sticky flags (Affected by FSSET) + if (VU->fmac[i].flagreg & (1 << REG_STATUS_FLAG)) + VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0x30) | (VU->fmac[i].statusflag & 0xFC0) | (VU->fmac[i].statusflag & 0xF); + else + VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFF0) | (VU->fmac[i].statusflag & 0xF) | ((VU->fmac[i].statusflag & 0xF) << 6); + VU->VI[REG_MAC_FLAG].UL = VU->fmac[i].macflag; + + VU->fmacreadpos = ++VU->fmacreadpos & 3; + VU->fmaccount--; + + didflush = true; } - VUM_LOG("Flushing FMAC starting at %d", startpos); - u32 lastmac, lastclip, laststatus; - lastmac = lastclip = laststatus = 9999; - - for (int i=0; i<8; i++) { - int currentpipe = (i + startpos) % 8; - - if (VU->fmac[currentpipe].enable == 0) continue; - - if ((VU->cycle - VU->fmac[currentpipe].sCycle) >= VU->fmac[currentpipe].Cycle) { - - - VU->fmac[currentpipe].enable = 0; - - VUM_LOG("Writing back flags"); - if (VU->fmac[currentpipe].flagreg & (1 << REG_STATUS_FLAG)) - { - if ((VU->cycle - VU->fmac[currentpipe].sCycle) < laststatus) - { - VUM_LOG("flushing FMAC Status Write pipe[%d] (status=%x) Cur Cycle %d Expected cycle %d", currentpipe, (VU->VI[REG_STATUS_FLAG].UL & 0xF30) | (VU->fmac[currentpipe].statusflag & 0x3CF), VU->cycle, VU->fmac[currentpipe].sCycle + VU->fmac[currentpipe].Cycle); - VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0x3F) | (VU->fmac[currentpipe].statusflag & 0xFC0); - laststatus = (VU->cycle - VU->fmac[currentpipe].sCycle); - } - } - else if (VU->fmac[currentpipe].flagreg & (1 << REG_CLIP_FLAG)) - { - if ((VU->cycle - VU->fmac[currentpipe].sCycle) < lastclip) - { - VUM_LOG("flushing FMAC Clip Write pipe[%d] (clip=%x) Cur Cycle %d Expected cycle %d", currentpipe, VU->fmac[currentpipe].clipflag, VU->cycle, VU->fmac[currentpipe].sCycle + VU->fmac[currentpipe].Cycle); - VU->VI[REG_CLIP_FLAG].UL = VU->fmac[currentpipe].clipflag; - lastclip = (VU->cycle - VU->fmac[currentpipe].sCycle); - } - } - else - { - if ((VU->cycle - VU->fmac[currentpipe].sCycle) < lastmac) - { - VUM_LOG("flushing FMAC pipe[%d] (macflag=%x status=%x) Cur Cycle %d Expected cycle %d", currentpipe, VU->fmac[currentpipe].macflag, (VU->VI[REG_STATUS_FLAG].UL & 0xF30) | (VU->fmac[currentpipe].statusflag & 0x3CF), VU->cycle, VU->fmac[currentpipe].sCycle + VU->fmac[currentpipe].Cycle); - // FMAC only affectx Z/S/I/O - VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFF0) | ((VU->fmac[currentpipe].statusflag & 0xF) | ((VU->fmac[currentpipe].statusflag & 0xF) << 6)); - VU->VI[REG_MAC_FLAG].UL = VU->fmac[currentpipe].macflag; - lastmac = (VU->cycle - VU->fmac[currentpipe].sCycle); - } - - } - didflush = true; - } - } return didflush; } -static __ri bool _vuIALUflush(VURegs* VU) { +static __ri bool _vuIALUflush(VURegs* VU) +{ bool didflush = false; VUM_LOG("Flushing ALU stalls"); - for (int i = 0; i < 8; i++) { - int currentpipe = i; + for (int i = VU->ialureadpos; VU->ialucount > 0; i = (i + 1) & 3) + { + if ((VU->cycle - VU->ialu[i].sCycle) < VU->ialu[i].Cycle) return didflush; - if (VU->ialu[currentpipe].enable == 0) continue; - - if ((VU->cycle - VU->ialu[currentpipe].sCycle) >= VU->ialu[currentpipe].Cycle) { - VU->ialu[currentpipe].enable = 0; - didflush = true; - } + VU->ialureadpos = ++VU->ialureadpos & 3; + VU->ialucount--; + didflush = true; } return didflush; } -static __ri bool _vuFDIVflush(VURegs * VU) { +static __ri bool _vuFDIVflush(VURegs * VU) +{ if (VU->fdiv.enable == 0) return false; - if ((VU->cycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) { + if ((VU->cycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) + { VUM_LOG("flushing FDIV pipe"); VU->fdiv.enable = 0; @@ -154,11 +119,13 @@ static __ri bool _vuFDIVflush(VURegs * VU) { return false; } -static __ri bool _vuEFUflush(VURegs * VU) { +static __ri bool _vuEFUflush(VURegs * VU) +{ if (VU->efu.enable == 0) return false; - if ((VU->cycle - VU->efu.sCycle) >= VU->efu.Cycle) { -// VUM_LOG("flushing EFU pipe"); + if ((VU->cycle - VU->efu.sCycle) >= VU->efu.Cycle) + { + VUM_LOG("flushing EFU pipe"); VU->efu.enable = 0; VU->VI[REG_P].UL = VU->efu.reg.UL; @@ -172,99 +139,68 @@ static __ri bool _vuEFUflush(VURegs * VU) { // called at end of program void _vuFlushAll(VURegs* VU) { - int nRepeat = 1, i; + int i = 0; - u32 startpos = 0; - s32 cycle = 9999; - - // Calculate lowest active FMAC pipe - for (int startpipe = 0; startpipe < 8; startpipe++) + if (VU->fdiv.enable) { - if (VU->fmac[startpipe].enable == 0) continue; + VU->fdiv.enable = 0; + VU->VI[REG_Q].UL = VU->fdiv.reg.UL; + VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFCF) | (VU->fdiv.statusflag & 0xC30); - if ((VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle) <= VU->cycle) - { - if ((VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)) < cycle) - { - VUM_LOG("Setting start pos to %d", startpipe); - cycle = (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)); - startpos = startpipe; - } - else - VUM_LOG("Nope for pipe %d, cycle %d greater than %d", startpipe, (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)), cycle); - } + if ((VU->cycle - VU->fdiv.sCycle) < VU->fdiv.Cycle) + VU->cycle = VU->fdiv.sCycle + VU->fdiv.Cycle; } - do { - nRepeat = 0; + if (VU->efu.enable) + { + VU->efu.enable = 0; + VU->VI[REG_P].UL = VU->efu.reg.UL; - for (i=0; i<8; i++) { - int currentpipe = (i + startpos) % 8; + if ((VU->cycle - VU->efu.sCycle) < VU->efu.Cycle) + VU->cycle = VU->efu.sCycle + VU->efu.Cycle; + } - if (VU->fmac[currentpipe].enable == 0) continue; + for (i=VU->fmacreadpos; VU->fmaccount > 0; i = (i + 1) & 3) + { + VUM_LOG("flushing FMAC pipe[%d] (macflag=%x)", i, VU->fmac[i].macflag); - nRepeat = 1; + // Clip flags (Affected by CLIP instruction) + if (VU->fmac[i].flagreg & (1 << REG_CLIP_FLAG)) + VU->VI[REG_CLIP_FLAG].UL = VU->fmac[i].clipflag; - if ((VU->cycle - VU->fmac[currentpipe].sCycle) >= VU->fmac[currentpipe].Cycle) { - VUM_LOG("flushing FMAC pipe[%d] (macflag=%x)", i, VU->fmac[currentpipe].macflag); + // Normal FMAC instructoins only affectx Z/S/I/O, D/I are modified only by FDIV instructions + // Sticky flags (Affected by FSSET) + if (VU->fmac[i].flagreg & (1 << REG_STATUS_FLAG)) + VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0x30) | (VU->fmac[i].statusflag & 0xFC0) | (VU->fmac[i].statusflag & 0xF); + else + VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFF0) | (VU->fmac[i].statusflag & 0xF) | ((VU->fmac[i].statusflag & 0xF) << 6); + VU->VI[REG_MAC_FLAG].UL = VU->fmac[i].macflag; - VU->fmac[currentpipe].enable = 0; - if (VU->fmac[currentpipe].flagreg & (1 << REG_STATUS_FLAG)) - VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0x3F) | (VU->fmac[currentpipe].statusflag & 0xFC0); - else if (VU->fmac[currentpipe].flagreg & (1 << REG_CLIP_FLAG)) - VU->VI[REG_CLIP_FLAG].UL = VU->fmac[currentpipe].clipflag; - else - { - // FMAC only affectx Z/S/I/O - VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFF0) | ((VU->fmac[currentpipe].statusflag & 0xF) | ((VU->fmac[currentpipe].statusflag & 0xF) << 6)); - VU->VI[REG_MAC_FLAG].UL = VU->fmac[currentpipe].macflag; - } - } - } + VU->fmacreadpos = ++VU->fmacreadpos & 3; - if (VU->fdiv.enable ) { + if((VU->cycle - VU->fmac[i].sCycle) < VU->fmac[i].Cycle) + VU->cycle = VU->fmac[i].sCycle + VU->fmac[i].Cycle; - nRepeat = 1; + VU->fmaccount--; + } - if ((VU->cycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) { - VUM_LOG("flushing FDIV pipe"); + for (i = VU->ialureadpos; VU->ialucount > 0; i = (i + 1) & 3) + { + VU->ialureadpos = ++VU->ialureadpos & 3; - VU->fdiv.enable = 0; - VU->VI[REG_Q].UL = VU->fdiv.reg.UL; - VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFCF) | (VU->fdiv.statusflag & 0xC30); - } - } + if ((VU->cycle - VU->ialu[i].sCycle) < VU->ialu[i].Cycle) + VU->cycle = VU->ialu[i].sCycle + VU->ialu[i].Cycle; - if (VU->efu.enable) { - - nRepeat = 1; - - if ((VU->cycle - VU->efu.sCycle) >= VU->efu.Cycle) { - // VUM_LOG("flushing EFU pipe"); - - VU->efu.enable = 0; - VU->VI[REG_P].UL = VU->efu.reg.UL; - } - } - - for (i = 0; i < 8; i++) { - int currentpipe = i; - - if (VU->ialu[currentpipe].enable == 0) continue; - nRepeat = 1; - if ((VU->cycle - VU->ialu[currentpipe].sCycle) >= VU->ialu[currentpipe].Cycle) { - VU->ialu[currentpipe].enable = 0; - } - } - - VU->cycle++; - } while(nRepeat); + VU->ialucount--; + } } -__fi void _vuTestPipes(VURegs * VU) { +__fi void _vuTestPipes(VURegs * VU) +{ bool flushed; - do { + do + { flushed = false; flushed |= _vuFMACflush(VU); flushed |= _vuFDIVflush(VU); @@ -273,72 +209,45 @@ __fi void _vuTestPipes(VURegs * VU) { } while (flushed == true); } -static void __fastcall _vuFMACTestStall(VURegs * VU, int reg, int xyzw) { - int i; +static void __fastcall _vuFMACTestStall(VURegs* VU, int reg, int xyzw) +{ + u32 i = 0; - for (i=0; i<8; i++) { - if (VU->fmac[i].enable == 0) continue; - if ((VU->cycle - VU->fmac[i].sCycle) >= VU->fmac[i].Cycle) continue; - if (VU->fmac[i].reg == reg && - VU->fmac[i].xyzw & xyzw) break; - } + for (int currentpipe = VU->fmacreadpos; i < VU->fmaccount; currentpipe = (currentpipe + 1) & 3, i++) + { + //Check if enough cycles have passed for this fmac position + if ((VU->cycle - VU->fmac[currentpipe].sCycle) >= VU->fmac[currentpipe].Cycle) continue; - if (i == 8) return; + // Check if the regs match + if ((VU->fmac[currentpipe].regupper == reg && + VU->fmac[currentpipe].xyzwupper & xyzw) + || (VU->fmac[currentpipe].reglower == reg && + VU->fmac[currentpipe].xyzwlower & xyzw)) + { + u32 newCycle = VU->fmac[currentpipe].Cycle + VU->fmac[currentpipe].sCycle; - u32 newCycle = VU->fmac[i].Cycle + VU->fmac[i].sCycle; - - VUM_LOG("FMAC[%d] stall %d", i, newCycle - VU->cycle); - if(newCycle > VU->cycle) - VU->cycle = newCycle; - -} -u32 regcycle = 0; -static __ri void __fastcall _vuFMACAdd(VURegs * VU, _VURegsNum* VUregsn) { - int i; - - //If it's an FMAC which doesn't modify FMAC flags, just exit, no need to queue - /* find a free fmac pipe */ - for (i=0; i<8; i++) { - if (VU->fmac[i].enable == 1) continue; - break; - } - - if (i < 8) { - VUM_LOG("adding FMAC pipe[%d]; reg=%x xyzw=%x flagreg=%x target=%x current %x", i, VUregsn->VFwrite, VUregsn->VFwxyzw, VUregsn->VIwrite, VU->cycle + 4, VU->cycle); - VU->fmac[i].enable = 1; - VU->fmac[i].sCycle = VU->cycle; - VU->fmac[i].Cycle = 4; - VU->fmac[i].reg = VUregsn->VFwrite; - VU->fmac[i].xyzw = VUregsn->VFwxyzw; - VU->fmac[i].macflag = VU->macflag; - VU->fmac[i].statusflag = VU->statusflag; - VU->fmac[i].clipflag = VU->clipflag; - VU->fmac[i].flagreg = VUregsn->VIwrite; - } else { - Console.Error("*PCSX2*: error , out of fmacs %d", VU->cycle); + VUM_LOG("FMAC[%d] stall %d", currentpipe, newCycle - VU->cycle); + if (newCycle > VU->cycle) + VU->cycle = newCycle; + } } } -static __ri void __fastcall _vuFDIVAdd(VURegs * VU, int cycles) { - VUM_LOG("adding FDIV pipe"); - - VU->fdiv.enable = 1; - VU->fdiv.sCycle = VU->cycle; - VU->fdiv.Cycle = cycles; - VU->fdiv.reg.F = VU->q.F; - VU->fdiv.statusflag = VU->statusflag; +static __fi void _vuTestFMACStalls(VURegs* VU, _VURegsNum* VUregsn) +{ + if (VUregsn->VFread0) + { + _vuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw); + } + if (VUregsn->VFread1) + { + _vuFMACTestStall(VU, VUregsn->VFread1, VUregsn->VFr1xyzw); + } } -static __ri void __fastcall _vuEFUAdd(VURegs * VU, int cycles) { - VUM_LOG("adding EFU pipe for %d cycles\n", cycles); - - VU->efu.enable = 1; - VU->efu.sCycle = VU->cycle; - VU->efu.Cycle = cycles; - VU->efu.reg.F = VU->p.F; -} - -static __ri void __fastcall _vuFlushFDIV(VURegs * VU) { +static __fi void _vuTestFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) +{ + _vuTestFMACStalls(VU, VUregsn); if (VU->fdiv.enable != 0) { @@ -349,13 +258,19 @@ static __ri void __fastcall _vuFlushFDIV(VURegs * VU) { } } -static __ri void __fastcall _vuFlushEFU(VURegs * VU, bool isWait = false) { +static __fi void _vuTestEFUStalls(VURegs* VU, _VURegsNum* VUregsn) +{ + _vuTestFMACStalls(VU, VUregsn); + if (VU->efu.enable == 0) return; - if (isWait) + if (VUregsn->cycles == 0) // WAITP { - VU->cycle = VU->efu.Cycle - 1; + u32 newCycle = VU->efu.sCycle + VU->efu.Cycle - 1; + VUM_LOG("waiting EFU pipe %d", newCycle - VU->cycle); + if (newCycle > VU->cycle) + VU->cycle = newCycle; VU->efu.sCycle = VU->cycle; VU->efu.Cycle = 1; } @@ -366,101 +281,39 @@ static __ri void __fastcall _vuFlushEFU(VURegs * VU, bool isWait = false) { if (newCycle > VU->cycle) VU->cycle = newCycle; } - } -static __fi void _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { - if (VUregsn->VFread0) { - _vuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw); - } - if (VUregsn->VFread1) { - _vuFMACTestStall(VU, VUregsn->VFread1, VUregsn->VFr1xyzw); +static __fi void _vuTestALUStalls(VURegs* VU, _VURegsNum* VUregsn) +{ + u32 i = 0; + + for (int currentpipe = VU->ialureadpos; i < VU->ialucount; currentpipe = (currentpipe + 1) & 3, i++) + { + if ((VU->cycle - VU->ialu[currentpipe].sCycle) >= VU->ialu[currentpipe].Cycle) continue; + + if (VU->ialu[currentpipe].reg & VUregsn->VIread) // Read and written VI regs share the same register + { + u32 newCycle = VU->ialu[currentpipe].Cycle + VU->ialu[currentpipe].sCycle; + + VUM_LOG("ALU[%d] stall %d", currentpipe, newCycle - VU->cycle); + if (newCycle > VU->cycle) + VU->cycle = newCycle; + } } } -static __fi void _vuAddFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { - _vuFMACAdd(VU, VUregsn); -} - -static __ri void __fastcall _vuIALUAdd(VURegs* VU, _VURegsNum* VUregsn) { - int i; - - if (VUregsn->cycles == 0) - return; - //If it's an FMAC which doesn't modify FMAC flags, just exit, no need to queue - /* find a free fmac pipe */ - for (i = 0; i < 8; i++) { - if (VU->ialu[i].enable == 1) continue; - break; - } - - if (i < 8) { - VUM_LOG("adding IALU pipe[%d]; reg=%x xyzw=%x flagreg=%x target=%x current %x", i, VUregsn->VFwrite, VUregsn->VFwxyzw, VUregsn->VIwrite, VU->cycle + 4, VU->cycle); - VU->ialu[i].enable = 1; - VU->ialu[i].sCycle = VU->cycle; - VU->ialu[i].Cycle = VUregsn->cycles; - VU->ialu[i].reg = VUregsn->VIwrite; - } - else { - Console.Error("*PCSX2*: error , out of ALU's %d", VU->cycle); - } -} - -static __fi void _vuAddIALUStalls(VURegs* VU, _VURegsNum* VUregsn) { - _vuIALUAdd(VU, VUregsn); -} - -static __fi void _vuTestFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { -// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); - _vuTestFMACStalls(VU, VUregsn); - _vuFlushFDIV(VU); -} - -static __fi void _vuAddFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { - if (VUregsn->VIwrite & (1 << REG_Q)) { - _vuFDIVAdd(VU, VUregsn->cycles); - } -} - - -static __fi void _vuTestEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { -// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); - _vuTestFMACStalls(VU, VUregsn); - _vuFlushEFU(VU, VUregsn->cycles == 0); -} - -static __fi void _vuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { - if (VUregsn->VIwrite & (1 << REG_P)) { - _vuEFUAdd(VU, VUregsn->cycles); - } -} - -__fi void _vuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { +__fi void _vuTestUpperStalls(VURegs* VU, _VURegsNum* VUregsn) +{ switch (VUregsn->pipe) { - case VUPIPE_FMAC: _vuTestFMACStalls(VU, VUregsn); break; + case VUPIPE_FMAC: _vuTestFMACStalls(VU, VUregsn); break; } } -static __fi void _vuTestALUStalls(VURegs* VU, _VURegsNum* VUregsn) { - int i; - for (i = 0; i < 8; i++) { - if (VU->ialu[i].enable == 0) continue; - if ((VU->cycle - VU->ialu[i].sCycle) >= VU->ialu[i].Cycle) continue; - if (VU->ialu[i].reg & VUregsn->VIread) // Read and written VI regs share the same register - break; - } - if (i == 8) return; - - u32 newCycle = VU->ialu[i].Cycle + VU->ialu[i].sCycle; - - VUM_LOG("ALU[%d] stall %d", i, newCycle - VU->cycle); - if (newCycle > VU->cycle) - VU->cycle = newCycle; -} - -__fi void _vuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { - switch (VUregsn->pipe) { +__fi void _vuTestLowerStalls(VURegs* VU, _VURegsNum* VUregsn) +{ + switch (VUregsn->pipe) + { case VUPIPE_FMAC: _vuTestFMACStalls(VU, VUregsn); break; case VUPIPE_FDIV: _vuTestFDIVStalls(VU, VUregsn); break; case VUPIPE_EFU: _vuTestEFUStalls(VU, VUregsn); break; @@ -468,18 +321,118 @@ __fi void _vuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { } } -__fi void _vuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { +__fi void _vuClearFMAC(VURegs* VU) +{ + int i = VU->fmacwritepos; + VU->fmac[i].regupper =0; + VU->fmac[i].xyzwupper = 0; + VU->fmac[i].flagreg = 0; + VU->fmac[i].reglower = 0; + VU->fmac[i].xyzwlower = 0; + VU->fmac[i].macflag = 0; + VU->fmac[i].statusflag = 0; + VU->fmac[i].clipflag = 0; + + VU->fmaccount++; +} + +static __ri void __fastcall _vuAddFMACStalls(VURegs* VU, _VURegsNum* VUregsn, bool isUpper) +{ + int i = VU->fmacwritepos; + + VUM_LOG("adding FMAC %s pipe[%d]; reg=%x xyzw=%x flagreg=%x target=%x current %x", isUpper ? "Upper" : "Lower", i, VUregsn->VFwrite, VUregsn->VFwxyzw, VUregsn->VIwrite, VU->cycle + 4, VU->cycle); + VU->fmac[i].sCycle = VU->cycle; + VU->fmac[i].Cycle = 4; + + if (isUpper) + { + VU->fmac[i].regupper = VUregsn->VFwrite; + VU->fmac[i].xyzwupper = VUregsn->VFwxyzw; + VU->fmac[i].flagreg = VUregsn->VIwrite; + } + else + { + VU->fmac[i].reglower = VUregsn->VFwrite; + VU->fmac[i].xyzwlower = VUregsn->VFwxyzw; + VU->fmac[i].flagreg |= VUregsn->VIwrite; + } + + VU->fmac[i].macflag = VU->macflag; + VU->fmac[i].statusflag = VU->statusflag; + VU->fmac[i].clipflag = VU->clipflag; +} + + +static __ri void __fastcall _vuFDIVAdd(VURegs* VU, int cycles) +{ + VUM_LOG("adding FDIV pipe"); + + VU->fdiv.enable = 1; + VU->fdiv.sCycle = VU->cycle; + VU->fdiv.Cycle = cycles; + VU->fdiv.reg.F = VU->q.F; + VU->fdiv.statusflag = VU->statusflag; +} + +static __ri void __fastcall _vuEFUAdd(VURegs* VU, int cycles) +{ + VUM_LOG("adding EFU pipe for %d cycles\n", cycles); + + VU->efu.enable = 1; + VU->efu.sCycle = VU->cycle; + VU->efu.Cycle = cycles; + VU->efu.reg.F = VU->p.F; +} + +static __ri void __fastcall _vuIALUAdd(VURegs* VU, _VURegsNum* VUregsn) +{ + + if (VUregsn->cycles == 0) + return; + + int i = VU->ialuwritepos; + + VUM_LOG("adding IALU pipe[%d]; reg=%x target=%x current %x", i, VUregsn->VIwrite, VU->cycle + VUregsn->cycles, VU->cycle); + VU->ialu[i].sCycle = VU->cycle; + VU->ialu[i].Cycle = VUregsn->cycles; + VU->ialu[i].reg = VUregsn->VIwrite; + + VU->ialuwritepos = ++VU->ialuwritepos & 3; + VU->ialucount++; +} + +static __fi void _vuAddIALUStalls(VURegs* VU, _VURegsNum* VUregsn) +{ + _vuIALUAdd(VU, VUregsn); +} + +static __fi void _vuAddFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) +{ + if (VUregsn->VIwrite & (1 << REG_Q)) + _vuFDIVAdd(VU, VUregsn->cycles); +} + +static __fi void _vuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn) +{ + if (VUregsn->VIwrite & (1 << REG_P)) + _vuEFUAdd(VU, VUregsn->cycles); +} + +__fi void _vuAddUpperStalls(VURegs* VU, _VURegsNum* VUregsn) +{ switch (VUregsn->pipe) { - case VUPIPE_FMAC: _vuAddFMACStalls(VU, VUregsn); break; + case VUPIPE_FMAC: _vuAddFMACStalls(VU, VUregsn, true); break; } } -__fi void _vuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { - switch (VUregsn->pipe) { - case VUPIPE_FMAC: _vuAddFMACStalls(VU, VUregsn); break; +__fi void _vuAddLowerStalls(VURegs* VU, _VURegsNum* VUregsn) +{ + switch (VUregsn->pipe) + { + case VUPIPE_FMAC: _vuAddFMACStalls(VU, VUregsn, false); break; case VUPIPE_FDIV: _vuAddFDIVStalls(VU, VUregsn); break; case VUPIPE_EFU: _vuAddEFUStalls(VU, VUregsn); break; - case VUPIPE_IALU: _vuAddIALUStalls(VU, VUregsn); break; + case VUPIPE_IALU: _vuAddIALUStalls(VU, VUregsn); break; } } @@ -2843,8 +2796,17 @@ VUREGS_IDISIT(IOR); VUREGS_IDISIT(ISUB); VUREGS_ITIS(ISUBIU); -VUREGS_FTFS(MOVE); - +static __ri void _vuRegsMOVE(const VURegs* VU, _VURegsNum* VUregsn) { + VUregsn->pipe = _Ft_ == 0 ? VUPIPE_NONE : VUPIPE_FMAC; + VUregsn->VFwrite = _Ft_; + VUregsn->VFwxyzw = _XYZW; + VUregsn->VFread0 = _Fs_; + VUregsn->VFr0xyzw = _XYZW; + VUregsn->VFread1 = 0; + VUregsn->VFr1xyzw = 0; + VUregsn->VIwrite = 0; + VUregsn->VIread = (_Ft_ ? GET_VF0_FLAG(_Fs_) : 0); +} static __ri void _vuRegsMFIR(const VURegs* VU, _VURegsNum *VUregsn) { VUregsn->pipe = VUPIPE_FMAC; VUregsn->VFwrite = _Ft_; diff --git a/pcsx2/VUops.h b/pcsx2/VUops.h index bf6216dd5a..ebb41c313d 100644 --- a/pcsx2/VUops.h +++ b/pcsx2/VUops.h @@ -53,7 +53,7 @@ extern __aligned16 const Fnptr_Void VU1_LOWER_OPCODE[128]; extern __aligned16 const Fnptr_Void VU1_UPPER_OPCODE[64]; extern __aligned16 const Fnptr_VuRegsN VU1regs_LOWER_OPCODE[128]; extern __aligned16 const Fnptr_VuRegsN VU1regs_UPPER_OPCODE[64]; - +extern void _vuClearFMAC(VURegs * VU); extern void _vuTestPipes(VURegs * VU); extern void _vuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn); extern void _vuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn);