VU Int: Rewrote most of the FMAC/IALU handling, now with 2x performance

This commit is contained in:
refractionpcsx2 2021-09-06 14:13:06 +01:00
parent e19b0bce57
commit 83143bd42e
6 changed files with 327 additions and 323 deletions

View File

@ -109,9 +109,11 @@ struct efuPipe
struct fmacPipe
{
int enable;
int reg;
u32 regupper;
u32 reglower;
int flagreg;
int xyzw;
u32 xyzwupper;
u32 xyzwlower;
u32 sCycle;
u32 Cycle;
u32 macflag;
@ -184,10 +186,16 @@ struct __aligned16 VURegs
u32 VIOldValue;
u32 VIRegNumber;
fmacPipe fmac[8];
fmacPipe fmac[4];
u32 fmacreadpos;
u32 fmacwritepos;
u32 fmaccount;
fdivPipe fdiv;
efuPipe efu;
ialuPipe ialu[8];
ialuPipe ialu[4];
u32 ialureadpos;
u32 ialuwritepos;
u32 ialucount;
VURegs()
{

View File

@ -81,27 +81,28 @@ static void _vu0Exec(VURegs* VU)
lregs.cycles = 0;
u32 cyclesBeforeOp = VU0.cycle-1;
#ifndef INT_VUSTALLHACK
_vuTestUpperStalls(VU, &uregs);
#endif
/* check upper flags */
if (ptr[1] & 0x80000000) { /* I flag */
if (ptr[1] & 0x80000000) // I flag
{
_vuTestPipes(VU);
if (VU->VIBackupCycles > 0)
VU->VIBackupCycles -= std::min((u8)(VU0.cycle - cyclesBeforeOp), VU->VIBackupCycles);
_vu0ExecUpper(VU, ptr);
VU->VI[REG_I].UL = ptr[0];
memset(&lregs, 0, sizeof(lregs));
} else {
}
else
{
VU->code = ptr[0];
VU0regs_LOWER_OPCODE[VU->code >> 25](&lregs);
#ifndef INT_VUSTALLHACK
_vuTestLowerStalls(VU, &lregs);
#endif
_vuTestPipes(VU);
if (VU->VIBackupCycles > 0)
VU->VIBackupCycles -= std::min((u8)(VU0.cycle - cyclesBeforeOp), VU->VIBackupCycles);
@ -153,6 +154,10 @@ static void _vu0Exec(VURegs* VU)
}
}
}
if (uregs.pipe == VUPIPE_FMAC || lregs.pipe == VUPIPE_FMAC)
_vuClearFMAC(VU);
_vuAddUpperStalls(VU, &uregs);
_vuAddLowerStalls(VU, &lregs);
@ -179,6 +184,10 @@ static void _vu0Exec(VURegs* VU)
vif0Regs.stat.VEW = false;
}
}
// Progress the write position of the FMAC pipeline by one place
if (uregs.pipe == VUPIPE_FMAC || lregs.pipe == VUPIPE_FMAC)
VU->fmacwritepos = ++VU->fmacwritepos & 3;
}
void vu0Exec(VURegs* VU)
@ -203,6 +212,17 @@ InterpVU0::InterpVU0()
IsInterpreter = true;
}
void InterpVU0::Reset()
{
DevCon.Warning("VU0 Int Reset");
VU0.fmacwritepos = 0;
VU0.fmacreadpos = 0;
VU0.fmaccount = 0;
VU0.ialuwritepos = 0;
VU0.ialureadpos = 0;
VU0.ialucount = 0;
}
void InterpVU0::SetStartPC(u32 startPC)
{
VU0.start_pc = startPC;

View File

@ -92,11 +92,13 @@ static void _vu1Exec(VURegs* VU)
_vuTestUpperStalls(VU, &uregs);
/* check upper flags */
if (ptr[1] & 0x80000000)
{ /* I flag */
if (ptr[1] & 0x80000000) // I Flag (Lower op is a float)
{
_vuTestPipes(VU);
if (VU->VIBackupCycles > 0)
VU->VIBackupCycles -= std::min((u8)(VU1.cycle - cyclesBeforeOp), VU->VIBackupCycles);
_vu1ExecUpper(VU, ptr);
VU->VI[REG_I].UL = ptr[0];
@ -174,10 +176,11 @@ static void _vu1Exec(VURegs* VU)
}
}
}
_vuAddUpperStalls(VU, &uregs);
if (uregs.pipe == VUPIPE_FMAC || lregs.pipe == VUPIPE_FMAC)
_vuClearFMAC(VU);
//if (!(ptr[1] & 0x80000000))
_vuAddLowerStalls(VU, &lregs);
_vuAddUpperStalls(VU, &uregs);
_vuAddLowerStalls(VU, &lregs);
if (VU->branch > 0)
{
@ -245,6 +248,10 @@ static void _vu1Exec(VURegs* VU)
}
}
}
// Progress the write position of the FMAC pipeline by one place
if (uregs.pipe == VUPIPE_FMAC || lregs.pipe == VUPIPE_FMAC)
VU->fmacwritepos = ++VU->fmacwritepos & 3;
}
void vu1Exec(VURegs* VU)
@ -272,6 +279,13 @@ InterpVU1::InterpVU1()
void InterpVU1::Reset()
{
DevCon.Warning("VU1 Int Reset");
VU1.fmacwritepos = 0;
VU1.fmacreadpos = 0;
VU1.fmaccount = 0;
VU1.ialuwritepos = 0;
VU1.ialureadpos = 0;
VU1.ialucount = 0;
vu1Thread.WaitVU();
}

View File

@ -170,7 +170,7 @@ public:
void Reserve() { }
void Shutdown() noexcept { }
void Reset() { }
void Reset();
void Step();
void SetStartPC(u32 startPC);

View File

@ -47,101 +47,66 @@ u32 laststall = 0;
static __aligned16 VECTOR RDzero;
static __ri bool _vuFMACflush(VURegs * VU) {
static __ri bool _vuFMACflush(VURegs * VU)
{
bool didflush = false;
int startpos = 0;
u32 cycle = 9999;
for (int startpipe = 0; startpipe < 8; startpipe++)
VUM_LOG("Flushing FMACs");
for (int i = VU->fmacreadpos; VU->fmaccount > 0; i = (i + 1) & 3)
{
if (VU->fmac[startpipe].enable == 0) continue;
if ((VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle) <= VU->cycle)
if ((VU->cycle - VU->fmac[i].sCycle) < VU->fmac[i].Cycle)
{
if ((VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)) < cycle)
{
VUM_LOG("Setting start pos to %d", startpipe);
cycle = (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle));
startpos = startpipe;
}
else
VUM_LOG("Nope for pipe %d, cycle %d greater than %d", startpipe, (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)), cycle);
VUM_LOG("Not flushing FMAC pipe[%d] (macflag=%x clipflag=%x statusflag=%x) r %d w %d", i, VU->fmac[i].macflag, VU->fmac[i].clipflag, VU->fmac[i].statusflag, VU->fmacreadpos, VU->fmacwritepos);
return didflush;
}
VUM_LOG("flushing FMAC pipe[%d] (macflag=%x clipflag=%x statusflag=%x) r %d w %d", i, VU->fmac[i].macflag, VU->fmac[i].clipflag, VU->fmac[i].statusflag, VU->fmacreadpos, VU->fmacwritepos);
// Clip flags (Affected by CLIP instruction)
if (VU->fmac[i].flagreg & (1 << REG_CLIP_FLAG))
VU->VI[REG_CLIP_FLAG].UL = VU->fmac[i].clipflag;
// Normal FMAC instructoins only affectx Z/S/I/O, D/I are modified only by FDIV instructions
// Sticky flags (Affected by FSSET)
if (VU->fmac[i].flagreg & (1 << REG_STATUS_FLAG))
VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0x30) | (VU->fmac[i].statusflag & 0xFC0) | (VU->fmac[i].statusflag & 0xF);
else
VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFF0) | (VU->fmac[i].statusflag & 0xF) | ((VU->fmac[i].statusflag & 0xF) << 6);
VU->VI[REG_MAC_FLAG].UL = VU->fmac[i].macflag;
VU->fmacreadpos = ++VU->fmacreadpos & 3;
VU->fmaccount--;
didflush = true;
}
VUM_LOG("Flushing FMAC starting at %d", startpos);
u32 lastmac, lastclip, laststatus;
lastmac = lastclip = laststatus = 9999;
for (int i=0; i<8; i++) {
int currentpipe = (i + startpos) % 8;
if (VU->fmac[currentpipe].enable == 0) continue;
if ((VU->cycle - VU->fmac[currentpipe].sCycle) >= VU->fmac[currentpipe].Cycle) {
VU->fmac[currentpipe].enable = 0;
VUM_LOG("Writing back flags");
if (VU->fmac[currentpipe].flagreg & (1 << REG_STATUS_FLAG))
{
if ((VU->cycle - VU->fmac[currentpipe].sCycle) < laststatus)
{
VUM_LOG("flushing FMAC Status Write pipe[%d] (status=%x) Cur Cycle %d Expected cycle %d", currentpipe, (VU->VI[REG_STATUS_FLAG].UL & 0xF30) | (VU->fmac[currentpipe].statusflag & 0x3CF), VU->cycle, VU->fmac[currentpipe].sCycle + VU->fmac[currentpipe].Cycle);
VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0x3F) | (VU->fmac[currentpipe].statusflag & 0xFC0);
laststatus = (VU->cycle - VU->fmac[currentpipe].sCycle);
}
}
else if (VU->fmac[currentpipe].flagreg & (1 << REG_CLIP_FLAG))
{
if ((VU->cycle - VU->fmac[currentpipe].sCycle) < lastclip)
{
VUM_LOG("flushing FMAC Clip Write pipe[%d] (clip=%x) Cur Cycle %d Expected cycle %d", currentpipe, VU->fmac[currentpipe].clipflag, VU->cycle, VU->fmac[currentpipe].sCycle + VU->fmac[currentpipe].Cycle);
VU->VI[REG_CLIP_FLAG].UL = VU->fmac[currentpipe].clipflag;
lastclip = (VU->cycle - VU->fmac[currentpipe].sCycle);
}
}
else
{
if ((VU->cycle - VU->fmac[currentpipe].sCycle) < lastmac)
{
VUM_LOG("flushing FMAC pipe[%d] (macflag=%x status=%x) Cur Cycle %d Expected cycle %d", currentpipe, VU->fmac[currentpipe].macflag, (VU->VI[REG_STATUS_FLAG].UL & 0xF30) | (VU->fmac[currentpipe].statusflag & 0x3CF), VU->cycle, VU->fmac[currentpipe].sCycle + VU->fmac[currentpipe].Cycle);
// FMAC only affectx Z/S/I/O
VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFF0) | ((VU->fmac[currentpipe].statusflag & 0xF) | ((VU->fmac[currentpipe].statusflag & 0xF) << 6));
VU->VI[REG_MAC_FLAG].UL = VU->fmac[currentpipe].macflag;
lastmac = (VU->cycle - VU->fmac[currentpipe].sCycle);
}
}
didflush = true;
}
}
return didflush;
}
static __ri bool _vuIALUflush(VURegs* VU) {
static __ri bool _vuIALUflush(VURegs* VU)
{
bool didflush = false;
VUM_LOG("Flushing ALU stalls");
for (int i = 0; i < 8; i++) {
int currentpipe = i;
for (int i = VU->ialureadpos; VU->ialucount > 0; i = (i + 1) & 3)
{
if ((VU->cycle - VU->ialu[i].sCycle) < VU->ialu[i].Cycle) return didflush;
if (VU->ialu[currentpipe].enable == 0) continue;
if ((VU->cycle - VU->ialu[currentpipe].sCycle) >= VU->ialu[currentpipe].Cycle) {
VU->ialu[currentpipe].enable = 0;
didflush = true;
}
VU->ialureadpos = ++VU->ialureadpos & 3;
VU->ialucount--;
didflush = true;
}
return didflush;
}
static __ri bool _vuFDIVflush(VURegs * VU) {
static __ri bool _vuFDIVflush(VURegs * VU)
{
if (VU->fdiv.enable == 0) return false;
if ((VU->cycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) {
if ((VU->cycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle)
{
VUM_LOG("flushing FDIV pipe");
VU->fdiv.enable = 0;
@ -154,11 +119,13 @@ static __ri bool _vuFDIVflush(VURegs * VU) {
return false;
}
static __ri bool _vuEFUflush(VURegs * VU) {
static __ri bool _vuEFUflush(VURegs * VU)
{
if (VU->efu.enable == 0) return false;
if ((VU->cycle - VU->efu.sCycle) >= VU->efu.Cycle) {
// VUM_LOG("flushing EFU pipe");
if ((VU->cycle - VU->efu.sCycle) >= VU->efu.Cycle)
{
VUM_LOG("flushing EFU pipe");
VU->efu.enable = 0;
VU->VI[REG_P].UL = VU->efu.reg.UL;
@ -172,99 +139,68 @@ static __ri bool _vuEFUflush(VURegs * VU) {
// called at end of program
void _vuFlushAll(VURegs* VU)
{
int nRepeat = 1, i;
int i = 0;
u32 startpos = 0;
s32 cycle = 9999;
// Calculate lowest active FMAC pipe
for (int startpipe = 0; startpipe < 8; startpipe++)
if (VU->fdiv.enable)
{
if (VU->fmac[startpipe].enable == 0) continue;
VU->fdiv.enable = 0;
VU->VI[REG_Q].UL = VU->fdiv.reg.UL;
VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFCF) | (VU->fdiv.statusflag & 0xC30);
if ((VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle) <= VU->cycle)
{
if ((VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)) < cycle)
{
VUM_LOG("Setting start pos to %d", startpipe);
cycle = (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle));
startpos = startpipe;
}
else
VUM_LOG("Nope for pipe %d, cycle %d greater than %d", startpipe, (s32)(VU->cycle - (VU->fmac[startpipe].sCycle + VU->fmac[startpipe].Cycle)), cycle);
}
if ((VU->cycle - VU->fdiv.sCycle) < VU->fdiv.Cycle)
VU->cycle = VU->fdiv.sCycle + VU->fdiv.Cycle;
}
do {
nRepeat = 0;
if (VU->efu.enable)
{
VU->efu.enable = 0;
VU->VI[REG_P].UL = VU->efu.reg.UL;
for (i=0; i<8; i++) {
int currentpipe = (i + startpos) % 8;
if ((VU->cycle - VU->efu.sCycle) < VU->efu.Cycle)
VU->cycle = VU->efu.sCycle + VU->efu.Cycle;
}
if (VU->fmac[currentpipe].enable == 0) continue;
for (i=VU->fmacreadpos; VU->fmaccount > 0; i = (i + 1) & 3)
{
VUM_LOG("flushing FMAC pipe[%d] (macflag=%x)", i, VU->fmac[i].macflag);
nRepeat = 1;
// Clip flags (Affected by CLIP instruction)
if (VU->fmac[i].flagreg & (1 << REG_CLIP_FLAG))
VU->VI[REG_CLIP_FLAG].UL = VU->fmac[i].clipflag;
if ((VU->cycle - VU->fmac[currentpipe].sCycle) >= VU->fmac[currentpipe].Cycle) {
VUM_LOG("flushing FMAC pipe[%d] (macflag=%x)", i, VU->fmac[currentpipe].macflag);
// Normal FMAC instructoins only affectx Z/S/I/O, D/I are modified only by FDIV instructions
// Sticky flags (Affected by FSSET)
if (VU->fmac[i].flagreg & (1 << REG_STATUS_FLAG))
VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0x30) | (VU->fmac[i].statusflag & 0xFC0) | (VU->fmac[i].statusflag & 0xF);
else
VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFF0) | (VU->fmac[i].statusflag & 0xF) | ((VU->fmac[i].statusflag & 0xF) << 6);
VU->VI[REG_MAC_FLAG].UL = VU->fmac[i].macflag;
VU->fmac[currentpipe].enable = 0;
if (VU->fmac[currentpipe].flagreg & (1 << REG_STATUS_FLAG))
VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0x3F) | (VU->fmac[currentpipe].statusflag & 0xFC0);
else if (VU->fmac[currentpipe].flagreg & (1 << REG_CLIP_FLAG))
VU->VI[REG_CLIP_FLAG].UL = VU->fmac[currentpipe].clipflag;
else
{
// FMAC only affectx Z/S/I/O
VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFF0) | ((VU->fmac[currentpipe].statusflag & 0xF) | ((VU->fmac[currentpipe].statusflag & 0xF) << 6));
VU->VI[REG_MAC_FLAG].UL = VU->fmac[currentpipe].macflag;
}
}
}
VU->fmacreadpos = ++VU->fmacreadpos & 3;
if (VU->fdiv.enable ) {
if((VU->cycle - VU->fmac[i].sCycle) < VU->fmac[i].Cycle)
VU->cycle = VU->fmac[i].sCycle + VU->fmac[i].Cycle;
nRepeat = 1;
VU->fmaccount--;
}
if ((VU->cycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) {
VUM_LOG("flushing FDIV pipe");
for (i = VU->ialureadpos; VU->ialucount > 0; i = (i + 1) & 3)
{
VU->ialureadpos = ++VU->ialureadpos & 3;
VU->fdiv.enable = 0;
VU->VI[REG_Q].UL = VU->fdiv.reg.UL;
VU->VI[REG_STATUS_FLAG].UL = (VU->VI[REG_STATUS_FLAG].UL & 0xFCF) | (VU->fdiv.statusflag & 0xC30);
}
}
if ((VU->cycle - VU->ialu[i].sCycle) < VU->ialu[i].Cycle)
VU->cycle = VU->ialu[i].sCycle + VU->ialu[i].Cycle;
if (VU->efu.enable) {
nRepeat = 1;
if ((VU->cycle - VU->efu.sCycle) >= VU->efu.Cycle) {
// VUM_LOG("flushing EFU pipe");
VU->efu.enable = 0;
VU->VI[REG_P].UL = VU->efu.reg.UL;
}
}
for (i = 0; i < 8; i++) {
int currentpipe = i;
if (VU->ialu[currentpipe].enable == 0) continue;
nRepeat = 1;
if ((VU->cycle - VU->ialu[currentpipe].sCycle) >= VU->ialu[currentpipe].Cycle) {
VU->ialu[currentpipe].enable = 0;
}
}
VU->cycle++;
} while(nRepeat);
VU->ialucount--;
}
}
__fi void _vuTestPipes(VURegs * VU) {
__fi void _vuTestPipes(VURegs * VU)
{
bool flushed;
do {
do
{
flushed = false;
flushed |= _vuFMACflush(VU);
flushed |= _vuFDIVflush(VU);
@ -273,72 +209,45 @@ __fi void _vuTestPipes(VURegs * VU) {
} while (flushed == true);
}
static void __fastcall _vuFMACTestStall(VURegs * VU, int reg, int xyzw) {
int i;
static void __fastcall _vuFMACTestStall(VURegs* VU, int reg, int xyzw)
{
u32 i = 0;
for (i=0; i<8; i++) {
if (VU->fmac[i].enable == 0) continue;
if ((VU->cycle - VU->fmac[i].sCycle) >= VU->fmac[i].Cycle) continue;
if (VU->fmac[i].reg == reg &&
VU->fmac[i].xyzw & xyzw) break;
}
for (int currentpipe = VU->fmacreadpos; i < VU->fmaccount; currentpipe = (currentpipe + 1) & 3, i++)
{
//Check if enough cycles have passed for this fmac position
if ((VU->cycle - VU->fmac[currentpipe].sCycle) >= VU->fmac[currentpipe].Cycle) continue;
if (i == 8) return;
// Check if the regs match
if ((VU->fmac[currentpipe].regupper == reg &&
VU->fmac[currentpipe].xyzwupper & xyzw)
|| (VU->fmac[currentpipe].reglower == reg &&
VU->fmac[currentpipe].xyzwlower & xyzw))
{
u32 newCycle = VU->fmac[currentpipe].Cycle + VU->fmac[currentpipe].sCycle;
u32 newCycle = VU->fmac[i].Cycle + VU->fmac[i].sCycle;
VUM_LOG("FMAC[%d] stall %d", i, newCycle - VU->cycle);
if(newCycle > VU->cycle)
VU->cycle = newCycle;
}
u32 regcycle = 0;
static __ri void __fastcall _vuFMACAdd(VURegs * VU, _VURegsNum* VUregsn) {
int i;
//If it's an FMAC which doesn't modify FMAC flags, just exit, no need to queue
/* find a free fmac pipe */
for (i=0; i<8; i++) {
if (VU->fmac[i].enable == 1) continue;
break;
}
if (i < 8) {
VUM_LOG("adding FMAC pipe[%d]; reg=%x xyzw=%x flagreg=%x target=%x current %x", i, VUregsn->VFwrite, VUregsn->VFwxyzw, VUregsn->VIwrite, VU->cycle + 4, VU->cycle);
VU->fmac[i].enable = 1;
VU->fmac[i].sCycle = VU->cycle;
VU->fmac[i].Cycle = 4;
VU->fmac[i].reg = VUregsn->VFwrite;
VU->fmac[i].xyzw = VUregsn->VFwxyzw;
VU->fmac[i].macflag = VU->macflag;
VU->fmac[i].statusflag = VU->statusflag;
VU->fmac[i].clipflag = VU->clipflag;
VU->fmac[i].flagreg = VUregsn->VIwrite;
} else {
Console.Error("*PCSX2*: error , out of fmacs %d", VU->cycle);
VUM_LOG("FMAC[%d] stall %d", currentpipe, newCycle - VU->cycle);
if (newCycle > VU->cycle)
VU->cycle = newCycle;
}
}
}
static __ri void __fastcall _vuFDIVAdd(VURegs * VU, int cycles) {
VUM_LOG("adding FDIV pipe");
VU->fdiv.enable = 1;
VU->fdiv.sCycle = VU->cycle;
VU->fdiv.Cycle = cycles;
VU->fdiv.reg.F = VU->q.F;
VU->fdiv.statusflag = VU->statusflag;
static __fi void _vuTestFMACStalls(VURegs* VU, _VURegsNum* VUregsn)
{
if (VUregsn->VFread0)
{
_vuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw);
}
if (VUregsn->VFread1)
{
_vuFMACTestStall(VU, VUregsn->VFread1, VUregsn->VFr1xyzw);
}
}
static __ri void __fastcall _vuEFUAdd(VURegs * VU, int cycles) {
VUM_LOG("adding EFU pipe for %d cycles\n", cycles);
VU->efu.enable = 1;
VU->efu.sCycle = VU->cycle;
VU->efu.Cycle = cycles;
VU->efu.reg.F = VU->p.F;
}
static __ri void __fastcall _vuFlushFDIV(VURegs * VU) {
static __fi void _vuTestFDIVStalls(VURegs * VU, _VURegsNum *VUregsn)
{
_vuTestFMACStalls(VU, VUregsn);
if (VU->fdiv.enable != 0)
{
@ -349,13 +258,19 @@ static __ri void __fastcall _vuFlushFDIV(VURegs * VU) {
}
}
static __ri void __fastcall _vuFlushEFU(VURegs * VU, bool isWait = false) {
static __fi void _vuTestEFUStalls(VURegs* VU, _VURegsNum* VUregsn)
{
_vuTestFMACStalls(VU, VUregsn);
if (VU->efu.enable == 0)
return;
if (isWait)
if (VUregsn->cycles == 0) // WAITP
{
VU->cycle = VU->efu.Cycle - 1;
u32 newCycle = VU->efu.sCycle + VU->efu.Cycle - 1;
VUM_LOG("waiting EFU pipe %d", newCycle - VU->cycle);
if (newCycle > VU->cycle)
VU->cycle = newCycle;
VU->efu.sCycle = VU->cycle;
VU->efu.Cycle = 1;
}
@ -366,101 +281,39 @@ static __ri void __fastcall _vuFlushEFU(VURegs * VU, bool isWait = false) {
if (newCycle > VU->cycle)
VU->cycle = newCycle;
}
}
static __fi void _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn) {
if (VUregsn->VFread0) {
_vuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw);
}
if (VUregsn->VFread1) {
_vuFMACTestStall(VU, VUregsn->VFread1, VUregsn->VFr1xyzw);
static __fi void _vuTestALUStalls(VURegs* VU, _VURegsNum* VUregsn)
{
u32 i = 0;
for (int currentpipe = VU->ialureadpos; i < VU->ialucount; currentpipe = (currentpipe + 1) & 3, i++)
{
if ((VU->cycle - VU->ialu[currentpipe].sCycle) >= VU->ialu[currentpipe].Cycle) continue;
if (VU->ialu[currentpipe].reg & VUregsn->VIread) // Read and written VI regs share the same register
{
u32 newCycle = VU->ialu[currentpipe].Cycle + VU->ialu[currentpipe].sCycle;
VUM_LOG("ALU[%d] stall %d", currentpipe, newCycle - VU->cycle);
if (newCycle > VU->cycle)
VU->cycle = newCycle;
}
}
}
static __fi void _vuAddFMACStalls(VURegs * VU, _VURegsNum *VUregsn) {
_vuFMACAdd(VU, VUregsn);
}
static __ri void __fastcall _vuIALUAdd(VURegs* VU, _VURegsNum* VUregsn) {
int i;
if (VUregsn->cycles == 0)
return;
//If it's an FMAC which doesn't modify FMAC flags, just exit, no need to queue
/* find a free fmac pipe */
for (i = 0; i < 8; i++) {
if (VU->ialu[i].enable == 1) continue;
break;
}
if (i < 8) {
VUM_LOG("adding IALU pipe[%d]; reg=%x xyzw=%x flagreg=%x target=%x current %x", i, VUregsn->VFwrite, VUregsn->VFwxyzw, VUregsn->VIwrite, VU->cycle + 4, VU->cycle);
VU->ialu[i].enable = 1;
VU->ialu[i].sCycle = VU->cycle;
VU->ialu[i].Cycle = VUregsn->cycles;
VU->ialu[i].reg = VUregsn->VIwrite;
}
else {
Console.Error("*PCSX2*: error , out of ALU's %d", VU->cycle);
}
}
static __fi void _vuAddIALUStalls(VURegs* VU, _VURegsNum* VUregsn) {
_vuIALUAdd(VU, VUregsn);
}
static __fi void _vuTestFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) {
// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn);
_vuTestFMACStalls(VU, VUregsn);
_vuFlushFDIV(VU);
}
static __fi void _vuAddFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) {
if (VUregsn->VIwrite & (1 << REG_Q)) {
_vuFDIVAdd(VU, VUregsn->cycles);
}
}
static __fi void _vuTestEFUStalls(VURegs * VU, _VURegsNum *VUregsn) {
// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn);
_vuTestFMACStalls(VU, VUregsn);
_vuFlushEFU(VU, VUregsn->cycles == 0);
}
static __fi void _vuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn) {
if (VUregsn->VIwrite & (1 << REG_P)) {
_vuEFUAdd(VU, VUregsn->cycles);
}
}
__fi void _vuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn) {
__fi void _vuTestUpperStalls(VURegs* VU, _VURegsNum* VUregsn)
{
switch (VUregsn->pipe) {
case VUPIPE_FMAC: _vuTestFMACStalls(VU, VUregsn); break;
case VUPIPE_FMAC: _vuTestFMACStalls(VU, VUregsn); break;
}
}
static __fi void _vuTestALUStalls(VURegs* VU, _VURegsNum* VUregsn) {
int i;
for (i = 0; i < 8; i++) {
if (VU->ialu[i].enable == 0) continue;
if ((VU->cycle - VU->ialu[i].sCycle) >= VU->ialu[i].Cycle) continue;
if (VU->ialu[i].reg & VUregsn->VIread) // Read and written VI regs share the same register
break;
}
if (i == 8) return;
u32 newCycle = VU->ialu[i].Cycle + VU->ialu[i].sCycle;
VUM_LOG("ALU[%d] stall %d", i, newCycle - VU->cycle);
if (newCycle > VU->cycle)
VU->cycle = newCycle;
}
__fi void _vuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) {
switch (VUregsn->pipe) {
__fi void _vuTestLowerStalls(VURegs* VU, _VURegsNum* VUregsn)
{
switch (VUregsn->pipe)
{
case VUPIPE_FMAC: _vuTestFMACStalls(VU, VUregsn); break;
case VUPIPE_FDIV: _vuTestFDIVStalls(VU, VUregsn); break;
case VUPIPE_EFU: _vuTestEFUStalls(VU, VUregsn); break;
@ -468,18 +321,118 @@ __fi void _vuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) {
}
}
__fi void _vuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn) {
__fi void _vuClearFMAC(VURegs* VU)
{
int i = VU->fmacwritepos;
VU->fmac[i].regupper =0;
VU->fmac[i].xyzwupper = 0;
VU->fmac[i].flagreg = 0;
VU->fmac[i].reglower = 0;
VU->fmac[i].xyzwlower = 0;
VU->fmac[i].macflag = 0;
VU->fmac[i].statusflag = 0;
VU->fmac[i].clipflag = 0;
VU->fmaccount++;
}
static __ri void __fastcall _vuAddFMACStalls(VURegs* VU, _VURegsNum* VUregsn, bool isUpper)
{
int i = VU->fmacwritepos;
VUM_LOG("adding FMAC %s pipe[%d]; reg=%x xyzw=%x flagreg=%x target=%x current %x", isUpper ? "Upper" : "Lower", i, VUregsn->VFwrite, VUregsn->VFwxyzw, VUregsn->VIwrite, VU->cycle + 4, VU->cycle);
VU->fmac[i].sCycle = VU->cycle;
VU->fmac[i].Cycle = 4;
if (isUpper)
{
VU->fmac[i].regupper = VUregsn->VFwrite;
VU->fmac[i].xyzwupper = VUregsn->VFwxyzw;
VU->fmac[i].flagreg = VUregsn->VIwrite;
}
else
{
VU->fmac[i].reglower = VUregsn->VFwrite;
VU->fmac[i].xyzwlower = VUregsn->VFwxyzw;
VU->fmac[i].flagreg |= VUregsn->VIwrite;
}
VU->fmac[i].macflag = VU->macflag;
VU->fmac[i].statusflag = VU->statusflag;
VU->fmac[i].clipflag = VU->clipflag;
}
static __ri void __fastcall _vuFDIVAdd(VURegs* VU, int cycles)
{
VUM_LOG("adding FDIV pipe");
VU->fdiv.enable = 1;
VU->fdiv.sCycle = VU->cycle;
VU->fdiv.Cycle = cycles;
VU->fdiv.reg.F = VU->q.F;
VU->fdiv.statusflag = VU->statusflag;
}
static __ri void __fastcall _vuEFUAdd(VURegs* VU, int cycles)
{
VUM_LOG("adding EFU pipe for %d cycles\n", cycles);
VU->efu.enable = 1;
VU->efu.sCycle = VU->cycle;
VU->efu.Cycle = cycles;
VU->efu.reg.F = VU->p.F;
}
static __ri void __fastcall _vuIALUAdd(VURegs* VU, _VURegsNum* VUregsn)
{
if (VUregsn->cycles == 0)
return;
int i = VU->ialuwritepos;
VUM_LOG("adding IALU pipe[%d]; reg=%x target=%x current %x", i, VUregsn->VIwrite, VU->cycle + VUregsn->cycles, VU->cycle);
VU->ialu[i].sCycle = VU->cycle;
VU->ialu[i].Cycle = VUregsn->cycles;
VU->ialu[i].reg = VUregsn->VIwrite;
VU->ialuwritepos = ++VU->ialuwritepos & 3;
VU->ialucount++;
}
static __fi void _vuAddIALUStalls(VURegs* VU, _VURegsNum* VUregsn)
{
_vuIALUAdd(VU, VUregsn);
}
static __fi void _vuAddFDIVStalls(VURegs * VU, _VURegsNum *VUregsn)
{
if (VUregsn->VIwrite & (1 << REG_Q))
_vuFDIVAdd(VU, VUregsn->cycles);
}
static __fi void _vuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn)
{
if (VUregsn->VIwrite & (1 << REG_P))
_vuEFUAdd(VU, VUregsn->cycles);
}
__fi void _vuAddUpperStalls(VURegs* VU, _VURegsNum* VUregsn)
{
switch (VUregsn->pipe) {
case VUPIPE_FMAC: _vuAddFMACStalls(VU, VUregsn); break;
case VUPIPE_FMAC: _vuAddFMACStalls(VU, VUregsn, true); break;
}
}
__fi void _vuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn) {
switch (VUregsn->pipe) {
case VUPIPE_FMAC: _vuAddFMACStalls(VU, VUregsn); break;
__fi void _vuAddLowerStalls(VURegs* VU, _VURegsNum* VUregsn)
{
switch (VUregsn->pipe)
{
case VUPIPE_FMAC: _vuAddFMACStalls(VU, VUregsn, false); break;
case VUPIPE_FDIV: _vuAddFDIVStalls(VU, VUregsn); break;
case VUPIPE_EFU: _vuAddEFUStalls(VU, VUregsn); break;
case VUPIPE_IALU: _vuAddIALUStalls(VU, VUregsn); break;
case VUPIPE_IALU: _vuAddIALUStalls(VU, VUregsn); break;
}
}
@ -2843,8 +2796,17 @@ VUREGS_IDISIT(IOR);
VUREGS_IDISIT(ISUB);
VUREGS_ITIS(ISUBIU);
VUREGS_FTFS(MOVE);
static __ri void _vuRegsMOVE(const VURegs* VU, _VURegsNum* VUregsn) {
VUregsn->pipe = _Ft_ == 0 ? VUPIPE_NONE : VUPIPE_FMAC;
VUregsn->VFwrite = _Ft_;
VUregsn->VFwxyzw = _XYZW;
VUregsn->VFread0 = _Fs_;
VUregsn->VFr0xyzw = _XYZW;
VUregsn->VFread1 = 0;
VUregsn->VFr1xyzw = 0;
VUregsn->VIwrite = 0;
VUregsn->VIread = (_Ft_ ? GET_VF0_FLAG(_Fs_) : 0);
}
static __ri void _vuRegsMFIR(const VURegs* VU, _VURegsNum *VUregsn) {
VUregsn->pipe = VUPIPE_FMAC;
VUregsn->VFwrite = _Ft_;

View File

@ -53,7 +53,7 @@ extern __aligned16 const Fnptr_Void VU1_LOWER_OPCODE[128];
extern __aligned16 const Fnptr_Void VU1_UPPER_OPCODE[64];
extern __aligned16 const Fnptr_VuRegsN VU1regs_LOWER_OPCODE[128];
extern __aligned16 const Fnptr_VuRegsN VU1regs_UPPER_OPCODE[64];
extern void _vuClearFMAC(VURegs * VU);
extern void _vuTestPipes(VURegs * VU);
extern void _vuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn);
extern void _vuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn);