VU: Initial work to overhaul VU interpreter

Still a bit janky in some games and subject to changes
This commit is contained in:
refractionpcsx2 2021-09-03 00:04:55 +01:00
parent 253571fd35
commit 067faccdd2
8 changed files with 534 additions and 302 deletions

View File

@ -568,10 +568,16 @@ struct Gif_Unit
}
if (curSize >= size)
return size;
if(!EmuConfig.Cpu.Recompiler.EnableVU1 && pathIdx == GIF_PATH_1)
{
return curSize | ((u32)gifTag.tag.EOP << 31);
}
if (gifTag.tag.EOP )
{
return curSize;
}
}
}
// Specify the transfer type you are initiating
// The return value is the amount of data (in bytes) that was processed

View File

@ -110,6 +110,7 @@ struct fmacPipe
{
int enable;
int reg;
int flagreg;
int xyzw;
u32 sCycle;
u32 Cycle;
@ -171,6 +172,12 @@ struct __aligned16 VURegs
u8* Mem;
u8* Micro;
u32 xgkickenable;
u32 xgkickaddr;
u32 xgkickdiff;
u32 xgkicksizeremaining;
u32 xgkicklastcycle;
u32 xgkickendpacket;
u32 ebit;
u8 VIBackupCycles;

View File

@ -44,6 +44,8 @@ void __fastcall vu0ExecMicro(u32 addr) {
// Need to copy the clip flag back to the interpreter in case COP2 has edited it
VU0.clipflag = VU0.VI[REG_CLIP_FLAG].UL;
VU0.macflag = VU0.VI[REG_MAC_FLAG].UL;
VU0.statusflag = VU0.VI[REG_STATUS_FLAG].UL;
VU0.VI[REG_VPU_STAT].UL &= ~0xFF;
VU0.VI[REG_VPU_STAT].UL |= 0x01;
VU0.cycle = cpuRegs.cycle;

View File

@ -19,6 +19,8 @@
#include "VUmicro.h"
#include <cfenv>
extern void _vuFlushAll(VURegs* VU);
static void _vu0ExecUpper(VURegs* VU, u32 *ptr) {
@ -82,6 +84,8 @@ static void _vu0Exec(VURegs* VU)
/* check upper flags */
if (ptr[1] & 0x80000000) { /* I flag */
_vuTestPipes(VU);
_vu0ExecUpper(VU, ptr);
VU->VI[REG_I].UL = ptr[0];
@ -92,7 +96,7 @@ static void _vu0Exec(VURegs* VU)
#ifndef INT_VUSTALLHACK
_vuTestLowerStalls(VU, &lregs);
#endif
_vuTestPipes(VU);
vu0branch = lregs.pipe == VUPIPE_BRANCH;
vfreg = 0; vireg = 0;
@ -146,8 +150,6 @@ static void _vu0Exec(VURegs* VU)
if (!(ptr[1] & 0x80000000))
_vuAddLowerStalls(VU, &lregs);
_vuTestPipes(VU);
if(VU->VIBackupCycles > 0)
VU->VIBackupCycles--;
@ -179,8 +181,8 @@ static void _vu0Exec(VURegs* VU)
void vu0Exec(VURegs* VU)
{
VU0.VI[REG_TPC].UL &= VU0_PROGMASK;
_vu0Exec(VU);
VU->cycle++;
_vu0Exec(VU);
if (VU->VI[0].UL != 0) DbgCon.Error("VI[0] != 0!!!!\n");
if (VU->VF[0].f.x != 0.0f) DbgCon.Error("VF[0].x != 0.0!!!!\n");
@ -210,6 +212,9 @@ void InterpVU0::Step()
void InterpVU0::Execute(u32 cycles)
{
const int originalRounding = fegetround();
fesetround(g_sseVUMXCSR.RoundingControl << 8);
VU0.VI[REG_TPC].UL <<= 3;
VU0.flags &= ~VUFLAG_MFLAGSET;
for (int i = (int)cycles; i > 0; i--) {
@ -222,4 +227,6 @@ void InterpVU0::Execute(u32 cycles)
vu0Exec(&VU0);
}
VU0.VI[REG_TPC].UL >>= 3;
fesetround(originalRounding);
}

View File

@ -18,17 +18,24 @@
#include "Common.h"
#include "VUmicro.h"
#include "GS.h"
#include "Gif_Unit.h"
#include "MTVU.h"
extern void _vuFlushAll(VURegs* VU);
#include <cfenv>
void _vu1ExecUpper(VURegs* VU, u32 *ptr) {
extern void _vuFlushAll(VURegs* VU);
extern void _vuXGKICKFlush(VURegs* VU);
void _vu1ExecUpper(VURegs* VU, u32* ptr)
{
VU->code = ptr[1];
//IdebugUPPER(VU1);
IdebugUPPER(VU1);
VU1_UPPER_OPCODE[VU->code & 0x3f]();
}
void _vu1ExecLower(VURegs* VU, u32 *ptr) {
void _vu1ExecLower(VURegs* VU, u32* ptr)
{
VU->code = ptr[0];
IdebugLOWER(VU1);
VU1_LOWER_OPCODE[VU->code >> 25]();
@ -52,70 +59,81 @@ static void _vu1Exec(VURegs* VU)
ptr = (u32*)&VU->Micro[VU->VI[REG_TPC].UL];
VU->VI[REG_TPC].UL += 8;
if (ptr[1] & 0x40000000) { /* E flag */
if (ptr[1] & 0x40000000)
{ /* E flag */
VU->ebit = 2;
}
if (ptr[1] & 0x10000000) { /* D flag */
if (VU0.VI[REG_FBRST].UL & 0x400) {
if (ptr[1] & 0x10000000)
{ /* D flag */
if (VU0.VI[REG_FBRST].UL & 0x400)
{
VU0.VI[REG_VPU_STAT].UL |= 0x200;
hwIntcIrq(INTC_VU1);
VU->ebit = 1;
}
}
if (ptr[1] & 0x08000000) { /* T flag */
if (VU0.VI[REG_FBRST].UL & 0x800) {
if (ptr[1] & 0x08000000)
{ /* T flag */
if (VU0.VI[REG_FBRST].UL & 0x800)
{
VU0.VI[REG_VPU_STAT].UL |= 0x400;
hwIntcIrq(INTC_VU1);
VU->ebit = 1;
}
}
//VUM_LOG("VU->cycle = %d (flags st=%x;mac=%x;clip=%x,q=%f)", VU->cycle, VU->statusflag, VU->macflag, VU->clipflag, VU->q.F);
VU->code = ptr[1];
VU1regs_UPPER_OPCODE[VU->code & 0x3f](&uregs);
#ifndef INT_VUSTALLHACK
_vuTestUpperStalls(VU, &uregs);
#endif
/* check upper flags */
if (ptr[1] & 0x80000000) { /* I flag */
if (ptr[1] & 0x80000000)
{ /* I flag */
_vuTestPipes(VU);
_vu1ExecUpper(VU, ptr);
VU->VI[REG_I].UL = ptr[0];
//Lower not used, set to 0 to fill in the FMAC stall gap
//Could probably get away with just running upper stalls, but lets not tempt fate.
memset(&lregs, 0, sizeof(lregs));
} else {
}
else
{
VU->code = ptr[0];
VU1regs_LOWER_OPCODE[VU->code >> 25](&lregs);
#ifndef INT_VUSTALLHACK
_vuTestLowerStalls(VU, &lregs);
#endif
_vuTestPipes(VU);
vu1branch = lregs.pipe == VUPIPE_BRANCH;
vfreg = 0; vireg = 0;
if (uregs.VFwrite) {
if (lregs.VFwrite == uregs.VFwrite) {
vfreg = 0;
vireg = 0;
if (uregs.VFwrite)
{
if (lregs.VFwrite == uregs.VFwrite)
{
//Console.Warning("*PCSX2*: Warning, VF write to the same reg in both lower/upper cycle");
discard = 1;
}
if (lregs.VFread0 == uregs.VFwrite ||
lregs.VFread1 == uregs.VFwrite) {
lregs.VFread1 == uregs.VFwrite)
{
//Console.WriteLn("saving reg %d at pc=%x", i, VU->VI[REG_TPC].UL);
_VF = VU->VF[uregs.VFwrite];
vfreg = uregs.VFwrite;
}
}
if (uregs.VIread & (1 << REG_CLIP_FLAG)) {
if (lregs.VIwrite & (1 << REG_CLIP_FLAG)) {
if (uregs.VIwrite & (1 << REG_CLIP_FLAG))
{
if (lregs.VIwrite & (1 << REG_CLIP_FLAG))
{
Console.Warning("*PCSX2*: Warning, VI write to the same reg in both lower/upper cycle");
discard = 1;
}
if (lregs.VIread & (1 << REG_CLIP_FLAG)) {
if (lregs.VIread & (1 << REG_CLIP_FLAG))
{
_VI = VU->VI[REG_CLIP_FLAG];
vireg = REG_CLIP_FLAG;
}
@ -123,36 +141,43 @@ static void _vu1Exec(VURegs* VU)
_vu1ExecUpper(VU, ptr);
if (discard == 0) {
if (vfreg) {
if (discard == 0)
{
if (vfreg)
{
_VFc = VU->VF[vfreg];
VU->VF[vfreg] = _VF;
}
if (vireg) {
if (vireg)
{
_VIc = VU->VI[vireg];
VU->VI[vireg] = _VI;
}
_vu1ExecLower(VU, ptr);
if (vfreg) {
if (vfreg)
{
VU->VF[vfreg] = _VFc;
}
if (vireg) {
if (vireg)
{
VU->VI[vireg] = _VIc;
}
}
}
_vuAddUpperStalls(VU, &uregs);
_vuAddLowerStalls(VU, &lregs);
_vuTestPipes(VU);
//if (!(ptr[1] & 0x80000000))
_vuAddLowerStalls(VU, &lregs);
if (VU->VIBackupCycles > 0)
VU->VIBackupCycles--;
if (VU->branch > 0) {
if (VU->branch-- == 1) {
if (VU->branch > 0)
{
if (VU->branch-- == 1)
{
VU->VI[REG_TPC].UL = VU->branchpc;
if (VU->takedelaybranch)
@ -166,26 +191,70 @@ static void _vu1Exec(VURegs* VU)
}
}
if( VU->ebit > 0 ) {
if( VU->ebit-- == 1 ) {
if (VU->ebit > 0)
{
if (VU->ebit-- == 1)
{
VU->VIBackupCycles = 0;
_vuFlushAll(VU);
VU0.VI[REG_VPU_STAT].UL &= ~0x100;
vif1Regs.stat.VEW = false;
}
}
if (VU->xgkickenable && (VU1.cycle - VU->xgkicklastcycle) >= 2)
{
if (VU->xgkicksizeremaining == 0)
{
IPU_LOG("Banana Reading next packet from %x", VU->xgkickaddr);
u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, VU->Mem, VU->xgkickaddr);
VU->xgkicksizeremaining = size & 0xFFFF;
VU->xgkickendpacket = size >> 31;
if (VU->xgkicksizeremaining == 0)
VU->xgkickenable = 0;
IPU_LOG("Banana New packet size %x", VU->xgkicksizeremaining);
}
u32 transfersize = std::min(VU->xgkicksizeremaining / 0x10, (VU1.cycle - VU->xgkicklastcycle) / 2);
if (transfersize)
{
IPU_LOG("Banana Transferring %x bytes from %x size left %x", transfersize * 0x10, VU->xgkickaddr, VU->xgkicksizeremaining);
if ((transfersize * 0x10) > VU->xgkicksizeremaining)
gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData(&VU->Mem[VU->xgkickaddr], transfersize * 0x10, true);
else
gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &VU->Mem[VU->xgkickaddr], transfersize * 0x10, true);
VU->xgkickaddr = (VU->xgkickaddr + (transfersize * 0x10)) & 0x3FFF;
VU->xgkicksizeremaining -= (transfersize * 0x10);
VU->xgkickdiff = 0x4000 - VU->xgkickaddr;
IPU_LOG("Banana next addr %x left size %x EOP %d", VU->xgkickaddr, VU->xgkicksizeremaining, VU->xgkickendpacket);
VU->xgkicklastcycle += std::max(transfersize * 2, 2U);
if (VU->xgkicksizeremaining || !VU->xgkickendpacket)
VU->xgkickenable = 1;
else
{
VU->xgkickenable = 0;
IPU_LOG("Banana transfer finished");
}
}
}
}
void vu1Exec(VURegs* VU)
{
_vu1Exec(VU);
VU->cycle++;
_vu1Exec(VU);
if (VU->VI[0].UL != 0) DbgCon.Error("VI[0] != 0!!!!\n");
if (VU->VF[0].f.x != 0.0f) DbgCon.Error("VF[0].x != 0.0!!!!\n");
if (VU->VF[0].f.y != 0.0f) DbgCon.Error("VF[0].y != 0.0!!!!\n");
if (VU->VF[0].f.z != 0.0f) DbgCon.Error("VF[0].z != 0.0!!!!\n");
if (VU->VF[0].f.w != 1.0f) DbgCon.Error("VF[0].w != 1.0!!!!\n");
if (VU->VI[0].UL != 0)
DbgCon.Error("VI[0] != 0!!!!\n");
if (VU->VF[0].f.x != 0.0f)
DbgCon.Error("VF[0].x != 0.0!!!!\n");
if (VU->VF[0].f.y != 0.0f)
DbgCon.Error("VF[0].y != 0.0!!!!\n");
if (VU->VF[0].f.z != 0.0f)
DbgCon.Error("VF[0].z != 0.0!!!!\n");
if (VU->VF[0].f.w != 1.0f)
DbgCon.Error("VF[0].w != 1.0!!!!\n");
}
InterpVU1::InterpVU1()
@ -194,11 +263,13 @@ InterpVU1::InterpVU1()
IsInterpreter = true;
}
void InterpVU1::Reset() {
void InterpVU1::Reset()
{
vu1Thread.WaitVU();
}
void InterpVU1::Shutdown() noexcept {
void InterpVU1::Shutdown() noexcept
{
vu1Thread.WaitVU();
}
@ -215,10 +286,16 @@ void InterpVU1::Step()
void InterpVU1::Execute(u32 cycles)
{
const int originalRounding = fegetround();
fesetround(g_sseVUMXCSR.RoundingControl << 8);
VU1.VI[REG_TPC].UL <<= 3;
for (int i = (int)cycles; i > 0; i--) {
if (!(VU0.VI[REG_VPU_STAT].UL & 0x100)) {
if (VU1.branch || VU1.ebit) {
for (int i = (int)cycles; i > 0; i--)
{
if (!(VU0.VI[REG_VPU_STAT].UL & 0x100))
{
if (VU1.branch || VU1.ebit)
{
Step(); // run branch delay slot?
}
break;
@ -226,5 +303,6 @@ void InterpVU1::Execute(u32 cycles)
Step();
}
VU1.VI[REG_TPC].UL >>= 3;
}
fesetround(originalRounding);
}

View File

@ -25,22 +25,6 @@
/* NEW FLAGS */ //By asadr. Thnkx F|RES :p
/*****************************************/
void vuUpdateDI(VURegs * VU) {
// u32 Flag_S = 0;
// u32 Flag_I = 0;
// u32 Flag_D = 0;
//
// /*
// FLAG D - I
// */
// Flag_I = (VU->statusflag >> 4) & 0x1;
// Flag_D = (VU->statusflag >> 5) & 0x1;
//
// VU->statusflag|= (Flag_I | (VU0.VI[REG_STATUS_FLAG].US[0] >> 4)) << 10;
// VU->statusflag|= (Flag_D | (VU0.VI[REG_STATUS_FLAG].US[0] >> 5)) << 11;
}
static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f )
{
u32 v = *(u32*)&f;
@ -64,7 +48,7 @@ static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f )
VU->macflag = (VU->macflag&~(0x1000<<shift)) | (0x0101<<shift);
return s;
case 255:
VU->macflag = (VU->macflag&~(0x0100<<shift)) | (0x1000<<shift);
VU->macflag = (VU->macflag&~(0x0101<<shift)) | (0x1000<<shift);
return s|0x7f7fffff; /* max allowed */
default:
VU->macflag = (VU->macflag & ~(0x1101<<shift));
@ -118,5 +102,6 @@ __ri void VU_STAT_UPDATE(VURegs * VU) {
if (VU->macflag & 0x00F0) newflag |= 0x2;
if (VU->macflag & 0x0F00) newflag |= 0x4;
if (VU->macflag & 0xF000) newflag |= 0x8;
VU->statusflag = (VU->statusflag&0xc30)|newflag|((VU->statusflag&0xf)<<6);
// Save old sticky flags and D/I settings, everthing else is the new flags only
VU->statusflag = (VU->statusflag&0xff0)| newflag | (newflag<<6);
}

View File

@ -17,6 +17,8 @@
#include "Common.h"
#include "VUmicro.h"
#include "MTVU.h"
#include "GS.h"
#include "Gif_Unit.h"
// Executes a Block based on EE delta time
void BaseVUmicroCPU::ExecuteBlock(bool startUp)
@ -32,7 +34,49 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp)
}
if (!(stat & test))
{
if (m_Idx == 1)
{
if (VU1.xgkickenable && (cpuRegs.cycle - VU1.xgkicklastcycle) >= 2)
{
if (VU1.xgkicksizeremaining == 0)
{
IPU_LOG("Banana Reading next packet from %x", VU1.xgkickaddr);
u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, VU1.Mem, VU1.xgkickaddr);
VU1.xgkicksizeremaining = size & 0xFFFF;
VU1.xgkickendpacket = size >> 31;
IPU_LOG("Banana New packet size %x", VU1.xgkicksizeremaining);
}
u32 transfersize = std::min(VU1.xgkicksizeremaining / 0x10, (cpuRegs.cycle - VU1.xgkicklastcycle) / 2);
if (transfersize)
{
IPU_LOG("Banana Transferring %x bytes from %x size left %x", transfersize * 0x10, VU1.xgkickaddr, VU1.xgkicksizeremaining);
if ((transfersize * 0x10) > VU1.xgkicksizeremaining)
gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData(&VU1.Mem[VU1.xgkickaddr], transfersize * 0x10, true);
else
gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &VU1.Mem[VU1.xgkickaddr], transfersize * 0x10, true);
VU1.xgkickaddr = (VU1.xgkickaddr + (transfersize * 0x10)) & 0x3FFF;
VU1.xgkicksizeremaining -= (transfersize * 0x10);
VU1.xgkickdiff = 0x4000 - VU1.xgkickaddr;
IPU_LOG("Banana next addr %x left size %x EOP %d", VU1.xgkickaddr, VU1.xgkicksizeremaining, VU1.xgkickendpacket);
VU1.xgkicklastcycle += std::max(transfersize * 2, 2U);
if (VU1.xgkicksizeremaining || !VU1.xgkickendpacket)
VU1.xgkickenable = 1;
else
{
VU1.xgkickenable = 0;
IPU_LOG("Banana transfer finished");
}
}
else
VU1.xgkickenable = 1;
}
}
return;
}
if (startUp && s) // Start Executing a microprogram (When kickstarted)
{

File diff suppressed because it is too large Load Diff