Int/VU0: Improve VU0 sync, mainly for interpreter.

Also correctly doubles the opcode time when double issue is disabled (interpreter).
This commit is contained in:
refractionpcsx2 2023-11-02 14:33:24 +00:00
parent 8b9af2c21b
commit ed1791ab02
3 changed files with 40 additions and 7 deletions

View File

@ -39,6 +39,16 @@ static fastjmp_buf intJmpBuf;
static void intEventTest();
u32 intGetCycles()
{
return cpuBlockCycles;
}
void intSetCycles(u32 cycles)
{
cpuBlockCycles = cycles;
}
// These macros are used to assemble the repassembler functions
void intBreakpoint(bool memcheck)
@ -178,7 +188,7 @@ static void execI()
#endif
cpuBlockCycles += opcode.cycles;
cpuBlockCycles += opcode.cycles * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1));
opcode.interpret();
}

View File

@ -220,6 +220,8 @@ alignas(16) extern tlbs tlb[48];
extern bool eeEventTestIsActive;
u32 intGetCycles();
void intSetCycles(u32 cycles);
void intSetBranch();
// This is a special form of the interpreter's doBranch that is run from various

View File

@ -55,7 +55,7 @@ void COP2_Unknown()
//****************************************************************************
__fi void _vu0run(bool breakOnMbit, bool addCycles) {
__fi void _vu0run(bool breakOnMbit, bool addCycles, bool sync_only) {
if (!(VU0.VI[REG_VPU_STAT].UL & 1)) return;
@ -67,12 +67,22 @@ __fi void _vu0run(bool breakOnMbit, bool addCycles) {
}
u32 startcycle = cpuRegs.cycle;
u32 runCycles = 0x7fffffff;
s32 runCycles = 0x7fffffff;
if (sync_only)
{
cpuRegs.cycle += intGetCycles() >> 3;
intSetCycles(intGetCycles() & (1 << 3) - 1);
runCycles = (s32)(cpuRegs.cycle - VU0.cycle);
if (runCycles < 0)
return;
}
do { // Run VU until it finishes or M-Bit
CpuVU0->Execute(runCycles);
} while ((VU0.VI[REG_VPU_STAT].UL & 1) // E-bit Termination
&& (!breakOnMbit || !(VU0.flags & VUFLAG_MFLAGSET) || (s32)(cpuRegs.cycle - VU0.cycle) > 0)); // M-bit Break
&& !sync_only && (!breakOnMbit || (!(VU0.flags & VUFLAG_MFLAGSET) && (s32)(cpuRegs.cycle - VU0.cycle) > 0))); // M-bit Break
// Add cycles if called from EE's COP2
if (addCycles)
@ -85,15 +95,17 @@ __fi void _vu0run(bool breakOnMbit, bool addCycles) {
}
}
void _vu0WaitMicro() { _vu0run(1, 1); } // Runs VU0 Micro Until E-bit or M-Bit End
void _vu0FinishMicro() { _vu0run(0, 1); } // Runs VU0 Micro Until E-Bit End
void vu0Finish() { _vu0run(0, 0); } // Runs VU0 Micro Until E-Bit End (doesn't stall EE)
void _vu0WaitMicro() { _vu0run(1, 1, 0); } // Runs VU0 Micro Until E-bit or M-Bit End
void _vu0FinishMicro() { _vu0run(0, 1, 0); } // Runs VU0 Micro Until E-Bit End
void vu0Finish() { _vu0run(0, 0, 0); } // Runs VU0 Micro Until E-Bit End (doesn't stall EE)
void vu0Sync() { _vu0run(0, 0, 1); } // Runs VU0 until it catches up
namespace R5900 {
namespace Interpreter{
namespace OpcodeImpl
{
void LQC2() {
vu0Sync();
u32 addr = cpuRegs.GPR.r[_Rs_].UL[0] + (s16)cpuRegs.code;
if (_Ft_) {
memRead128(addr, VU0.VF[_Ft_].UQ);
@ -107,6 +119,7 @@ namespace OpcodeImpl
//TODO: check this
// HUH why ? doesn't make any sense ...
void SQC2() {
vu0Sync();
u32 addr = _Imm_ + cpuRegs.GPR.r[_Rs_].UL[0];
memWrite128(addr, VU0.VF[_Ft_].UQ);
}
@ -117,6 +130,8 @@ void QMFC2() {
if (cpuRegs.code & 1) {
_vu0FinishMicro();
}
else
vu0Sync();
if (_Rt_ == 0) return;
cpuRegs.GPR.r[_Rt_].UD[0] = VU0.VF[_Fs_].UD[0];
cpuRegs.GPR.r[_Rt_].UD[1] = VU0.VF[_Fs_].UD[1];
@ -126,6 +141,8 @@ void QMTC2() {
if (cpuRegs.code & 1) {
_vu0WaitMicro();
}
else
vu0Sync();
if (_Fs_ == 0) return;
VU0.VF[_Fs_].UD[0] = cpuRegs.GPR.r[_Rt_].UD[0];
VU0.VF[_Fs_].UD[1] = cpuRegs.GPR.r[_Rt_].UD[1];
@ -135,6 +152,8 @@ void CFC2() {
if (cpuRegs.code & 1) {
_vu0FinishMicro();
}
else
vu0Sync();
if (_Rt_ == 0) return;
if (_Fs_ == REG_R)
@ -155,6 +174,8 @@ void CTC2() {
if (cpuRegs.code & 1) {
_vu0WaitMicro();
}
else
vu0Sync();
if (_Fs_ == 0) return;
switch(_Fs_) {