From fba9c6c04d227ffc6a603efec88e1759f9261263 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Mon, 20 Sep 2021 21:03:11 +0100 Subject: [PATCH] COP2: never flush EE regs but back them up conditionally --- pcsx2/x86/iCore.cpp | 72 ++++++++++++++++++ pcsx2/x86/iCore.h | 2 + pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 13 ++-- pcsx2/x86/microVU_Macro.inl | 103 ++++++++++++-------------- 4 files changed, 129 insertions(+), 61 deletions(-) diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index e7e9a26b85..9c18c03862 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -45,8 +45,80 @@ _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR]; #define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] #define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] + +__aligned16 u32 xmmBackup[iREGCNT_XMM][4]; + +#ifdef __M_X86_64 +__aligned16 u64 gprBackup[iREGCNT_GPR]; +#else +__aligned16 u32 gprBackup[iREGCNT_GPR]; +#endif + static int s_xmmchecknext = 0; +void _backupNeededXMM() +{ + for (int i = 0; i < iREGCNT_XMM; i++) + { + if (xmmregs[i].inuse) + { + xMOVAPS(ptr128[&xmmBackup[i][0]], xRegisterSSE(i)); + } + } +} + +void _restoreNeededXMM() +{ + for (int i = 0; i < iREGCNT_XMM; i++) + { + if (xmmregs[i].inuse) + { + xMOVAPS(xRegisterSSE(i), ptr128[&xmmBackup[i][0]]); + } + } +} + +void _backupNeededx86() +{ + for (int i = 0; i < iREGCNT_GPR; i++) + { + if (x86regs[i].inuse) + { +#ifdef __M_X86_64 + xMOV(ptr64[&gprBackup[i]], xRegister64(i)); +#else + xMOV(ptr32[&gprBackup[i]], xRegister32(i)); +#endif + } + } +} + +void _restoreNeededx86() +{ + for (int i = 0; i < iREGCNT_GPR; i++) + { + if (x86regs[i].inuse) + { +#ifdef __M_X86_64 + xMOV(xRegister64(i), ptr64[&gprBackup[i]]); +#else + xMOV(xRegister32(i), ptr32[&gprBackup[i]]); +#endif + } + } +} + +void _cop2BackupRegs() +{ + _backupNeededx86(); + _backupNeededXMM(); +} + +void _cop2RestoreRegs() +{ + _restoreNeededx86(); + _restoreNeededXMM(); +} // Clear current register mapping structure // Clear allocation counter void _initXMMregs() diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index ad80294a7f..dbde1f086c 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -159,6 +159,8 @@ struct _xmmregs u16 counter; }; +void _cop2BackupRegs(); +void _cop2RestoreRegs(); void _initXMMregs(); int _getFreeXMMreg(); int _allocTempXMMreg(XMMSSEType type, int xmmreg); diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 528ca014d7..a6c1e1d46c 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -920,19 +920,21 @@ void recSWC1() void recLQC2() { - iFlushCall(FLUSH_EVERYTHING); - + _freeX86reg(eax); xMOV(eax, ptr32[&cpuRegs.cycle]); xADD(eax, scaleblockcycles_clear()); xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); xForwardJZ32 skipvuidle; xSUB(eax, ptr32[&VU0.cycle]); xSUB(eax, ptr32[&VU0.nextBlockCycles]); xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0); xForwardJL32 skip; + _cop2BackupRegs(); xLoadFarAddr(arg1reg, CpuVU0); xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + _cop2RestoreRegs(); skip.SetTarget(); skipvuidle.SetTarget(); @@ -965,20 +967,21 @@ void recLQC2() void recSQC2() { - iFlushCall(FLUSH_EVERYTHING); - - + _freeX86reg(eax); xMOV(eax, ptr32[&cpuRegs.cycle]); xADD(eax, scaleblockcycles_clear()); xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); xForwardJZ32 skipvuidle; xSUB(eax, ptr32[&VU0.cycle]); xSUB(eax, ptr32[&VU0.nextBlockCycles]); xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0); xForwardJL32 skip; + _cop2BackupRegs(); xLoadFarAddr(arg1reg, CpuVU0); xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + _cop2RestoreRegs(); skip.SetTarget(); skipvuidle.SetTarget(); diff --git a/pcsx2/x86/microVU_Macro.inl b/pcsx2/x86/microVU_Macro.inl index 2fde488c73..4ed4f9acdd 100644 --- a/pcsx2/x86/microVU_Macro.inl +++ b/pcsx2/x86/microVU_Macro.inl @@ -36,7 +36,7 @@ void setupMacroOp(int mode, const char* opName) microVU0.prog.IRinfo.curPC = 0; microVU0.code = cpuRegs.code; memset(µVU0.prog.IRinfo.info[0], 0, sizeof(microVU0.prog.IRinfo.info[0])); - iFlushCall(FLUSH_EVERYTHING); + microVU0.regAlloc->reset(); if (mode & 0x01) // Q-Reg will be Read { @@ -285,13 +285,13 @@ void COP2_Interlock(bool mBitSync) if (cpuRegs.code & 1) { - iFlushCall(FLUSH_EVERYTHING); xMOV(eax, ptr32[&cpuRegs.cycle]); xADD(eax, scaleblockcycles_clear()); xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); xForwardJZ32 skipvuidle; + _cop2BackupRegs(); if (mBitSync) { xSUB(eax, ptr32[&VU0.cycle]); @@ -306,6 +306,7 @@ void COP2_Interlock(bool mBitSync) } else xFastCall((void*)_vu0FinishMicro); + _cop2RestoreRegs(); skipvuidle.SetTarget(); } } @@ -321,80 +322,47 @@ void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex) static void recCFC2() { - printCOP2("CFC2"); + _freeX86reg(eax); COP2_Interlock(false); + if (!_Rt_) return; - iFlushCall(FLUSH_EVERYTHING); - if (!(cpuRegs.code & 1)) { xMOV(eax, ptr32[&cpuRegs.cycle]); xADD(eax, scaleblockcycles_clear()); xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles + xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); xForwardJZ32 skipvuidle; xSUB(eax, ptr32[&VU0.cycle]); xSUB(eax, ptr32[&VU0.nextBlockCycles]); xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0); xForwardJL32 skip; + _cop2BackupRegs(); xLoadFarAddr(arg1reg, CpuVU0); xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + _cop2RestoreRegs(); skip.SetTarget(); skipvuidle.SetTarget(); } + _flushEEreg(_Rt_); + if (_Rd_ == REG_STATUS_FLAG) // Normalize Status Flag xMOV(eax, ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL]); else xMOV(eax, ptr32[&vu0Regs.VI[_Rd_].UL]); - if (_Rd_ == REG_TPC) // Divide TPC register value by 8 during copying - { - // Ok, this deserves an explanation. - // Accoring to the official PS2 VU0 coding manual there are 3 ways to execute a micro subroutine on VU0 - // one of which is using the VCALLMSR intruction. - // The manual requires putting the address of the micro subroutine - // into the CMSAR0 register divided by 8 using the CTC2 command before executing VCALLMSR. - // Many games (for instance, 24: The Game, GTA LCS, GTA VCS and FFXII) do in fact use this way, - // they diligently put the address of the micro subroutine into a separate register (v0, v1 etc), divide it by 8 - // and move it to CMSAR0 by calling the CTC2 command. - - // However, there are also at least 2 confirmed games (R Racing Evolution, Street Fighter EX3) - // that execute a piece of code to run a micro subroutine on VU0 like this: - // - // ... - // cfc2 t4, TPC - // ctc2 t4, CMSAR0 - // callmsr - // ... - // - // Interestingly enough there is no division by 8 but it works fine in these 2 mentioned games. - // It means the division operation is implicit. - // Homebrew tests for the real PS2 have shown that in fact the instruction "cfc2 t4, TPC" ends up with values that are not always divisible by 8. - - // There are 2 possibilities: either the CFC2 instruction divides the value of the TPC (which is the Program Counter register - // for micro subroutines) by 8 itself during copying or the TPC register always works with addresses already divided by 8. - // The latter seems less possible because the Program Counter register by definition holds the memory address of the instruction. - // In addition, PCSX2 already implements TPC as an instruction pointer so we'll assume that division by 8 - // is done by CFC2 while working with the TPC register. - // (fixes R Racing Evolution and Street Fighter EX3) - - //xSHR(eax, 3); - - //Update Refraction - Don't need to do this anymore as addresses are fed in divided by 8 always. - //Games such at The Incredible Hulk will read VU1's TPC from VU0 (which will already be multiplied by 8) then try to use CMSAR1 (which will also multiply by 8) - //So everything is now fed in without multiplication - } - // FixMe: Should R-Reg have upper 9 bits 0? xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); if (_Rd_ >= 16) { + _freeX86reg(edx); xCDQ(); // Sign Extend xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); } @@ -402,19 +370,21 @@ static void recCFC2() xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0); // FixMe: I think this is needed, but not sure how it works - _eeOnWriteReg(_Rt_, 1); + // Update Refraction 20/09/2021: This is needed because Const Prop is broken + // the Flushed flag isn't being cleared when it's not flushed. TODO I guess + _eeOnWriteReg(_Rt_, 0); } static void recCTC2() { - printCOP2("CTC2"); + _freeX86reg(eax); + COP2_Interlock(1); + if (!_Rd_) return; - iFlushCall(FLUSH_EVERYTHING); - if (!(cpuRegs.code & 1)) { xMOV(eax, ptr32[&cpuRegs.cycle]); @@ -427,12 +397,16 @@ static void recCTC2() xSUB(eax, ptr32[&VU0.nextBlockCycles]); xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0); xForwardJL32 skip; + _cop2BackupRegs(); xLoadFarAddr(arg1reg, CpuVU0); xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + _cop2RestoreRegs(); skip.SetTarget(); skipvuidle.SetTarget(); } + _flushEEreg(_Rt_); + switch (_Rd_) { case REG_MAC_FLAG: @@ -456,6 +430,7 @@ static void recCTC2() else xAND(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], 0x3F); + _freeXMMreg(xmmT1.Id); //Need to update the sticky flags for microVU mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL); xMOVDZX(xmmT1, eax); @@ -465,6 +440,7 @@ static void recCTC2() break; } case REG_CMSAR1: // Execute VU1 Micro SubRoutine + _cop2BackupRegs(); xMOV(ecx, 1); xFastCall((void*)vu1Finish, ecx); if (_Rt_) @@ -474,6 +450,7 @@ static void recCTC2() else xXOR(ecx, ecx); xFastCall((void*)vu1ExecMicro, ecx); + _cop2RestoreRegs(); break; case REG_FBRST: if (!_Rt_) @@ -483,10 +460,10 @@ static void recCTC2() } else xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); - + _cop2BackupRegs(); TEST_FBRST_RESET(vu0ResetRegs, 0); TEST_FBRST_RESET(vu1ResetRegs, 1); - + _cop2RestoreRegs(); xAND(eax, 0x0C0C); xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], eax); break; @@ -503,12 +480,13 @@ static void recQMFC2() { printCOP2("QMFC2"); + _freeX86reg(eax); + COP2_Interlock(false); + if (!_Rt_) return; - iFlushCall(FLUSH_EVERYTHING); - if (!(cpuRegs.code & 1)) { xMOV(eax, ptr32[&cpuRegs.cycle]); @@ -521,14 +499,19 @@ static void recQMFC2() xSUB(eax, ptr32[&VU0.nextBlockCycles]); xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0); xForwardJL32 skip; + _cop2BackupRegs(); xLoadFarAddr(arg1reg, CpuVU0); xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + _cop2RestoreRegs(); skip.SetTarget(); skipvuidle.SetTarget(); } - // FixMe: For some reason this line is needed or else games break: - _eeOnWriteReg(_Rt_, 0); + _flushEEreg(_Rt_); + _freeXMMreg(xmmT1.Id); + // Update Refraction 20/09/2021: This is needed because Const Prop is broken + // the Flushed flag isn't being cleared when it's not flushed. TODO I guess + _eeOnWriteReg(_Rt_, 0); // This is needed because Const Prop is broken xMOVAPS(xmmT1, ptr128[&vu0Regs.VF[_Rd_]]); xMOVAPS(ptr128[&cpuRegs.GPR.r[_Rt_]], xmmT1); @@ -538,12 +521,13 @@ static void recQMTC2() { printCOP2("QMTC2"); + _freeX86reg(eax); + COP2_Interlock(true); + if (!_Rd_) return; - iFlushCall(FLUSH_EVERYTHING); - if (!(cpuRegs.code & 1)) { xMOV(eax, ptr32[&cpuRegs.cycle]); @@ -556,12 +540,17 @@ static void recQMTC2() xSUB(eax, ptr32[&VU0.nextBlockCycles]); xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0); xForwardJL32 skip; + _cop2BackupRegs(); xLoadFarAddr(arg1reg, CpuVU0); xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg); + _cop2RestoreRegs(); skip.SetTarget(); skipvuidle.SetTarget(); } + _flushEEreg(_Rt_); + _freeXMMreg(xmmT1.Id); + xMOVAPS(xmmT1, ptr128[&cpuRegs.GPR.r[_Rt_]]); xMOVAPS(ptr128[&vu0Regs.VF[_Rd_]], xmmT1); } @@ -637,12 +626,14 @@ namespace OpcodeImpl { void recCOP2_BC2() { recCOP2_BC2t[_Rt_](); } void recCOP2_SPEC1() { - iFlushCall(FLUSH_EVERYTHING); + _cop2BackupRegs(); xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); xForwardJZ32 skipvuidle; xFastCall((void*)_vu0FinishMicro); skipvuidle.SetTarget(); recCOP2SPECIAL1t[_Funct_](); + + _cop2RestoreRegs(); } void recCOP2_SPEC2() { recCOP2SPECIAL2t[(cpuRegs.code & 3) | ((cpuRegs.code >> 4) & 0x7c)](); }