diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt
index 51d5702034..cc4b7c9088 100644
--- a/pcsx2/CMakeLists.txt
+++ b/pcsx2/CMakeLists.txt
@@ -1340,6 +1340,7 @@ set(pcsx2x86Sources
x86/iMMI.cpp
x86/iR3000A.cpp
x86/iR3000Atables.cpp
+ x86/iR5900Analysis.cpp
x86/iR5900Misc.cpp
x86/ir5900tables.cpp
x86/ix86-32/iCore-32.cpp
@@ -1371,6 +1372,7 @@ set(pcsx2x86Headers
x86/iR5900AritImm.h
x86/iR5900Branch.h
x86/iR5900.h
+ x86/iR5900Analysis.h
x86/iR5900Jump.h
x86/iR5900LoadStore.h
x86/iR5900Move.h
diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj
index 9606096ee4..d693a3932b 100644
--- a/pcsx2/pcsx2.vcxproj
+++ b/pcsx2/pcsx2.vcxproj
@@ -546,6 +546,7 @@
+
@@ -991,6 +992,7 @@
+
diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters
index 7cc03f15bf..3675419aab 100644
--- a/pcsx2/pcsx2.vcxproj.filters
+++ b/pcsx2/pcsx2.vcxproj.filters
@@ -1706,6 +1706,9 @@
System\Ps2\GS\Renderers\Hardware
+
+ System\Ps2\EmotionEngine\EE\Dynarec
+
@@ -2834,6 +2837,9 @@
System\Ps2\GS\Renderers\Hardware
+
+ System\Ps2\EmotionEngine\EE\Dynarec
+
diff --git a/pcsx2/pcsx2core.vcxproj b/pcsx2/pcsx2core.vcxproj
index 2e50c834c1..a2a247d973 100644
--- a/pcsx2/pcsx2core.vcxproj
+++ b/pcsx2/pcsx2core.vcxproj
@@ -320,6 +320,7 @@
+
@@ -620,6 +621,7 @@
+
diff --git a/pcsx2/pcsx2core.vcxproj.filters b/pcsx2/pcsx2core.vcxproj.filters
index de2b928ec0..c93397b16a 100644
--- a/pcsx2/pcsx2core.vcxproj.filters
+++ b/pcsx2/pcsx2core.vcxproj.filters
@@ -1199,6 +1199,9 @@
System\Ps2\GS\Renderers\Hardware
+
+ System\Ps2\EmotionEngine\EE\Dynarec
+
@@ -1979,6 +1982,9 @@
System\Ps2\GS\Renderers\Hardware
+
+ System\Ps2\EmotionEngine\EE\Dynarec
+
diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h
index 4d8632dd50..8e453ea410 100644
--- a/pcsx2/x86/iCore.h
+++ b/pcsx2/x86/iCore.h
@@ -215,11 +215,17 @@ int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy); // returns t
#define EEINSTINFO_COP1 1
#define EEINSTINFO_COP2 2
+#define EEINST_COP2_DENORMALIZE_STATUS_FLAG 0x100
+#define EEINST_COP2_NORMALIZE_STATUS_FLAG 0x200
+#define EEINST_COP2_STATUS_FLAG 0x400
+#define EEINST_COP2_MAC_FLAG 0x800
+#define EEINST_COP2_CLIP_FLAG 0x1000
+
struct EEINST
{
+ u16 info; // extra info, if 1 inst is COP1, 2 inst is COP2. Also uses EEINST_XMM
u8 regs[34]; // includes HI/LO (HI=32, LO=33)
u8 fpuregs[33]; // ACC=32
- u8 info; // extra info, if 1 inst is COP1, 2 inst is COP2. Also uses EEINST_XMM
// uses XMMTYPE_ flags; if type == XMMTYPE_TEMP, not used
u8 writeType[3], writeReg[3]; // reg written in this inst, 0 if no reg
diff --git a/pcsx2/x86/iR5900Analysis.cpp b/pcsx2/x86/iR5900Analysis.cpp
new file mode 100644
index 0000000000..c1d23128b2
--- /dev/null
+++ b/pcsx2/x86/iR5900Analysis.cpp
@@ -0,0 +1,229 @@
+/* PCSX2 - PS2 Emulator for PCs
+ * Copyright (C) 2002-2022 PCSX2 Dev Team
+ *
+ * PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU Lesser General Public License as published by the Free Software Found-
+ * ation, either version 3 of the License, or (at your option) any later version.
+ *
+ * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with PCSX2.
+ * If not, see .
+ */
+
+#include "PrecompiledHeader.h"
+
+#include "iR5900Analysis.h"
+#include "Memory.h"
+#include "DebugTools/Debug.h"
+
+using namespace R5900;
+
+// This should be moved to analysis...
+extern int cop2flags(u32 code);
+
+AnalysisPass::AnalysisPass() = default;
+
+AnalysisPass::~AnalysisPass() = default;
+
+void AnalysisPass::Run(u32 start, u32 end, EEINST* inst_cache)
+{
+}
+
+template
+void __fi AnalysisPass::ForEachInstruction(u32 start, u32 end, EEINST* inst_cache, const F& func)
+{
+ EEINST* eeinst = inst_cache;
+ for (u32 apc = start; apc < end; apc += 4, eeinst++)
+ {
+ cpuRegs.code = memRead32(apc);
+ if (!func(apc, eeinst))
+ break;
+ }
+}
+
+template
+void __fi R5900::AnalysisPass::DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache, const F& func)
+{
+ std::string d;
+ EEINST* eeinst = inst_cache;
+ for (u32 apc = start; apc < end; apc += 4, eeinst++)
+ {
+ d.clear();
+ disR5900Fasm(d, memRead32(apc), apc, false);
+ func(apc, eeinst, d);
+ Console.WriteLn(" %08X %s", apc, d.c_str());
+ }
+}
+
+COP2FlagHackPass::COP2FlagHackPass()
+ : AnalysisPass()
+{
+}
+
+COP2FlagHackPass::~COP2FlagHackPass() = default;
+
+void COP2FlagHackPass::Run(u32 start, u32 end, EEINST* inst_cache)
+{
+ m_status_denormalized = false;
+ m_last_status_write = nullptr;
+ m_last_mac_write = nullptr;
+ m_last_clip_write = nullptr;
+ m_cfc2_pc = start;
+
+ ForEachInstruction(start, end, inst_cache, [this, end](u32 apc, EEINST* inst) {
+ // catch SB/SH/SW to potential DMA->VIF0->VU0 exec.
+ // this is very unlikely in a cop2 chain.
+ if (_Opcode_ == 050 || _Opcode_ == 051 || _Opcode_ == 053)
+ {
+ CommitAllFlags();
+ return true;
+ }
+ else if (_Opcode_ != 022)
+ {
+ // not COP2
+ return true;
+ }
+
+ // Detect ctc2 Status, zero, ..., cfc2 v0, Status pattern where we need accurate sticky bits.
+ // Test case: Tekken Tag Tournament.
+ if (_Rs_ == 6 && _Rd_ == REG_STATUS_FLAG)
+ {
+ // Read ahead, looking for cfc2.
+ m_cfc2_pc = apc;
+ ForEachInstruction(apc, end, inst, [this](u32 capc, EEINST*) {
+ if (_Opcode_ == 022 && _Rs_ == 2 && _Rd_ == REG_STATUS_FLAG)
+ {
+ m_cfc2_pc = capc;
+ return false;
+ }
+ return true;
+ });
+#ifdef PCSX2_DEVBUILD
+ if (m_cfc2_pc != apc)
+ DevCon.WriteLn("CTC2 at %08X paired with CFC2 %08X", apc, m_cfc2_pc);
+#endif
+ }
+
+ // CFC2/CTC2
+ if (_Rs_ == 6 || _Rs_ == 2)
+ {
+ switch (_Rd_)
+ {
+ case REG_STATUS_FLAG:
+ CommitStatusFlag();
+ break;
+ case REG_MAC_FLAG:
+ CommitMACFlag();
+ break;
+ case REG_CLIP_FLAG:
+ CommitClipFlag();
+ break;
+ case REG_FBRST:
+ {
+ // only apply to CTC2, is FBRST readable?
+ if (_Rs_ == 2)
+ CommitAllFlags();
+ }
+ break;
+ }
+ }
+
+ if (((cpuRegs.code >> 25 & 1) == 1) && ((cpuRegs.code >> 2 & 15) == 14))
+ {
+ // VCALLMS, everything needs to be up to date
+ CommitAllFlags();
+ }
+
+ // 1 - status, 2 - mac, 3 - clip
+ const int flags = cop2flags(cpuRegs.code);
+ if (flags == 0)
+ return true;
+
+ // STATUS
+ if (flags & 1)
+ {
+ if (!m_status_denormalized)
+ {
+ inst->info |= EEINST_COP2_DENORMALIZE_STATUS_FLAG;
+ m_status_denormalized = true;
+ }
+
+ // if we're still behind the next CFC2 after the sticky bits got cleared, we need to update flags
+ if (apc < m_cfc2_pc)
+ inst->info |= EEINST_COP2_STATUS_FLAG;
+
+ m_last_status_write = inst;
+ }
+
+ // MAC
+ if (flags & 2)
+ {
+ m_last_mac_write = inst;
+ }
+
+ // CLIP
+ if (flags & 4)
+ {
+ // we don't track the clip flag yet..
+ // but it's unlikely that we'll have more than 4 clip flags in a row, because that would be pointless?
+ inst->info |= EEINST_COP2_CLIP_FLAG;
+ m_last_clip_write = inst;
+ }
+
+ return true;
+ });
+
+ CommitAllFlags();
+
+#if 0
+ if (m_cfc2_pc != start)
+ DumpAnnotatedBlock(start, end, inst_cache);
+#endif
+}
+
+void COP2FlagHackPass::DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache)
+{
+ AnalysisPass::DumpAnnotatedBlock(start, end, inst_cache, [](u32, EEINST* eeinst, std::string& d) {
+ if (eeinst->info & EEINST_COP2_DENORMALIZE_STATUS_FLAG)
+ d.append(" COP2_DENORMALIZE_STATUS_FLAG");
+ if (eeinst->info & EEINST_COP2_NORMALIZE_STATUS_FLAG)
+ d.append(" COP2_NORMALIZE_STATUS_FLAG");
+ if (eeinst->info & EEINST_COP2_STATUS_FLAG)
+ d.append(" COP2_STATUS_FLAG");
+ if (eeinst->info & EEINST_COP2_MAC_FLAG)
+ d.append(" COP2_MAC_FLAG");
+ if (eeinst->info & EEINST_COP2_CLIP_FLAG)
+ d.append(" COP2_CLIP_FLAG");
+ });
+}
+
+void COP2FlagHackPass::CommitStatusFlag()
+{
+ if (m_last_status_write)
+ {
+ m_last_status_write->info |= EEINST_COP2_STATUS_FLAG | EEINST_COP2_NORMALIZE_STATUS_FLAG;
+ m_status_denormalized = false;
+ }
+}
+
+void COP2FlagHackPass::CommitMACFlag()
+{
+ if (m_last_mac_write)
+ m_last_mac_write->info |= EEINST_COP2_MAC_FLAG;
+}
+
+void COP2FlagHackPass::CommitClipFlag()
+{
+ if (m_last_clip_write)
+ m_last_clip_write->info |= EEINST_COP2_CLIP_FLAG;
+}
+
+void COP2FlagHackPass::CommitAllFlags()
+{
+ CommitStatusFlag();
+ CommitMACFlag();
+ CommitClipFlag();
+}
diff --git a/pcsx2/x86/iR5900Analysis.h b/pcsx2/x86/iR5900Analysis.h
new file mode 100644
index 0000000000..87e9d503db
--- /dev/null
+++ b/pcsx2/x86/iR5900Analysis.h
@@ -0,0 +1,65 @@
+/* PCSX2 - PS2 Emulator for PCs
+ * Copyright (C) 2002-2022 PCSX2 Dev Team
+ *
+ * PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU Lesser General Public License as published by the Free Software Found-
+ * ation, either version 3 of the License, or (at your option) any later version.
+ *
+ * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with PCSX2.
+ * If not, see .
+ */
+
+#pragma once
+
+#include "iR5900.h"
+#include "iCore.h"
+
+namespace R5900
+{
+ class AnalysisPass
+ {
+ public:
+ AnalysisPass();
+ virtual ~AnalysisPass();
+
+ /// Runs the actual pass.
+ virtual void Run(u32 start, u32 end, EEINST* inst_cache);
+
+ protected:
+ /// Takes a functor of bool(pc, EEINST*), returning false if iteration should stop.
+ template
+ void ForEachInstruction(u32 start, u32 end, EEINST* inst_cache, const F& func);
+
+ /// Dumps the block to the console, calling the functor void(pc, EEINST*, std::string&) for each instruction.
+ template
+ void DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache, const F& func);
+ };
+
+ class COP2FlagHackPass final : public AnalysisPass
+ {
+ public:
+ COP2FlagHackPass();
+ ~COP2FlagHackPass();
+
+ void Run(u32 start, u32 end, EEINST* inst_cache) override;
+
+ private:
+ void DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache);
+
+ void CommitStatusFlag();
+ void CommitMACFlag();
+ void CommitClipFlag();
+ void CommitAllFlags();
+
+ bool m_status_denormalized = false;
+ EEINST* m_last_status_write = nullptr;
+ EEINST* m_last_mac_write = nullptr;
+ EEINST* m_last_clip_write = nullptr;
+
+ u32 m_cfc2_pc = 0;
+ };
+} // namespace R5900
\ No newline at end of file
diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp
index e612eb482c..f4ceef2e7e 100644
--- a/pcsx2/x86/ix86-32/iR5900-32.cpp
+++ b/pcsx2/x86/ix86-32/iR5900-32.cpp
@@ -21,6 +21,7 @@
#include "R5900Exceptions.h"
#include "R5900OpcodeTables.h"
#include "iR5900.h"
+#include "iR5900Analysis.h"
#include "BaseblockEx.h"
#include "System/RecTypes.h"
@@ -2171,6 +2172,7 @@ StartRecomp:
}
// rec info //
+ bool has_cop2_instructions = false;
{
EEINST* pcur;
@@ -2191,9 +2193,18 @@ StartRecomp:
cpuRegs.code = *(int*)PSM(i - 4);
pcur[-1] = pcur[0];
pcur--;
+
+ has_cop2_instructions |= (_Opcode_ == 022);
}
}
+ // eventually we'll want to have a vector of passes or something.
+ if (has_cop2_instructions && EmuConfig.Speedhacks.vuFlagHack)
+ {
+ COP2FlagHackPass fhpass;
+ fhpass.Run(startpc, s_nEndBlock, s_pInstCache + 1);
+ }
+
// analyze instructions //
{
usecop2 = 0;
diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl
index 9ce1e4ea12..a908efbda5 100644
--- a/pcsx2/x86/microVU_Alloc.inl
+++ b/pcsx2/x86/microVU_Alloc.inl
@@ -70,22 +70,22 @@ __ri void mVUallocSFLAGc(const x32& reg, const x32& regT, int fInstance)
xOR(reg, regT);
}
-// Denormalizes Status Flag
-__ri void mVUallocSFLAGd(u32* memAddr)
+// Denormalizes Status Flag; destroys tmp1/tmp2
+__ri void mVUallocSFLAGd(u32* memAddr, const x32& reg = eax, const x32& tmp1 = ecx, const x32& tmp2 = edx)
{
- xMOV(edx, ptr32[memAddr]);
- xMOV(eax, edx);
- xSHR(eax, 3);
- xAND(eax, 0x18);
+ xMOV(tmp2, ptr32[memAddr]);
+ xMOV(reg, tmp2);
+ xSHR(reg, 3);
+ xAND(reg, 0x18);
- xMOV(ecx, edx);
- xSHL(ecx, 11);
- xAND(ecx, 0x1800);
- xOR(eax, ecx);
+ xMOV(tmp1, tmp2);
+ xSHL(tmp1, 11);
+ xAND(tmp1, 0x1800);
+ xOR(reg, tmp1);
- xSHL(edx, 14);
- xAND(edx, 0x3cf0000);
- xOR(eax, edx);
+ xSHL(tmp2, 14);
+ xAND(tmp2, 0x3cf0000);
+ xOR(reg, tmp2);
}
__fi void mVUallocMFLAGa(mV, const x32& reg, int fInstance)
diff --git a/pcsx2/x86/microVU_Macro.inl b/pcsx2/x86/microVU_Macro.inl
index 29cbc5d43d..bc38bb113a 100644
--- a/pcsx2/x86/microVU_Macro.inl
+++ b/pcsx2/x86/microVU_Macro.inl
@@ -55,24 +55,38 @@ void setupMacroOp(int mode, const char* opName)
{
xMOVSSZX(xmmPQ, ptr32[&vu0Regs.VI[REG_Q].UL]);
}
- if (mode & 0x08) // Clip Instruction
+ if (mode & 0x08 && (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_CLIP_FLAG)) // Clip Instruction
{
microVU0.prog.IRinfo.info[0].cFlag.write = 0xff;
microVU0.prog.IRinfo.info[0].cFlag.lastWrite = 0xff;
}
- if (mode & 0x10) // Update Status/Mac Flags
+ if (mode & 0x10 && (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_STATUS_FLAG)) // Update Status Flag
{
microVU0.prog.IRinfo.info[0].sFlag.doFlag = true;
microVU0.prog.IRinfo.info[0].sFlag.doNonSticky = true;
microVU0.prog.IRinfo.info[0].sFlag.write = 0;
microVU0.prog.IRinfo.info[0].sFlag.lastWrite = 0;
+ }
+ if (mode & 0x10 && (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_MAC_FLAG)) // Update Mac Flags
+ {
microVU0.prog.IRinfo.info[0].mFlag.doFlag = true;
microVU0.prog.IRinfo.info[0].mFlag.write = 0xff;
- _freeX86reg(ebx);
- //Denormalize
- mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL);
-
- xMOV(gprF0, eax);
+ }
+ if (mode & 0x10)
+ {
+ _freeX86reg(gprF0);
+
+ if (!CHECK_VU_FLAGHACK || (g_pCurInstInfo->info & EEINST_COP2_DENORMALIZE_STATUS_FLAG))
+ {
+ // flags are normalized, so denormalize before running the first instruction
+ mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL, gprF0, eax, ecx);
+ }
+ else
+ {
+ // load denormalized status flag
+ // ideally we'd keep this in a register, but 32-bit...
+ xMOV(gprF0, ptr32[&vuRegs->VI[REG_STATUS_FLAG].UL]);
+ }
}
}
@@ -82,29 +96,45 @@ void endMacroOp(int mode)
{
xMOVSS(ptr32[&vu0Regs.VI[REG_Q].UL], xmmPQ);
}
- if (mode & 0x10) // Status/Mac Flags were Updated
- {
- // Normalize
- mVUallocSFLAGc(eax, gprF0, 0);
- xMOV(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax);
- }
microVU0.regAlloc->flushAll();
_clearNeededCOP2Regs();
- if (mode & 0x10) // Update VU0 Status/Mac instances after flush to avoid corrupting anything
+ if (mode & 0x10)
{
- int t0reg = _allocTempXMMreg(XMMT_INT, -1);
- mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL);
- xMOVDZX(xRegisterSSE(t0reg), eax);
- xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0);
- xMOVAPS(ptr128[µVU0.regs().micro_statusflags], xRegisterSSE(t0reg));
+ if (!CHECK_VU_FLAGHACK || (g_pCurInstInfo->info & EEINST_COP2_STATUS_FLAG)) // Status/Mac Flags were Updated
+ {
+ // update micro_statusflags
+ const int t0reg = _allocTempXMMreg(XMMT_INT, -1);
+ xMOVDZX(xRegisterSSE(t0reg), gprF0);
+ xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0);
+ xMOVAPS(ptr128[µVU0.regs().micro_statusflags], xRegisterSSE(t0reg));
+ _freeXMMreg(t0reg);
+ }
- xMOVDZX(xRegisterSSE(t0reg), ptr32[&vu0Regs.VI[REG_MAC_FLAG].UL]);
- xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0);
- xMOVAPS(ptr128[µVU0.regs().micro_macflags], xRegisterSSE(t0reg));
- _freeXMMreg(t0reg);
+ if (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_NORMALIZE_STATUS_FLAG)
+ {
+ // Normalize
+ mVUallocSFLAGc(eax, gprF0, 0);
+ xMOV(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax);
+ }
+ else
+ {
+ // backup denormalized flags for the next instruction
+ // this is fine, because we'll normalize them again before this reg is accessed
+ xMOV(ptr32[&vuRegs->VI[REG_STATUS_FLAG].UL], gprF0);
+ }
+
+ if (!CHECK_VU_FLAGHACK || (g_pCurInstInfo->info & EEINST_COP2_MAC_FLAG))
+ {
+ const int t0reg = _allocTempXMMreg(XMMT_INT, -1);
+ xMOVDZX(xRegisterSSE(t0reg), ptr32[&vu0Regs.VI[REG_MAC_FLAG].UL]);
+ xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0);
+ xMOVAPS(ptr128[µVU0.regs().micro_macflags], xRegisterSSE(t0reg));
+ _freeXMMreg(t0reg);
+ }
}
+
microVU0.cop2 = 0;
microVU0.regAlloc->reset(false);
}