From d20bfa240d38b1c5f06f4440e5e78b5e92f7704c Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 26 Feb 2022 16:06:04 +1000 Subject: [PATCH] EE: Add COP2 flag hack --- pcsx2/CMakeLists.txt | 2 + pcsx2/pcsx2.vcxproj | 2 + pcsx2/pcsx2.vcxproj.filters | 6 + pcsx2/pcsx2core.vcxproj | 2 + pcsx2/pcsx2core.vcxproj.filters | 6 + pcsx2/x86/iCore.h | 8 +- pcsx2/x86/iR5900Analysis.cpp | 229 ++++++++++++++++++++++++++++++++ pcsx2/x86/iR5900Analysis.h | 65 +++++++++ pcsx2/x86/ix86-32/iR5900-32.cpp | 11 ++ pcsx2/x86/microVU_Alloc.inl | 26 ++-- pcsx2/x86/microVU_Macro.inl | 76 +++++++---- 11 files changed, 396 insertions(+), 37 deletions(-) create mode 100644 pcsx2/x86/iR5900Analysis.cpp create mode 100644 pcsx2/x86/iR5900Analysis.h diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 51d5702034..cc4b7c9088 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -1340,6 +1340,7 @@ set(pcsx2x86Sources x86/iMMI.cpp x86/iR3000A.cpp x86/iR3000Atables.cpp + x86/iR5900Analysis.cpp x86/iR5900Misc.cpp x86/ir5900tables.cpp x86/ix86-32/iCore-32.cpp @@ -1371,6 +1372,7 @@ set(pcsx2x86Headers x86/iR5900AritImm.h x86/iR5900Branch.h x86/iR5900.h + x86/iR5900Analysis.h x86/iR5900Jump.h x86/iR5900LoadStore.h x86/iR5900Move.h diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 9606096ee4..d693a3932b 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -546,6 +546,7 @@ + @@ -991,6 +992,7 @@ + diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 7cc03f15bf..3675419aab 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -1706,6 +1706,9 @@ System\Ps2\GS\Renderers\Hardware + + System\Ps2\EmotionEngine\EE\Dynarec + @@ -2834,6 +2837,9 @@ System\Ps2\GS\Renderers\Hardware + + System\Ps2\EmotionEngine\EE\Dynarec + diff --git a/pcsx2/pcsx2core.vcxproj b/pcsx2/pcsx2core.vcxproj index 2e50c834c1..a2a247d973 100644 --- a/pcsx2/pcsx2core.vcxproj +++ b/pcsx2/pcsx2core.vcxproj @@ -320,6 +320,7 @@ + @@ -620,6 +621,7 @@ + diff --git a/pcsx2/pcsx2core.vcxproj.filters b/pcsx2/pcsx2core.vcxproj.filters index de2b928ec0..c93397b16a 100644 --- a/pcsx2/pcsx2core.vcxproj.filters +++ b/pcsx2/pcsx2core.vcxproj.filters @@ -1199,6 +1199,9 @@ System\Ps2\GS\Renderers\Hardware + + System\Ps2\EmotionEngine\EE\Dynarec + @@ -1979,6 +1982,9 @@ System\Ps2\GS\Renderers\Hardware + + System\Ps2\EmotionEngine\EE\Dynarec + diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index 4d8632dd50..8e453ea410 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -215,11 +215,17 @@ int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy); // returns t #define EEINSTINFO_COP1 1 #define EEINSTINFO_COP2 2 +#define EEINST_COP2_DENORMALIZE_STATUS_FLAG 0x100 +#define EEINST_COP2_NORMALIZE_STATUS_FLAG 0x200 +#define EEINST_COP2_STATUS_FLAG 0x400 +#define EEINST_COP2_MAC_FLAG 0x800 +#define EEINST_COP2_CLIP_FLAG 0x1000 + struct EEINST { + u16 info; // extra info, if 1 inst is COP1, 2 inst is COP2. Also uses EEINST_XMM u8 regs[34]; // includes HI/LO (HI=32, LO=33) u8 fpuregs[33]; // ACC=32 - u8 info; // extra info, if 1 inst is COP1, 2 inst is COP2. Also uses EEINST_XMM // uses XMMTYPE_ flags; if type == XMMTYPE_TEMP, not used u8 writeType[3], writeReg[3]; // reg written in this inst, 0 if no reg diff --git a/pcsx2/x86/iR5900Analysis.cpp b/pcsx2/x86/iR5900Analysis.cpp new file mode 100644 index 0000000000..c1d23128b2 --- /dev/null +++ b/pcsx2/x86/iR5900Analysis.cpp @@ -0,0 +1,229 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2022 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" + +#include "iR5900Analysis.h" +#include "Memory.h" +#include "DebugTools/Debug.h" + +using namespace R5900; + +// This should be moved to analysis... +extern int cop2flags(u32 code); + +AnalysisPass::AnalysisPass() = default; + +AnalysisPass::~AnalysisPass() = default; + +void AnalysisPass::Run(u32 start, u32 end, EEINST* inst_cache) +{ +} + +template +void __fi AnalysisPass::ForEachInstruction(u32 start, u32 end, EEINST* inst_cache, const F& func) +{ + EEINST* eeinst = inst_cache; + for (u32 apc = start; apc < end; apc += 4, eeinst++) + { + cpuRegs.code = memRead32(apc); + if (!func(apc, eeinst)) + break; + } +} + +template +void __fi R5900::AnalysisPass::DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache, const F& func) +{ + std::string d; + EEINST* eeinst = inst_cache; + for (u32 apc = start; apc < end; apc += 4, eeinst++) + { + d.clear(); + disR5900Fasm(d, memRead32(apc), apc, false); + func(apc, eeinst, d); + Console.WriteLn(" %08X %s", apc, d.c_str()); + } +} + +COP2FlagHackPass::COP2FlagHackPass() + : AnalysisPass() +{ +} + +COP2FlagHackPass::~COP2FlagHackPass() = default; + +void COP2FlagHackPass::Run(u32 start, u32 end, EEINST* inst_cache) +{ + m_status_denormalized = false; + m_last_status_write = nullptr; + m_last_mac_write = nullptr; + m_last_clip_write = nullptr; + m_cfc2_pc = start; + + ForEachInstruction(start, end, inst_cache, [this, end](u32 apc, EEINST* inst) { + // catch SB/SH/SW to potential DMA->VIF0->VU0 exec. + // this is very unlikely in a cop2 chain. + if (_Opcode_ == 050 || _Opcode_ == 051 || _Opcode_ == 053) + { + CommitAllFlags(); + return true; + } + else if (_Opcode_ != 022) + { + // not COP2 + return true; + } + + // Detect ctc2 Status, zero, ..., cfc2 v0, Status pattern where we need accurate sticky bits. + // Test case: Tekken Tag Tournament. + if (_Rs_ == 6 && _Rd_ == REG_STATUS_FLAG) + { + // Read ahead, looking for cfc2. + m_cfc2_pc = apc; + ForEachInstruction(apc, end, inst, [this](u32 capc, EEINST*) { + if (_Opcode_ == 022 && _Rs_ == 2 && _Rd_ == REG_STATUS_FLAG) + { + m_cfc2_pc = capc; + return false; + } + return true; + }); +#ifdef PCSX2_DEVBUILD + if (m_cfc2_pc != apc) + DevCon.WriteLn("CTC2 at %08X paired with CFC2 %08X", apc, m_cfc2_pc); +#endif + } + + // CFC2/CTC2 + if (_Rs_ == 6 || _Rs_ == 2) + { + switch (_Rd_) + { + case REG_STATUS_FLAG: + CommitStatusFlag(); + break; + case REG_MAC_FLAG: + CommitMACFlag(); + break; + case REG_CLIP_FLAG: + CommitClipFlag(); + break; + case REG_FBRST: + { + // only apply to CTC2, is FBRST readable? + if (_Rs_ == 2) + CommitAllFlags(); + } + break; + } + } + + if (((cpuRegs.code >> 25 & 1) == 1) && ((cpuRegs.code >> 2 & 15) == 14)) + { + // VCALLMS, everything needs to be up to date + CommitAllFlags(); + } + + // 1 - status, 2 - mac, 3 - clip + const int flags = cop2flags(cpuRegs.code); + if (flags == 0) + return true; + + // STATUS + if (flags & 1) + { + if (!m_status_denormalized) + { + inst->info |= EEINST_COP2_DENORMALIZE_STATUS_FLAG; + m_status_denormalized = true; + } + + // if we're still behind the next CFC2 after the sticky bits got cleared, we need to update flags + if (apc < m_cfc2_pc) + inst->info |= EEINST_COP2_STATUS_FLAG; + + m_last_status_write = inst; + } + + // MAC + if (flags & 2) + { + m_last_mac_write = inst; + } + + // CLIP + if (flags & 4) + { + // we don't track the clip flag yet.. + // but it's unlikely that we'll have more than 4 clip flags in a row, because that would be pointless? + inst->info |= EEINST_COP2_CLIP_FLAG; + m_last_clip_write = inst; + } + + return true; + }); + + CommitAllFlags(); + +#if 0 + if (m_cfc2_pc != start) + DumpAnnotatedBlock(start, end, inst_cache); +#endif +} + +void COP2FlagHackPass::DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache) +{ + AnalysisPass::DumpAnnotatedBlock(start, end, inst_cache, [](u32, EEINST* eeinst, std::string& d) { + if (eeinst->info & EEINST_COP2_DENORMALIZE_STATUS_FLAG) + d.append(" COP2_DENORMALIZE_STATUS_FLAG"); + if (eeinst->info & EEINST_COP2_NORMALIZE_STATUS_FLAG) + d.append(" COP2_NORMALIZE_STATUS_FLAG"); + if (eeinst->info & EEINST_COP2_STATUS_FLAG) + d.append(" COP2_STATUS_FLAG"); + if (eeinst->info & EEINST_COP2_MAC_FLAG) + d.append(" COP2_MAC_FLAG"); + if (eeinst->info & EEINST_COP2_CLIP_FLAG) + d.append(" COP2_CLIP_FLAG"); + }); +} + +void COP2FlagHackPass::CommitStatusFlag() +{ + if (m_last_status_write) + { + m_last_status_write->info |= EEINST_COP2_STATUS_FLAG | EEINST_COP2_NORMALIZE_STATUS_FLAG; + m_status_denormalized = false; + } +} + +void COP2FlagHackPass::CommitMACFlag() +{ + if (m_last_mac_write) + m_last_mac_write->info |= EEINST_COP2_MAC_FLAG; +} + +void COP2FlagHackPass::CommitClipFlag() +{ + if (m_last_clip_write) + m_last_clip_write->info |= EEINST_COP2_CLIP_FLAG; +} + +void COP2FlagHackPass::CommitAllFlags() +{ + CommitStatusFlag(); + CommitMACFlag(); + CommitClipFlag(); +} diff --git a/pcsx2/x86/iR5900Analysis.h b/pcsx2/x86/iR5900Analysis.h new file mode 100644 index 0000000000..87e9d503db --- /dev/null +++ b/pcsx2/x86/iR5900Analysis.h @@ -0,0 +1,65 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2022 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +#include "iR5900.h" +#include "iCore.h" + +namespace R5900 +{ + class AnalysisPass + { + public: + AnalysisPass(); + virtual ~AnalysisPass(); + + /// Runs the actual pass. + virtual void Run(u32 start, u32 end, EEINST* inst_cache); + + protected: + /// Takes a functor of bool(pc, EEINST*), returning false if iteration should stop. + template + void ForEachInstruction(u32 start, u32 end, EEINST* inst_cache, const F& func); + + /// Dumps the block to the console, calling the functor void(pc, EEINST*, std::string&) for each instruction. + template + void DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache, const F& func); + }; + + class COP2FlagHackPass final : public AnalysisPass + { + public: + COP2FlagHackPass(); + ~COP2FlagHackPass(); + + void Run(u32 start, u32 end, EEINST* inst_cache) override; + + private: + void DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache); + + void CommitStatusFlag(); + void CommitMACFlag(); + void CommitClipFlag(); + void CommitAllFlags(); + + bool m_status_denormalized = false; + EEINST* m_last_status_write = nullptr; + EEINST* m_last_mac_write = nullptr; + EEINST* m_last_clip_write = nullptr; + + u32 m_cfc2_pc = 0; + }; +} // namespace R5900 \ No newline at end of file diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index e612eb482c..f4ceef2e7e 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -21,6 +21,7 @@ #include "R5900Exceptions.h" #include "R5900OpcodeTables.h" #include "iR5900.h" +#include "iR5900Analysis.h" #include "BaseblockEx.h" #include "System/RecTypes.h" @@ -2171,6 +2172,7 @@ StartRecomp: } // rec info // + bool has_cop2_instructions = false; { EEINST* pcur; @@ -2191,9 +2193,18 @@ StartRecomp: cpuRegs.code = *(int*)PSM(i - 4); pcur[-1] = pcur[0]; pcur--; + + has_cop2_instructions |= (_Opcode_ == 022); } } + // eventually we'll want to have a vector of passes or something. + if (has_cop2_instructions && EmuConfig.Speedhacks.vuFlagHack) + { + COP2FlagHackPass fhpass; + fhpass.Run(startpc, s_nEndBlock, s_pInstCache + 1); + } + // analyze instructions // { usecop2 = 0; diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 9ce1e4ea12..a908efbda5 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -70,22 +70,22 @@ __ri void mVUallocSFLAGc(const x32& reg, const x32& regT, int fInstance) xOR(reg, regT); } -// Denormalizes Status Flag -__ri void mVUallocSFLAGd(u32* memAddr) +// Denormalizes Status Flag; destroys tmp1/tmp2 +__ri void mVUallocSFLAGd(u32* memAddr, const x32& reg = eax, const x32& tmp1 = ecx, const x32& tmp2 = edx) { - xMOV(edx, ptr32[memAddr]); - xMOV(eax, edx); - xSHR(eax, 3); - xAND(eax, 0x18); + xMOV(tmp2, ptr32[memAddr]); + xMOV(reg, tmp2); + xSHR(reg, 3); + xAND(reg, 0x18); - xMOV(ecx, edx); - xSHL(ecx, 11); - xAND(ecx, 0x1800); - xOR(eax, ecx); + xMOV(tmp1, tmp2); + xSHL(tmp1, 11); + xAND(tmp1, 0x1800); + xOR(reg, tmp1); - xSHL(edx, 14); - xAND(edx, 0x3cf0000); - xOR(eax, edx); + xSHL(tmp2, 14); + xAND(tmp2, 0x3cf0000); + xOR(reg, tmp2); } __fi void mVUallocMFLAGa(mV, const x32& reg, int fInstance) diff --git a/pcsx2/x86/microVU_Macro.inl b/pcsx2/x86/microVU_Macro.inl index 29cbc5d43d..bc38bb113a 100644 --- a/pcsx2/x86/microVU_Macro.inl +++ b/pcsx2/x86/microVU_Macro.inl @@ -55,24 +55,38 @@ void setupMacroOp(int mode, const char* opName) { xMOVSSZX(xmmPQ, ptr32[&vu0Regs.VI[REG_Q].UL]); } - if (mode & 0x08) // Clip Instruction + if (mode & 0x08 && (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_CLIP_FLAG)) // Clip Instruction { microVU0.prog.IRinfo.info[0].cFlag.write = 0xff; microVU0.prog.IRinfo.info[0].cFlag.lastWrite = 0xff; } - if (mode & 0x10) // Update Status/Mac Flags + if (mode & 0x10 && (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_STATUS_FLAG)) // Update Status Flag { microVU0.prog.IRinfo.info[0].sFlag.doFlag = true; microVU0.prog.IRinfo.info[0].sFlag.doNonSticky = true; microVU0.prog.IRinfo.info[0].sFlag.write = 0; microVU0.prog.IRinfo.info[0].sFlag.lastWrite = 0; + } + if (mode & 0x10 && (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_MAC_FLAG)) // Update Mac Flags + { microVU0.prog.IRinfo.info[0].mFlag.doFlag = true; microVU0.prog.IRinfo.info[0].mFlag.write = 0xff; - _freeX86reg(ebx); - //Denormalize - mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL); - - xMOV(gprF0, eax); + } + if (mode & 0x10) + { + _freeX86reg(gprF0); + + if (!CHECK_VU_FLAGHACK || (g_pCurInstInfo->info & EEINST_COP2_DENORMALIZE_STATUS_FLAG)) + { + // flags are normalized, so denormalize before running the first instruction + mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL, gprF0, eax, ecx); + } + else + { + // load denormalized status flag + // ideally we'd keep this in a register, but 32-bit... + xMOV(gprF0, ptr32[&vuRegs->VI[REG_STATUS_FLAG].UL]); + } } } @@ -82,29 +96,45 @@ void endMacroOp(int mode) { xMOVSS(ptr32[&vu0Regs.VI[REG_Q].UL], xmmPQ); } - if (mode & 0x10) // Status/Mac Flags were Updated - { - // Normalize - mVUallocSFLAGc(eax, gprF0, 0); - xMOV(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax); - } microVU0.regAlloc->flushAll(); _clearNeededCOP2Regs(); - if (mode & 0x10) // Update VU0 Status/Mac instances after flush to avoid corrupting anything + if (mode & 0x10) { - int t0reg = _allocTempXMMreg(XMMT_INT, -1); - mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL); - xMOVDZX(xRegisterSSE(t0reg), eax); - xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0); - xMOVAPS(ptr128[µVU0.regs().micro_statusflags], xRegisterSSE(t0reg)); + if (!CHECK_VU_FLAGHACK || (g_pCurInstInfo->info & EEINST_COP2_STATUS_FLAG)) // Status/Mac Flags were Updated + { + // update micro_statusflags + const int t0reg = _allocTempXMMreg(XMMT_INT, -1); + xMOVDZX(xRegisterSSE(t0reg), gprF0); + xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0); + xMOVAPS(ptr128[µVU0.regs().micro_statusflags], xRegisterSSE(t0reg)); + _freeXMMreg(t0reg); + } - xMOVDZX(xRegisterSSE(t0reg), ptr32[&vu0Regs.VI[REG_MAC_FLAG].UL]); - xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0); - xMOVAPS(ptr128[µVU0.regs().micro_macflags], xRegisterSSE(t0reg)); - _freeXMMreg(t0reg); + if (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_NORMALIZE_STATUS_FLAG) + { + // Normalize + mVUallocSFLAGc(eax, gprF0, 0); + xMOV(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax); + } + else + { + // backup denormalized flags for the next instruction + // this is fine, because we'll normalize them again before this reg is accessed + xMOV(ptr32[&vuRegs->VI[REG_STATUS_FLAG].UL], gprF0); + } + + if (!CHECK_VU_FLAGHACK || (g_pCurInstInfo->info & EEINST_COP2_MAC_FLAG)) + { + const int t0reg = _allocTempXMMreg(XMMT_INT, -1); + xMOVDZX(xRegisterSSE(t0reg), ptr32[&vu0Regs.VI[REG_MAC_FLAG].UL]); + xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0); + xMOVAPS(ptr128[µVU0.regs().micro_macflags], xRegisterSSE(t0reg)); + _freeXMMreg(t0reg); + } } + microVU0.cop2 = 0; microVU0.regAlloc->reset(false); }