EE: Add COP2 flag hack

This commit is contained in:
Connor McLaughlin 2022-02-26 16:06:04 +10:00 committed by refractionpcsx2
parent ba27a46ac6
commit d20bfa240d
11 changed files with 396 additions and 37 deletions

View File

@ -1340,6 +1340,7 @@ set(pcsx2x86Sources
x86/iMMI.cpp
x86/iR3000A.cpp
x86/iR3000Atables.cpp
x86/iR5900Analysis.cpp
x86/iR5900Misc.cpp
x86/ir5900tables.cpp
x86/ix86-32/iCore-32.cpp
@ -1371,6 +1372,7 @@ set(pcsx2x86Headers
x86/iR5900AritImm.h
x86/iR5900Branch.h
x86/iR5900.h
x86/iR5900Analysis.h
x86/iR5900Jump.h
x86/iR5900LoadStore.h
x86/iR5900Move.h

View File

@ -546,6 +546,7 @@
<ClCompile Include="HwWrite.cpp" />
<ClCompile Include="Cache.cpp" />
<ClCompile Include="Memory.cpp" />
<ClCompile Include="x86\iR5900Analysis.cpp" />
<ClCompile Include="x86\ix86-32\recVTLB.cpp" />
<ClCompile Include="vtlb.cpp" />
<ClCompile Include="MTVU.cpp" />
@ -991,6 +992,7 @@
<ClInclude Include="MTVU.h" />
<ClInclude Include="VU.h" />
<ClInclude Include="VUmicro.h" />
<ClInclude Include="x86\iR5900Analysis.h" />
<ClInclude Include="x86\microVU.h" />
<ClInclude Include="x86\microVU_IR.h" />
<ClInclude Include="x86\microVU_Misc.h" />

View File

@ -1706,6 +1706,9 @@
<ClCompile Include="GS\Renderers\HW\GSTextureReplacementLoaders.cpp">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClCompile>
<ClCompile Include="x86\iR5900Analysis.cpp">
<Filter>System\Ps2\EmotionEngine\EE\Dynarec</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Patch.h">
@ -2834,6 +2837,9 @@
<ClInclude Include="GS\Renderers\HW\GSTextureReplacements.h">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClInclude>
<ClInclude Include="x86\iR5900Analysis.h">
<Filter>System\Ps2\EmotionEngine\EE\Dynarec</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="windows\wxResources.rc">

View File

@ -320,6 +320,7 @@
<ClCompile Include="HwWrite.cpp" />
<ClCompile Include="Cache.cpp" />
<ClCompile Include="Memory.cpp" />
<ClCompile Include="x86\iR5900Analysis.cpp" />
<ClCompile Include="x86\ix86-32\recVTLB.cpp" />
<ClCompile Include="vtlb.cpp" />
<ClCompile Include="MTVU.cpp" />
@ -620,6 +621,7 @@
<ClInclude Include="MTVU.h" />
<ClInclude Include="VU.h" />
<ClInclude Include="VUmicro.h" />
<ClInclude Include="x86\iR5900Analysis.h" />
<ClInclude Include="x86\microVU.h" />
<ClInclude Include="x86\microVU_IR.h" />
<ClInclude Include="x86\microVU_Misc.h" />

View File

@ -1199,6 +1199,9 @@
<ClCompile Include="GS\Renderers\HW\GSTextureReplacementLoaders.cpp">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClCompile>
<ClCompile Include="x86\iR5900Analysis.cpp">
<Filter>System\Ps2\EmotionEngine\EE\Dynarec</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Patch.h">
@ -1979,6 +1982,9 @@
<ClInclude Include="GS\Renderers\HW\GSTextureReplacements.h">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClInclude>
<ClInclude Include="x86\iR5900Analysis.h">
<Filter>System\Ps2\EmotionEngine\EE\Dynarec</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="GS\GS.rc">

View File

@ -215,11 +215,17 @@ int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy); // returns t
#define EEINSTINFO_COP1 1
#define EEINSTINFO_COP2 2
#define EEINST_COP2_DENORMALIZE_STATUS_FLAG 0x100
#define EEINST_COP2_NORMALIZE_STATUS_FLAG 0x200
#define EEINST_COP2_STATUS_FLAG 0x400
#define EEINST_COP2_MAC_FLAG 0x800
#define EEINST_COP2_CLIP_FLAG 0x1000
struct EEINST
{
u16 info; // extra info, if 1 inst is COP1, 2 inst is COP2. Also uses EEINST_XMM
u8 regs[34]; // includes HI/LO (HI=32, LO=33)
u8 fpuregs[33]; // ACC=32
u8 info; // extra info, if 1 inst is COP1, 2 inst is COP2. Also uses EEINST_XMM
// uses XMMTYPE_ flags; if type == XMMTYPE_TEMP, not used
u8 writeType[3], writeReg[3]; // reg written in this inst, 0 if no reg

View File

@ -0,0 +1,229 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2022 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "iR5900Analysis.h"
#include "Memory.h"
#include "DebugTools/Debug.h"
using namespace R5900;
// This should be moved to analysis...
extern int cop2flags(u32 code);
AnalysisPass::AnalysisPass() = default;
AnalysisPass::~AnalysisPass() = default;
void AnalysisPass::Run(u32 start, u32 end, EEINST* inst_cache)
{
}
template <class F>
void __fi AnalysisPass::ForEachInstruction(u32 start, u32 end, EEINST* inst_cache, const F& func)
{
EEINST* eeinst = inst_cache;
for (u32 apc = start; apc < end; apc += 4, eeinst++)
{
cpuRegs.code = memRead32(apc);
if (!func(apc, eeinst))
break;
}
}
template <class F>
void __fi R5900::AnalysisPass::DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache, const F& func)
{
std::string d;
EEINST* eeinst = inst_cache;
for (u32 apc = start; apc < end; apc += 4, eeinst++)
{
d.clear();
disR5900Fasm(d, memRead32(apc), apc, false);
func(apc, eeinst, d);
Console.WriteLn(" %08X %s", apc, d.c_str());
}
}
COP2FlagHackPass::COP2FlagHackPass()
: AnalysisPass()
{
}
COP2FlagHackPass::~COP2FlagHackPass() = default;
void COP2FlagHackPass::Run(u32 start, u32 end, EEINST* inst_cache)
{
m_status_denormalized = false;
m_last_status_write = nullptr;
m_last_mac_write = nullptr;
m_last_clip_write = nullptr;
m_cfc2_pc = start;
ForEachInstruction(start, end, inst_cache, [this, end](u32 apc, EEINST* inst) {
// catch SB/SH/SW to potential DMA->VIF0->VU0 exec.
// this is very unlikely in a cop2 chain.
if (_Opcode_ == 050 || _Opcode_ == 051 || _Opcode_ == 053)
{
CommitAllFlags();
return true;
}
else if (_Opcode_ != 022)
{
// not COP2
return true;
}
// Detect ctc2 Status, zero, ..., cfc2 v0, Status pattern where we need accurate sticky bits.
// Test case: Tekken Tag Tournament.
if (_Rs_ == 6 && _Rd_ == REG_STATUS_FLAG)
{
// Read ahead, looking for cfc2.
m_cfc2_pc = apc;
ForEachInstruction(apc, end, inst, [this](u32 capc, EEINST*) {
if (_Opcode_ == 022 && _Rs_ == 2 && _Rd_ == REG_STATUS_FLAG)
{
m_cfc2_pc = capc;
return false;
}
return true;
});
#ifdef PCSX2_DEVBUILD
if (m_cfc2_pc != apc)
DevCon.WriteLn("CTC2 at %08X paired with CFC2 %08X", apc, m_cfc2_pc);
#endif
}
// CFC2/CTC2
if (_Rs_ == 6 || _Rs_ == 2)
{
switch (_Rd_)
{
case REG_STATUS_FLAG:
CommitStatusFlag();
break;
case REG_MAC_FLAG:
CommitMACFlag();
break;
case REG_CLIP_FLAG:
CommitClipFlag();
break;
case REG_FBRST:
{
// only apply to CTC2, is FBRST readable?
if (_Rs_ == 2)
CommitAllFlags();
}
break;
}
}
if (((cpuRegs.code >> 25 & 1) == 1) && ((cpuRegs.code >> 2 & 15) == 14))
{
// VCALLMS, everything needs to be up to date
CommitAllFlags();
}
// 1 - status, 2 - mac, 3 - clip
const int flags = cop2flags(cpuRegs.code);
if (flags == 0)
return true;
// STATUS
if (flags & 1)
{
if (!m_status_denormalized)
{
inst->info |= EEINST_COP2_DENORMALIZE_STATUS_FLAG;
m_status_denormalized = true;
}
// if we're still behind the next CFC2 after the sticky bits got cleared, we need to update flags
if (apc < m_cfc2_pc)
inst->info |= EEINST_COP2_STATUS_FLAG;
m_last_status_write = inst;
}
// MAC
if (flags & 2)
{
m_last_mac_write = inst;
}
// CLIP
if (flags & 4)
{
// we don't track the clip flag yet..
// but it's unlikely that we'll have more than 4 clip flags in a row, because that would be pointless?
inst->info |= EEINST_COP2_CLIP_FLAG;
m_last_clip_write = inst;
}
return true;
});
CommitAllFlags();
#if 0
if (m_cfc2_pc != start)
DumpAnnotatedBlock(start, end, inst_cache);
#endif
}
void COP2FlagHackPass::DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache)
{
AnalysisPass::DumpAnnotatedBlock(start, end, inst_cache, [](u32, EEINST* eeinst, std::string& d) {
if (eeinst->info & EEINST_COP2_DENORMALIZE_STATUS_FLAG)
d.append(" COP2_DENORMALIZE_STATUS_FLAG");
if (eeinst->info & EEINST_COP2_NORMALIZE_STATUS_FLAG)
d.append(" COP2_NORMALIZE_STATUS_FLAG");
if (eeinst->info & EEINST_COP2_STATUS_FLAG)
d.append(" COP2_STATUS_FLAG");
if (eeinst->info & EEINST_COP2_MAC_FLAG)
d.append(" COP2_MAC_FLAG");
if (eeinst->info & EEINST_COP2_CLIP_FLAG)
d.append(" COP2_CLIP_FLAG");
});
}
void COP2FlagHackPass::CommitStatusFlag()
{
if (m_last_status_write)
{
m_last_status_write->info |= EEINST_COP2_STATUS_FLAG | EEINST_COP2_NORMALIZE_STATUS_FLAG;
m_status_denormalized = false;
}
}
void COP2FlagHackPass::CommitMACFlag()
{
if (m_last_mac_write)
m_last_mac_write->info |= EEINST_COP2_MAC_FLAG;
}
void COP2FlagHackPass::CommitClipFlag()
{
if (m_last_clip_write)
m_last_clip_write->info |= EEINST_COP2_CLIP_FLAG;
}
void COP2FlagHackPass::CommitAllFlags()
{
CommitStatusFlag();
CommitMACFlag();
CommitClipFlag();
}

View File

@ -0,0 +1,65 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2022 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "iR5900.h"
#include "iCore.h"
namespace R5900
{
class AnalysisPass
{
public:
AnalysisPass();
virtual ~AnalysisPass();
/// Runs the actual pass.
virtual void Run(u32 start, u32 end, EEINST* inst_cache);
protected:
/// Takes a functor of bool(pc, EEINST*), returning false if iteration should stop.
template <class F>
void ForEachInstruction(u32 start, u32 end, EEINST* inst_cache, const F& func);
/// Dumps the block to the console, calling the functor void(pc, EEINST*, std::string&) for each instruction.
template <class F>
void DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache, const F& func);
};
class COP2FlagHackPass final : public AnalysisPass
{
public:
COP2FlagHackPass();
~COP2FlagHackPass();
void Run(u32 start, u32 end, EEINST* inst_cache) override;
private:
void DumpAnnotatedBlock(u32 start, u32 end, EEINST* inst_cache);
void CommitStatusFlag();
void CommitMACFlag();
void CommitClipFlag();
void CommitAllFlags();
bool m_status_denormalized = false;
EEINST* m_last_status_write = nullptr;
EEINST* m_last_mac_write = nullptr;
EEINST* m_last_clip_write = nullptr;
u32 m_cfc2_pc = 0;
};
} // namespace R5900

View File

@ -21,6 +21,7 @@
#include "R5900Exceptions.h"
#include "R5900OpcodeTables.h"
#include "iR5900.h"
#include "iR5900Analysis.h"
#include "BaseblockEx.h"
#include "System/RecTypes.h"
@ -2171,6 +2172,7 @@ StartRecomp:
}
// rec info //
bool has_cop2_instructions = false;
{
EEINST* pcur;
@ -2191,9 +2193,18 @@ StartRecomp:
cpuRegs.code = *(int*)PSM(i - 4);
pcur[-1] = pcur[0];
pcur--;
has_cop2_instructions |= (_Opcode_ == 022);
}
}
// eventually we'll want to have a vector of passes or something.
if (has_cop2_instructions && EmuConfig.Speedhacks.vuFlagHack)
{
COP2FlagHackPass fhpass;
fhpass.Run(startpc, s_nEndBlock, s_pInstCache + 1);
}
// analyze instructions //
{
usecop2 = 0;

View File

@ -70,22 +70,22 @@ __ri void mVUallocSFLAGc(const x32& reg, const x32& regT, int fInstance)
xOR(reg, regT);
}
// Denormalizes Status Flag
__ri void mVUallocSFLAGd(u32* memAddr)
// Denormalizes Status Flag; destroys tmp1/tmp2
__ri void mVUallocSFLAGd(u32* memAddr, const x32& reg = eax, const x32& tmp1 = ecx, const x32& tmp2 = edx)
{
xMOV(edx, ptr32[memAddr]);
xMOV(eax, edx);
xSHR(eax, 3);
xAND(eax, 0x18);
xMOV(tmp2, ptr32[memAddr]);
xMOV(reg, tmp2);
xSHR(reg, 3);
xAND(reg, 0x18);
xMOV(ecx, edx);
xSHL(ecx, 11);
xAND(ecx, 0x1800);
xOR(eax, ecx);
xMOV(tmp1, tmp2);
xSHL(tmp1, 11);
xAND(tmp1, 0x1800);
xOR(reg, tmp1);
xSHL(edx, 14);
xAND(edx, 0x3cf0000);
xOR(eax, edx);
xSHL(tmp2, 14);
xAND(tmp2, 0x3cf0000);
xOR(reg, tmp2);
}
__fi void mVUallocMFLAGa(mV, const x32& reg, int fInstance)

View File

@ -55,24 +55,38 @@ void setupMacroOp(int mode, const char* opName)
{
xMOVSSZX(xmmPQ, ptr32[&vu0Regs.VI[REG_Q].UL]);
}
if (mode & 0x08) // Clip Instruction
if (mode & 0x08 && (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_CLIP_FLAG)) // Clip Instruction
{
microVU0.prog.IRinfo.info[0].cFlag.write = 0xff;
microVU0.prog.IRinfo.info[0].cFlag.lastWrite = 0xff;
}
if (mode & 0x10) // Update Status/Mac Flags
if (mode & 0x10 && (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_STATUS_FLAG)) // Update Status Flag
{
microVU0.prog.IRinfo.info[0].sFlag.doFlag = true;
microVU0.prog.IRinfo.info[0].sFlag.doNonSticky = true;
microVU0.prog.IRinfo.info[0].sFlag.write = 0;
microVU0.prog.IRinfo.info[0].sFlag.lastWrite = 0;
}
if (mode & 0x10 && (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_MAC_FLAG)) // Update Mac Flags
{
microVU0.prog.IRinfo.info[0].mFlag.doFlag = true;
microVU0.prog.IRinfo.info[0].mFlag.write = 0xff;
_freeX86reg(ebx);
//Denormalize
mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL);
xMOV(gprF0, eax);
}
if (mode & 0x10)
{
_freeX86reg(gprF0);
if (!CHECK_VU_FLAGHACK || (g_pCurInstInfo->info & EEINST_COP2_DENORMALIZE_STATUS_FLAG))
{
// flags are normalized, so denormalize before running the first instruction
mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL, gprF0, eax, ecx);
}
else
{
// load denormalized status flag
// ideally we'd keep this in a register, but 32-bit...
xMOV(gprF0, ptr32[&vuRegs->VI[REG_STATUS_FLAG].UL]);
}
}
}
@ -82,29 +96,45 @@ void endMacroOp(int mode)
{
xMOVSS(ptr32[&vu0Regs.VI[REG_Q].UL], xmmPQ);
}
if (mode & 0x10) // Status/Mac Flags were Updated
{
// Normalize
mVUallocSFLAGc(eax, gprF0, 0);
xMOV(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax);
}
microVU0.regAlloc->flushAll();
_clearNeededCOP2Regs();
if (mode & 0x10) // Update VU0 Status/Mac instances after flush to avoid corrupting anything
if (mode & 0x10)
{
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL);
xMOVDZX(xRegisterSSE(t0reg), eax);
xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0);
xMOVAPS(ptr128[&microVU0.regs().micro_statusflags], xRegisterSSE(t0reg));
if (!CHECK_VU_FLAGHACK || (g_pCurInstInfo->info & EEINST_COP2_STATUS_FLAG)) // Status/Mac Flags were Updated
{
// update micro_statusflags
const int t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDZX(xRegisterSSE(t0reg), gprF0);
xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0);
xMOVAPS(ptr128[&microVU0.regs().micro_statusflags], xRegisterSSE(t0reg));
_freeXMMreg(t0reg);
}
xMOVDZX(xRegisterSSE(t0reg), ptr32[&vu0Regs.VI[REG_MAC_FLAG].UL]);
xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0);
xMOVAPS(ptr128[&microVU0.regs().micro_macflags], xRegisterSSE(t0reg));
_freeXMMreg(t0reg);
if (!CHECK_VU_FLAGHACK || g_pCurInstInfo->info & EEINST_COP2_NORMALIZE_STATUS_FLAG)
{
// Normalize
mVUallocSFLAGc(eax, gprF0, 0);
xMOV(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax);
}
else
{
// backup denormalized flags for the next instruction
// this is fine, because we'll normalize them again before this reg is accessed
xMOV(ptr32[&vuRegs->VI[REG_STATUS_FLAG].UL], gprF0);
}
if (!CHECK_VU_FLAGHACK || (g_pCurInstInfo->info & EEINST_COP2_MAC_FLAG))
{
const int t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDZX(xRegisterSSE(t0reg), ptr32[&vu0Regs.VI[REG_MAC_FLAG].UL]);
xSHUF.PS(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0);
xMOVAPS(ptr128[&microVU0.regs().micro_macflags], xRegisterSSE(t0reg));
_freeXMMreg(t0reg);
}
}
microVU0.cop2 = 0;
microVU0.regAlloc->reset(false);
}