mirror of https://github.com/PCSX2/pcsx2.git
COP2: never flush EE regs but back them up conditionally
This commit is contained in:
parent
05b8e80ac8
commit
fba9c6c04d
|
@ -45,8 +45,80 @@ _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR];
|
|||
#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0]
|
||||
#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0]
|
||||
|
||||
|
||||
__aligned16 u32 xmmBackup[iREGCNT_XMM][4];
|
||||
|
||||
#ifdef __M_X86_64
|
||||
__aligned16 u64 gprBackup[iREGCNT_GPR];
|
||||
#else
|
||||
__aligned16 u32 gprBackup[iREGCNT_GPR];
|
||||
#endif
|
||||
|
||||
static int s_xmmchecknext = 0;
|
||||
|
||||
void _backupNeededXMM()
|
||||
{
|
||||
for (int i = 0; i < iREGCNT_XMM; i++)
|
||||
{
|
||||
if (xmmregs[i].inuse)
|
||||
{
|
||||
xMOVAPS(ptr128[&xmmBackup[i][0]], xRegisterSSE(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void _restoreNeededXMM()
|
||||
{
|
||||
for (int i = 0; i < iREGCNT_XMM; i++)
|
||||
{
|
||||
if (xmmregs[i].inuse)
|
||||
{
|
||||
xMOVAPS(xRegisterSSE(i), ptr128[&xmmBackup[i][0]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void _backupNeededx86()
|
||||
{
|
||||
for (int i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if (x86regs[i].inuse)
|
||||
{
|
||||
#ifdef __M_X86_64
|
||||
xMOV(ptr64[&gprBackup[i]], xRegister64(i));
|
||||
#else
|
||||
xMOV(ptr32[&gprBackup[i]], xRegister32(i));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void _restoreNeededx86()
|
||||
{
|
||||
for (int i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if (x86regs[i].inuse)
|
||||
{
|
||||
#ifdef __M_X86_64
|
||||
xMOV(xRegister64(i), ptr64[&gprBackup[i]]);
|
||||
#else
|
||||
xMOV(xRegister32(i), ptr32[&gprBackup[i]]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void _cop2BackupRegs()
|
||||
{
|
||||
_backupNeededx86();
|
||||
_backupNeededXMM();
|
||||
}
|
||||
|
||||
void _cop2RestoreRegs()
|
||||
{
|
||||
_restoreNeededx86();
|
||||
_restoreNeededXMM();
|
||||
}
|
||||
// Clear current register mapping structure
|
||||
// Clear allocation counter
|
||||
void _initXMMregs()
|
||||
|
|
|
@ -159,6 +159,8 @@ struct _xmmregs
|
|||
u16 counter;
|
||||
};
|
||||
|
||||
void _cop2BackupRegs();
|
||||
void _cop2RestoreRegs();
|
||||
void _initXMMregs();
|
||||
int _getFreeXMMreg();
|
||||
int _allocTempXMMreg(XMMSSEType type, int xmmreg);
|
||||
|
|
|
@ -920,19 +920,21 @@ void recSWC1()
|
|||
|
||||
void recLQC2()
|
||||
{
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
_freeX86reg(eax);
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
|
||||
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
xSUB(eax, ptr32[&VU0.cycle]);
|
||||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0);
|
||||
xForwardJL32 skip;
|
||||
_cop2BackupRegs();
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
_cop2RestoreRegs();
|
||||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
|
||||
|
@ -965,20 +967,21 @@ void recLQC2()
|
|||
|
||||
void recSQC2()
|
||||
{
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
|
||||
_freeX86reg(eax);
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
|
||||
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
xSUB(eax, ptr32[&VU0.cycle]);
|
||||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0);
|
||||
xForwardJL32 skip;
|
||||
_cop2BackupRegs();
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
_cop2RestoreRegs();
|
||||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ void setupMacroOp(int mode, const char* opName)
|
|||
microVU0.prog.IRinfo.curPC = 0;
|
||||
microVU0.code = cpuRegs.code;
|
||||
memset(µVU0.prog.IRinfo.info[0], 0, sizeof(microVU0.prog.IRinfo.info[0]));
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
microVU0.regAlloc->reset();
|
||||
if (mode & 0x01) // Q-Reg will be Read
|
||||
{
|
||||
|
@ -285,13 +285,13 @@ void COP2_Interlock(bool mBitSync)
|
|||
|
||||
if (cpuRegs.code & 1)
|
||||
{
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
|
||||
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
_cop2BackupRegs();
|
||||
if (mBitSync)
|
||||
{
|
||||
xSUB(eax, ptr32[&VU0.cycle]);
|
||||
|
@ -306,6 +306,7 @@ void COP2_Interlock(bool mBitSync)
|
|||
}
|
||||
else
|
||||
xFastCall((void*)_vu0FinishMicro);
|
||||
_cop2RestoreRegs();
|
||||
skipvuidle.SetTarget();
|
||||
}
|
||||
}
|
||||
|
@ -321,80 +322,47 @@ void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex)
|
|||
|
||||
static void recCFC2()
|
||||
{
|
||||
|
||||
printCOP2("CFC2");
|
||||
_freeX86reg(eax);
|
||||
|
||||
COP2_Interlock(false);
|
||||
|
||||
if (!_Rt_)
|
||||
return;
|
||||
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
if (!(cpuRegs.code & 1))
|
||||
{
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
|
||||
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
xSUB(eax, ptr32[&VU0.cycle]);
|
||||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0);
|
||||
xForwardJL32 skip;
|
||||
_cop2BackupRegs();
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
_cop2RestoreRegs();
|
||||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
}
|
||||
|
||||
_flushEEreg(_Rt_);
|
||||
|
||||
if (_Rd_ == REG_STATUS_FLAG) // Normalize Status Flag
|
||||
xMOV(eax, ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL]);
|
||||
else
|
||||
xMOV(eax, ptr32[&vu0Regs.VI[_Rd_].UL]);
|
||||
|
||||
if (_Rd_ == REG_TPC) // Divide TPC register value by 8 during copying
|
||||
{
|
||||
// Ok, this deserves an explanation.
|
||||
// Accoring to the official PS2 VU0 coding manual there are 3 ways to execute a micro subroutine on VU0
|
||||
// one of which is using the VCALLMSR intruction.
|
||||
// The manual requires putting the address of the micro subroutine
|
||||
// into the CMSAR0 register divided by 8 using the CTC2 command before executing VCALLMSR.
|
||||
// Many games (for instance, 24: The Game, GTA LCS, GTA VCS and FFXII) do in fact use this way,
|
||||
// they diligently put the address of the micro subroutine into a separate register (v0, v1 etc), divide it by 8
|
||||
// and move it to CMSAR0 by calling the CTC2 command.
|
||||
|
||||
// However, there are also at least 2 confirmed games (R Racing Evolution, Street Fighter EX3)
|
||||
// that execute a piece of code to run a micro subroutine on VU0 like this:
|
||||
//
|
||||
// ...
|
||||
// cfc2 t4, TPC
|
||||
// ctc2 t4, CMSAR0
|
||||
// callmsr
|
||||
// ...
|
||||
//
|
||||
// Interestingly enough there is no division by 8 but it works fine in these 2 mentioned games.
|
||||
// It means the division operation is implicit.
|
||||
// Homebrew tests for the real PS2 have shown that in fact the instruction "cfc2 t4, TPC" ends up with values that are not always divisible by 8.
|
||||
|
||||
// There are 2 possibilities: either the CFC2 instruction divides the value of the TPC (which is the Program Counter register
|
||||
// for micro subroutines) by 8 itself during copying or the TPC register always works with addresses already divided by 8.
|
||||
// The latter seems less possible because the Program Counter register by definition holds the memory address of the instruction.
|
||||
// In addition, PCSX2 already implements TPC as an instruction pointer so we'll assume that division by 8
|
||||
// is done by CFC2 while working with the TPC register.
|
||||
// (fixes R Racing Evolution and Street Fighter EX3)
|
||||
|
||||
//xSHR(eax, 3);
|
||||
|
||||
//Update Refraction - Don't need to do this anymore as addresses are fed in divided by 8 always.
|
||||
//Games such at The Incredible Hulk will read VU1's TPC from VU0 (which will already be multiplied by 8) then try to use CMSAR1 (which will also multiply by 8)
|
||||
//So everything is now fed in without multiplication
|
||||
}
|
||||
|
||||
// FixMe: Should R-Reg have upper 9 bits 0?
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
|
||||
|
||||
if (_Rd_ >= 16)
|
||||
{
|
||||
_freeX86reg(edx);
|
||||
xCDQ(); // Sign Extend
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
|
||||
}
|
||||
|
@ -402,19 +370,21 @@ static void recCFC2()
|
|||
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
|
||||
|
||||
// FixMe: I think this is needed, but not sure how it works
|
||||
_eeOnWriteReg(_Rt_, 1);
|
||||
// Update Refraction 20/09/2021: This is needed because Const Prop is broken
|
||||
// the Flushed flag isn't being cleared when it's not flushed. TODO I guess
|
||||
_eeOnWriteReg(_Rt_, 0);
|
||||
}
|
||||
|
||||
static void recCTC2()
|
||||
{
|
||||
|
||||
printCOP2("CTC2");
|
||||
_freeX86reg(eax);
|
||||
|
||||
COP2_Interlock(1);
|
||||
|
||||
if (!_Rd_)
|
||||
return;
|
||||
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
if (!(cpuRegs.code & 1))
|
||||
{
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
|
@ -427,12 +397,16 @@ static void recCTC2()
|
|||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0);
|
||||
xForwardJL32 skip;
|
||||
_cop2BackupRegs();
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
_cop2RestoreRegs();
|
||||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
}
|
||||
|
||||
_flushEEreg(_Rt_);
|
||||
|
||||
switch (_Rd_)
|
||||
{
|
||||
case REG_MAC_FLAG:
|
||||
|
@ -456,6 +430,7 @@ static void recCTC2()
|
|||
else
|
||||
xAND(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], 0x3F);
|
||||
|
||||
_freeXMMreg(xmmT1.Id);
|
||||
//Need to update the sticky flags for microVU
|
||||
mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL);
|
||||
xMOVDZX(xmmT1, eax);
|
||||
|
@ -465,6 +440,7 @@ static void recCTC2()
|
|||
break;
|
||||
}
|
||||
case REG_CMSAR1: // Execute VU1 Micro SubRoutine
|
||||
_cop2BackupRegs();
|
||||
xMOV(ecx, 1);
|
||||
xFastCall((void*)vu1Finish, ecx);
|
||||
if (_Rt_)
|
||||
|
@ -474,6 +450,7 @@ static void recCTC2()
|
|||
else
|
||||
xXOR(ecx, ecx);
|
||||
xFastCall((void*)vu1ExecMicro, ecx);
|
||||
_cop2RestoreRegs();
|
||||
break;
|
||||
case REG_FBRST:
|
||||
if (!_Rt_)
|
||||
|
@ -483,10 +460,10 @@ static void recCTC2()
|
|||
}
|
||||
else
|
||||
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
|
||||
_cop2BackupRegs();
|
||||
TEST_FBRST_RESET(vu0ResetRegs, 0);
|
||||
TEST_FBRST_RESET(vu1ResetRegs, 1);
|
||||
|
||||
_cop2RestoreRegs();
|
||||
xAND(eax, 0x0C0C);
|
||||
xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], eax);
|
||||
break;
|
||||
|
@ -503,12 +480,13 @@ static void recQMFC2()
|
|||
{
|
||||
|
||||
printCOP2("QMFC2");
|
||||
_freeX86reg(eax);
|
||||
|
||||
COP2_Interlock(false);
|
||||
|
||||
if (!_Rt_)
|
||||
return;
|
||||
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
if (!(cpuRegs.code & 1))
|
||||
{
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
|
@ -521,14 +499,19 @@ static void recQMFC2()
|
|||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0);
|
||||
xForwardJL32 skip;
|
||||
_cop2BackupRegs();
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
_cop2RestoreRegs();
|
||||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
}
|
||||
|
||||
// FixMe: For some reason this line is needed or else games break:
|
||||
_eeOnWriteReg(_Rt_, 0);
|
||||
_flushEEreg(_Rt_);
|
||||
_freeXMMreg(xmmT1.Id);
|
||||
// Update Refraction 20/09/2021: This is needed because Const Prop is broken
|
||||
// the Flushed flag isn't being cleared when it's not flushed. TODO I guess
|
||||
_eeOnWriteReg(_Rt_, 0); // This is needed because Const Prop is broken
|
||||
|
||||
xMOVAPS(xmmT1, ptr128[&vu0Regs.VF[_Rd_]]);
|
||||
xMOVAPS(ptr128[&cpuRegs.GPR.r[_Rt_]], xmmT1);
|
||||
|
@ -538,12 +521,13 @@ static void recQMTC2()
|
|||
{
|
||||
|
||||
printCOP2("QMTC2");
|
||||
_freeX86reg(eax);
|
||||
|
||||
COP2_Interlock(true);
|
||||
|
||||
if (!_Rd_)
|
||||
return;
|
||||
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
if (!(cpuRegs.code & 1))
|
||||
{
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
|
@ -556,12 +540,17 @@ static void recQMTC2()
|
|||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, EmuConfig.Gamefixes.VUKickstartHack ? 8 : 0);
|
||||
xForwardJL32 skip;
|
||||
_cop2BackupRegs();
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
_cop2RestoreRegs();
|
||||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
}
|
||||
|
||||
_flushEEreg(_Rt_);
|
||||
_freeXMMreg(xmmT1.Id);
|
||||
|
||||
xMOVAPS(xmmT1, ptr128[&cpuRegs.GPR.r[_Rt_]]);
|
||||
xMOVAPS(ptr128[&vu0Regs.VF[_Rd_]], xmmT1);
|
||||
}
|
||||
|
@ -637,12 +626,14 @@ namespace OpcodeImpl {
|
|||
void recCOP2_BC2() { recCOP2_BC2t[_Rt_](); }
|
||||
void recCOP2_SPEC1()
|
||||
{
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
_cop2BackupRegs();
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
xFastCall((void*)_vu0FinishMicro);
|
||||
skipvuidle.SetTarget();
|
||||
|
||||
recCOP2SPECIAL1t[_Funct_]();
|
||||
|
||||
_cop2RestoreRegs();
|
||||
}
|
||||
void recCOP2_SPEC2() { recCOP2SPECIAL2t[(cpuRegs.code & 3) | ((cpuRegs.code >> 4) & 0x7c)](); }
|
||||
|
|
Loading…
Reference in New Issue