x86/iR5900: Don't swap loadstore delay slots for BC0/BC2 conditions

Swapping the loadstore could affect the condition of the instruction,
leading to incorrect code execution.

Fixes lock up in Oni after intro FMVs.
This commit is contained in:
Connor McLaughlin 2022-11-20 13:37:42 +10:00 committed by refractionpcsx2
parent 599e291824
commit ef8e35032e
7 changed files with 25 additions and 18 deletions

View File

@ -65,7 +65,7 @@ static void _setupBranchTest()
void recBC0F()
{
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
const bool swap = TrySwapDelaySlot(0, 0, 0);
const bool swap = TrySwapDelaySlot(0, 0, 0, false);
_setupBranchTest();
recDoBranchImm(branchTo, JE32(0), false, swap);
}
@ -73,7 +73,7 @@ void recBC0F()
void recBC0T()
{
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
const bool swap = TrySwapDelaySlot(0, 0, 0);
const bool swap = TrySwapDelaySlot(0, 0, 0, false);
_setupBranchTest();
recDoBranchImm(branchTo, JNE32(0), false, swap);
}

View File

@ -704,7 +704,7 @@ void recBC1F()
{
EE::Profiler.EmitOp(eeOpcode::BC1F);
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
const bool swap = TrySwapDelaySlot(0, 0, 0);
const bool swap = TrySwapDelaySlot(0, 0, 0, true);
_setupBranchTest();
recDoBranchImm(branchTo, JNZ32(0), false, swap);
}
@ -713,7 +713,7 @@ void recBC1T()
{
EE::Profiler.EmitOp(eeOpcode::BC1T);
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
const bool swap = TrySwapDelaySlot(0, 0, 0);
const bool swap = TrySwapDelaySlot(0, 0, 0, true);
_setupBranchTest();
recDoBranchImm(branchTo, JZ32(0), false, swap);
}

View File

@ -69,7 +69,7 @@ u8* recBeginThunk();
u8* recEndThunk();
// used when processing branches
bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd);
bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd, bool allow_loadstore);
void SaveBranchState();
void LoadBranchState();

View File

@ -913,7 +913,7 @@ void SetBranchReg(u32 reg)
// xMOV(ptr[&cpuRegs.pc], eax);
// }
// }
const bool swap = EmuConfig.Gamefixes.GoemonTlbHack ? false : TrySwapDelaySlot(reg, 0, 0);
const bool swap = EmuConfig.Gamefixes.GoemonTlbHack ? false : TrySwapDelaySlot(reg, 0, 0, true);
if (!swap)
{
const int wbreg = _allocX86reg(X86TYPE_PCWRITEBACK, 0, MODE_WRITE | MODE_CALLEESAVED);
@ -998,7 +998,7 @@ u8* recEndThunk()
return block_end;
}
bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd)
bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd, bool allow_loadstore)
{
#if 1
if (g_recompilingDelaySlot)
@ -1029,6 +1029,12 @@ bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd)
case 14: // XORI
case 24: // DADDI
case 25: // DADDIU
{
if ((rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt)))
goto is_unsafe;
}
break;
case 26: // LDL
case 27: // LDR
case 30: // LQ
@ -1051,7 +1057,8 @@ bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd)
case 55: // LD
case 63: // SD
{
if ((rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt)))
// We can't allow loadstore swaps for BC0x/BC2x, since they could affect the condition.
if (!allow_loadstore || (rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt)))
goto is_unsafe;
}
break;

View File

@ -155,7 +155,7 @@ static void recBEQ_process(int process)
}
else
{
const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0);
const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0, true);
recSetBranchEQ(0, process);
@ -219,7 +219,7 @@ static void recBNE_process(int process)
return;
}
const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0);
const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0, true);
recSetBranchEQ(1, process);
@ -383,7 +383,7 @@ void recBLTZAL()
return;
}
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true);
recSetBranchL(1);
@ -432,7 +432,7 @@ void recBGEZAL()
return;
}
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true);
recSetBranchL(0);
@ -551,7 +551,7 @@ void recBLEZ()
return;
}
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true);
const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
_eeFlushAllDirty();
@ -600,7 +600,7 @@ void recBGTZ()
return;
}
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true);
const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
_eeFlushAllDirty();
@ -649,7 +649,7 @@ void recBLTZ()
return;
}
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true);
_eeFlushAllDirty();
recSetBranchL(1);
@ -691,7 +691,7 @@ void recBGEZ()
return;
}
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true);
_eeFlushAllDirty();
recSetBranchL(0);

View File

@ -99,7 +99,7 @@ void recJALR()
EE::Profiler.EmitOp(eeOpcode::JALR);
const u32 newpc = pc + 4;
const bool swap = (EmuConfig.Gamefixes.GoemonTlbHack || _Rd_ == _Rs_) ? false : TrySwapDelaySlot(_Rs_, 0, _Rd_);
const bool swap = (EmuConfig.Gamefixes.GoemonTlbHack || _Rd_ == _Rs_) ? false : TrySwapDelaySlot(_Rs_, 0, _Rd_, true);
// uncomment when there are NO instructions that need to call interpreter
// int mmreg;

View File

@ -311,7 +311,7 @@ static void _setupBranchTest(u32*(jmpType)(u32), bool isLikely)
{
printCOP2("COP2 Branch");
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
const bool swap = isLikely ? false : TrySwapDelaySlot(0, 0, 0);
const bool swap = isLikely ? false : TrySwapDelaySlot(0, 0, 0, false);
_eeFlushAllDirty();
//xTEST(ptr32[&vif1Regs.stat._u32], 0x4);
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x100);