diff --git a/pcsx2/x86/iCOP0.cpp b/pcsx2/x86/iCOP0.cpp index af9022b4cb..51707e4e5a 100644 --- a/pcsx2/x86/iCOP0.cpp +++ b/pcsx2/x86/iCOP0.cpp @@ -65,7 +65,7 @@ static void _setupBranchTest() void recBC0F() { const u32 branchTo = ((s32)_Imm_ * 4) + pc; - const bool swap = TrySwapDelaySlot(0, 0, 0); + const bool swap = TrySwapDelaySlot(0, 0, 0, false); _setupBranchTest(); recDoBranchImm(branchTo, JE32(0), false, swap); } @@ -73,7 +73,7 @@ void recBC0F() void recBC0T() { const u32 branchTo = ((s32)_Imm_ * 4) + pc; - const bool swap = TrySwapDelaySlot(0, 0, 0); + const bool swap = TrySwapDelaySlot(0, 0, 0, false); _setupBranchTest(); recDoBranchImm(branchTo, JNE32(0), false, swap); } diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index 5ef1ee0441..11255acf07 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -704,7 +704,7 @@ void recBC1F() { EE::Profiler.EmitOp(eeOpcode::BC1F); const u32 branchTo = ((s32)_Imm_ * 4) + pc; - const bool swap = TrySwapDelaySlot(0, 0, 0); + const bool swap = TrySwapDelaySlot(0, 0, 0, true); _setupBranchTest(); recDoBranchImm(branchTo, JNZ32(0), false, swap); } @@ -713,7 +713,7 @@ void recBC1T() { EE::Profiler.EmitOp(eeOpcode::BC1T); const u32 branchTo = ((s32)_Imm_ * 4) + pc; - const bool swap = TrySwapDelaySlot(0, 0, 0); + const bool swap = TrySwapDelaySlot(0, 0, 0, true); _setupBranchTest(); recDoBranchImm(branchTo, JZ32(0), false, swap); } diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index aa76fd3d6d..ef869b0941 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -69,7 +69,7 @@ u8* recBeginThunk(); u8* recEndThunk(); // used when processing branches -bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd); +bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd, bool allow_loadstore); void SaveBranchState(); void LoadBranchState(); diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index f4e321a0d1..f109a895cb 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -913,7 +913,7 @@ void SetBranchReg(u32 reg) // xMOV(ptr[&cpuRegs.pc], eax); // } // } - const bool swap = EmuConfig.Gamefixes.GoemonTlbHack ? false : TrySwapDelaySlot(reg, 0, 0); + const bool swap = EmuConfig.Gamefixes.GoemonTlbHack ? false : TrySwapDelaySlot(reg, 0, 0, true); if (!swap) { const int wbreg = _allocX86reg(X86TYPE_PCWRITEBACK, 0, MODE_WRITE | MODE_CALLEESAVED); @@ -998,7 +998,7 @@ u8* recEndThunk() return block_end; } -bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd) +bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd, bool allow_loadstore) { #if 1 if (g_recompilingDelaySlot) @@ -1029,6 +1029,12 @@ bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd) case 14: // XORI case 24: // DADDI case 25: // DADDIU + { + if ((rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt))) + goto is_unsafe; + } + break; + case 26: // LDL case 27: // LDR case 30: // LQ @@ -1051,7 +1057,8 @@ bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd) case 55: // LD case 63: // SD { - if ((rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt))) + // We can't allow loadstore swaps for BC0x/BC2x, since they could affect the condition. + if (!allow_loadstore || (rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt))) goto is_unsafe; } break; diff --git a/pcsx2/x86/ix86-32/iR5900Branch.cpp b/pcsx2/x86/ix86-32/iR5900Branch.cpp index f5ce1d34db..0d0211e344 100644 --- a/pcsx2/x86/ix86-32/iR5900Branch.cpp +++ b/pcsx2/x86/ix86-32/iR5900Branch.cpp @@ -155,7 +155,7 @@ static void recBEQ_process(int process) } else { - const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0); + const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0, true); recSetBranchEQ(0, process); @@ -219,7 +219,7 @@ static void recBNE_process(int process) return; } - const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0); + const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0, true); recSetBranchEQ(1, process); @@ -383,7 +383,7 @@ void recBLTZAL() return; } - const bool swap = TrySwapDelaySlot(_Rs_, 0, 0); + const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true); recSetBranchL(1); @@ -432,7 +432,7 @@ void recBGEZAL() return; } - const bool swap = TrySwapDelaySlot(_Rs_, 0, 0); + const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true); recSetBranchL(0); @@ -551,7 +551,7 @@ void recBLEZ() return; } - const bool swap = TrySwapDelaySlot(_Rs_, 0, 0); + const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true); const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ); _eeFlushAllDirty(); @@ -600,7 +600,7 @@ void recBGTZ() return; } - const bool swap = TrySwapDelaySlot(_Rs_, 0, 0); + const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true); const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ); _eeFlushAllDirty(); @@ -649,7 +649,7 @@ void recBLTZ() return; } - const bool swap = TrySwapDelaySlot(_Rs_, 0, 0); + const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true); _eeFlushAllDirty(); recSetBranchL(1); @@ -691,7 +691,7 @@ void recBGEZ() return; } - const bool swap = TrySwapDelaySlot(_Rs_, 0, 0); + const bool swap = TrySwapDelaySlot(_Rs_, 0, 0, true); _eeFlushAllDirty(); recSetBranchL(0); diff --git a/pcsx2/x86/ix86-32/iR5900Jump.cpp b/pcsx2/x86/ix86-32/iR5900Jump.cpp index 5893d06e39..bdf2771a35 100644 --- a/pcsx2/x86/ix86-32/iR5900Jump.cpp +++ b/pcsx2/x86/ix86-32/iR5900Jump.cpp @@ -99,7 +99,7 @@ void recJALR() EE::Profiler.EmitOp(eeOpcode::JALR); const u32 newpc = pc + 4; - const bool swap = (EmuConfig.Gamefixes.GoemonTlbHack || _Rd_ == _Rs_) ? false : TrySwapDelaySlot(_Rs_, 0, _Rd_); + const bool swap = (EmuConfig.Gamefixes.GoemonTlbHack || _Rd_ == _Rs_) ? false : TrySwapDelaySlot(_Rs_, 0, _Rd_, true); // uncomment when there are NO instructions that need to call interpreter // int mmreg; diff --git a/pcsx2/x86/microVU_Macro.inl b/pcsx2/x86/microVU_Macro.inl index fde8eff246..a10ef6c848 100644 --- a/pcsx2/x86/microVU_Macro.inl +++ b/pcsx2/x86/microVU_Macro.inl @@ -311,7 +311,7 @@ static void _setupBranchTest(u32*(jmpType)(u32), bool isLikely) { printCOP2("COP2 Branch"); const u32 branchTo = ((s32)_Imm_ * 4) + pc; - const bool swap = isLikely ? false : TrySwapDelaySlot(0, 0, 0); + const bool swap = isLikely ? false : TrySwapDelaySlot(0, 0, 0, false); _eeFlushAllDirty(); //xTEST(ptr32[&vif1Regs.stat._u32], 0x4); xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x100);