CPU: Fix more load delay slot issues
Fixes Spyro again. b{ltz,gez}(al)? disabled in the recompiler until issues are fixed.
This commit is contained in:
parent
897e75fa8e
commit
8c5fcc8f48
|
@ -55,22 +55,23 @@ void CodeCache::Execute()
|
|||
break;
|
||||
}
|
||||
|
||||
reexecute_block:
|
||||
|
||||
#if 0
|
||||
const u32 tick = m_system->GetGlobalTickCounter() + m_core->GetPendingTicks();
|
||||
if (tick == 58672386)
|
||||
if (tick == 61033207)
|
||||
__debugbreak();
|
||||
#endif
|
||||
|
||||
reexecute_block:
|
||||
if (m_use_recompiler)
|
||||
block->host_code(m_core);
|
||||
else
|
||||
InterpretCachedBlock(*block);
|
||||
|
||||
#if 0
|
||||
LogCurrentState();
|
||||
#endif
|
||||
|
||||
if (m_use_recompiler)
|
||||
block->host_code(m_core);
|
||||
else
|
||||
InterpretCachedBlock(*block);
|
||||
|
||||
if (m_core->m_downcount < 0)
|
||||
break;
|
||||
else if (m_core->HasPendingInterrupt() || !USE_BLOCK_LINKING)
|
||||
|
@ -142,11 +143,14 @@ void CodeCache::LogCurrentState()
|
|||
WriteToExecutionLog(
|
||||
"tick=%u pc=%08X npc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
|
||||
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
|
||||
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X\n",
|
||||
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X npc=%08X ldr=%s "
|
||||
"ldv=%08X\n",
|
||||
m_system->GetGlobalTickCounter() + m_core->GetPendingTicks(), regs.pc, regs.npc, regs.zero, regs.at, regs.v0,
|
||||
regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7,
|
||||
regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp,
|
||||
regs.sp, regs.fp, regs.ra);
|
||||
regs.sp, regs.fp, regs.ra, regs.npc,
|
||||
(m_core->m_next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(m_core->m_next_load_delay_reg),
|
||||
(m_core->m_next_load_delay_reg == Reg::count) ? 0 : m_core->m_next_load_delay_value);
|
||||
}
|
||||
|
||||
CodeBlockKey CodeCache::GetNextBlockKey() const
|
||||
|
|
|
@ -94,10 +94,8 @@ bool Core::DoState(StateWrapper& sw)
|
|||
sw.Do(&m_branch_was_taken);
|
||||
sw.Do(&m_load_delay_reg);
|
||||
sw.Do(&m_load_delay_value);
|
||||
sw.Do(&m_load_delay_old_value);
|
||||
sw.Do(&m_next_load_delay_reg);
|
||||
sw.Do(&m_next_load_delay_value);
|
||||
sw.Do(&m_next_load_delay_old_value);
|
||||
sw.Do(&m_cache_control);
|
||||
sw.DoBytes(m_dcache.data(), m_dcache.size());
|
||||
|
||||
|
@ -389,6 +387,7 @@ u32 Core::ReadReg(Reg rs)
|
|||
void Core::WriteReg(Reg rd, u32 value)
|
||||
{
|
||||
m_regs.r[static_cast<u8>(rd)] = value;
|
||||
m_load_delay_reg = (rd == m_load_delay_reg) ? Reg::count : m_load_delay_reg;
|
||||
|
||||
// prevent writes to $zero from going through - better than branching/cmov
|
||||
m_regs.zero = 0;
|
||||
|
@ -407,7 +406,6 @@ void Core::WriteRegDelayed(Reg rd, u32 value)
|
|||
// save the old value, if something else overwrites this reg we want to preserve it
|
||||
m_next_load_delay_reg = rd;
|
||||
m_next_load_delay_value = value;
|
||||
m_next_load_delay_old_value = m_regs.r[static_cast<u8>(rd)];
|
||||
}
|
||||
|
||||
std::optional<u32> Core::ReadCop0Reg(Cop0Reg reg)
|
||||
|
@ -1120,7 +1118,7 @@ void Core::ExecuteInstruction()
|
|||
|
||||
case InstructionOp::jal:
|
||||
{
|
||||
m_regs.ra = m_regs.npc;
|
||||
WriteReg(Reg::ra, m_regs.npc);
|
||||
m_next_instruction_is_branch_delay_slot = true;
|
||||
Branch((m_regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
|
||||
}
|
||||
|
@ -1175,7 +1173,7 @@ void Core::ExecuteInstruction()
|
|||
// register is still linked even if the branch isn't taken
|
||||
const bool link = (rt & u8(0x1E)) == u8(0x10);
|
||||
if (link)
|
||||
m_regs.ra = m_regs.npc;
|
||||
WriteReg(Reg::ra, m_regs.npc);
|
||||
|
||||
if (branch)
|
||||
Branch(m_regs.pc + (inst.i.imm_sext32() << 2));
|
||||
|
|
|
@ -106,12 +106,11 @@ private:
|
|||
ALWAYS_INLINE void UpdateLoadDelay()
|
||||
{
|
||||
// the old value is needed in case the delay slot instruction overwrites the same register
|
||||
if (m_load_delay_reg != Reg::count && m_regs.r[static_cast<u8>(m_load_delay_reg)] == m_load_delay_old_value)
|
||||
if (m_load_delay_reg != Reg::count)
|
||||
m_regs.r[static_cast<u8>(m_load_delay_reg)] = m_load_delay_value;
|
||||
|
||||
m_load_delay_reg = m_next_load_delay_reg;
|
||||
m_load_delay_value = m_next_load_delay_value;
|
||||
m_load_delay_old_value = m_next_load_delay_old_value;
|
||||
m_next_load_delay_reg = Reg::count;
|
||||
}
|
||||
|
||||
|
@ -168,10 +167,8 @@ private:
|
|||
// load delays
|
||||
Reg m_load_delay_reg = Reg::count;
|
||||
u32 m_load_delay_value = 0;
|
||||
u32 m_load_delay_old_value = 0;
|
||||
Reg m_next_load_delay_reg = Reg::count;
|
||||
u32 m_next_load_delay_value = 0;
|
||||
u32 m_next_load_delay_old_value = 0;
|
||||
|
||||
u32 m_cache_control = 0;
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi)
|
|||
|
||||
case InstructionOp::j:
|
||||
case InstructionOp::jal:
|
||||
case InstructionOp::b:
|
||||
//case InstructionOp::b:
|
||||
case InstructionOp::beq:
|
||||
case InstructionOp::bne:
|
||||
case InstructionOp::bgtz:
|
||||
|
@ -812,20 +812,23 @@ void CodeGenerator::InstructionEpilogue(const CodeBlockInstruction& cbi)
|
|||
{
|
||||
m_register_cache.UpdateLoadDelay();
|
||||
|
||||
if (m_load_delay_dirty)
|
||||
{
|
||||
// we have to invalidate the register cache, since the load delayed register might've been cached
|
||||
Log_DebugPrint("Emitting delay slot flush");
|
||||
EmitFlushInterpreterLoadDelay();
|
||||
m_register_cache.InvalidateAllNonDirtyGuestRegisters();
|
||||
m_load_delay_dirty = false;
|
||||
}
|
||||
|
||||
// copy if the previous instruction was a load, reset the current value on the next instruction
|
||||
if (m_next_load_delay_dirty)
|
||||
{
|
||||
Log_DebugPrint("Emitting delay slot flush (with move next)");
|
||||
EmitDelaySlotUpdate(false, false, true);
|
||||
EmitMoveNextInterpreterLoadDelay();
|
||||
m_next_load_delay_dirty = false;
|
||||
m_load_delay_dirty = true;
|
||||
}
|
||||
else if (m_load_delay_dirty)
|
||||
{
|
||||
Log_DebugPrint("Emitting delay slot flush");
|
||||
EmitDelaySlotUpdate(true, false, false);
|
||||
m_load_delay_dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::SyncCurrentInstructionPC()
|
||||
|
|
|
@ -65,7 +65,10 @@ public:
|
|||
|
||||
void EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg);
|
||||
void EmitStoreGuestRegister(Reg guest_reg, const Value& value);
|
||||
void EmitStoreLoadDelay(Reg reg, const Value& value);
|
||||
void EmitStoreInterpreterLoadDelay(Reg reg, const Value& value);
|
||||
void EmitFlushInterpreterLoadDelay();
|
||||
void EmitMoveNextInterpreterLoadDelay();
|
||||
void EmitCancelInterpreterLoadDelayForReg(Reg reg);
|
||||
void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset);
|
||||
void EmitStoreCPUStructField(u32 offset, const Value& value);
|
||||
void EmitAddCPUStructField(u32 offset, const Value& value);
|
||||
|
@ -168,7 +171,6 @@ private:
|
|||
void SyncCurrentInstructionPC();
|
||||
void SyncPC();
|
||||
void AddPendingCycles();
|
||||
void EmitDelaySlotUpdate(bool skip_check_for_delay, bool skip_check_old_value, bool move_next);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Instruction Code Generators
|
||||
|
|
|
@ -17,25 +17,11 @@ void CodeGenerator::EmitStoreGuestRegister(Reg guest_reg, const Value& value)
|
|||
EmitStoreCPUStructField(CalculateRegisterOffset(guest_reg), value);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreLoadDelay(Reg reg, const Value& value)
|
||||
void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value)
|
||||
{
|
||||
DebugAssert(value.size == RegSize_32 && value.IsInHostRegister());
|
||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_reg), Value::FromConstantU8(static_cast<u8>(reg)));
|
||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_value), value);
|
||||
|
||||
// We don't want to allocate a register since this could be in a block exit, so re-use the value.
|
||||
if (m_register_cache.IsGuestRegisterCached(reg))
|
||||
{
|
||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), m_register_cache.ReadGuestRegister(reg));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitPushHostReg(value.host_reg);
|
||||
EmitLoadCPUStructField(value.host_reg, RegSize_32, CalculateRegisterOffset(reg));
|
||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), value);
|
||||
EmitPopHostReg(value.host_reg);
|
||||
}
|
||||
|
||||
m_load_delay_dirty = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -1614,60 +1614,65 @@ void CodeGenerator::EmitStoreGuestMemory(const Value& address, const Value& valu
|
|||
SwitchToNearCode();
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitDelaySlotUpdate(bool skip_check_for_delay, bool skip_check_old_value, bool move_next)
|
||||
void CodeGenerator::EmitFlushInterpreterLoadDelay()
|
||||
{
|
||||
Value reg = m_register_cache.AllocateScratch(RegSize_8);
|
||||
Value value = m_register_cache.AllocateScratch(RegSize_32);
|
||||
|
||||
Xbyak::Label skip_flush;
|
||||
|
||||
auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + offsetof(Core, m_load_delay_reg)];
|
||||
auto load_delay_old_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_load_delay_old_value)];
|
||||
auto load_delay_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_load_delay_value)];
|
||||
auto reg_ptr = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_regs.r[0]) + GetHostReg64(reg.host_reg) * 4];
|
||||
|
||||
Xbyak::Label skip_flush;
|
||||
|
||||
// reg = load_delay_reg
|
||||
m_emit->movzx(GetHostReg32(reg.host_reg), load_delay_reg);
|
||||
if (!skip_check_old_value)
|
||||
m_emit->mov(GetHostReg32(value), load_delay_old_value);
|
||||
|
||||
if (!skip_check_for_delay)
|
||||
{
|
||||
// if load_delay_reg == Reg::count goto skip_flush
|
||||
m_emit->cmp(GetHostReg32(reg.host_reg), static_cast<u8>(Reg::count));
|
||||
m_emit->je(skip_flush);
|
||||
}
|
||||
|
||||
if (!skip_check_old_value)
|
||||
{
|
||||
// if r[reg] != load_delay_old_value goto skip_flush
|
||||
m_emit->cmp(GetHostReg32(value), reg_ptr);
|
||||
m_emit->jne(skip_flush);
|
||||
}
|
||||
// if load_delay_reg == Reg::count goto skip_flush
|
||||
m_emit->cmp(GetHostReg32(reg.host_reg), static_cast<u8>(Reg::count));
|
||||
m_emit->je(skip_flush);
|
||||
|
||||
// r[reg] = load_delay_value
|
||||
m_emit->mov(GetHostReg32(value), load_delay_value);
|
||||
m_emit->mov(reg_ptr, GetHostReg32(value));
|
||||
|
||||
// if !move_next load_delay_reg = Reg::count
|
||||
if (!move_next)
|
||||
m_emit->mov(load_delay_reg, static_cast<u8>(Reg::count));
|
||||
// load_delay_reg = Reg::count
|
||||
m_emit->mov(load_delay_reg, static_cast<u8>(Reg::count));
|
||||
|
||||
m_emit->L(skip_flush);
|
||||
}
|
||||
|
||||
if (move_next)
|
||||
{
|
||||
auto next_load_delay_reg = m_emit->byte[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_reg)];
|
||||
auto next_load_delay_old_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_old_value)];
|
||||
auto next_load_delay_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_value)];
|
||||
m_emit->mov(GetHostReg32(value), next_load_delay_value);
|
||||
m_emit->mov(GetHostReg8(reg), next_load_delay_reg);
|
||||
m_emit->mov(load_delay_value, GetHostReg32(value));
|
||||
m_emit->mov(GetHostReg32(value), next_load_delay_old_value);
|
||||
m_emit->mov(load_delay_reg, GetHostReg8(reg));
|
||||
m_emit->mov(load_delay_old_value, GetHostReg32(value));
|
||||
m_emit->mov(next_load_delay_reg, static_cast<u8>(Reg::count));
|
||||
}
|
||||
void CodeGenerator::EmitMoveNextInterpreterLoadDelay()
|
||||
{
|
||||
Value reg = m_register_cache.AllocateScratch(RegSize_8);
|
||||
Value value = m_register_cache.AllocateScratch(RegSize_32);
|
||||
|
||||
auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + offsetof(Core, m_load_delay_reg)];
|
||||
auto load_delay_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_load_delay_value)];
|
||||
auto next_load_delay_reg = m_emit->byte[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_reg)];
|
||||
auto next_load_delay_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_value)];
|
||||
|
||||
m_emit->mov(GetHostReg32(value), next_load_delay_value);
|
||||
m_emit->mov(GetHostReg8(reg), next_load_delay_reg);
|
||||
m_emit->mov(load_delay_value, GetHostReg32(value));
|
||||
m_emit->mov(load_delay_reg, GetHostReg8(reg));
|
||||
m_emit->mov(next_load_delay_reg, static_cast<u8>(Reg::count));
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
|
||||
{
|
||||
auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + offsetof(Core, m_load_delay_reg)];
|
||||
|
||||
Xbyak::Label skip_cancel;
|
||||
|
||||
// if load_delay_reg != reg goto skip_cancel
|
||||
m_emit->cmp(load_delay_reg, static_cast<u8>(reg));
|
||||
m_emit->jne(skip_cancel);
|
||||
|
||||
// load_delay_reg = Reg::count
|
||||
m_emit->mov(load_delay_reg, static_cast<u8>(Reg::count));
|
||||
|
||||
m_emit->L(skip_cancel);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
|
@ -1746,7 +1751,9 @@ void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, bool always_link
|
|||
old_npc = m_register_cache.ReadGuestRegister(Reg::npc, false, true);
|
||||
if (always_link)
|
||||
{
|
||||
// can't cache because we have two branches
|
||||
// Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards,
|
||||
// if we don't cancel it, at the end of the instruction the value we write can be overridden.
|
||||
EmitCancelInterpreterLoadDelayForReg(lr_reg);
|
||||
m_register_cache.WriteGuestRegister(lr_reg, std::move(old_npc));
|
||||
m_register_cache.FlushGuestRegister(lr_reg, true, true);
|
||||
}
|
||||
|
@ -1760,7 +1767,9 @@ void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, bool always_link
|
|||
// save the old PC if we want to
|
||||
if (lr_reg != Reg::count && !always_link)
|
||||
{
|
||||
// can't cache because we have two branches
|
||||
// Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards,
|
||||
// if we don't cancel it, at the end of the instruction the value we write can be overridden.
|
||||
EmitCancelInterpreterLoadDelayForReg(lr_reg);
|
||||
m_register_cache.WriteGuestRegister(lr_reg, std::move(old_npc));
|
||||
m_register_cache.FlushGuestRegister(lr_reg, true, true);
|
||||
}
|
||||
|
@ -1807,6 +1816,10 @@ void CodeGenerator::EmitRaiseException(Exception excode, Condition condition /*
|
|||
Value::FromConstantU8(static_cast<u8>(excode)));
|
||||
m_register_cache.FlushAllGuestRegisters(true, true);
|
||||
m_register_cache.FlushLoadDelay(true);
|
||||
|
||||
// PC should be synced at this point. If we leave the 4 on here for this instruction, we mess up npc.
|
||||
Assert(m_delayed_pc_add == 4);
|
||||
m_delayed_pc_add = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -509,6 +509,9 @@ void RegisterCache::WriteGuestRegisterDelayed(Reg guest_reg, Value&& value)
|
|||
m_load_delay_value.ReleaseAndClear();
|
||||
}
|
||||
|
||||
// two load delay case with interpreter load delay
|
||||
m_code_generator.EmitCancelInterpreterLoadDelayForReg(guest_reg);
|
||||
|
||||
// set up the load delay at the end of this instruction
|
||||
Value& cache_value = m_next_load_delay_value;
|
||||
Assert(m_next_load_delay_register == Reg::count);
|
||||
|
@ -561,7 +564,7 @@ void RegisterCache::WriteLoadDelayToCPU(bool clear)
|
|||
if (m_load_delay_register != Reg::count)
|
||||
{
|
||||
Log_DebugPrintf("Flushing pending load delay of %s", GetRegName(m_load_delay_register));
|
||||
m_code_generator.EmitStoreLoadDelay(m_load_delay_register, m_load_delay_value);
|
||||
m_code_generator.EmitStoreInterpreterLoadDelay(m_load_delay_register, m_load_delay_value);
|
||||
if (clear)
|
||||
{
|
||||
m_load_delay_register = Reg::count;
|
||||
|
@ -573,17 +576,18 @@ void RegisterCache::WriteLoadDelayToCPU(bool clear)
|
|||
void RegisterCache::FlushLoadDelay(bool clear)
|
||||
{
|
||||
Assert(m_next_load_delay_register == Reg::count);
|
||||
if (m_load_delay_register == Reg::count)
|
||||
return;
|
||||
|
||||
// if this is an exception exit, write the new value to the CPU register file, but keep it tracked for the next
|
||||
// non-exception-raised path. TODO: push/pop whole state would avoid this issue
|
||||
m_code_generator.EmitStoreGuestRegister(m_load_delay_register, m_load_delay_value);
|
||||
|
||||
if (clear)
|
||||
if (m_load_delay_register != Reg::count)
|
||||
{
|
||||
m_load_delay_register = Reg::count;
|
||||
m_load_delay_value.ReleaseAndClear();
|
||||
// if this is an exception exit, write the new value to the CPU register file, but keep it tracked for the next
|
||||
// non-exception-raised path. TODO: push/pop whole state would avoid this issue
|
||||
m_code_generator.EmitStoreGuestRegister(m_load_delay_register, m_load_delay_value);
|
||||
|
||||
if (clear)
|
||||
{
|
||||
m_load_delay_register = Reg::count;
|
||||
m_load_delay_value.ReleaseAndClear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -627,6 +631,16 @@ void RegisterCache::InvalidateGuestRegister(Reg guest_reg)
|
|||
cache_value.Clear();
|
||||
}
|
||||
|
||||
void RegisterCache::InvalidateAllNonDirtyGuestRegisters()
|
||||
{
|
||||
for (u8 reg = 0; reg < static_cast<u8>(Reg::count); reg++)
|
||||
{
|
||||
Value& cache_value = m_guest_reg_cache[reg];
|
||||
if (cache_value.IsValid() && !cache_value.IsDirty())
|
||||
InvalidateGuestRegister(static_cast<Reg>(reg));
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterCache::FlushAllGuestRegisters(bool invalidate, bool clear_dirty)
|
||||
{
|
||||
for (u8 reg = 0; reg < static_cast<u8>(Reg::count); reg++)
|
||||
|
@ -694,4 +708,5 @@ void RegisterCache::AppendRegisterToOrder(Reg reg)
|
|||
m_guest_register_order[0] = reg;
|
||||
m_guest_register_order_count++;
|
||||
}
|
||||
|
||||
} // namespace CPU::Recompiler
|
||||
|
|
|
@ -257,6 +257,7 @@ public:
|
|||
void FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty);
|
||||
void InvalidateGuestRegister(Reg guest_reg);
|
||||
|
||||
void InvalidateAllNonDirtyGuestRegisters();
|
||||
void FlushAllGuestRegisters(bool invalidate, bool clear_dirty);
|
||||
bool EvictOneGuestRegister();
|
||||
|
||||
|
|
Loading…
Reference in New Issue