CPU: Fix more load delay slot issues
Fixes Spyro again. b{ltz,gez}(al)? disabled in the recompiler until issues are fixed.
This commit is contained in:
parent
897e75fa8e
commit
8c5fcc8f48
|
@ -55,22 +55,23 @@ void CodeCache::Execute()
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reexecute_block:
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
const u32 tick = m_system->GetGlobalTickCounter() + m_core->GetPendingTicks();
|
const u32 tick = m_system->GetGlobalTickCounter() + m_core->GetPendingTicks();
|
||||||
if (tick == 58672386)
|
if (tick == 61033207)
|
||||||
__debugbreak();
|
__debugbreak();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
reexecute_block:
|
|
||||||
if (m_use_recompiler)
|
|
||||||
block->host_code(m_core);
|
|
||||||
else
|
|
||||||
InterpretCachedBlock(*block);
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
LogCurrentState();
|
LogCurrentState();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (m_use_recompiler)
|
||||||
|
block->host_code(m_core);
|
||||||
|
else
|
||||||
|
InterpretCachedBlock(*block);
|
||||||
|
|
||||||
if (m_core->m_downcount < 0)
|
if (m_core->m_downcount < 0)
|
||||||
break;
|
break;
|
||||||
else if (m_core->HasPendingInterrupt() || !USE_BLOCK_LINKING)
|
else if (m_core->HasPendingInterrupt() || !USE_BLOCK_LINKING)
|
||||||
|
@ -142,11 +143,14 @@ void CodeCache::LogCurrentState()
|
||||||
WriteToExecutionLog(
|
WriteToExecutionLog(
|
||||||
"tick=%u pc=%08X npc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
|
"tick=%u pc=%08X npc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
|
||||||
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
|
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
|
||||||
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X\n",
|
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X npc=%08X ldr=%s "
|
||||||
|
"ldv=%08X\n",
|
||||||
m_system->GetGlobalTickCounter() + m_core->GetPendingTicks(), regs.pc, regs.npc, regs.zero, regs.at, regs.v0,
|
m_system->GetGlobalTickCounter() + m_core->GetPendingTicks(), regs.pc, regs.npc, regs.zero, regs.at, regs.v0,
|
||||||
regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7,
|
regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7,
|
||||||
regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp,
|
regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp,
|
||||||
regs.sp, regs.fp, regs.ra);
|
regs.sp, regs.fp, regs.ra, regs.npc,
|
||||||
|
(m_core->m_next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(m_core->m_next_load_delay_reg),
|
||||||
|
(m_core->m_next_load_delay_reg == Reg::count) ? 0 : m_core->m_next_load_delay_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
CodeBlockKey CodeCache::GetNextBlockKey() const
|
CodeBlockKey CodeCache::GetNextBlockKey() const
|
||||||
|
|
|
@ -94,10 +94,8 @@ bool Core::DoState(StateWrapper& sw)
|
||||||
sw.Do(&m_branch_was_taken);
|
sw.Do(&m_branch_was_taken);
|
||||||
sw.Do(&m_load_delay_reg);
|
sw.Do(&m_load_delay_reg);
|
||||||
sw.Do(&m_load_delay_value);
|
sw.Do(&m_load_delay_value);
|
||||||
sw.Do(&m_load_delay_old_value);
|
|
||||||
sw.Do(&m_next_load_delay_reg);
|
sw.Do(&m_next_load_delay_reg);
|
||||||
sw.Do(&m_next_load_delay_value);
|
sw.Do(&m_next_load_delay_value);
|
||||||
sw.Do(&m_next_load_delay_old_value);
|
|
||||||
sw.Do(&m_cache_control);
|
sw.Do(&m_cache_control);
|
||||||
sw.DoBytes(m_dcache.data(), m_dcache.size());
|
sw.DoBytes(m_dcache.data(), m_dcache.size());
|
||||||
|
|
||||||
|
@ -389,6 +387,7 @@ u32 Core::ReadReg(Reg rs)
|
||||||
void Core::WriteReg(Reg rd, u32 value)
|
void Core::WriteReg(Reg rd, u32 value)
|
||||||
{
|
{
|
||||||
m_regs.r[static_cast<u8>(rd)] = value;
|
m_regs.r[static_cast<u8>(rd)] = value;
|
||||||
|
m_load_delay_reg = (rd == m_load_delay_reg) ? Reg::count : m_load_delay_reg;
|
||||||
|
|
||||||
// prevent writes to $zero from going through - better than branching/cmov
|
// prevent writes to $zero from going through - better than branching/cmov
|
||||||
m_regs.zero = 0;
|
m_regs.zero = 0;
|
||||||
|
@ -407,7 +406,6 @@ void Core::WriteRegDelayed(Reg rd, u32 value)
|
||||||
// save the old value, if something else overwrites this reg we want to preserve it
|
// save the old value, if something else overwrites this reg we want to preserve it
|
||||||
m_next_load_delay_reg = rd;
|
m_next_load_delay_reg = rd;
|
||||||
m_next_load_delay_value = value;
|
m_next_load_delay_value = value;
|
||||||
m_next_load_delay_old_value = m_regs.r[static_cast<u8>(rd)];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<u32> Core::ReadCop0Reg(Cop0Reg reg)
|
std::optional<u32> Core::ReadCop0Reg(Cop0Reg reg)
|
||||||
|
@ -1120,7 +1118,7 @@ void Core::ExecuteInstruction()
|
||||||
|
|
||||||
case InstructionOp::jal:
|
case InstructionOp::jal:
|
||||||
{
|
{
|
||||||
m_regs.ra = m_regs.npc;
|
WriteReg(Reg::ra, m_regs.npc);
|
||||||
m_next_instruction_is_branch_delay_slot = true;
|
m_next_instruction_is_branch_delay_slot = true;
|
||||||
Branch((m_regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
|
Branch((m_regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
|
||||||
}
|
}
|
||||||
|
@ -1175,7 +1173,7 @@ void Core::ExecuteInstruction()
|
||||||
// register is still linked even if the branch isn't taken
|
// register is still linked even if the branch isn't taken
|
||||||
const bool link = (rt & u8(0x1E)) == u8(0x10);
|
const bool link = (rt & u8(0x1E)) == u8(0x10);
|
||||||
if (link)
|
if (link)
|
||||||
m_regs.ra = m_regs.npc;
|
WriteReg(Reg::ra, m_regs.npc);
|
||||||
|
|
||||||
if (branch)
|
if (branch)
|
||||||
Branch(m_regs.pc + (inst.i.imm_sext32() << 2));
|
Branch(m_regs.pc + (inst.i.imm_sext32() << 2));
|
||||||
|
|
|
@ -106,12 +106,11 @@ private:
|
||||||
ALWAYS_INLINE void UpdateLoadDelay()
|
ALWAYS_INLINE void UpdateLoadDelay()
|
||||||
{
|
{
|
||||||
// the old value is needed in case the delay slot instruction overwrites the same register
|
// the old value is needed in case the delay slot instruction overwrites the same register
|
||||||
if (m_load_delay_reg != Reg::count && m_regs.r[static_cast<u8>(m_load_delay_reg)] == m_load_delay_old_value)
|
if (m_load_delay_reg != Reg::count)
|
||||||
m_regs.r[static_cast<u8>(m_load_delay_reg)] = m_load_delay_value;
|
m_regs.r[static_cast<u8>(m_load_delay_reg)] = m_load_delay_value;
|
||||||
|
|
||||||
m_load_delay_reg = m_next_load_delay_reg;
|
m_load_delay_reg = m_next_load_delay_reg;
|
||||||
m_load_delay_value = m_next_load_delay_value;
|
m_load_delay_value = m_next_load_delay_value;
|
||||||
m_load_delay_old_value = m_next_load_delay_old_value;
|
|
||||||
m_next_load_delay_reg = Reg::count;
|
m_next_load_delay_reg = Reg::count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -168,10 +167,8 @@ private:
|
||||||
// load delays
|
// load delays
|
||||||
Reg m_load_delay_reg = Reg::count;
|
Reg m_load_delay_reg = Reg::count;
|
||||||
u32 m_load_delay_value = 0;
|
u32 m_load_delay_value = 0;
|
||||||
u32 m_load_delay_old_value = 0;
|
|
||||||
Reg m_next_load_delay_reg = Reg::count;
|
Reg m_next_load_delay_reg = Reg::count;
|
||||||
u32 m_next_load_delay_value = 0;
|
u32 m_next_load_delay_value = 0;
|
||||||
u32 m_next_load_delay_old_value = 0;
|
|
||||||
|
|
||||||
u32 m_cache_control = 0;
|
u32 m_cache_control = 0;
|
||||||
|
|
||||||
|
|
|
@ -93,7 +93,7 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi)
|
||||||
|
|
||||||
case InstructionOp::j:
|
case InstructionOp::j:
|
||||||
case InstructionOp::jal:
|
case InstructionOp::jal:
|
||||||
case InstructionOp::b:
|
//case InstructionOp::b:
|
||||||
case InstructionOp::beq:
|
case InstructionOp::beq:
|
||||||
case InstructionOp::bne:
|
case InstructionOp::bne:
|
||||||
case InstructionOp::bgtz:
|
case InstructionOp::bgtz:
|
||||||
|
@ -812,20 +812,23 @@ void CodeGenerator::InstructionEpilogue(const CodeBlockInstruction& cbi)
|
||||||
{
|
{
|
||||||
m_register_cache.UpdateLoadDelay();
|
m_register_cache.UpdateLoadDelay();
|
||||||
|
|
||||||
|
if (m_load_delay_dirty)
|
||||||
|
{
|
||||||
|
// we have to invalidate the register cache, since the load delayed register might've been cached
|
||||||
|
Log_DebugPrint("Emitting delay slot flush");
|
||||||
|
EmitFlushInterpreterLoadDelay();
|
||||||
|
m_register_cache.InvalidateAllNonDirtyGuestRegisters();
|
||||||
|
m_load_delay_dirty = false;
|
||||||
|
}
|
||||||
|
|
||||||
// copy if the previous instruction was a load, reset the current value on the next instruction
|
// copy if the previous instruction was a load, reset the current value on the next instruction
|
||||||
if (m_next_load_delay_dirty)
|
if (m_next_load_delay_dirty)
|
||||||
{
|
{
|
||||||
Log_DebugPrint("Emitting delay slot flush (with move next)");
|
Log_DebugPrint("Emitting delay slot flush (with move next)");
|
||||||
EmitDelaySlotUpdate(false, false, true);
|
EmitMoveNextInterpreterLoadDelay();
|
||||||
m_next_load_delay_dirty = false;
|
m_next_load_delay_dirty = false;
|
||||||
m_load_delay_dirty = true;
|
m_load_delay_dirty = true;
|
||||||
}
|
}
|
||||||
else if (m_load_delay_dirty)
|
|
||||||
{
|
|
||||||
Log_DebugPrint("Emitting delay slot flush");
|
|
||||||
EmitDelaySlotUpdate(true, false, false);
|
|
||||||
m_load_delay_dirty = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CodeGenerator::SyncCurrentInstructionPC()
|
void CodeGenerator::SyncCurrentInstructionPC()
|
||||||
|
|
|
@ -65,7 +65,10 @@ public:
|
||||||
|
|
||||||
void EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg);
|
void EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg);
|
||||||
void EmitStoreGuestRegister(Reg guest_reg, const Value& value);
|
void EmitStoreGuestRegister(Reg guest_reg, const Value& value);
|
||||||
void EmitStoreLoadDelay(Reg reg, const Value& value);
|
void EmitStoreInterpreterLoadDelay(Reg reg, const Value& value);
|
||||||
|
void EmitFlushInterpreterLoadDelay();
|
||||||
|
void EmitMoveNextInterpreterLoadDelay();
|
||||||
|
void EmitCancelInterpreterLoadDelayForReg(Reg reg);
|
||||||
void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset);
|
void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset);
|
||||||
void EmitStoreCPUStructField(u32 offset, const Value& value);
|
void EmitStoreCPUStructField(u32 offset, const Value& value);
|
||||||
void EmitAddCPUStructField(u32 offset, const Value& value);
|
void EmitAddCPUStructField(u32 offset, const Value& value);
|
||||||
|
@ -168,7 +171,6 @@ private:
|
||||||
void SyncCurrentInstructionPC();
|
void SyncCurrentInstructionPC();
|
||||||
void SyncPC();
|
void SyncPC();
|
||||||
void AddPendingCycles();
|
void AddPendingCycles();
|
||||||
void EmitDelaySlotUpdate(bool skip_check_for_delay, bool skip_check_old_value, bool move_next);
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Instruction Code Generators
|
// Instruction Code Generators
|
||||||
|
|
|
@ -17,25 +17,11 @@ void CodeGenerator::EmitStoreGuestRegister(Reg guest_reg, const Value& value)
|
||||||
EmitStoreCPUStructField(CalculateRegisterOffset(guest_reg), value);
|
EmitStoreCPUStructField(CalculateRegisterOffset(guest_reg), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CodeGenerator::EmitStoreLoadDelay(Reg reg, const Value& value)
|
void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value)
|
||||||
{
|
{
|
||||||
DebugAssert(value.size == RegSize_32 && value.IsInHostRegister());
|
DebugAssert(value.size == RegSize_32 && value.IsInHostRegister());
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_reg), Value::FromConstantU8(static_cast<u8>(reg)));
|
EmitStoreCPUStructField(offsetof(Core, m_load_delay_reg), Value::FromConstantU8(static_cast<u8>(reg)));
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_value), value);
|
EmitStoreCPUStructField(offsetof(Core, m_load_delay_value), value);
|
||||||
|
|
||||||
// We don't want to allocate a register since this could be in a block exit, so re-use the value.
|
|
||||||
if (m_register_cache.IsGuestRegisterCached(reg))
|
|
||||||
{
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), m_register_cache.ReadGuestRegister(reg));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
EmitPushHostReg(value.host_reg);
|
|
||||||
EmitLoadCPUStructField(value.host_reg, RegSize_32, CalculateRegisterOffset(reg));
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), value);
|
|
||||||
EmitPopHostReg(value.host_reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
m_load_delay_dirty = true;
|
m_load_delay_dirty = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1614,60 +1614,65 @@ void CodeGenerator::EmitStoreGuestMemory(const Value& address, const Value& valu
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
void CodeGenerator::EmitDelaySlotUpdate(bool skip_check_for_delay, bool skip_check_old_value, bool move_next)
|
void CodeGenerator::EmitFlushInterpreterLoadDelay()
|
||||||
{
|
{
|
||||||
Value reg = m_register_cache.AllocateScratch(RegSize_8);
|
Value reg = m_register_cache.AllocateScratch(RegSize_8);
|
||||||
Value value = m_register_cache.AllocateScratch(RegSize_32);
|
Value value = m_register_cache.AllocateScratch(RegSize_32);
|
||||||
|
|
||||||
Xbyak::Label skip_flush;
|
|
||||||
|
|
||||||
auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + offsetof(Core, m_load_delay_reg)];
|
auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + offsetof(Core, m_load_delay_reg)];
|
||||||
auto load_delay_old_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_load_delay_old_value)];
|
|
||||||
auto load_delay_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_load_delay_value)];
|
auto load_delay_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_load_delay_value)];
|
||||||
auto reg_ptr = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_regs.r[0]) + GetHostReg64(reg.host_reg) * 4];
|
auto reg_ptr = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_regs.r[0]) + GetHostReg64(reg.host_reg) * 4];
|
||||||
|
|
||||||
|
Xbyak::Label skip_flush;
|
||||||
|
|
||||||
// reg = load_delay_reg
|
// reg = load_delay_reg
|
||||||
m_emit->movzx(GetHostReg32(reg.host_reg), load_delay_reg);
|
m_emit->movzx(GetHostReg32(reg.host_reg), load_delay_reg);
|
||||||
if (!skip_check_old_value)
|
|
||||||
m_emit->mov(GetHostReg32(value), load_delay_old_value);
|
|
||||||
|
|
||||||
if (!skip_check_for_delay)
|
// if load_delay_reg == Reg::count goto skip_flush
|
||||||
{
|
m_emit->cmp(GetHostReg32(reg.host_reg), static_cast<u8>(Reg::count));
|
||||||
// if load_delay_reg == Reg::count goto skip_flush
|
m_emit->je(skip_flush);
|
||||||
m_emit->cmp(GetHostReg32(reg.host_reg), static_cast<u8>(Reg::count));
|
|
||||||
m_emit->je(skip_flush);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!skip_check_old_value)
|
|
||||||
{
|
|
||||||
// if r[reg] != load_delay_old_value goto skip_flush
|
|
||||||
m_emit->cmp(GetHostReg32(value), reg_ptr);
|
|
||||||
m_emit->jne(skip_flush);
|
|
||||||
}
|
|
||||||
|
|
||||||
// r[reg] = load_delay_value
|
// r[reg] = load_delay_value
|
||||||
m_emit->mov(GetHostReg32(value), load_delay_value);
|
m_emit->mov(GetHostReg32(value), load_delay_value);
|
||||||
m_emit->mov(reg_ptr, GetHostReg32(value));
|
m_emit->mov(reg_ptr, GetHostReg32(value));
|
||||||
|
|
||||||
// if !move_next load_delay_reg = Reg::count
|
// load_delay_reg = Reg::count
|
||||||
if (!move_next)
|
m_emit->mov(load_delay_reg, static_cast<u8>(Reg::count));
|
||||||
m_emit->mov(load_delay_reg, static_cast<u8>(Reg::count));
|
|
||||||
|
|
||||||
m_emit->L(skip_flush);
|
m_emit->L(skip_flush);
|
||||||
|
}
|
||||||
|
|
||||||
if (move_next)
|
void CodeGenerator::EmitMoveNextInterpreterLoadDelay()
|
||||||
{
|
{
|
||||||
auto next_load_delay_reg = m_emit->byte[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_reg)];
|
Value reg = m_register_cache.AllocateScratch(RegSize_8);
|
||||||
auto next_load_delay_old_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_old_value)];
|
Value value = m_register_cache.AllocateScratch(RegSize_32);
|
||||||
auto next_load_delay_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_value)];
|
|
||||||
m_emit->mov(GetHostReg32(value), next_load_delay_value);
|
auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + offsetof(Core, m_load_delay_reg)];
|
||||||
m_emit->mov(GetHostReg8(reg), next_load_delay_reg);
|
auto load_delay_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_load_delay_value)];
|
||||||
m_emit->mov(load_delay_value, GetHostReg32(value));
|
auto next_load_delay_reg = m_emit->byte[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_reg)];
|
||||||
m_emit->mov(GetHostReg32(value), next_load_delay_old_value);
|
auto next_load_delay_value = m_emit->dword[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_value)];
|
||||||
m_emit->mov(load_delay_reg, GetHostReg8(reg));
|
|
||||||
m_emit->mov(load_delay_old_value, GetHostReg32(value));
|
m_emit->mov(GetHostReg32(value), next_load_delay_value);
|
||||||
m_emit->mov(next_load_delay_reg, static_cast<u8>(Reg::count));
|
m_emit->mov(GetHostReg8(reg), next_load_delay_reg);
|
||||||
}
|
m_emit->mov(load_delay_value, GetHostReg32(value));
|
||||||
|
m_emit->mov(load_delay_reg, GetHostReg8(reg));
|
||||||
|
m_emit->mov(next_load_delay_reg, static_cast<u8>(Reg::count));
|
||||||
|
}
|
||||||
|
|
||||||
|
void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
|
||||||
|
{
|
||||||
|
auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + offsetof(Core, m_load_delay_reg)];
|
||||||
|
|
||||||
|
Xbyak::Label skip_cancel;
|
||||||
|
|
||||||
|
// if load_delay_reg != reg goto skip_cancel
|
||||||
|
m_emit->cmp(load_delay_reg, static_cast<u8>(reg));
|
||||||
|
m_emit->jne(skip_cancel);
|
||||||
|
|
||||||
|
// load_delay_reg = Reg::count
|
||||||
|
m_emit->mov(load_delay_reg, static_cast<u8>(Reg::count));
|
||||||
|
|
||||||
|
m_emit->L(skip_cancel);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
@ -1746,7 +1751,9 @@ void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, bool always_link
|
||||||
old_npc = m_register_cache.ReadGuestRegister(Reg::npc, false, true);
|
old_npc = m_register_cache.ReadGuestRegister(Reg::npc, false, true);
|
||||||
if (always_link)
|
if (always_link)
|
||||||
{
|
{
|
||||||
// can't cache because we have two branches
|
// Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards,
|
||||||
|
// if we don't cancel it, at the end of the instruction the value we write can be overridden.
|
||||||
|
EmitCancelInterpreterLoadDelayForReg(lr_reg);
|
||||||
m_register_cache.WriteGuestRegister(lr_reg, std::move(old_npc));
|
m_register_cache.WriteGuestRegister(lr_reg, std::move(old_npc));
|
||||||
m_register_cache.FlushGuestRegister(lr_reg, true, true);
|
m_register_cache.FlushGuestRegister(lr_reg, true, true);
|
||||||
}
|
}
|
||||||
|
@ -1760,7 +1767,9 @@ void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, bool always_link
|
||||||
// save the old PC if we want to
|
// save the old PC if we want to
|
||||||
if (lr_reg != Reg::count && !always_link)
|
if (lr_reg != Reg::count && !always_link)
|
||||||
{
|
{
|
||||||
// can't cache because we have two branches
|
// Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards,
|
||||||
|
// if we don't cancel it, at the end of the instruction the value we write can be overridden.
|
||||||
|
EmitCancelInterpreterLoadDelayForReg(lr_reg);
|
||||||
m_register_cache.WriteGuestRegister(lr_reg, std::move(old_npc));
|
m_register_cache.WriteGuestRegister(lr_reg, std::move(old_npc));
|
||||||
m_register_cache.FlushGuestRegister(lr_reg, true, true);
|
m_register_cache.FlushGuestRegister(lr_reg, true, true);
|
||||||
}
|
}
|
||||||
|
@ -1807,6 +1816,10 @@ void CodeGenerator::EmitRaiseException(Exception excode, Condition condition /*
|
||||||
Value::FromConstantU8(static_cast<u8>(excode)));
|
Value::FromConstantU8(static_cast<u8>(excode)));
|
||||||
m_register_cache.FlushAllGuestRegisters(true, true);
|
m_register_cache.FlushAllGuestRegisters(true, true);
|
||||||
m_register_cache.FlushLoadDelay(true);
|
m_register_cache.FlushLoadDelay(true);
|
||||||
|
|
||||||
|
// PC should be synced at this point. If we leave the 4 on here for this instruction, we mess up npc.
|
||||||
|
Assert(m_delayed_pc_add == 4);
|
||||||
|
m_delayed_pc_add = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -509,6 +509,9 @@ void RegisterCache::WriteGuestRegisterDelayed(Reg guest_reg, Value&& value)
|
||||||
m_load_delay_value.ReleaseAndClear();
|
m_load_delay_value.ReleaseAndClear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// two load delay case with interpreter load delay
|
||||||
|
m_code_generator.EmitCancelInterpreterLoadDelayForReg(guest_reg);
|
||||||
|
|
||||||
// set up the load delay at the end of this instruction
|
// set up the load delay at the end of this instruction
|
||||||
Value& cache_value = m_next_load_delay_value;
|
Value& cache_value = m_next_load_delay_value;
|
||||||
Assert(m_next_load_delay_register == Reg::count);
|
Assert(m_next_load_delay_register == Reg::count);
|
||||||
|
@ -561,7 +564,7 @@ void RegisterCache::WriteLoadDelayToCPU(bool clear)
|
||||||
if (m_load_delay_register != Reg::count)
|
if (m_load_delay_register != Reg::count)
|
||||||
{
|
{
|
||||||
Log_DebugPrintf("Flushing pending load delay of %s", GetRegName(m_load_delay_register));
|
Log_DebugPrintf("Flushing pending load delay of %s", GetRegName(m_load_delay_register));
|
||||||
m_code_generator.EmitStoreLoadDelay(m_load_delay_register, m_load_delay_value);
|
m_code_generator.EmitStoreInterpreterLoadDelay(m_load_delay_register, m_load_delay_value);
|
||||||
if (clear)
|
if (clear)
|
||||||
{
|
{
|
||||||
m_load_delay_register = Reg::count;
|
m_load_delay_register = Reg::count;
|
||||||
|
@ -573,17 +576,18 @@ void RegisterCache::WriteLoadDelayToCPU(bool clear)
|
||||||
void RegisterCache::FlushLoadDelay(bool clear)
|
void RegisterCache::FlushLoadDelay(bool clear)
|
||||||
{
|
{
|
||||||
Assert(m_next_load_delay_register == Reg::count);
|
Assert(m_next_load_delay_register == Reg::count);
|
||||||
if (m_load_delay_register == Reg::count)
|
|
||||||
return;
|
|
||||||
|
|
||||||
// if this is an exception exit, write the new value to the CPU register file, but keep it tracked for the next
|
if (m_load_delay_register != Reg::count)
|
||||||
// non-exception-raised path. TODO: push/pop whole state would avoid this issue
|
|
||||||
m_code_generator.EmitStoreGuestRegister(m_load_delay_register, m_load_delay_value);
|
|
||||||
|
|
||||||
if (clear)
|
|
||||||
{
|
{
|
||||||
m_load_delay_register = Reg::count;
|
// if this is an exception exit, write the new value to the CPU register file, but keep it tracked for the next
|
||||||
m_load_delay_value.ReleaseAndClear();
|
// non-exception-raised path. TODO: push/pop whole state would avoid this issue
|
||||||
|
m_code_generator.EmitStoreGuestRegister(m_load_delay_register, m_load_delay_value);
|
||||||
|
|
||||||
|
if (clear)
|
||||||
|
{
|
||||||
|
m_load_delay_register = Reg::count;
|
||||||
|
m_load_delay_value.ReleaseAndClear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -627,6 +631,16 @@ void RegisterCache::InvalidateGuestRegister(Reg guest_reg)
|
||||||
cache_value.Clear();
|
cache_value.Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RegisterCache::InvalidateAllNonDirtyGuestRegisters()
|
||||||
|
{
|
||||||
|
for (u8 reg = 0; reg < static_cast<u8>(Reg::count); reg++)
|
||||||
|
{
|
||||||
|
Value& cache_value = m_guest_reg_cache[reg];
|
||||||
|
if (cache_value.IsValid() && !cache_value.IsDirty())
|
||||||
|
InvalidateGuestRegister(static_cast<Reg>(reg));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RegisterCache::FlushAllGuestRegisters(bool invalidate, bool clear_dirty)
|
void RegisterCache::FlushAllGuestRegisters(bool invalidate, bool clear_dirty)
|
||||||
{
|
{
|
||||||
for (u8 reg = 0; reg < static_cast<u8>(Reg::count); reg++)
|
for (u8 reg = 0; reg < static_cast<u8>(Reg::count); reg++)
|
||||||
|
@ -694,4 +708,5 @@ void RegisterCache::AppendRegisterToOrder(Reg reg)
|
||||||
m_guest_register_order[0] = reg;
|
m_guest_register_order[0] = reg;
|
||||||
m_guest_register_order_count++;
|
m_guest_register_order_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace CPU::Recompiler
|
} // namespace CPU::Recompiler
|
||||||
|
|
|
@ -257,6 +257,7 @@ public:
|
||||||
void FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty);
|
void FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty);
|
||||||
void InvalidateGuestRegister(Reg guest_reg);
|
void InvalidateGuestRegister(Reg guest_reg);
|
||||||
|
|
||||||
|
void InvalidateAllNonDirtyGuestRegisters();
|
||||||
void FlushAllGuestRegisters(bool invalidate, bool clear_dirty);
|
void FlushAllGuestRegisters(bool invalidate, bool clear_dirty);
|
||||||
bool EvictOneGuestRegister();
|
bool EvictOneGuestRegister();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue