CPU/Recompiler: Use register cache for managing pc
Reduces the number of loadstores after each instruction.
This commit is contained in:
parent
002d1cd4fd
commit
aa52dbfeb8
|
@ -120,6 +120,9 @@ void CodeCache::Execute()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// in case we switch to interpreter...
|
||||||
|
m_core->m_regs.npc = m_core->m_regs.pc;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CodeCache::SetUseRecompiler(bool enable)
|
void CodeCache::SetUseRecompiler(bool enable)
|
||||||
|
@ -148,16 +151,16 @@ void CodeCache::Flush()
|
||||||
void CodeCache::LogCurrentState()
|
void CodeCache::LogCurrentState()
|
||||||
{
|
{
|
||||||
const auto& regs = m_core->m_regs;
|
const auto& regs = m_core->m_regs;
|
||||||
WriteToExecutionLog(
|
WriteToExecutionLog("tick=%u pc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
|
||||||
"tick=%u pc=%08X npc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
|
|
||||||
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
|
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
|
||||||
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X npc=%08X ldr=%s "
|
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X ldr=%s "
|
||||||
"ldv=%08X\n",
|
"ldv=%08X\n",
|
||||||
m_system->GetGlobalTickCounter() + m_core->GetPendingTicks(), regs.pc, regs.npc, regs.zero, regs.at, regs.v0,
|
m_system->GetGlobalTickCounter() + m_core->GetPendingTicks(), regs.pc, regs.zero, regs.at,
|
||||||
regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7,
|
regs.v0, regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4,
|
||||||
regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp,
|
regs.t5, regs.t6, regs.t7, regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7,
|
||||||
regs.sp, regs.fp, regs.ra, regs.npc,
|
regs.t8, regs.t9, regs.k0, regs.k1, regs.gp, regs.sp, regs.fp, regs.ra,
|
||||||
(m_core->m_next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(m_core->m_next_load_delay_reg),
|
(m_core->m_next_load_delay_reg == Reg::count) ? "NONE" :
|
||||||
|
GetRegName(m_core->m_next_load_delay_reg),
|
||||||
(m_core->m_next_load_delay_reg == Reg::count) ? 0 : m_core->m_next_load_delay_value);
|
(m_core->m_next_load_delay_reg == Reg::count) ? 0 : m_core->m_next_load_delay_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -202,7 +205,8 @@ bool CodeCache::RevalidateBlock(CodeBlock* block)
|
||||||
for (const CodeBlockInstruction& cbi : block->instructions)
|
for (const CodeBlockInstruction& cbi : block->instructions)
|
||||||
{
|
{
|
||||||
u32 new_code = 0;
|
u32 new_code = 0;
|
||||||
m_bus->DispatchAccess<MemoryAccessType::Read, MemoryAccessSize::Word>(cbi.pc, new_code);
|
m_bus->DispatchAccess<MemoryAccessType::Read, MemoryAccessSize::Word>(cbi.pc & PHYSICAL_MEMORY_ADDRESS_MASK,
|
||||||
|
new_code);
|
||||||
if (cbi.instruction.bits != new_code)
|
if (cbi.instruction.bits != new_code)
|
||||||
{
|
{
|
||||||
Log_DebugPrintf("Block 0x%08X changed at PC 0x%08X - %08X to %08X - recompiling.", block->GetPC(), cbi.pc,
|
Log_DebugPrintf("Block 0x%08X changed at PC 0x%08X - %08X to %08X - recompiling.", block->GetPC(), cbi.pc,
|
||||||
|
@ -419,7 +423,9 @@ void CodeCache::UnlinkBlock(CodeBlock* block)
|
||||||
void CodeCache::InterpretCachedBlock(const CodeBlock& block)
|
void CodeCache::InterpretCachedBlock(const CodeBlock& block)
|
||||||
{
|
{
|
||||||
// set up the state so we've already fetched the instruction
|
// set up the state so we've already fetched the instruction
|
||||||
DebugAssert((m_core->m_regs.pc & PHYSICAL_MEMORY_ADDRESS_MASK) == block.GetPC());
|
DebugAssert(m_core->m_regs.pc == block.GetPC());
|
||||||
|
|
||||||
|
m_core->m_regs.npc = block.GetPC() + 4;
|
||||||
|
|
||||||
for (const CodeBlockInstruction& cbi : block.instructions)
|
for (const CodeBlockInstruction& cbi : block.instructions)
|
||||||
{
|
{
|
||||||
|
@ -427,14 +433,13 @@ void CodeCache::InterpretCachedBlock(const CodeBlock& block)
|
||||||
|
|
||||||
// now executing the instruction we previously fetched
|
// now executing the instruction we previously fetched
|
||||||
m_core->m_current_instruction.bits = cbi.instruction.bits;
|
m_core->m_current_instruction.bits = cbi.instruction.bits;
|
||||||
m_core->m_current_instruction_pc = m_core->m_regs.pc;
|
m_core->m_current_instruction_pc = cbi.pc;
|
||||||
m_core->m_current_instruction_in_branch_delay_slot = cbi.is_branch_delay_slot;
|
m_core->m_current_instruction_in_branch_delay_slot = cbi.is_branch_delay_slot;
|
||||||
m_core->m_current_instruction_was_branch_taken = m_core->m_branch_was_taken;
|
m_core->m_current_instruction_was_branch_taken = m_core->m_branch_was_taken;
|
||||||
m_core->m_branch_was_taken = false;
|
m_core->m_branch_was_taken = false;
|
||||||
m_core->m_exception_raised = false;
|
m_core->m_exception_raised = false;
|
||||||
|
|
||||||
// update pc
|
// update pc
|
||||||
DebugAssert((m_core->m_regs.pc & PHYSICAL_MEMORY_ADDRESS_MASK) == cbi.pc);
|
|
||||||
m_core->m_regs.pc = m_core->m_regs.npc;
|
m_core->m_regs.pc = m_core->m_regs.npc;
|
||||||
m_core->m_regs.npc += 4;
|
m_core->m_regs.npc += 4;
|
||||||
|
|
||||||
|
@ -454,6 +459,8 @@ void CodeCache::InterpretCachedBlock(const CodeBlock& block)
|
||||||
|
|
||||||
void CodeCache::InterpretUncachedBlock()
|
void CodeCache::InterpretUncachedBlock()
|
||||||
{
|
{
|
||||||
|
Panic("Fixme with regards to re-fetching PC");
|
||||||
|
|
||||||
// At this point, pc contains the last address executed (in the previous block). The instruction has not been fetched
|
// At this point, pc contains the last address executed (in the previous block). The instruction has not been fetched
|
||||||
// yet. pc shouldn't be updated until the fetch occurs, that way the exception occurs in the delay slot.
|
// yet. pc shouldn't be updated until the fetch occurs, that way the exception occurs in the delay slot.
|
||||||
bool in_branch_delay_slot = false;
|
bool in_branch_delay_slot = false;
|
||||||
|
|
|
@ -28,6 +28,8 @@ union CodeBlockKey
|
||||||
ALWAYS_INLINE u32 GetPC() const { return aligned_pc << 2; }
|
ALWAYS_INLINE u32 GetPC() const { return aligned_pc << 2; }
|
||||||
ALWAYS_INLINE void SetPC(u32 pc) { aligned_pc = pc >> 2; }
|
ALWAYS_INLINE void SetPC(u32 pc) { aligned_pc = pc >> 2; }
|
||||||
|
|
||||||
|
ALWAYS_INLINE u32 GetPCPhysicalAddress() const { return (aligned_pc << 2) & PHYSICAL_MEMORY_ADDRESS_MASK; }
|
||||||
|
|
||||||
ALWAYS_INLINE CodeBlockKey& operator=(const CodeBlockKey& rhs)
|
ALWAYS_INLINE CodeBlockKey& operator=(const CodeBlockKey& rhs)
|
||||||
{
|
{
|
||||||
bits = rhs.bits;
|
bits = rhs.bits;
|
||||||
|
@ -72,12 +74,15 @@ struct CodeBlock
|
||||||
|
|
||||||
const u32 GetPC() const { return key.GetPC(); }
|
const u32 GetPC() const { return key.GetPC(); }
|
||||||
const u32 GetSizeInBytes() const { return static_cast<u32>(instructions.size()) * sizeof(Instruction); }
|
const u32 GetSizeInBytes() const { return static_cast<u32>(instructions.size()) * sizeof(Instruction); }
|
||||||
const u32 GetStartPageIndex() const { return (key.GetPC() / CPU_CODE_CACHE_PAGE_SIZE); }
|
const u32 GetStartPageIndex() const { return (key.GetPCPhysicalAddress() / CPU_CODE_CACHE_PAGE_SIZE); }
|
||||||
const u32 GetEndPageIndex() const { return ((key.GetPC() + GetSizeInBytes()) / CPU_CODE_CACHE_PAGE_SIZE); }
|
const u32 GetEndPageIndex() const
|
||||||
|
{
|
||||||
|
return ((key.GetPCPhysicalAddress() + GetSizeInBytes()) / CPU_CODE_CACHE_PAGE_SIZE);
|
||||||
|
}
|
||||||
bool IsInRAM() const
|
bool IsInRAM() const
|
||||||
{
|
{
|
||||||
// TODO: Constant
|
// TODO: Constant
|
||||||
return key.GetPC() < 0x200000;
|
return key.GetPCPhysicalAddress() < 0x200000;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -50,6 +50,8 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
|
||||||
EmitEndBlock();
|
EmitEndBlock();
|
||||||
|
|
||||||
FinalizeBlock(out_host_code, out_host_code_size);
|
FinalizeBlock(out_host_code, out_host_code_size);
|
||||||
|
Log_ProfilePrintf("JIT block 0x%08X: %zu instructions (%u bytes), %u host bytes", block->GetPC(),
|
||||||
|
block->instructions.size(), block->GetSizeInBytes(), *out_host_code_size);
|
||||||
|
|
||||||
DebugAssert(m_register_cache.GetUsedHostRegisters() == 0);
|
DebugAssert(m_register_cache.GetUsedHostRegisters() == 0);
|
||||||
|
|
||||||
|
@ -709,14 +711,6 @@ void CodeGenerator::BlockPrologue()
|
||||||
m_branch_was_taken_dirty = true;
|
m_branch_was_taken_dirty = true;
|
||||||
m_current_instruction_was_branch_taken_dirty = false;
|
m_current_instruction_was_branch_taken_dirty = false;
|
||||||
m_load_delay_dirty = true;
|
m_load_delay_dirty = true;
|
||||||
|
|
||||||
// sync m_current_instruction_pc so we can simply add to it
|
|
||||||
SyncCurrentInstructionPC();
|
|
||||||
|
|
||||||
// and the same for m_regs.pc
|
|
||||||
SyncPC();
|
|
||||||
|
|
||||||
EmitAddCPUStructField(offsetof(Core, m_regs.npc), Value::FromConstantU32(4));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CodeGenerator::BlockEpilogue()
|
void CodeGenerator::BlockEpilogue()
|
||||||
|
@ -729,17 +723,7 @@ void CodeGenerator::BlockEpilogue()
|
||||||
if (m_register_cache.HasLoadDelay())
|
if (m_register_cache.HasLoadDelay())
|
||||||
m_register_cache.WriteLoadDelayToCPU(true);
|
m_register_cache.WriteLoadDelayToCPU(true);
|
||||||
|
|
||||||
// if the last instruction wasn't a fallback, we need to add its fetch
|
|
||||||
if (m_delayed_pc_add > 0)
|
|
||||||
{
|
|
||||||
EmitAddCPUStructField(offsetof(Core, m_regs.npc), Value::FromConstantU32(m_delayed_pc_add));
|
|
||||||
m_delayed_pc_add = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
AddPendingCycles();
|
AddPendingCycles();
|
||||||
|
|
||||||
// TODO: correct value for is_branch_delay_slot - branches in branch delay slot.
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_next_instruction_is_branch_delay_slot), Value::FromConstantU8(0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles,
|
void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles,
|
||||||
|
@ -771,42 +755,29 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou
|
||||||
m_current_instruction_in_branch_delay_slot_dirty = false;
|
m_current_instruction_in_branch_delay_slot_dirty = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cbi.is_branch_delay_slot)
|
// increment PC, except if we're in the branch delay slot where it was just changed
|
||||||
|
if (!cbi.is_branch_delay_slot)
|
||||||
{
|
{
|
||||||
// m_regs.pc should be synced for the next block, as the branch wrote to npc
|
Assert(!m_register_cache.IsGuestRegisterInHostRegister(Reg::pc));
|
||||||
SyncCurrentInstructionPC();
|
m_register_cache.WriteGuestRegister(Reg::pc, Value::FromConstantU32(cbi.pc + 4));
|
||||||
SyncPC();
|
|
||||||
|
|
||||||
// m_current_instruction_in_branch_delay_slot = true
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_in_branch_delay_slot), Value::FromConstantU8(1));
|
|
||||||
m_current_instruction_in_branch_delay_slot_dirty = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!CanInstructionTrap(cbi.instruction, m_block->key.user_mode) && !force_sync)
|
if (!CanInstructionTrap(cbi.instruction, m_block->key.user_mode) && !force_sync)
|
||||||
{
|
{
|
||||||
// Defer updates for non-faulting instructions.
|
// Defer updates for non-faulting instructions.
|
||||||
m_delayed_pc_add += INSTRUCTION_SIZE;
|
|
||||||
m_delayed_cycles_add += cycles;
|
m_delayed_cycles_add += cycles;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_delayed_pc_add > 0)
|
if (cbi.is_branch_delay_slot)
|
||||||
{
|
{
|
||||||
// m_current_instruction_pc += m_delayed_pc_add
|
// m_current_instruction_in_branch_delay_slot = true
|
||||||
EmitAddCPUStructField(offsetof(Core, m_current_instruction_pc), Value::FromConstantU32(m_delayed_pc_add));
|
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_in_branch_delay_slot), Value::FromConstantU8(1));
|
||||||
|
m_current_instruction_in_branch_delay_slot_dirty = true;
|
||||||
// m_regs.pc += m_delayed_pc_add
|
|
||||||
EmitAddCPUStructField(offsetof(Core, m_regs.pc), Value::FromConstantU32(m_delayed_pc_add));
|
|
||||||
|
|
||||||
// m_regs.npc += m_delayed_pc_add
|
|
||||||
// TODO: This can go once we recompile branch instructions and unconditionally set npc
|
|
||||||
EmitAddCPUStructField(offsetof(Core, m_regs.npc), Value::FromConstantU32(m_delayed_pc_add));
|
|
||||||
|
|
||||||
m_delayed_pc_add = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cbi.is_branch_instruction)
|
// Sync current instruction PC
|
||||||
m_delayed_pc_add = INSTRUCTION_SIZE;
|
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_pc), Value::FromConstantU32(cbi.pc));
|
||||||
|
|
||||||
m_delayed_cycles_add += cycles;
|
m_delayed_cycles_add += cycles;
|
||||||
AddPendingCycles();
|
AddPendingCycles();
|
||||||
|
@ -835,22 +806,6 @@ void CodeGenerator::InstructionEpilogue(const CodeBlockInstruction& cbi)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CodeGenerator::SyncCurrentInstructionPC()
|
|
||||||
{
|
|
||||||
// m_current_instruction_pc = m_regs.pc
|
|
||||||
Value pc_value = m_register_cache.AllocateScratch(RegSize_32);
|
|
||||||
EmitLoadCPUStructField(pc_value.host_reg, RegSize_32, offsetof(Core, m_regs.pc));
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_pc), pc_value);
|
|
||||||
}
|
|
||||||
|
|
||||||
void CodeGenerator::SyncPC()
|
|
||||||
{
|
|
||||||
// m_regs.pc = m_regs.npc
|
|
||||||
Value npc_value = m_register_cache.AllocateScratch(RegSize_32);
|
|
||||||
EmitLoadCPUStructField(npc_value.host_reg, RegSize_32, offsetof(Core, m_regs.npc));
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_regs.pc), npc_value);
|
|
||||||
}
|
|
||||||
|
|
||||||
void CodeGenerator::AddPendingCycles()
|
void CodeGenerator::AddPendingCycles()
|
||||||
{
|
{
|
||||||
if (m_delayed_cycles_add == 0)
|
if (m_delayed_cycles_add == 0)
|
||||||
|
@ -1246,8 +1201,7 @@ bool CodeGenerator::Compile_SetLess(const CodeBlockInstruction& cbi)
|
||||||
|
|
||||||
bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
||||||
{
|
{
|
||||||
// Force sync since we branches are PC-relative.
|
InstructionPrologue(cbi, 1);
|
||||||
InstructionPrologue(cbi, 1, true);
|
|
||||||
|
|
||||||
// Compute the branch target.
|
// Compute the branch target.
|
||||||
// This depends on the form of the instruction.
|
// This depends on the form of the instruction.
|
||||||
|
@ -1258,7 +1212,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
||||||
{
|
{
|
||||||
// npc = (pc & 0xF0000000) | (target << 2)
|
// npc = (pc & 0xF0000000) | (target << 2)
|
||||||
Value branch_target =
|
Value branch_target =
|
||||||
OrValues(AndValues(m_register_cache.ReadGuestRegister(Reg::pc, false), Value::FromConstantU32(0xF0000000)),
|
OrValues(AndValues(m_register_cache.ReadGuestRegister(Reg::pc), Value::FromConstantU32(0xF0000000)),
|
||||||
Value::FromConstantU32(cbi.instruction.j.target << 2));
|
Value::FromConstantU32(cbi.instruction.j.target << 2));
|
||||||
|
|
||||||
EmitBranch(Condition::Always, (cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count,
|
EmitBranch(Condition::Always, (cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count,
|
||||||
|
@ -1294,7 +1248,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
||||||
case InstructionOp::bne:
|
case InstructionOp::bne:
|
||||||
{
|
{
|
||||||
// npc = pc + (sext(imm) << 2)
|
// npc = pc + (sext(imm) << 2)
|
||||||
Value branch_target = AddValues(m_register_cache.ReadGuestRegister(Reg::pc, false),
|
Value branch_target = AddValues(m_register_cache.ReadGuestRegister(Reg::pc),
|
||||||
Value::FromConstantU32(cbi.instruction.i.imm_sext32() << 2), false);
|
Value::FromConstantU32(cbi.instruction.i.imm_sext32() << 2), false);
|
||||||
|
|
||||||
// branch <- rs op rt
|
// branch <- rs op rt
|
||||||
|
@ -1311,7 +1265,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
||||||
case InstructionOp::blez:
|
case InstructionOp::blez:
|
||||||
{
|
{
|
||||||
// npc = pc + (sext(imm) << 2)
|
// npc = pc + (sext(imm) << 2)
|
||||||
Value branch_target = AddValues(m_register_cache.ReadGuestRegister(Reg::pc, false),
|
Value branch_target = AddValues(m_register_cache.ReadGuestRegister(Reg::pc),
|
||||||
Value::FromConstantU32(cbi.instruction.i.imm_sext32() << 2), false);
|
Value::FromConstantU32(cbi.instruction.i.imm_sext32() << 2), false);
|
||||||
|
|
||||||
// branch <- rs op 0
|
// branch <- rs op 0
|
||||||
|
@ -1327,7 +1281,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
||||||
case InstructionOp::b:
|
case InstructionOp::b:
|
||||||
{
|
{
|
||||||
// npc = pc + (sext(imm) << 2)
|
// npc = pc + (sext(imm) << 2)
|
||||||
Value branch_target = AddValues(m_register_cache.ReadGuestRegister(Reg::pc, false),
|
Value branch_target = AddValues(m_register_cache.ReadGuestRegister(Reg::pc),
|
||||||
Value::FromConstantU32(cbi.instruction.i.imm_sext32() << 2), false);
|
Value::FromConstantU32(cbi.instruction.i.imm_sext32() << 2), false);
|
||||||
|
|
||||||
const u8 rt = static_cast<u8>(cbi.instruction.i.rt.GetValue());
|
const u8 rt = static_cast<u8>(cbi.instruction.i.rt.GetValue());
|
||||||
|
@ -1344,7 +1298,8 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
||||||
if (link)
|
if (link)
|
||||||
{
|
{
|
||||||
EmitCancelInterpreterLoadDelayForReg(Reg::ra);
|
EmitCancelInterpreterLoadDelayForReg(Reg::ra);
|
||||||
m_register_cache.WriteGuestRegister(Reg::ra, m_register_cache.ReadGuestRegister(Reg::npc, false));
|
m_register_cache.WriteGuestRegister(
|
||||||
|
Reg::ra, AddValues(m_register_cache.ReadGuestRegister(Reg::pc), Value::FromConstantU32(4), false));
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitTest(lhs.host_reg, lhs);
|
EmitTest(lhs.host_reg, lhs);
|
||||||
|
|
|
@ -165,8 +165,6 @@ private:
|
||||||
void BlockEpilogue();
|
void BlockEpilogue();
|
||||||
void InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles, bool force_sync = false);
|
void InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles, bool force_sync = false);
|
||||||
void InstructionEpilogue(const CodeBlockInstruction& cbi);
|
void InstructionEpilogue(const CodeBlockInstruction& cbi);
|
||||||
void SyncCurrentInstructionPC();
|
|
||||||
void SyncPC();
|
|
||||||
void AddPendingCycles();
|
void AddPendingCycles();
|
||||||
|
|
||||||
Value DoGTERegisterRead(u32 index);
|
Value DoGTERegisterRead(u32 index);
|
||||||
|
@ -202,7 +200,6 @@ private:
|
||||||
CodeEmitter m_far_emitter;
|
CodeEmitter m_far_emitter;
|
||||||
CodeEmitter* m_emit;
|
CodeEmitter* m_emit;
|
||||||
|
|
||||||
u32 m_delayed_pc_add = 0;
|
|
||||||
TickCount m_delayed_cycles_add = 0;
|
TickCount m_delayed_cycles_add = 0;
|
||||||
|
|
||||||
// whether various flags need to be reset.
|
// whether various flags need to be reset.
|
||||||
|
|
|
@ -201,6 +201,9 @@ void CodeGenerator::EmitEndBlock()
|
||||||
|
|
||||||
void CodeGenerator::EmitExceptionExit()
|
void CodeGenerator::EmitExceptionExit()
|
||||||
{
|
{
|
||||||
|
// toss away our PC value since we're jumping to the exception handler
|
||||||
|
m_register_cache.InvalidateGuestRegister(Reg::pc);
|
||||||
|
|
||||||
// ensure all unflushed registers are written back
|
// ensure all unflushed registers are written back
|
||||||
m_register_cache.FlushAllGuestRegisters(false, false);
|
m_register_cache.FlushAllGuestRegisters(false, false);
|
||||||
|
|
||||||
|
@ -1762,28 +1765,33 @@ static void EmitConditionalJump(Condition condition, bool invert, Xbyak::CodeGen
|
||||||
|
|
||||||
void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_target)
|
void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_target)
|
||||||
{
|
{
|
||||||
// allocate scratch register for reading npc - we return to the main path, so this could cause a reg flush
|
// ensure the lr register is flushed, since we want it's correct value after the branch
|
||||||
Value old_npc = m_register_cache.AllocateScratch(RegSize_32);
|
if (lr_reg != Reg::count && lr_reg != Reg::zero)
|
||||||
|
|
||||||
// npc gets modified by the branch, so we can't trust it on returning. same for lr_reg, which might contain a dirty
|
|
||||||
// value
|
|
||||||
m_register_cache.FlushGuestRegister(Reg::npc, true, true);
|
|
||||||
if (lr_reg != Reg::count)
|
|
||||||
m_register_cache.FlushGuestRegister(lr_reg, true, true);
|
m_register_cache.FlushGuestRegister(lr_reg, true, true);
|
||||||
|
|
||||||
// condition is inverted because we want the case for skipping it
|
// compute return address, which is also set as the new pc when the branch isn't taken
|
||||||
|
Value new_pc;
|
||||||
|
if (condition != Condition::Always || lr_reg != Reg::count)
|
||||||
|
{
|
||||||
|
new_pc = AddValues(m_register_cache.ReadGuestRegister(Reg::pc), Value::FromConstantU32(4), false);
|
||||||
|
if (!new_pc.IsInHostRegister())
|
||||||
|
new_pc = GetValueInHostRegister(new_pc);
|
||||||
|
}
|
||||||
|
|
||||||
Xbyak::Label skip_branch;
|
Xbyak::Label skip_branch;
|
||||||
if (condition != Condition::Always)
|
if (condition != Condition::Always)
|
||||||
|
{
|
||||||
|
// condition is inverted because we want the case for skipping it
|
||||||
EmitConditionalJump(condition, true, m_emit, skip_branch);
|
EmitConditionalJump(condition, true, m_emit, skip_branch);
|
||||||
|
}
|
||||||
|
|
||||||
// save the old PC if we want to
|
// save the old PC if we want to
|
||||||
if (lr_reg != Reg::count)
|
if (lr_reg != Reg::count && lr_reg != Reg::zero)
|
||||||
{
|
{
|
||||||
// Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards,
|
// Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards,
|
||||||
// if we don't cancel it, at the end of the instruction the value we write can be overridden.
|
// if we don't cancel it, at the end of the instruction the value we write can be overridden.
|
||||||
EmitCancelInterpreterLoadDelayForReg(lr_reg);
|
EmitCancelInterpreterLoadDelayForReg(lr_reg);
|
||||||
EmitLoadGuestRegister(old_npc.host_reg, Reg::npc);
|
EmitStoreGuestRegister(lr_reg, new_pc);
|
||||||
EmitStoreGuestRegister(lr_reg, old_npc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// we don't need to test the address of constant branches unless they're definitely misaligned, which would be
|
// we don't need to test the address of constant branches unless they're definitely misaligned, which would be
|
||||||
|
@ -1814,12 +1822,18 @@ void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_t
|
||||||
m_register_cache.PopState();
|
m_register_cache.PopState();
|
||||||
}
|
}
|
||||||
|
|
||||||
// branch taken path - write new PC and flush it, since two branches
|
// branch taken path - change the return address/new pc
|
||||||
EmitStoreGuestRegister(Reg::npc, branch_target);
|
if (condition != Condition::Always)
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_was_branch_taken), Value::FromConstantU8(1));
|
EmitCopyValue(new_pc.GetHostRegister(), branch_target);
|
||||||
|
|
||||||
// converge point
|
// converge point
|
||||||
m_emit->L(skip_branch);
|
m_emit->L(skip_branch);
|
||||||
|
|
||||||
|
// update pc
|
||||||
|
if (condition != Condition::Always)
|
||||||
|
m_register_cache.WriteGuestRegister(Reg::pc, std::move(new_pc));
|
||||||
|
else
|
||||||
|
m_register_cache.WriteGuestRegister(Reg::pc, std::move(branch_target));
|
||||||
}
|
}
|
||||||
|
|
||||||
void CodeGenerator::EmitRaiseException(Exception excode, Condition condition /* = Condition::Always */)
|
void CodeGenerator::EmitRaiseException(Exception excode, Condition condition /* = Condition::Always */)
|
||||||
|
@ -1827,14 +1841,12 @@ void CodeGenerator::EmitRaiseException(Exception excode, Condition condition /*
|
||||||
if (condition == Condition::Always)
|
if (condition == Condition::Always)
|
||||||
{
|
{
|
||||||
// no need to use far code if we're always raising the exception
|
// no need to use far code if we're always raising the exception
|
||||||
EmitFunctionCall(nullptr, &Thunks::RaiseException, m_register_cache.GetCPUPtr(),
|
m_register_cache.InvalidateGuestRegister(Reg::pc);
|
||||||
Value::FromConstantU8(static_cast<u8>(excode)));
|
|
||||||
m_register_cache.FlushAllGuestRegisters(true, true);
|
m_register_cache.FlushAllGuestRegisters(true, true);
|
||||||
m_register_cache.FlushLoadDelay(true);
|
m_register_cache.FlushLoadDelay(true);
|
||||||
|
|
||||||
// PC should be synced at this point. If we leave the 4 on here for this instruction, we mess up npc.
|
EmitFunctionCall(nullptr, &Thunks::RaiseException, m_register_cache.GetCPUPtr(),
|
||||||
Assert(m_delayed_pc_add == 4);
|
Value::FromConstantU8(static_cast<u8>(excode)));
|
||||||
m_delayed_pc_add = 0;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -252,6 +252,13 @@ public:
|
||||||
return cache_value.IsConstant() || cache_value.IsInHostRegister();
|
return cache_value.IsConstant() || cache_value.IsInHostRegister();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the specified guest register is cached and in a host register.
|
||||||
|
bool IsGuestRegisterInHostRegister(Reg guest_reg) const
|
||||||
|
{
|
||||||
|
const Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
|
||||||
|
return cache_value.IsInHostRegister();
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the host register if the guest register is cached.
|
/// Returns the host register if the guest register is cached.
|
||||||
std::optional<HostReg> GetHostRegisterForGuestRegister(Reg guest_reg) const
|
std::optional<HostReg> GetHostRegisterForGuestRegister(Reg guest_reg) const
|
||||||
{
|
{
|
||||||
|
|
|
@ -189,7 +189,7 @@ bool CanInstructionTrap(const Instruction& instruction, bool in_user_mode)
|
||||||
case InstructionOp::bgtz:
|
case InstructionOp::bgtz:
|
||||||
case InstructionOp::blez:
|
case InstructionOp::blez:
|
||||||
case InstructionOp::bne:
|
case InstructionOp::bne:
|
||||||
return true;
|
return false;
|
||||||
|
|
||||||
case InstructionOp::funct:
|
case InstructionOp::funct:
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue