This commit is contained in:
Connor McLaughlin 2020-08-27 23:39:08 +10:00
parent 537f833658
commit e0598822b9
9 changed files with 244 additions and 45 deletions

View File

@ -321,8 +321,12 @@ void ExecuteRecompiler()
{
const u32 pc = g_state.regs.pc;
g_state.current_instruction_pc = pc;
#if 0
if (pc == 0xbfc0d444)
__debugbreak();
#endif
const u32 fast_map_index = GetFastMapIndex(pc);
s_single_block_asm_dispatcher[fast_map_index]();
s_single_block_asm_dispatcher(s_fast_map[fast_map_index]);
}
TimingEvents::RunEvents();
@ -520,6 +524,23 @@ bool CompileBlock(CodeBlock* block)
cbi.is_store_instruction = IsMemoryStoreInstruction(cbi.instruction);
cbi.has_load_delay = InstructionHasLoadDelay(cbi.instruction);
cbi.can_trap = CanInstructionTrap(cbi.instruction, InUserMode());
cbi.is_direct_branch_instruction = IsDirectBranchInstruction(cbi.instruction);
if (cbi.is_direct_branch_instruction && true)
{
// backwards branch?
VirtualMemoryAddress branch_pc = GetDirectBranchTarget(cbi.instruction, cbi.pc);
for (CodeBlockInstruction& other_cbi : block->instructions)
{
if (other_cbi.pc == branch_pc)
{
other_cbi.is_direct_branch_target = true;
cbi.is_direct_branch_in_block = true;
block->has_in_block_branches = true;
Log_InfoPrintf("Found reverse branch from %08X to %08X", cbi.pc, branch_pc);
break;
}
}
}
if (g_settings.cpu_recompiler_icache)
{
@ -552,7 +573,7 @@ bool CompileBlock(CodeBlock* block)
// change the pc for the second branch's delay slot, it comes from the first branch
const CodeBlockInstruction& prev_cbi = block->instructions.back();
pc = GetBranchInstructionTarget(prev_cbi.instruction, prev_cbi.pc);
pc = GetDirectBranchTarget(prev_cbi.instruction, prev_cbi.pc);
Log_DevPrintf("Double branch at %08X, using delay slot from %08X -> %08X", cbi.pc, prev_cbi.pc, pc);
}
@ -590,6 +611,17 @@ bool CompileBlock(CodeBlock* block)
cbi.is_load_delay_slot ? "LD" : " ", cbi.pc, cbi.instruction.bits, disasm.GetCharArray());
}
#endif
if (block->instructions.size() >= 2)
{
Log_InfoPrintf("%08X -> %08X", block->instructions.front().pc, block->instructions.back().pc);
const auto& cbi = block->instructions[block->instructions.size() - 2];
SmallString disasm;
CPU::DisassembleInstruction(&disasm, cbi.pc, cbi.instruction.bits);
Log_InfoPrintf("[%s %s 0x%08X] %08X %s", cbi.is_branch_delay_slot ? "BD" : " ",
cbi.is_load_delay_slot ? "LD" : " ", cbi.pc, cbi.instruction.bits, disasm.GetCharArray());
}
}
else
{
@ -899,3 +931,8 @@ Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc,
#endif // WITH_RECOMPILER
} // namespace CPU::CodeCache
void CPU::Recompiler::Thunks::templog()
{
// CPU::CodeCache::LogCurrentState();
}

View File

@ -54,6 +54,9 @@ struct CodeBlockInstruction
bool is_branch_instruction : 1;
bool is_unconditional_branch_instruction : 1;
bool is_branch_delay_slot : 1;
bool is_direct_branch_instruction : 1;
bool is_direct_branch_target : 1;
bool is_direct_branch_in_block : 1;
bool is_load_instruction : 1;
bool is_store_instruction : 1;
bool is_load_delay_slot : 1;
@ -86,6 +89,7 @@ struct CodeBlock
bool contains_loadstore_instructions = false;
bool contains_double_branches = false;
bool invalidated = false;
bool has_in_block_branches = false;
const u32 GetPC() const { return key.GetPC(); }
const u32 GetSizeInBytes() const { return static_cast<u32>(instructions.size()) * sizeof(Instruction); }

View File

@ -30,17 +30,10 @@ bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* o
EmitBeginBlock();
BlockPrologue();
const CodeBlockInstruction* cbi = m_block_start;
while (cbi != m_block_end)
m_current_instruction = m_block_start;
while (m_current_instruction != m_block_end)
{
#ifdef _DEBUG
SmallString disasm;
DisassembleInstruction(&disasm, cbi->pc, cbi->instruction.bits);
Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray());
#endif
m_current_instruction = cbi;
if (!CompileInstruction(*cbi))
if (!CompileInstruction(*m_current_instruction))
{
m_current_instruction = nullptr;
m_block_end = nullptr;
@ -49,7 +42,7 @@ bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* o
return false;
}
cbi++;
m_current_instruction++;
}
BlockEpilogue();
@ -70,6 +63,12 @@ bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* o
bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi)
{
#ifdef _DEBUG
SmallString disasm;
DisassembleInstruction(&disasm, cbi.pc, cbi.instruction.bits);
Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray());
#endif
bool result;
switch (cbi.instruction.op)
{
@ -864,6 +863,17 @@ Value CodeGenerator::NotValue(const Value& val)
return res;
}
LabelType* CodeGenerator::GetBranchTargetLabel(VirtualMemoryAddress pc)
{
for (auto& it : m_branch_targets)
{
if (it.first == pc)
return &it.second;
}
return nullptr;
}
void CodeGenerator::GenerateExceptionExit(const CodeBlockInstruction& cbi, Exception excode,
Condition condition /* = Condition::Always */)
{
@ -903,6 +913,8 @@ void CodeGenerator::BlockPrologue()
{
InitSpeculativeRegs();
// EmitFunctionCall(nullptr, &CPU::Recompiler::Thunks::templog);
EmitStoreCPUStructField(offsetof(State, exception_raised), Value::FromConstantU8(0));
if (m_block->uncached_fetch_ticks > 0)
@ -940,6 +952,23 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou
m_emit->nop();
#endif
// flush and reload registers on branch targets since we'll be coming back here
if (cbi.is_direct_branch_target)
{
if (&cbi != m_block_start)
{
m_register_cache.FlushAllGuestRegisters(true, true);
if (m_register_cache.HasLoadDelay())
m_register_cache.WriteLoadDelayToCPU(true);
AddPendingCycles(true);
SyncPC();
}
LabelType label;
EmitBindLabel(&label);
m_branch_targets.emplace_back(cbi.pc, std::move(label));
m_load_delay_dirty = true;
}
// move instruction offsets forward
m_current_instruction_pc_offset = m_pc_offset;
m_pc_offset = m_next_pc_offset;
@ -1063,6 +1092,17 @@ void CodeGenerator::WriteNewPC(const Value& value, bool commit)
m_next_pc_offset = 0;
}
void CodeGenerator::SyncPC()
{
if (m_pc_offset == 0)
return;
EmitAddCPUStructField(offsetof(State, regs.pc), Value::FromConstantU32(m_pc_offset));
m_pc_offset = 0;
m_next_pc_offset = 4;
}
bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1, true);
@ -1956,7 +1996,8 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1);
auto DoBranch = [this](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg, Value&& branch_target) {
auto DoBranch = [this, &cbi](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg,
Value&& branch_target) {
// ensure the lr register is flushed, since we want it's correct value after the branch
// we don't want to invalidate it yet because of "jalr r0, r0", branch_target could be the lr_reg.
if (lr_reg != Reg::count && lr_reg != Reg::zero)
@ -1967,8 +2008,15 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
if (condition != Condition::Always || lr_reg != Reg::count)
next_pc = CalculatePC(4);
LabelType branch_not_taken;
LabelType* in_block_target = nullptr;
if (cbi.is_direct_branch_in_block)
in_block_target = GetBranchTargetLabel(GetDirectBranchTarget(cbi.instruction, cbi.pc));
Value take_branch;
LabelType branch_taken, branch_not_taken;
if (condition != Condition::Always)
{
if (!in_block_target)
{
// condition is inverted because we want the case for skipping it
if (lhs.IsValid() && rhs.IsValid())
@ -1978,6 +2026,45 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
else
EmitConditionalBranch(condition, true, &branch_not_taken);
}
else
{
take_branch = m_register_cache.AllocateScratch(RegSize_32);
switch (condition)
{
case Condition::NotEqual:
case Condition::Equal:
case Condition::Overflow:
case Condition::Greater:
case Condition::GreaterEqual:
case Condition::LessEqual:
case Condition::Less:
case Condition::Above:
case Condition::AboveEqual:
case Condition::Below:
case Condition::BelowEqual:
{
EmitCmp(lhs.GetHostRegister(), rhs);
EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition);
}
break;
case Condition::Negative:
case Condition::PositiveOrZero:
case Condition::NotZero:
case Condition::Zero:
{
Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0));
EmitTest(lhs.GetHostRegister(), lhs);
EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition);
}
break;
default:
UnreachableCode();
break;
}
}
}
// save the old PC if we want to
if (lr_reg != Reg::count && lr_reg != Reg::zero)
@ -2024,9 +2111,54 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
m_register_cache.PopState();
}
if (in_block_target)
{
// if it's an in-block branch, compile the delay slot now
Assert((m_current_instruction + 1) != m_block_end);
InstructionEpilogue(cbi);
m_current_instruction++;
if (!CompileInstruction(*m_current_instruction))
return false;
// flush all regs since we're at the end of the block now
m_register_cache.FlushAllGuestRegisters(false, true);
if (m_register_cache.HasLoadDelay())
m_register_cache.WriteLoadDelayToCPU(true);
AddPendingCycles(true);
// branch not taken?
EmitConditionalBranch(Condition::NotZero, true, take_branch.GetHostRegister(), take_branch.size,
&branch_not_taken);
m_register_cache.PushState();
{
// check downcount
{
Value pending_ticks = m_register_cache.AllocateScratch(RegSize_32);
Value downcount = m_register_cache.AllocateScratch(RegSize_32);
EmitLoadCPUStructField(pending_ticks.GetHostRegister(), RegSize_32, offsetof(State, pending_ticks));
EmitLoadCPUStructField(downcount.GetHostRegister(), RegSize_32, offsetof(State, downcount));
// pending < downcount
EmitConditionalBranch(Condition::GreaterEqual, false, pending_ticks.GetHostRegister(), downcount,
&branch_taken);
}
// EmitFunctionCall(nullptr, &CPU::Recompiler::Thunks::templog);
// now, we can jump back in the block
// if it's an in-branch block, we can skip writing the PC since it's synced anyway
EmitBranch(in_block_target);
}
// restore back
m_register_cache.PopState();
}
if (condition != Condition::Always)
{
// branch taken path - modify the next pc
EmitBindLabel(&branch_taken);
EmitCopyValue(next_pc.GetHostRegister(), branch_target);
// converge point
@ -2042,6 +2174,11 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
// now invalidate lr becuase it was possibly written in the branch
if (lr_reg != Reg::count && lr_reg != Reg::zero)
m_register_cache.InvalidateGuestRegister(lr_reg);
if (!in_block_target)
InstructionEpilogue(cbi);
return true;
};
// Compute the branch target.
@ -2055,10 +2192,9 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
Value branch_target = OrValues(AndValues(CalculatePC(), Value::FromConstantU32(0xF0000000)),
Value::FromConstantU32(cbi.instruction.j.target << 2));
DoBranch(Condition::Always, Value(), Value(), (cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count,
std::move(branch_target));
return DoBranch(Condition::Always, Value(), Value(),
(cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count, std::move(branch_target));
}
break;
case InstructionOp::funct:
{
@ -2066,7 +2202,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
{
// npc = rs, link to rt
Value branch_target = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs);
DoBranch(Condition::Always, Value(), Value(),
return DoBranch(Condition::Always, Value(), Value(),
(cbi.instruction.r.funct == InstructionFunct::jalr) ? cbi.instruction.r.rd : Reg::count,
std::move(branch_target));
}
@ -2076,13 +2212,15 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
const Exception excode =
(cbi.instruction.r.funct == InstructionFunct::syscall) ? Exception::Syscall : Exception::BP;
GenerateExceptionExit(cbi, excode);
InstructionEpilogue(cbi);
return true;
}
else
{
UnreachableCode();
return false;
}
}
break;
case InstructionOp::beq:
case InstructionOp::bne:
@ -2094,7 +2232,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
if (cbi.instruction.op == InstructionOp::beq && cbi.instruction.i.rs == Reg::zero &&
cbi.instruction.i.rt == Reg::zero)
{
DoBranch(Condition::Always, Value(), Value(), Reg::count, std::move(branch_target));
return DoBranch(Condition::Always, Value(), Value(), Reg::count, std::move(branch_target));
}
else
{
@ -2102,10 +2240,9 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
Value lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs, true, true);
Value rhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rt);
const Condition condition = (cbi.instruction.op == InstructionOp::beq) ? Condition::Equal : Condition::NotEqual;
DoBranch(condition, lhs, rhs, Reg::count, std::move(branch_target));
return DoBranch(condition, lhs, rhs, Reg::count, std::move(branch_target));
}
}
break;
case InstructionOp::bgtz:
case InstructionOp::blez:
@ -2118,9 +2255,8 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
const Condition condition =
(cbi.instruction.op == InstructionOp::bgtz) ? Condition::Greater : Condition::LessEqual;
DoBranch(condition, lhs, Value::FromConstantU32(0), Reg::count, std::move(branch_target));
return DoBranch(condition, lhs, Value::FromConstantU32(0), Reg::count, std::move(branch_target));
}
break;
case InstructionOp::b:
{
@ -2142,17 +2278,13 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
m_register_cache.WriteGuestRegister(Reg::ra, CalculatePC(4));
}
DoBranch(condition, lhs, Value(), Reg::count, std::move(branch_target));
return DoBranch(condition, lhs, Value(), Reg::count, std::move(branch_target));
}
break;
default:
UnreachableCode();
break;
return false;
}
InstructionEpilogue(cbi);
return true;
}
bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi)

View File

@ -2,6 +2,7 @@
#include <array>
#include <initializer_list>
#include <utility>
#include <vector>
#include "common/jit_code_buffer.h"
@ -188,6 +189,8 @@ private:
void* GetCurrentNearCodePointer() const;
void* GetCurrentFarCodePointer() const;
LabelType* GetBranchTargetLabel(VirtualMemoryAddress pc);
//////////////////////////////////////////////////////////////////////////
// Code Generation Helpers
//////////////////////////////////////////////////////////////////////////
@ -202,6 +205,7 @@ private:
Value GetCurrentInstructionPC(u32 offset = 0);
void UpdateCurrentInstructionPC(bool commit);
void WriteNewPC(const Value& value, bool commit);
void SyncPC();
Value DoGTERegisterRead(u32 index);
void DoGTERegisterWrite(u32 index, const Value& value);
@ -239,6 +243,8 @@ private:
CodeEmitter m_far_emitter;
CodeEmitter* m_emit;
std::vector<std::pair<VirtualMemoryAddress, LabelType>> m_branch_targets;
TickCount m_delayed_cycles_add = 0;
TickCount m_pc_offset = 0;
TickCount m_current_instruction_pc_offset = 0;

View File

@ -280,6 +280,21 @@ Value RegisterCache::AllocateScratch(RegSize size, HostReg reg /* = HostReg_Inva
return Value::FromScratch(this, reg, size);
}
void RegisterCache::ReserveCallerSavedRegisters()
{
for (u32 reg = 0; reg < HostReg_Count; reg++)
{
if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
DebugAssert(m_state.callee_saved_order_count < HostReg_Count);
m_code_generator.EmitPushHostReg(static_cast<HostReg>(reg), GetActiveCalleeSavedRegisterCount());
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg);
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
}
}
}
u32 RegisterCache::PushCallerSavedRegisters() const
{
u32 position = GetActiveCalleeSavedRegisterCount();

View File

@ -241,6 +241,9 @@ public:
/// Ensures a host register is free, removing any value cached.
void EnsureHostRegFree(HostReg reg);
/// Preallocates caller saved registers, enabling later use without stack pushes.
void ReserveCallerSavedRegisters();
/// Push/pop volatile host registers. Returns the number of registers pushed/popped.
u32 PushCallerSavedRegisters() const;
u32 PopCallerSavedRegisters() const;

View File

@ -33,6 +33,7 @@ void UncheckedWriteMemoryHalfWord(u32 address, u16 value);
void UncheckedWriteMemoryWord(u32 address, u32 value);
void UpdateFastmemMapping();
void templog();
} // namespace Recompiler::Thunks

View File

@ -98,24 +98,25 @@ bool IsDirectBranchInstruction(const Instruction& instruction)
}
}
u32 GetBranchInstructionTarget(const Instruction& instruction, u32 instruction_pc)
VirtualMemoryAddress GetDirectBranchTarget(const Instruction& instruction, VirtualMemoryAddress instruction_pc)
{
const VirtualMemoryAddress pc = instruction_pc + 4;
switch (instruction.op)
{
case InstructionOp::j:
case InstructionOp::jal:
return ((instruction_pc + 4) & UINT32_C(0xF0000000)) | (instruction.j.target << 2);
return (pc & UINT32_C(0xF0000000)) | (instruction.j.target << 2);
case InstructionOp::b:
case InstructionOp::beq:
case InstructionOp::bgtz:
case InstructionOp::blez:
case InstructionOp::bne:
return instruction_pc + 4 + (instruction.i.imm_sext32() << 2);
return (pc + (instruction.i.imm_sext32() << 2));
default:
Panic("Trying to get branch target of indirect or invalid branch");
return instruction_pc;
return pc;
}
}

View File

@ -223,7 +223,7 @@ union Instruction
bool IsBranchInstruction(const Instruction& instruction);
bool IsUnconditionalBranchInstruction(const Instruction& instruction);
bool IsDirectBranchInstruction(const Instruction& instruction);
u32 GetBranchInstructionTarget(const Instruction& instruction, u32 instruction_pc);
VirtualMemoryAddress GetDirectBranchTarget(const Instruction& instruction, VirtualMemoryAddress instruction_pc);
bool IsCallInstruction(const Instruction& instruction);
bool IsReturnInstruction(const Instruction& instruction);
bool IsMemoryLoadInstruction(const Instruction& instruction);