CPU: Simulate stalls from GTE instructions
This commit is contained in:
parent
3d4cdb6d92
commit
30db081a64
|
@ -181,7 +181,10 @@ bool DoState(StateWrapper& sw)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sw.IsReading())
|
if (sw.IsReading())
|
||||||
|
{
|
||||||
UpdateFastmemBase();
|
UpdateFastmemBase();
|
||||||
|
g_state.gte_completion_tick = 0;
|
||||||
|
}
|
||||||
|
|
||||||
return !sw.HasError();
|
return !sw.HasError();
|
||||||
}
|
}
|
||||||
|
@ -1462,6 +1465,8 @@ restart_instruction:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
StallUntilGTEComplete();
|
||||||
|
|
||||||
if (inst.cop.IsCommonInstruction())
|
if (inst.cop.IsCommonInstruction())
|
||||||
{
|
{
|
||||||
// TODO: Combine with cop0.
|
// TODO: Combine with cop0.
|
||||||
|
@ -1533,6 +1538,7 @@ restart_instruction:
|
||||||
if (!ReadMemoryWord(addr, &value))
|
if (!ReadMemoryWord(addr, &value))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
StallUntilGTEComplete();
|
||||||
GTE::WriteRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())), value);
|
GTE::WriteRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())), value);
|
||||||
|
|
||||||
if constexpr (pgxp_mode >= PGXPMode::Memory)
|
if constexpr (pgxp_mode >= PGXPMode::Memory)
|
||||||
|
@ -1549,6 +1555,8 @@ restart_instruction:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
StallUntilGTEComplete();
|
||||||
|
|
||||||
const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32();
|
const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32();
|
||||||
const u32 value = GTE::ReadRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())));
|
const u32 value = GTE::ReadRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())));
|
||||||
WriteMemoryWord(addr, value);
|
WriteMemoryWord(addr, value);
|
||||||
|
@ -1596,7 +1604,10 @@ void DispatchInterrupt()
|
||||||
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
|
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
|
||||||
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
|
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
|
||||||
if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction())
|
if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction())
|
||||||
|
{
|
||||||
|
StallUntilGTEComplete();
|
||||||
GTE::ExecuteInstruction(g_state.next_instruction.bits);
|
GTE::ExecuteInstruction(g_state.next_instruction.bits);
|
||||||
|
}
|
||||||
|
|
||||||
// Interrupt raising occurs before the start of the instruction.
|
// Interrupt raising occurs before the start of the instruction.
|
||||||
RaiseException(
|
RaiseException(
|
||||||
|
|
|
@ -46,8 +46,9 @@ union CacheControl
|
||||||
struct State
|
struct State
|
||||||
{
|
{
|
||||||
// ticks the CPU has executed
|
// ticks the CPU has executed
|
||||||
TickCount pending_ticks = 0;
|
|
||||||
TickCount downcount = 0;
|
TickCount downcount = 0;
|
||||||
|
TickCount pending_ticks = 0;
|
||||||
|
TickCount gte_completion_tick = 0;
|
||||||
|
|
||||||
Registers regs = {};
|
Registers regs = {};
|
||||||
Cop0Registers cop0_regs = {};
|
Cop0Registers cop0_regs = {};
|
||||||
|
@ -118,6 +119,8 @@ ALWAYS_INLINE TickCount GetPendingTicks()
|
||||||
}
|
}
|
||||||
ALWAYS_INLINE void ResetPendingTicks()
|
ALWAYS_INLINE void ResetPendingTicks()
|
||||||
{
|
{
|
||||||
|
g_state.gte_completion_tick =
|
||||||
|
(g_state.pending_ticks < g_state.gte_completion_tick) ? (g_state.gte_completion_tick - g_state.pending_ticks) : 0;
|
||||||
g_state.pending_ticks = 0;
|
g_state.pending_ticks = 0;
|
||||||
}
|
}
|
||||||
ALWAYS_INLINE void AddPendingTicks(TickCount ticks)
|
ALWAYS_INLINE void AddPendingTicks(TickCount ticks)
|
||||||
|
|
|
@ -111,4 +111,15 @@ bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value);
|
||||||
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks);
|
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks);
|
||||||
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size);
|
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size);
|
||||||
|
|
||||||
|
ALWAYS_INLINE void AddGTETicks(TickCount ticks)
|
||||||
|
{
|
||||||
|
g_state.gte_completion_tick = g_state.pending_ticks + ticks + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ALWAYS_INLINE void StallUntilGTEComplete()
|
||||||
|
{
|
||||||
|
g_state.pending_ticks =
|
||||||
|
(g_state.gte_completion_tick > g_state.pending_ticks) ? g_state.gte_completion_tick : g_state.pending_ticks;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace CPU
|
} // namespace CPU
|
|
@ -964,6 +964,7 @@ void CodeGenerator::BlockPrologue()
|
||||||
m_branch_was_taken_dirty = g_settings.cpu_recompiler_memory_exceptions;
|
m_branch_was_taken_dirty = g_settings.cpu_recompiler_memory_exceptions;
|
||||||
m_current_instruction_was_branch_taken_dirty = false;
|
m_current_instruction_was_branch_taken_dirty = false;
|
||||||
m_load_delay_dirty = true;
|
m_load_delay_dirty = true;
|
||||||
|
m_gte_busy_cycles_dirty = true;
|
||||||
|
|
||||||
m_pc_offset = 0;
|
m_pc_offset = 0;
|
||||||
m_current_instruction_pc_offset = 0;
|
m_current_instruction_pc_offset = 0;
|
||||||
|
@ -1067,13 +1068,63 @@ void CodeGenerator::TruncateBlockAtCurrentInstruction()
|
||||||
|
|
||||||
void CodeGenerator::AddPendingCycles(bool commit)
|
void CodeGenerator::AddPendingCycles(bool commit)
|
||||||
{
|
{
|
||||||
if (m_delayed_cycles_add == 0)
|
if (m_delayed_cycles_add == 0 && m_gte_done_cycle <= m_delayed_cycles_add)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(m_delayed_cycles_add));
|
if (m_gte_done_cycle > m_delayed_cycles_add)
|
||||||
|
{
|
||||||
|
Value temp = m_register_cache.AllocateScratch(RegSize_32);
|
||||||
|
EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offsetof(State, pending_ticks));
|
||||||
|
if (m_delayed_cycles_add > 0)
|
||||||
|
{
|
||||||
|
EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(m_delayed_cycles_add), false);
|
||||||
|
EmitStoreCPUStructField(offsetof(State, pending_ticks), temp);
|
||||||
|
EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(),
|
||||||
|
Value::FromConstantU32(m_gte_done_cycle - m_delayed_cycles_add), false);
|
||||||
|
EmitStoreCPUStructField(offsetof(State, gte_completion_tick), temp);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(m_gte_done_cycle), false);
|
||||||
|
EmitStoreCPUStructField(offsetof(State, gte_completion_tick), temp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(m_delayed_cycles_add));
|
||||||
|
}
|
||||||
|
|
||||||
if (commit)
|
if (commit)
|
||||||
|
{
|
||||||
|
m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_delayed_cycles_add, 0);
|
||||||
m_delayed_cycles_add = 0;
|
m_delayed_cycles_add = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CodeGenerator::AddGTETicks(TickCount ticks)
|
||||||
|
{
|
||||||
|
m_gte_done_cycle = m_delayed_cycles_add + ticks;
|
||||||
|
Log_DebugPrintf("Adding %d GTE ticks", ticks);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CodeGenerator::StallUntilGTEComplete()
|
||||||
|
{
|
||||||
|
if (!m_gte_busy_cycles_dirty)
|
||||||
|
{
|
||||||
|
// simple case - in block scheduling
|
||||||
|
if (m_gte_done_cycle > m_delayed_cycles_add)
|
||||||
|
{
|
||||||
|
Log_DebugPrintf("Stalling for %d ticks from GTE", m_gte_done_cycle - m_delayed_cycles_add);
|
||||||
|
m_delayed_cycles_add += (m_gte_done_cycle - m_delayed_cycles_add);
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// switch to in block scheduling
|
||||||
|
EmitStallUntilGTEComplete();
|
||||||
|
m_gte_done_cycle = 0;
|
||||||
|
m_gte_busy_cycles_dirty = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
Value CodeGenerator::CalculatePC(u32 offset /* = 0 */)
|
Value CodeGenerator::CalculatePC(u32 offset /* = 0 */)
|
||||||
|
@ -2740,6 +2791,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
|
||||||
{
|
{
|
||||||
if (cbi.instruction.op == InstructionOp::lwc2 || cbi.instruction.op == InstructionOp::swc2)
|
if (cbi.instruction.op == InstructionOp::lwc2 || cbi.instruction.op == InstructionOp::swc2)
|
||||||
{
|
{
|
||||||
|
StallUntilGTEComplete();
|
||||||
InstructionPrologue(cbi, 1);
|
InstructionPrologue(cbi, 1);
|
||||||
|
|
||||||
const u32 reg = static_cast<u32>(cbi.instruction.i.rt.GetValue());
|
const u32 reg = static_cast<u32>(cbi.instruction.i.rt.GetValue());
|
||||||
|
@ -2786,6 +2838,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
|
||||||
const u32 reg = static_cast<u32>(cbi.instruction.r.rd.GetValue()) +
|
const u32 reg = static_cast<u32>(cbi.instruction.r.rd.GetValue()) +
|
||||||
((cbi.instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? 32 : 0);
|
((cbi.instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? 32 : 0);
|
||||||
|
|
||||||
|
StallUntilGTEComplete();
|
||||||
InstructionPrologue(cbi, 1);
|
InstructionPrologue(cbi, 1);
|
||||||
|
|
||||||
Value value = DoGTERegisterRead(reg);
|
Value value = DoGTERegisterRead(reg);
|
||||||
|
@ -2811,6 +2864,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
|
||||||
const u32 reg = static_cast<u32>(cbi.instruction.r.rd.GetValue()) +
|
const u32 reg = static_cast<u32>(cbi.instruction.r.rd.GetValue()) +
|
||||||
((cbi.instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? 32 : 0);
|
((cbi.instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? 32 : 0);
|
||||||
|
|
||||||
|
StallUntilGTEComplete();
|
||||||
InstructionPrologue(cbi, 1);
|
InstructionPrologue(cbi, 1);
|
||||||
|
|
||||||
Value value = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt);
|
Value value = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt);
|
||||||
|
@ -2833,11 +2887,16 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
TickCount func_ticks;
|
||||||
|
GTE::InstructionImpl func = GTE::GetInstructionImpl(cbi.instruction.bits, &func_ticks);
|
||||||
|
|
||||||
// forward everything to the GTE.
|
// forward everything to the GTE.
|
||||||
|
StallUntilGTEComplete();
|
||||||
InstructionPrologue(cbi, 1);
|
InstructionPrologue(cbi, 1);
|
||||||
|
|
||||||
Value instruction_bits = Value::FromConstantU32(cbi.instruction.bits & GTE::Instruction::REQUIRED_BITS_MASK);
|
Value instruction_bits = Value::FromConstantU32(cbi.instruction.bits & GTE::Instruction::REQUIRED_BITS_MASK);
|
||||||
EmitFunctionCall(nullptr, GTE::GetInstructionImpl(cbi.instruction.bits), instruction_bits);
|
EmitFunctionCall(nullptr, func, instruction_bits);
|
||||||
|
AddGTETicks(func_ticks);
|
||||||
|
|
||||||
InstructionEpilogue(cbi);
|
InstructionEpilogue(cbi);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -71,6 +71,7 @@ public:
|
||||||
void EmitMoveNextInterpreterLoadDelay();
|
void EmitMoveNextInterpreterLoadDelay();
|
||||||
void EmitCancelInterpreterLoadDelayForReg(Reg reg);
|
void EmitCancelInterpreterLoadDelayForReg(Reg reg);
|
||||||
void EmitICacheCheckAndUpdate();
|
void EmitICacheCheckAndUpdate();
|
||||||
|
void EmitStallUntilGTEComplete();
|
||||||
void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset);
|
void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset);
|
||||||
void EmitStoreCPUStructField(u32 offset, const Value& value);
|
void EmitStoreCPUStructField(u32 offset, const Value& value);
|
||||||
void EmitAddCPUStructField(u32 offset, const Value& value);
|
void EmitAddCPUStructField(u32 offset, const Value& value);
|
||||||
|
@ -200,6 +201,8 @@ private:
|
||||||
void InstructionEpilogue(const CodeBlockInstruction& cbi);
|
void InstructionEpilogue(const CodeBlockInstruction& cbi);
|
||||||
void TruncateBlockAtCurrentInstruction();
|
void TruncateBlockAtCurrentInstruction();
|
||||||
void AddPendingCycles(bool commit);
|
void AddPendingCycles(bool commit);
|
||||||
|
void AddGTETicks(TickCount ticks);
|
||||||
|
void StallUntilGTEComplete();
|
||||||
|
|
||||||
Value CalculatePC(u32 offset = 0);
|
Value CalculatePC(u32 offset = 0);
|
||||||
Value GetCurrentInstructionPC(u32 offset = 0);
|
Value GetCurrentInstructionPC(u32 offset = 0);
|
||||||
|
@ -244,6 +247,7 @@ private:
|
||||||
CodeEmitter* m_emit;
|
CodeEmitter* m_emit;
|
||||||
|
|
||||||
TickCount m_delayed_cycles_add = 0;
|
TickCount m_delayed_cycles_add = 0;
|
||||||
|
TickCount m_gte_done_cycle = 0;
|
||||||
TickCount m_pc_offset = 0;
|
TickCount m_pc_offset = 0;
|
||||||
TickCount m_current_instruction_pc_offset = 0;
|
TickCount m_current_instruction_pc_offset = 0;
|
||||||
TickCount m_next_pc_offset = 0;
|
TickCount m_next_pc_offset = 0;
|
||||||
|
@ -254,6 +258,7 @@ private:
|
||||||
bool m_current_instruction_was_branch_taken_dirty = false;
|
bool m_current_instruction_was_branch_taken_dirty = false;
|
||||||
bool m_load_delay_dirty = false;
|
bool m_load_delay_dirty = false;
|
||||||
bool m_next_load_delay_dirty = false;
|
bool m_next_load_delay_dirty = false;
|
||||||
|
bool m_gte_busy_cycles_dirty = false;
|
||||||
|
|
||||||
bool m_fastmem_load_base_in_register = false;
|
bool m_fastmem_load_base_in_register = false;
|
||||||
bool m_fastmem_store_base_in_register = false;
|
bool m_fastmem_store_base_in_register = false;
|
||||||
|
|
|
@ -1695,6 +1695,24 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
|
||||||
m_emit->Bind(&skip_cancel);
|
m_emit->Bind(&skip_cancel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CodeGenerator::EmitStallUntilGTEComplete()
|
||||||
|
{
|
||||||
|
static_assert(offsetof(State, pending_ticks) + sizeof(u32) == offsetof(State, gte_completion_tick));
|
||||||
|
|
||||||
|
m_emit->ldr(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks)));
|
||||||
|
m_emit->ldr(GetHostReg32(RARG2), a32::MemOperand(GetCPUPtrReg(), offsetof(State, gte_completion_tick)));
|
||||||
|
|
||||||
|
if (m_delayed_cycles_add > 0)
|
||||||
|
{
|
||||||
|
m_emit->Add(GetHostReg32(RARG1), GetHostReg32(RARG1), static_cast<u32>(m_delayed_cycles_add));
|
||||||
|
m_delayed_cycles_add = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_emit->cmp(GetHostReg32(RARG2), GetHostReg32(RARG1));
|
||||||
|
m_emit->mov(a32::hi, GetHostReg32(RARG1), GetHostReg32(RARG2));
|
||||||
|
m_emit->str(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks)));
|
||||||
|
}
|
||||||
|
|
||||||
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
|
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
|
||||||
{
|
{
|
||||||
const s32 displacement = GetPCDisplacement(GetCurrentCodePointer(), address);
|
const s32 displacement = GetPCDisplacement(GetCurrentCodePointer(), address);
|
||||||
|
|
|
@ -1890,6 +1890,23 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
|
||||||
m_emit->Bind(&skip_cancel);
|
m_emit->Bind(&skip_cancel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CodeGenerator::EmitStallUntilGTEComplete()
|
||||||
|
{
|
||||||
|
static_assert(offsetof(State, pending_ticks) + sizeof(u32) == offsetof(State, gte_completion_tick));
|
||||||
|
m_emit->ldp(GetHostReg32(RARG1), GetHostReg32(RARG2),
|
||||||
|
a64::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks)));
|
||||||
|
|
||||||
|
if (m_delayed_cycles_add > 0)
|
||||||
|
{
|
||||||
|
m_emit->Add(GetHostReg32(RARG1), GetHostReg32(RARG1), static_cast<u32>(m_delayed_cycles_add));
|
||||||
|
m_delayed_cycles_add = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_emit->cmp(GetHostReg32(RARG2), GetHostReg32(RARG1));
|
||||||
|
m_emit->csel(GetHostReg32(RARG1), GetHostReg32(RARG2), GetHostReg32(RARG1), a64::Condition::hi);
|
||||||
|
m_emit->str(GetHostReg32(RARG1), a64::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks)));
|
||||||
|
}
|
||||||
|
|
||||||
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
|
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
|
||||||
{
|
{
|
||||||
const s64 jump_distance =
|
const s64 jump_distance =
|
||||||
|
|
|
@ -207,4 +207,35 @@ void CodeGenerator::EmitICacheCheckAndUpdate()
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if 0 // Not Used
|
||||||
|
|
||||||
|
void CodeGenerator::EmitStallUntilGTEComplete()
|
||||||
|
{
|
||||||
|
Value pending_ticks = m_register_cache.AllocateScratch(RegSize_32);
|
||||||
|
Value gte_completion_tick = m_register_cache.AllocateScratch(RegSize_32);
|
||||||
|
EmitLoadCPUStructField(pending_ticks.GetHostRegister(), RegSize_32, offsetof(State, pending_ticks));
|
||||||
|
EmitLoadCPUStructField(gte_completion_tick.GetHostRegister(), RegSize_32, offsetof(State, gte_completion_tick));
|
||||||
|
|
||||||
|
// commit cycles here, should always be nonzero
|
||||||
|
if (m_delayed_cycles_add > 0)
|
||||||
|
{
|
||||||
|
EmitAdd(pending_ticks.GetHostRegister(), pending_ticks.GetHostRegister(),
|
||||||
|
Value::FromConstantU32(m_delayed_cycles_add), false);
|
||||||
|
m_delayed_cycles_add = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
LabelType gte_done;
|
||||||
|
EmitSub(gte_completion_tick.GetHostRegister(), gte_completion_tick.GetHostRegister(), pending_ticks, true);
|
||||||
|
EmitConditionalBranch(Condition::Below, false, >e_done);
|
||||||
|
|
||||||
|
// add stall ticks
|
||||||
|
EmitAdd(pending_ticks.GetHostRegister(), pending_ticks.GetHostRegister(), gte_completion_tick, false);
|
||||||
|
|
||||||
|
// store new ticks
|
||||||
|
EmitBindLabel(>e_done);
|
||||||
|
EmitStoreCPUStructField(offsetof(State, pending_ticks), pending_ticks);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace CPU::Recompiler
|
} // namespace CPU::Recompiler
|
||||||
|
|
|
@ -2656,6 +2656,22 @@ void CodeGenerator::EmitICacheCheckAndUpdate()
|
||||||
m_register_cache.UninhibitAllocation();
|
m_register_cache.UninhibitAllocation();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CodeGenerator::EmitStallUntilGTEComplete()
|
||||||
|
{
|
||||||
|
m_emit->mov(GetHostReg32(RRETURN), m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)]);
|
||||||
|
m_emit->mov(GetHostReg32(RARG1), m_emit->dword[GetCPUPtrReg() + offsetof(State, gte_completion_tick)]);
|
||||||
|
|
||||||
|
if (m_delayed_cycles_add > 0)
|
||||||
|
{
|
||||||
|
m_emit->add(GetHostReg32(RRETURN), static_cast<u32>(m_delayed_cycles_add));
|
||||||
|
m_delayed_cycles_add = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_emit->cmp(GetHostReg32(RARG1), GetHostReg32(RRETURN));
|
||||||
|
m_emit->cmova(GetHostReg32(RRETURN), GetHostReg32(RARG1));
|
||||||
|
m_emit->mov(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], GetHostReg32(RRETURN));
|
||||||
|
}
|
||||||
|
|
||||||
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
|
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
|
||||||
{
|
{
|
||||||
const s64 jump_distance =
|
const s64 jump_distance =
|
||||||
|
|
|
@ -3,10 +3,12 @@
|
||||||
#include "common/bitutils.h"
|
#include "common/bitutils.h"
|
||||||
#include "common/state_wrapper.h"
|
#include "common/state_wrapper.h"
|
||||||
#include "cpu_core.h"
|
#include "cpu_core.h"
|
||||||
|
#include "cpu_core_private.h"
|
||||||
#include "host_display.h"
|
#include "host_display.h"
|
||||||
#include "host_interface.h"
|
#include "host_interface.h"
|
||||||
#include "pgxp.h"
|
#include "pgxp.h"
|
||||||
#include "settings.h"
|
#include "settings.h"
|
||||||
|
#include "timing_event.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
@ -1157,11 +1159,13 @@ void ExecuteInstruction(u32 inst_bits)
|
||||||
switch (inst.command)
|
switch (inst.command)
|
||||||
{
|
{
|
||||||
case 0x01:
|
case 0x01:
|
||||||
|
CPU::AddGTETicks(15);
|
||||||
Execute_RTPS(inst);
|
Execute_RTPS(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x06:
|
case 0x06:
|
||||||
{
|
{
|
||||||
|
CPU::AddGTETicks(8);
|
||||||
if (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling)
|
if (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling)
|
||||||
Execute_NCLIP_PGXP(inst);
|
Execute_NCLIP_PGXP(inst);
|
||||||
else
|
else
|
||||||
|
@ -1170,82 +1174,102 @@ void ExecuteInstruction(u32 inst_bits)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x0C:
|
case 0x0C:
|
||||||
|
CPU::AddGTETicks(6);
|
||||||
Execute_OP(inst);
|
Execute_OP(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x10:
|
case 0x10:
|
||||||
|
CPU::AddGTETicks(8);
|
||||||
Execute_DPCS(inst);
|
Execute_DPCS(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x11:
|
case 0x11:
|
||||||
|
CPU::AddGTETicks(7);
|
||||||
Execute_INTPL(inst);
|
Execute_INTPL(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x12:
|
case 0x12:
|
||||||
|
CPU::AddGTETicks(8);
|
||||||
Execute_MVMVA(inst);
|
Execute_MVMVA(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x13:
|
case 0x13:
|
||||||
|
CPU::AddGTETicks(19);
|
||||||
Execute_NCDS(inst);
|
Execute_NCDS(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x14:
|
case 0x14:
|
||||||
|
CPU::AddGTETicks(13);
|
||||||
Execute_CDP(inst);
|
Execute_CDP(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x16:
|
case 0x16:
|
||||||
|
CPU::AddGTETicks(44);
|
||||||
Execute_NCDT(inst);
|
Execute_NCDT(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x1B:
|
case 0x1B:
|
||||||
|
CPU::AddGTETicks(17);
|
||||||
Execute_NCCS(inst);
|
Execute_NCCS(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x1C:
|
case 0x1C:
|
||||||
|
CPU::AddGTETicks(11);
|
||||||
Execute_CC(inst);
|
Execute_CC(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x1E:
|
case 0x1E:
|
||||||
|
CPU::AddGTETicks(14);
|
||||||
Execute_NCS(inst);
|
Execute_NCS(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x20:
|
case 0x20:
|
||||||
|
CPU::AddGTETicks(30);
|
||||||
Execute_NCT(inst);
|
Execute_NCT(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x28:
|
case 0x28:
|
||||||
|
CPU::AddGTETicks(5);
|
||||||
Execute_SQR(inst);
|
Execute_SQR(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x29:
|
case 0x29:
|
||||||
|
CPU::AddGTETicks(8);
|
||||||
Execute_DCPL(inst);
|
Execute_DCPL(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x2A:
|
case 0x2A:
|
||||||
|
CPU::AddGTETicks(17);
|
||||||
Execute_DPCT(inst);
|
Execute_DPCT(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x2D:
|
case 0x2D:
|
||||||
|
CPU::AddGTETicks(5);
|
||||||
Execute_AVSZ3(inst);
|
Execute_AVSZ3(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x2E:
|
case 0x2E:
|
||||||
|
CPU::AddGTETicks(6);
|
||||||
Execute_AVSZ4(inst);
|
Execute_AVSZ4(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x30:
|
case 0x30:
|
||||||
|
CPU::AddGTETicks(23);
|
||||||
Execute_RTPT(inst);
|
Execute_RTPT(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x3D:
|
case 0x3D:
|
||||||
|
CPU::AddGTETicks(5);
|
||||||
Execute_GPF(inst);
|
Execute_GPF(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x3E:
|
case 0x3E:
|
||||||
|
CPU::AddGTETicks(5);
|
||||||
Execute_GPL(inst);
|
Execute_GPL(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x3F:
|
case 0x3F:
|
||||||
|
CPU::AddGTETicks(39);
|
||||||
Execute_NCCT(inst);
|
Execute_NCCT(inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1255,16 +1279,18 @@ void ExecuteInstruction(u32 inst_bits)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
InstructionImpl GetInstructionImpl(u32 inst_bits)
|
InstructionImpl GetInstructionImpl(u32 inst_bits, TickCount* ticks)
|
||||||
{
|
{
|
||||||
const Instruction inst{inst_bits};
|
const Instruction inst{inst_bits};
|
||||||
switch (inst.command)
|
switch (inst.command)
|
||||||
{
|
{
|
||||||
case 0x01:
|
case 0x01:
|
||||||
|
*ticks = 15;
|
||||||
return &Execute_RTPS;
|
return &Execute_RTPS;
|
||||||
|
|
||||||
case 0x06:
|
case 0x06:
|
||||||
{
|
{
|
||||||
|
*ticks = 8;
|
||||||
if (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling)
|
if (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling)
|
||||||
return &Execute_NCLIP_PGXP;
|
return &Execute_NCLIP_PGXP;
|
||||||
else
|
else
|
||||||
|
@ -1272,63 +1298,83 @@ InstructionImpl GetInstructionImpl(u32 inst_bits)
|
||||||
}
|
}
|
||||||
|
|
||||||
case 0x0C:
|
case 0x0C:
|
||||||
|
*ticks = 6;
|
||||||
return &Execute_OP;
|
return &Execute_OP;
|
||||||
|
|
||||||
case 0x10:
|
case 0x10:
|
||||||
|
*ticks = 8;
|
||||||
return &Execute_DPCS;
|
return &Execute_DPCS;
|
||||||
|
|
||||||
case 0x11:
|
case 0x11:
|
||||||
|
*ticks = 7;
|
||||||
return &Execute_INTPL;
|
return &Execute_INTPL;
|
||||||
|
|
||||||
case 0x12:
|
case 0x12:
|
||||||
|
*ticks = 8;
|
||||||
return &Execute_MVMVA;
|
return &Execute_MVMVA;
|
||||||
|
|
||||||
case 0x13:
|
case 0x13:
|
||||||
|
*ticks = 19;
|
||||||
return &Execute_NCDS;
|
return &Execute_NCDS;
|
||||||
|
|
||||||
case 0x14:
|
case 0x14:
|
||||||
|
*ticks = 13;
|
||||||
return &Execute_CDP;
|
return &Execute_CDP;
|
||||||
|
|
||||||
case 0x16:
|
case 0x16:
|
||||||
|
*ticks = 44;
|
||||||
return &Execute_NCDT;
|
return &Execute_NCDT;
|
||||||
|
|
||||||
case 0x1B:
|
case 0x1B:
|
||||||
|
*ticks = 17;
|
||||||
return &Execute_NCCS;
|
return &Execute_NCCS;
|
||||||
|
|
||||||
case 0x1C:
|
case 0x1C:
|
||||||
|
*ticks = 11;
|
||||||
return &Execute_CC;
|
return &Execute_CC;
|
||||||
|
|
||||||
case 0x1E:
|
case 0x1E:
|
||||||
|
*ticks = 14;
|
||||||
return &Execute_NCS;
|
return &Execute_NCS;
|
||||||
|
|
||||||
case 0x20:
|
case 0x20:
|
||||||
|
*ticks = 30;
|
||||||
return &Execute_NCT;
|
return &Execute_NCT;
|
||||||
|
|
||||||
case 0x28:
|
case 0x28:
|
||||||
|
*ticks = 5;
|
||||||
return &Execute_SQR;
|
return &Execute_SQR;
|
||||||
|
|
||||||
case 0x29:
|
case 0x29:
|
||||||
|
*ticks = 8;
|
||||||
return &Execute_DCPL;
|
return &Execute_DCPL;
|
||||||
|
|
||||||
case 0x2A:
|
case 0x2A:
|
||||||
|
*ticks = 17;
|
||||||
return &Execute_DPCT;
|
return &Execute_DPCT;
|
||||||
|
|
||||||
case 0x2D:
|
case 0x2D:
|
||||||
|
*ticks = 5;
|
||||||
return &Execute_AVSZ3;
|
return &Execute_AVSZ3;
|
||||||
|
|
||||||
case 0x2E:
|
case 0x2E:
|
||||||
|
*ticks = 6;
|
||||||
return &Execute_AVSZ4;
|
return &Execute_AVSZ4;
|
||||||
|
|
||||||
case 0x30:
|
case 0x30:
|
||||||
|
*ticks = 23;
|
||||||
return &Execute_RTPT;
|
return &Execute_RTPT;
|
||||||
|
|
||||||
case 0x3D:
|
case 0x3D:
|
||||||
|
*ticks = 5;
|
||||||
return &Execute_GPF;
|
return &Execute_GPF;
|
||||||
|
|
||||||
case 0x3E:
|
case 0x3E:
|
||||||
|
*ticks = 5;
|
||||||
return &Execute_GPL;
|
return &Execute_GPL;
|
||||||
|
|
||||||
case 0x3F:
|
case 0x3F:
|
||||||
|
*ticks = 39;
|
||||||
return &Execute_NCCT;
|
return &Execute_NCCT;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -20,6 +20,6 @@ u32* GetRegisterPtr(u32 index);
|
||||||
void ExecuteInstruction(u32 inst_bits);
|
void ExecuteInstruction(u32 inst_bits);
|
||||||
|
|
||||||
using InstructionImpl = void (*)(Instruction);
|
using InstructionImpl = void (*)(Instruction);
|
||||||
InstructionImpl GetInstructionImpl(u32 inst_bits);
|
InstructionImpl GetInstructionImpl(u32 inst_bits, TickCount* ticks);
|
||||||
|
|
||||||
} // namespace GTE
|
} // namespace GTE
|
||||||
|
|
Loading…
Reference in New Issue