From 872a48d616291e49bcfd9a5868708a2649dac05f Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 2 Mar 2025 15:17:09 +1000 Subject: [PATCH] testing --- src/core/cpu_code_cache.cpp | 225 ++- src/core/cpu_code_cache_private.h | 10 +- src/core/cpu_core.cpp | 2242 +++++++++++++++++------------ src/core/cpu_core_private.h | 73 + 4 files changed, 1539 insertions(+), 1011 deletions(-) diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 40985ecd8..a65816933 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -73,10 +73,10 @@ static void SetRegAccess(InstructionInfo* inst, Reg reg, bool write); static void AddBlockToPageList(Block* block); static void RemoveBlockFromPageList(Block* block); -static Block* CreateCachedInterpreterBlock(u32 pc); +static void SetCachedInterpreterHandlers(); +static void CompileCachedInterpreterBlock(const u32); +static void ExecuteCachedInterpreterBlock(const CachedInterpreterInstruction* cinst); [[noreturn]] static void ExecuteCachedInterpreter(); -template -[[noreturn]] static void ExecuteCachedInterpreterImpl(); // Fast map provides lookup from PC to function // Function pointers are offset so that you don't need to subtract @@ -216,6 +216,12 @@ void CPU::CodeCache::Reset() CompileASMFunctions(); ResetCodeLUT(); } + else + { + SetCachedInterpreterHandlers(); + ResetCodeBuffer(); + ResetCodeLUT(); + } } void CPU::CodeCache::Shutdown() @@ -708,15 +714,136 @@ PageFaultHandler::HandlerResult PageFaultHandler::HandlePageFault(void* exceptio // MARK: - Cached Interpreter //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -CPU::CodeCache::Block* CPU::CodeCache::CreateCachedInterpreterBlock(u32 pc) +void CPU::CodeCache::SetCachedInterpreterHandlers() { - BlockMetadata metadata = {}; - ReadBlockInstructions(pc, &s_block_instructions, &metadata); - return CreateBlock(pc, s_block_instructions, metadata); + static constexpr const CachedInterpreterInstruction compile_or_revalidate_block[] = { + {&CompileCachedInterpreterBlock, 0u}, + {nullptr, 0u}, + }; + + g_compile_or_revalidate_block = compile_or_revalidate_block; } -template -[[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreterImpl() +void CPU::CodeCache::CompileCachedInterpreterBlock(const u32) +{ + const u32 start_pc = g_state.pc; + MemMap::BeginCodeWrite(); + + // Revalidation + Block* block = LookupBlock(start_pc); + if (block) + { + // we should only be here if the block got invalidated + DebugAssert(block->state != BlockState::Valid); + if (RevalidateBlock(block)) + { + DebugAssert(block->host_code); + SetCodeLUT(start_pc, block->host_code); + // BacklinkBlocks(start_pc, block->host_code); + MemMap::EndCodeWrite(); + return; + } + + // remove outward links from this block, since we're recompiling it + // UnlinkBlockExits(block); + } + + BlockMetadata metadata = {}; + if (!ReadBlockInstructions(start_pc, &s_block_instructions, &metadata)) + { + ERROR_LOG("Failed to read block at 0x{:08X}, falling back to uncached interpreter", start_pc); + Panic("Fixme"); + } + + const u32 required_space = sizeof(CachedInterpreterInstruction) * (static_cast(s_block_instructions.size()) + 3); + if (GetFreeCodeSpace() < required_space) + { + ERROR_LOG("Out of code space while compiling {:08X}. Resetting code cache.", start_pc); + CodeCache::Reset(); + } + + block = CreateBlock(start_pc, s_block_instructions, metadata); + if (!block) + { + Panic("Fixme"); + return; + } + + const CPU::Instruction* mips_insns = block->Instructions(); + CachedInterpreterInstruction* cstart = reinterpret_cast(GetFreeCodePointer()); + CachedInterpreterInstruction* cinst = cstart; + + if (false) + { + cinst->handler = [](u32) { LogCurrentState(); }; + cinst->arg = 0; + cinst++; + } + + if (block->HasFlag(BlockFlags::IsUsingICache)) + { + cinst->handler = &CheckAndUpdateICacheTags; + cinst->arg = block->icache_line_count; + cinst++; + } + else if (block->HasFlag(BlockFlags::NeedsDynamicFetchTicks)) + { + static const auto dynamic_fetch_handler = [](u32 size) { + AddPendingTicks( + static_cast(size * static_cast(*Bus::GetMemoryAccessTimePtr( + g_state.pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word)))); + }; + + cinst->handler = dynamic_fetch_handler; + cinst->arg = block->size; + cinst++; + } + else if (block->uncached_fetch_ticks > 0) + { + cinst->handler = reinterpret_cast(&CPU::AddPendingTicks); + cinst->arg = static_cast(block->uncached_fetch_ticks); + cinst++; + } + + for (u32 i = 0; i < block->size; i++) + { + const Instruction insn = *(mips_insns++); + cinst->handler = GetCachedInterpreterHandler(insn); + cinst->arg = insn.bits; + if (!cinst->handler) + Panic("Fixme"); + + cinst++; + } + + // end + cinst->handler = nullptr; + cinst->arg = 0; + cinst++; + + block->host_code = cstart; + block->host_code_size = static_cast(cinst - cstart) * sizeof(CachedInterpreterInstruction); + + SetCodeLUT(start_pc, cstart); + CommitCode(required_space); + MemMap::EndCodeWrite(); + + // execute it + ExecuteCachedInterpreterBlock(cstart); + + // TODO: Block linking! +} + +ALWAYS_INLINE_RELEASE void CPU::CodeCache::ExecuteCachedInterpreterBlock(const CachedInterpreterInstruction* cinst) +{ + do + { + cinst->handler(cinst->arg); + cinst++; + } while (cinst->handler); +} + +[[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreter() { #define CHECK_DOWNCOUNT() \ if (g_state.pending_ticks >= g_state.downcount) \ @@ -733,94 +860,32 @@ template LogCurrentState(); #endif #if 0 - if ((g_state.pending_ticks + TimingEvents::GetGlobalTickCounter()) == 3301006214) + if ((g_state.pending_ticks + TimingEvents::GetGlobalTickCounter()) == 108345628) __debugbreak(); #endif + // Manually done because we don't want to compile blocks without a LUT. const u32 pc = g_state.pc; const u32 table = pc >> LUT_TABLE_SHIFT; - Block* block; - if (s_block_lut[table]) - { - const u32 idx = (pc & 0xFFFF) >> 2; - block = s_block_lut[table][idx]; - } - else - { - // Likely invalid code... - goto interpret_block; - } + const u32 idx = (pc & 0xFFFF) >> 2; + const CachedInterpreterInstruction* cinst = + reinterpret_cast(g_code_lut[table][idx]); reexecute_block: - if (!block) - { - if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]] - goto interpret_block; - } - else - { - if (block->state == BlockState::FallbackToInterpreter) [[unlikely]] - goto interpret_block; - - if ((block->state != BlockState::Valid && !RevalidateBlock(block)) || - (block->protection == PageProtectionMode::ManualCheck && !IsBlockCodeCurrent(block))) - { - if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]] - goto interpret_block; - } - } - + // Execute block. DebugAssert(!(HasPendingInterrupt())); - if (block->HasFlag(BlockFlags::IsUsingICache)) - { - CheckAndUpdateICacheTags(block->icache_line_count); - } - else if (block->HasFlag(BlockFlags::NeedsDynamicFetchTicks)) - { - AddPendingTicks( - static_cast(block->size * static_cast(*Bus::GetMemoryAccessTimePtr( - block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word)))); - } - else - { - AddPendingTicks(block->uncached_fetch_ticks); - } - - InterpretCachedBlock(block); - + ExecuteCachedInterpreterBlock(cinst); CHECK_DOWNCOUNT(); // Handle self-looping blocks - if (g_state.pc == block->pc) - goto reexecute_block; - else - continue; - - interpret_block: - InterpretUncachedBlock(); - CHECK_DOWNCOUNT(); - continue; + // if (g_state.pc == pc) + // goto reexecute_block; } TimingEvents::RunEvents(); } } -[[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreter() -{ - if (g_settings.gpu_pgxp_enable) - { - if (g_settings.gpu_pgxp_cpu) - ExecuteCachedInterpreterImpl(); - else - ExecuteCachedInterpreterImpl(); - } - else - { - ExecuteCachedInterpreterImpl(); - } -} - void CPU::CodeCache::LogCurrentState() { #if 0 diff --git a/src/core/cpu_code_cache_private.h b/src/core/cpu_code_cache_private.h index 0b6697c24..c74140862 100644 --- a/src/core/cpu_code_cache_private.h +++ b/src/core/cpu_code_cache_private.h @@ -205,8 +205,14 @@ struct PageProtectionInfo }; static_assert(sizeof(PageProtectionInfo) == (sizeof(Block*) * 2 + 8)); -template -void InterpretCachedBlock(const Block* block); +using CachedInterpreterHandler = void(*)(u32 arg); +CachedInterpreterHandler GetCachedInterpreterHandler(const Instruction inst); + +struct CachedInterpreterInstruction +{ + CachedInterpreterHandler handler; + u32 arg; +}; template void InterpretUncachedBlock(); diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index 26c86e3b4..8c08a3900 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -98,6 +98,12 @@ static bool WriteMemoryByte(VirtualMemoryAddress addr, u32 value); static bool WriteMemoryHalfWord(VirtualMemoryAddress addr, u32 value); static bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value); +#define DECLARE_INSTRUCTION(insn) \ + template \ + static void Execute_##insn(const Instruction inst); +CPU_FOR_EACH_INSTRUCTION(DECLARE_INSTRUCTION) +#undef DECLARE_INSTRUCTION + constinit State g_state; bool TRACE_EXECUTION = false; @@ -845,6 +851,989 @@ void CPU::DisassembleAndPrint(u32 addr, u32 instructions_before /* = 0 */, u32 i } } +#define DEFINE_INSTRUCTION(insn) \ + template \ + ALWAYS_INLINE_RELEASE void CPU::Execute_##insn(const Instruction inst) + +DEFINE_INSTRUCTION(sll) +{ + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rtVal << inst.r.shamt; + WriteReg(inst.r.rd, rdVal); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLL(inst, rtVal); +} + +DEFINE_INSTRUCTION(srl) +{ + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rtVal >> inst.r.shamt; + WriteReg(inst.r.rd, rdVal); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRL(inst, rtVal); +} + +DEFINE_INSTRUCTION(sra) +{ + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = static_cast(static_cast(rtVal) >> inst.r.shamt); + WriteReg(inst.r.rd, rdVal); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRA(inst, rtVal); +} + +DEFINE_INSTRUCTION(sllv) +{ + const u32 rtVal = ReadReg(inst.r.rt); + const u32 shamt = ReadReg(inst.r.rs) & UINT32_C(0x1F); + const u32 rdVal = rtVal << shamt; + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLLV(inst, rtVal, shamt); + + WriteReg(inst.r.rd, rdVal); +} + +DEFINE_INSTRUCTION(srlv) +{ + const u32 rtVal = ReadReg(inst.r.rt); + const u32 shamt = ReadReg(inst.r.rs) & UINT32_C(0x1F); + const u32 rdVal = rtVal >> shamt; + WriteReg(inst.r.rd, rdVal); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRLV(inst, rtVal, shamt); +} + +DEFINE_INSTRUCTION(srav) +{ + const u32 rtVal = ReadReg(inst.r.rt); + const u32 shamt = ReadReg(inst.r.rs) & UINT32_C(0x1F); + const u32 rdVal = static_cast(static_cast(rtVal) >> shamt); + WriteReg(inst.r.rd, rdVal); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRAV(inst, rtVal, shamt); +} + +DEFINE_INSTRUCTION(and) +{ + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 new_value = rsVal & rtVal; + WriteReg(inst.r.rd, new_value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_AND_(inst, rsVal, rtVal); +} + +DEFINE_INSTRUCTION(or) +{ + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 new_value = rsVal | rtVal; + WriteReg(inst.r.rd, new_value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_OR_(inst, rsVal, rtVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); +} + +DEFINE_INSTRUCTION(xor) +{ + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 new_value = rsVal ^ rtVal; + WriteReg(inst.r.rd, new_value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_XOR_(inst, rsVal, rtVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); +} + +DEFINE_INSTRUCTION(nor) +{ + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 new_value = ~(rsVal | rtVal); + WriteReg(inst.r.rd, new_value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_NOR(inst, rsVal, rtVal); +} + +DEFINE_INSTRUCTION(add) +{ + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rsVal + rtVal; + if (AddOverflow(rsVal, rtVal, rdVal)) + { + RaiseException(Exception::Ov); + return; + } + + WriteReg(inst.r.rd, rdVal); + + if constexpr (pgxp_mode == PGXPMode::CPU) + PGXP::CPU_ADD(inst, rsVal, rtVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); +} + +DEFINE_INSTRUCTION(addu) +{ + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rsVal + rtVal; + WriteReg(inst.r.rd, rdVal); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_ADD(inst, rsVal, rtVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); +} + +DEFINE_INSTRUCTION(sub) +{ + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rsVal - rtVal; + if (SubOverflow(rsVal, rtVal, rdVal)) + { + RaiseException(Exception::Ov); + return; + } + + WriteReg(inst.r.rd, rdVal); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SUB(inst, rsVal, rtVal); +} + +DEFINE_INSTRUCTION(subu) +{ + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rsVal - rtVal; + WriteReg(inst.r.rd, rdVal); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SUB(inst, rsVal, rtVal); +} + +DEFINE_INSTRUCTION(mult) +{ + const u32 lhs = ReadReg(inst.r.rs); + const u32 rhs = ReadReg(inst.r.rt); + const u64 result = static_cast(static_cast(SignExtend64(lhs)) * static_cast(SignExtend64(rhs))); + + g_state.regs.hi = Truncate32(result >> 32); + g_state.regs.lo = Truncate32(result); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MULT(inst, lhs, rhs); +} + +DEFINE_INSTRUCTION(multu) +{ + const u32 lhs = ReadReg(inst.r.rs); + const u32 rhs = ReadReg(inst.r.rt); + const u64 result = ZeroExtend64(lhs) * ZeroExtend64(rhs); + + g_state.regs.hi = Truncate32(result >> 32); + g_state.regs.lo = Truncate32(result); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MULTU(inst, lhs, rhs); +} + +DEFINE_INSTRUCTION(div) +{ + const s32 num = static_cast(ReadReg(inst.r.rs)); + const s32 denom = static_cast(ReadReg(inst.r.rt)); + + if (denom == 0) + { + // divide by zero + g_state.regs.lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); + g_state.regs.hi = static_cast(num); + } + else if (static_cast(num) == UINT32_C(0x80000000) && denom == -1) + { + // unrepresentable + g_state.regs.lo = UINT32_C(0x80000000); + g_state.regs.hi = 0; + } + else + { + g_state.regs.lo = static_cast(num / denom); + g_state.regs.hi = static_cast(num % denom); + } + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_DIV(inst, num, denom); +} + +DEFINE_INSTRUCTION(divu) +{ + const u32 num = ReadReg(inst.r.rs); + const u32 denom = ReadReg(inst.r.rt); + + if (denom == 0) + { + // divide by zero + g_state.regs.lo = UINT32_C(0xFFFFFFFF); + g_state.regs.hi = static_cast(num); + } + else + { + g_state.regs.lo = num / denom; + g_state.regs.hi = num % denom; + } + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_DIVU(inst, num, denom); +} + +DEFINE_INSTRUCTION(mfhi) +{ + const u32 value = g_state.regs.hi; + WriteReg(inst.r.rd, value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MOVE(static_cast(inst.r.rd.GetValue()), static_cast(Reg::hi), value); +} + +DEFINE_INSTRUCTION(mthi) +{ + const u32 value = ReadReg(inst.r.rs); + g_state.regs.hi = value; + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MOVE(static_cast(Reg::hi), static_cast(inst.r.rs.GetValue()), value); +} + +DEFINE_INSTRUCTION(mflo) +{ + const u32 value = g_state.regs.lo; + WriteReg(inst.r.rd, value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MOVE(static_cast(inst.r.rd.GetValue()), static_cast(Reg::lo), value); +} + +DEFINE_INSTRUCTION(mtlo) +{ + const u32 value = ReadReg(inst.r.rs); + g_state.regs.lo = value; + + if constexpr (pgxp_mode == PGXPMode::CPU) + PGXP::CPU_MOVE(static_cast(Reg::lo), static_cast(inst.r.rs.GetValue()), value); +} + +DEFINE_INSTRUCTION(slt) +{ + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 result = BoolToUInt32(static_cast(rsVal) < static_cast(rtVal)); + WriteReg(inst.r.rd, result); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLT(inst, rsVal, rtVal); +} + +DEFINE_INSTRUCTION(sltu) +{ + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 result = BoolToUInt32(rsVal < rtVal); + WriteReg(inst.r.rd, result); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLTU(inst, rsVal, rtVal); +} + +DEFINE_INSTRUCTION(jr) +{ + g_state.next_instruction_is_branch_delay_slot = true; + const u32 target = ReadReg(inst.r.rs); + Branch(target); +} + +DEFINE_INSTRUCTION(jalr) +{ + g_state.next_instruction_is_branch_delay_slot = true; + const u32 target = ReadReg(inst.r.rs); + WriteReg(inst.r.rd, g_state.npc); + Branch(target); +} + +DEFINE_INSTRUCTION(syscall) +{ + RaiseException(Exception::Syscall); +} + +DEFINE_INSTRUCTION(break) +{ + RaiseBreakException(Cop0Registers::CAUSE::MakeValueForException( + Exception::BP, g_state.current_instruction_in_branch_delay_slot, + g_state.current_instruction_was_branch_taken, g_state.current_instruction.cop.cop_n), + g_state.current_instruction_pc, g_state.current_instruction.bits); +} + +DEFINE_INSTRUCTION(lui) +{ + const u32 value = inst.i.imm_zext32() << 16; + WriteReg(inst.i.rt, value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_LUI(inst); +} + +DEFINE_INSTRUCTION(andi) +{ + const u32 rsVal = ReadReg(inst.i.rs); + const u32 new_value = rsVal & inst.i.imm_zext32(); + WriteReg(inst.i.rt, new_value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_ANDI(inst, rsVal); +} + +DEFINE_INSTRUCTION(ori) +{ + const u32 rsVal = ReadReg(inst.i.rs); + const u32 imm = inst.i.imm_zext32(); + const u32 rtVal = rsVal | imm; + WriteReg(inst.i.rt, rtVal); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_ORI(inst, rsVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); +} + +DEFINE_INSTRUCTION(xori) +{ + const u32 rsVal = ReadReg(inst.i.rs); + const u32 imm = inst.i.imm_zext32(); + const u32 new_value = ReadReg(inst.i.rs) ^ imm; + WriteReg(inst.i.rt, new_value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_XORI(inst, rsVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); +} + +DEFINE_INSTRUCTION(addi) +{ + const u32 rsVal = ReadReg(inst.i.rs); + const u32 imm = inst.i.imm_sext32(); + const u32 rtVal = rsVal + imm; + if (AddOverflow(rsVal, imm, rtVal)) + { + RaiseException(Exception::Ov); + return; + } + + WriteReg(inst.i.rt, rtVal); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_ADDI(inst, rsVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); +} + +DEFINE_INSTRUCTION(addiu) +{ + const u32 rsVal = ReadReg(inst.i.rs); + const u32 imm = inst.i.imm_sext32(); + const u32 rtVal = rsVal + imm; + WriteReg(inst.i.rt, rtVal); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_ADDI(inst, rsVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); +} + +DEFINE_INSTRUCTION(slti) +{ + const u32 rsVal = ReadReg(inst.i.rs); + const u32 result = BoolToUInt32(static_cast(rsVal) < static_cast(inst.i.imm_sext32())); + WriteReg(inst.i.rt, result); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLTI(inst, rsVal); +} + +DEFINE_INSTRUCTION(sltiu) +{ + const u32 result = BoolToUInt32(ReadReg(inst.i.rs) < inst.i.imm_sext32()); + WriteReg(inst.i.rt, result); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLTIU(inst, ReadReg(inst.i.rs)); +} + +DEFINE_INSTRUCTION(lb) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + if constexpr (debug) + { + Cop0DataBreakpointCheck(addr); + MemoryBreakpointCheck(addr); + } + + u8 value; + if (!ReadMemoryByte(addr, &value)) + return; + + const u32 sxvalue = SignExtend32(value); + + WriteRegDelayed(inst.i.rt, sxvalue); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_LBx(inst, addr, sxvalue); +} + +DEFINE_INSTRUCTION(lbu) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + if constexpr (debug) + { + Cop0DataBreakpointCheck(addr); + MemoryBreakpointCheck(addr); + } + + u8 value; + if (!ReadMemoryByte(addr, &value)) + return; + + const u32 zxvalue = ZeroExtend32(value); + WriteRegDelayed(inst.i.rt, zxvalue); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_LBx(inst, addr, zxvalue); +} + +DEFINE_INSTRUCTION(lh) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + if constexpr (debug) + { + Cop0DataBreakpointCheck(addr); + MemoryBreakpointCheck(addr); + } + + u16 value; + if (!ReadMemoryHalfWord(addr, &value)) + return; + + const u32 sxvalue = SignExtend32(value); + WriteRegDelayed(inst.i.rt, sxvalue); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_LH(inst, addr, sxvalue); +} + +DEFINE_INSTRUCTION(lhu) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + if constexpr (debug) + { + Cop0DataBreakpointCheck(addr); + MemoryBreakpointCheck(addr); + } + + u16 value; + if (!ReadMemoryHalfWord(addr, &value)) + return; + + const u32 zxvalue = ZeroExtend32(value); + WriteRegDelayed(inst.i.rt, zxvalue); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_LHU(inst, addr, zxvalue); +} + +DEFINE_INSTRUCTION(lw) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + if constexpr (debug) + { + Cop0DataBreakpointCheck(addr); + MemoryBreakpointCheck(addr); + } + + u32 value; + if (!ReadMemoryWord(addr, &value)) + return; + + WriteRegDelayed(inst.i.rt, value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_LW(inst, addr, value); +} + +DEFINE_INSTRUCTION(lwl) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + const VirtualMemoryAddress aligned_addr = addr & ~UINT32_C(3); + if constexpr (debug) + { + Cop0DataBreakpointCheck(addr); + MemoryBreakpointCheck(addr); + } + + u32 aligned_value; + if (!ReadMemoryWord(aligned_addr, &aligned_value)) + return; + + // Bypasses load delay. No need to check the old value since this is the delay slot or it's not relevant. + const u32 existing_value = (inst.i.rt == g_state.load_delay_reg) ? g_state.load_delay_value : ReadReg(inst.i.rt); + const u8 shift = (Truncate8(addr) & u8(3)) * u8(8); + const u32 mask = UINT32_C(0x00FFFFFF) >> shift; + const u32 new_value = (existing_value & mask) | (aligned_value << (24 - shift)); + + WriteRegDelayed(inst.i.rt, new_value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_LW(inst, addr, new_value); +} + +DEFINE_INSTRUCTION(lwr) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + const VirtualMemoryAddress aligned_addr = addr & ~UINT32_C(3); + if constexpr (debug) + { + Cop0DataBreakpointCheck(addr); + MemoryBreakpointCheck(addr); + } + + u32 aligned_value; + if (!ReadMemoryWord(aligned_addr, &aligned_value)) + return; + + // Bypasses load delay. No need to check the old value since this is the delay slot or it's not relevant. + const u32 existing_value = (inst.i.rt == g_state.load_delay_reg) ? g_state.load_delay_value : ReadReg(inst.i.rt); + const u8 shift = (Truncate8(addr) & u8(3)) * u8(8); + const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift); + const u32 new_value = (existing_value & mask) | (aligned_value >> shift); + + WriteRegDelayed(inst.i.rt, new_value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_LW(inst, addr, new_value); +} + +DEFINE_INSTRUCTION(sb) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + if constexpr (debug) + { + Cop0DataBreakpointCheck(addr); + MemoryBreakpointCheck(addr); + } + + const u32 value = ReadReg(inst.i.rt); + WriteMemoryByte(addr, value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_SB(inst, addr, value); +} + +DEFINE_INSTRUCTION(sh) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + if constexpr (debug) + { + Cop0DataBreakpointCheck(addr); + MemoryBreakpointCheck(addr); + } + + const u32 value = ReadReg(inst.i.rt); + WriteMemoryHalfWord(addr, value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_SH(inst, addr, value); +} + +DEFINE_INSTRUCTION(sw) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + if constexpr (debug) + { + Cop0DataBreakpointCheck(addr); + MemoryBreakpointCheck(addr); + } + + const u32 value = ReadReg(inst.i.rt); + WriteMemoryWord(addr, value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_SW(inst, addr, value); +} + +DEFINE_INSTRUCTION(swl) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + const VirtualMemoryAddress aligned_addr = addr & ~UINT32_C(3); + if constexpr (debug) + { + Cop0DataBreakpointCheck(aligned_addr); + MemoryBreakpointCheck(aligned_addr); + } + + const u32 reg_value = ReadReg(inst.i.rt); + const u8 shift = (Truncate8(addr) & u8(3)) * u8(8); + u32 mem_value; + if (!ReadMemoryWord(aligned_addr, &mem_value)) + return; + + const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift; + const u32 new_value = (mem_value & mem_mask) | (reg_value >> (24 - shift)); + + WriteMemoryWord(aligned_addr, new_value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_SW(inst, aligned_addr, new_value); +} + +DEFINE_INSTRUCTION(swr) +{ + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + const VirtualMemoryAddress aligned_addr = addr & ~UINT32_C(3); + if constexpr (debug) + { + Cop0DataBreakpointCheck(aligned_addr); + MemoryBreakpointCheck(aligned_addr); + } + + const u32 reg_value = ReadReg(inst.i.rt); + const u8 shift = (Truncate8(addr) & u8(3)) * u8(8); + u32 mem_value; + if (!ReadMemoryWord(aligned_addr, &mem_value)) + return; + + const u32 mem_mask = UINT32_C(0x00FFFFFF) >> (24 - shift); + const u32 new_value = (mem_value & mem_mask) | (reg_value << shift); + + WriteMemoryWord(aligned_addr, new_value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_SW(inst, aligned_addr, new_value); +} + +DEFINE_INSTRUCTION(j) +{ + g_state.next_instruction_is_branch_delay_slot = true; + Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2)); +} + +DEFINE_INSTRUCTION(jal) +{ + WriteReg(Reg::ra, g_state.npc); + g_state.next_instruction_is_branch_delay_slot = true; + Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2)); +} + +DEFINE_INSTRUCTION(beq) +{ + // We're still flagged as a branch delay slot even if the branch isn't taken. + g_state.next_instruction_is_branch_delay_slot = true; + const bool branch = (ReadReg(inst.i.rs) == ReadReg(inst.i.rt)); + if (branch) + Branch(g_state.pc + (inst.i.imm_sext32() << 2)); +} + +DEFINE_INSTRUCTION(bne) +{ + g_state.next_instruction_is_branch_delay_slot = true; + const bool branch = (ReadReg(inst.i.rs) != ReadReg(inst.i.rt)); + if (branch) + Branch(g_state.pc + (inst.i.imm_sext32() << 2)); +} + +DEFINE_INSTRUCTION(bgtz) +{ + g_state.next_instruction_is_branch_delay_slot = true; + const bool branch = (static_cast(ReadReg(inst.i.rs)) > 0); + if (branch) + Branch(g_state.pc + (inst.i.imm_sext32() << 2)); +} + +DEFINE_INSTRUCTION(blez) +{ + g_state.next_instruction_is_branch_delay_slot = true; + const bool branch = (static_cast(ReadReg(inst.i.rs)) <= 0); + if (branch) + Branch(g_state.pc + (inst.i.imm_sext32() << 2)); +} + +DEFINE_INSTRUCTION(b) +{ + g_state.next_instruction_is_branch_delay_slot = true; + const u8 rt = static_cast(inst.i.rt.GetValue()); + + // bgez is the inverse of bltz, so simply do ltz and xor the result + const bool bgez = ConvertToBoolUnchecked(rt & u8(1)); + const bool branch = (static_cast(ReadReg(inst.i.rs)) < 0) ^ bgez; + + // register is still linked even if the branch isn't taken + const bool link = (rt & u8(0x1E)) == u8(0x10); + if (link) + WriteReg(Reg::ra, g_state.npc); + + if (branch) + Branch(g_state.pc + (inst.i.imm_sext32() << 2)); +} + +DEFINE_INSTRUCTION(mfc0) +{ + u32 value; + + switch (static_cast(inst.r.rd.GetValue())) + { + case Cop0Reg::BPC: + value = g_state.cop0_regs.BPC; + break; + + case Cop0Reg::BPCM: + value = g_state.cop0_regs.BPCM; + break; + + case Cop0Reg::BDA: + value = g_state.cop0_regs.BDA; + break; + + case Cop0Reg::BDAM: + value = g_state.cop0_regs.BDAM; + break; + + case Cop0Reg::DCIC: + value = g_state.cop0_regs.dcic.bits; + break; + + case Cop0Reg::JUMPDEST: + value = g_state.cop0_regs.TAR; + break; + + case Cop0Reg::BadVaddr: + value = g_state.cop0_regs.BadVaddr; + break; + + case Cop0Reg::SR: + value = g_state.cop0_regs.sr.bits; + break; + + case Cop0Reg::CAUSE: + value = g_state.cop0_regs.cause.bits; + break; + + case Cop0Reg::EPC: + value = g_state.cop0_regs.EPC; + break; + + case Cop0Reg::PRID: + value = g_state.cop0_regs.PRID; + break; + + default: + RaiseException(Exception::RI); + return; + } + + WriteRegDelayed(inst.r.rt, value); + + if constexpr (pgxp_mode == PGXPMode::CPU) + PGXP::CPU_MFC0(inst, value); +} + +DEFINE_INSTRUCTION(mtc0) +{ + u32 value = ReadReg(inst.r.rt); + [[maybe_unused]] const u32 orig_value = value; + + switch (static_cast(inst.r.rd.GetValue())) + { + case Cop0Reg::BPC: + { + g_state.cop0_regs.BPC = value; + DEV_LOG("COP0 BPC <- {:08X}", value); + } + break; + + case Cop0Reg::BPCM: + { + g_state.cop0_regs.BPCM = value; + DEV_LOG("COP0 BPCM <- {:08X}", value); + if (UpdateDebugDispatcherFlag()) + ExitExecution(); + } + break; + + case Cop0Reg::BDA: + { + g_state.cop0_regs.BDA = value; + DEV_LOG("COP0 BDA <- {:08X}", value); + } + break; + + case Cop0Reg::BDAM: + { + g_state.cop0_regs.BDAM = value; + DEV_LOG("COP0 BDAM <- {:08X}", value); + } + break; + + case Cop0Reg::JUMPDEST: + { + WARNING_LOG("Ignoring write to Cop0 JUMPDEST"); + } + break; + + case Cop0Reg::DCIC: + { + g_state.cop0_regs.dcic.bits = + (g_state.cop0_regs.dcic.bits & ~Cop0Registers::DCIC::WRITE_MASK) | (value & Cop0Registers::DCIC::WRITE_MASK); + DEV_LOG("COP0 DCIC <- {:08X} (now {:08X})", value, g_state.cop0_regs.dcic.bits); + value = g_state.cop0_regs.dcic.bits; + if (UpdateDebugDispatcherFlag()) + ExitExecution(); + } + break; + + case Cop0Reg::SR: + { + g_state.cop0_regs.sr.bits = + (g_state.cop0_regs.sr.bits & ~Cop0Registers::SR::WRITE_MASK) | (value & Cop0Registers::SR::WRITE_MASK); + DEBUG_LOG("COP0 SR <- {:08X} (now {:08X})", value, g_state.cop0_regs.sr.bits); + value = g_state.cop0_regs.sr.bits; + UpdateMemoryPointers(); + CheckForPendingInterrupt(); + } + break; + + case Cop0Reg::CAUSE: + { + g_state.cop0_regs.cause.bits = + (g_state.cop0_regs.cause.bits & ~Cop0Registers::CAUSE::WRITE_MASK) | (value & Cop0Registers::CAUSE::WRITE_MASK); + DEBUG_LOG("COP0 CAUSE <- {:08X} (now {:08X})", value, g_state.cop0_regs.cause.bits); + value = g_state.cop0_regs.cause.bits; + CheckForPendingInterrupt(); + } + break; + + [[unlikely]] default: + RaiseException(Exception::RI); + return; + } + + if constexpr (pgxp_mode == PGXPMode::CPU) + PGXP::CPU_MTC0(inst, value, orig_value); +} + +DEFINE_INSTRUCTION(rfe) +{ + // restore mode + g_state.cop0_regs.sr.mode_bits = + (g_state.cop0_regs.sr.mode_bits & UINT32_C(0b110000)) | (g_state.cop0_regs.sr.mode_bits >> 2); + CheckForPendingInterrupt(); +} + +DEFINE_INSTRUCTION(cfc2) +{ + StallUntilGTEComplete(); + + const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue()) + 32); + WriteRegDelayed(inst.r.rt, value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_MFC2(inst, value); +} + +DEFINE_INSTRUCTION(ctc2) +{ + const u32 value = ReadReg(inst.r.rt); + GTE::WriteRegister(static_cast(inst.r.rd.GetValue()) + 32, value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_MTC2(inst, value); +} + +DEFINE_INSTRUCTION(mfc2) +{ + StallUntilGTEComplete(); + + const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue())); + WriteRegDelayed(inst.r.rt, value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_MFC2(inst, value); +} + +DEFINE_INSTRUCTION(mtc2) +{ + const u32 value = ReadReg(inst.r.rt); + GTE::WriteRegister(static_cast(inst.r.rd.GetValue()), value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_MTC2(inst, value); +} + +DEFINE_INSTRUCTION(cop2) +{ + StallUntilGTEComplete(); + GTE::ExecuteInstruction(inst.bits); +} + +DEFINE_INSTRUCTION(lwc2) +{ + if (!g_state.cop0_regs.sr.CE2) + { + WARNING_LOG("Coprocessor 2 not enabled"); + RaiseException(Exception::CpU); + return; + } + + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + u32 value; + if (!ReadMemoryWord(addr, &value)) + return; + + GTE::WriteRegister(ZeroExtend32(static_cast(inst.i.rt.GetValue())), value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_LWC2(inst, addr, value); +} + +DEFINE_INSTRUCTION(swc2) +{ + if (!g_state.cop0_regs.sr.CE2) + { + WARNING_LOG("Coprocessor 2 not enabled"); + RaiseException(Exception::CpU); + return; + } + + StallUntilGTEComplete(); + + const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + const u32 value = GTE::ReadRegister(ZeroExtend32(static_cast(inst.i.rt.GetValue()))); + WriteMemoryWord(addr, value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_SWC2(inst, addr, value); +} + template ALWAYS_INLINE_RELEASE void CPU::ExecuteInstruction() { @@ -875,777 +1864,231 @@ restart_instruction: switch (inst.r.funct) { case InstructionFunct::sll: - { - const u32 rtVal = ReadReg(inst.r.rt); - const u32 rdVal = rtVal << inst.r.shamt; - WriteReg(inst.r.rd, rdVal); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SLL(inst, rtVal); - } - break; + Execute_sll(inst); + break; case InstructionFunct::srl: - { - const u32 rtVal = ReadReg(inst.r.rt); - const u32 rdVal = rtVal >> inst.r.shamt; - WriteReg(inst.r.rd, rdVal); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SRL(inst, rtVal); - } - break; + Execute_srl(inst); + break; case InstructionFunct::sra: - { - const u32 rtVal = ReadReg(inst.r.rt); - const u32 rdVal = static_cast(static_cast(rtVal) >> inst.r.shamt); - WriteReg(inst.r.rd, rdVal); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SRA(inst, rtVal); - } - break; + Execute_sra(inst); + break; case InstructionFunct::sllv: - { - const u32 rtVal = ReadReg(inst.r.rt); - const u32 shamt = ReadReg(inst.r.rs) & UINT32_C(0x1F); - const u32 rdVal = rtVal << shamt; - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SLLV(inst, rtVal, shamt); - - WriteReg(inst.r.rd, rdVal); - } - break; + Execute_sllv(inst); + break; case InstructionFunct::srlv: - { - const u32 rtVal = ReadReg(inst.r.rt); - const u32 shamt = ReadReg(inst.r.rs) & UINT32_C(0x1F); - const u32 rdVal = rtVal >> shamt; - WriteReg(inst.r.rd, rdVal); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SRLV(inst, rtVal, shamt); - } - break; + Execute_srlv(inst); + break; case InstructionFunct::srav: - { - const u32 rtVal = ReadReg(inst.r.rt); - const u32 shamt = ReadReg(inst.r.rs) & UINT32_C(0x1F); - const u32 rdVal = static_cast(static_cast(rtVal) >> shamt); - WriteReg(inst.r.rd, rdVal); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SRAV(inst, rtVal, shamt); - } - break; + Execute_srav(inst); + break; case InstructionFunct::and_: - { - const u32 rsVal = ReadReg(inst.r.rs); - const u32 rtVal = ReadReg(inst.r.rt); - const u32 new_value = rsVal & rtVal; - WriteReg(inst.r.rd, new_value); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_AND_(inst, rsVal, rtVal); - } - break; + Execute_and(inst); + break; case InstructionFunct::or_: - { - const u32 rsVal = ReadReg(inst.r.rs); - const u32 rtVal = ReadReg(inst.r.rt); - const u32 new_value = rsVal | rtVal; - WriteReg(inst.r.rd, new_value); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_OR_(inst, rsVal, rtVal); - else if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); - } - break; + Execute_or(inst); + break; case InstructionFunct::xor_: - { - const u32 rsVal = ReadReg(inst.r.rs); - const u32 rtVal = ReadReg(inst.r.rt); - const u32 new_value = rsVal ^ rtVal; - WriteReg(inst.r.rd, new_value); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_XOR_(inst, rsVal, rtVal); - else if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); - } - break; + Execute_xor(inst); + break; case InstructionFunct::nor: - { - const u32 rsVal = ReadReg(inst.r.rs); - const u32 rtVal = ReadReg(inst.r.rt); - const u32 new_value = ~(rsVal | rtVal); - WriteReg(inst.r.rd, new_value); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_NOR(inst, rsVal, rtVal); - } - break; + Execute_nor(inst); + break; case InstructionFunct::add: - { - const u32 rsVal = ReadReg(inst.r.rs); - const u32 rtVal = ReadReg(inst.r.rt); - const u32 rdVal = rsVal + rtVal; - if (AddOverflow(rsVal, rtVal, rdVal)) - { - RaiseException(Exception::Ov); - return; - } - - WriteReg(inst.r.rd, rdVal); - - if constexpr (pgxp_mode == PGXPMode::CPU) - PGXP::CPU_ADD(inst, rsVal, rtVal); - else if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); - } - break; + Execute_add(inst); + break; case InstructionFunct::addu: - { - const u32 rsVal = ReadReg(inst.r.rs); - const u32 rtVal = ReadReg(inst.r.rt); - const u32 rdVal = rsVal + rtVal; - WriteReg(inst.r.rd, rdVal); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_ADD(inst, rsVal, rtVal); - else if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); - } - break; + Execute_addu(inst); + break; case InstructionFunct::sub: - { - const u32 rsVal = ReadReg(inst.r.rs); - const u32 rtVal = ReadReg(inst.r.rt); - const u32 rdVal = rsVal - rtVal; - if (SubOverflow(rsVal, rtVal, rdVal)) - { - RaiseException(Exception::Ov); - return; - } - - WriteReg(inst.r.rd, rdVal); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SUB(inst, rsVal, rtVal); - } - break; + Execute_sub(inst); + break; case InstructionFunct::subu: - { - const u32 rsVal = ReadReg(inst.r.rs); - const u32 rtVal = ReadReg(inst.r.rt); - const u32 rdVal = rsVal - rtVal; - WriteReg(inst.r.rd, rdVal); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SUB(inst, rsVal, rtVal); - } - break; + Execute_subu(inst); + break; case InstructionFunct::slt: - { - const u32 rsVal = ReadReg(inst.r.rs); - const u32 rtVal = ReadReg(inst.r.rt); - const u32 result = BoolToUInt32(static_cast(rsVal) < static_cast(rtVal)); - WriteReg(inst.r.rd, result); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SLT(inst, rsVal, rtVal); - } - break; + Execute_slt(inst); + break; case InstructionFunct::sltu: - { - const u32 rsVal = ReadReg(inst.r.rs); - const u32 rtVal = ReadReg(inst.r.rt); - const u32 result = BoolToUInt32(rsVal < rtVal); - WriteReg(inst.r.rd, result); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SLTU(inst, rsVal, rtVal); - } - break; + Execute_sltu(inst); + break; case InstructionFunct::mfhi: - { - const u32 value = g_state.regs.hi; - WriteReg(inst.r.rd, value); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_MOVE(static_cast(inst.r.rd.GetValue()), static_cast(Reg::hi), value); - } - break; + Execute_mfhi(inst); + break; case InstructionFunct::mthi: - { - const u32 value = ReadReg(inst.r.rs); - g_state.regs.hi = value; - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_MOVE(static_cast(Reg::hi), static_cast(inst.r.rs.GetValue()), value); - } - break; + Execute_mthi(inst); + break; case InstructionFunct::mflo: - { - const u32 value = g_state.regs.lo; - WriteReg(inst.r.rd, value); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_MOVE(static_cast(inst.r.rd.GetValue()), static_cast(Reg::lo), value); - } - break; + Execute_mflo(inst); + break; case InstructionFunct::mtlo: - { - const u32 value = ReadReg(inst.r.rs); - g_state.regs.lo = value; - - if constexpr (pgxp_mode == PGXPMode::CPU) - PGXP::CPU_MOVE(static_cast(Reg::lo), static_cast(inst.r.rs.GetValue()), value); - } - break; + Execute_mtlo(inst); + break; case InstructionFunct::mult: - { - const u32 lhs = ReadReg(inst.r.rs); - const u32 rhs = ReadReg(inst.r.rt); - const u64 result = - static_cast(static_cast(SignExtend64(lhs)) * static_cast(SignExtend64(rhs))); - - g_state.regs.hi = Truncate32(result >> 32); - g_state.regs.lo = Truncate32(result); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_MULT(inst, lhs, rhs); - } - break; + Execute_mult(inst); + break; case InstructionFunct::multu: - { - const u32 lhs = ReadReg(inst.r.rs); - const u32 rhs = ReadReg(inst.r.rt); - const u64 result = ZeroExtend64(lhs) * ZeroExtend64(rhs); - - g_state.regs.hi = Truncate32(result >> 32); - g_state.regs.lo = Truncate32(result); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_MULTU(inst, lhs, rhs); - } - break; + Execute_multu(inst); + break; case InstructionFunct::div: - { - const s32 num = static_cast(ReadReg(inst.r.rs)); - const s32 denom = static_cast(ReadReg(inst.r.rt)); - - if (denom == 0) - { - // divide by zero - g_state.regs.lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); - g_state.regs.hi = static_cast(num); - } - else if (static_cast(num) == UINT32_C(0x80000000) && denom == -1) - { - // unrepresentable - g_state.regs.lo = UINT32_C(0x80000000); - g_state.regs.hi = 0; - } - else - { - g_state.regs.lo = static_cast(num / denom); - g_state.regs.hi = static_cast(num % denom); - } - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_DIV(inst, num, denom); - } - break; + Execute_div(inst); + break; case InstructionFunct::divu: - { - const u32 num = ReadReg(inst.r.rs); - const u32 denom = ReadReg(inst.r.rt); - - if (denom == 0) - { - // divide by zero - g_state.regs.lo = UINT32_C(0xFFFFFFFF); - g_state.regs.hi = static_cast(num); - } - else - { - g_state.regs.lo = num / denom; - g_state.regs.hi = num % denom; - } - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_DIVU(inst, num, denom); - } - break; + Execute_divu(inst); + break; case InstructionFunct::jr: - { - g_state.next_instruction_is_branch_delay_slot = true; - const u32 target = ReadReg(inst.r.rs); - Branch(target); - } - break; + Execute_jr(inst); + break; case InstructionFunct::jalr: - { - g_state.next_instruction_is_branch_delay_slot = true; - const u32 target = ReadReg(inst.r.rs); - WriteReg(inst.r.rd, g_state.npc); - Branch(target); - } - break; + Execute_jalr(inst); + break; case InstructionFunct::syscall: - { - RaiseException(Exception::Syscall); - } - break; + Execute_syscall(inst); + break; case InstructionFunct::break_: - { - RaiseBreakException(Cop0Registers::CAUSE::MakeValueForException( - Exception::BP, g_state.current_instruction_in_branch_delay_slot, - g_state.current_instruction_was_branch_taken, g_state.current_instruction.cop.cop_n), - g_state.current_instruction_pc, g_state.current_instruction.bits); - } - break; + Execute_break(inst); + break; default: - { RaiseException(Exception::RI); break; - } } } break; case InstructionOp::lui: - { - const u32 value = inst.i.imm_zext32() << 16; - WriteReg(inst.i.rt, value); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_LUI(inst); - } - break; + Execute_lui(inst); + break; case InstructionOp::andi: - { - const u32 rsVal = ReadReg(inst.i.rs); - const u32 new_value = rsVal & inst.i.imm_zext32(); - WriteReg(inst.i.rt, new_value); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_ANDI(inst, rsVal); - } - break; + Execute_andi(inst); + break; case InstructionOp::ori: - { - const u32 rsVal = ReadReg(inst.i.rs); - const u32 imm = inst.i.imm_zext32(); - const u32 rtVal = rsVal | imm; - WriteReg(inst.i.rt, rtVal); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_ORI(inst, rsVal); - else if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); - } - break; + Execute_ori(inst); + break; case InstructionOp::xori: - { - const u32 rsVal = ReadReg(inst.i.rs); - const u32 imm = inst.i.imm_zext32(); - const u32 new_value = ReadReg(inst.i.rs) ^ imm; - WriteReg(inst.i.rt, new_value); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_XORI(inst, rsVal); - else if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); - } - break; + Execute_xori(inst); + break; case InstructionOp::addi: - { - const u32 rsVal = ReadReg(inst.i.rs); - const u32 imm = inst.i.imm_sext32(); - const u32 rtVal = rsVal + imm; - if (AddOverflow(rsVal, imm, rtVal)) - { - RaiseException(Exception::Ov); - return; - } - - WriteReg(inst.i.rt, rtVal); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_ADDI(inst, rsVal); - else if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); - } - break; + Execute_addi(inst); + break; case InstructionOp::addiu: - { - const u32 rsVal = ReadReg(inst.i.rs); - const u32 imm = inst.i.imm_sext32(); - const u32 rtVal = rsVal + imm; - WriteReg(inst.i.rt, rtVal); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_ADDI(inst, rsVal); - else if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); - } - break; + Execute_addiu(inst); + break; case InstructionOp::slti: - { - const u32 rsVal = ReadReg(inst.i.rs); - const u32 result = BoolToUInt32(static_cast(rsVal) < static_cast(inst.i.imm_sext32())); - WriteReg(inst.i.rt, result); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SLTI(inst, rsVal); - } - break; + Execute_slti(inst); + break; case InstructionOp::sltiu: - { - const u32 result = BoolToUInt32(ReadReg(inst.i.rs) < inst.i.imm_sext32()); - WriteReg(inst.i.rt, result); - - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SLTIU(inst, ReadReg(inst.i.rs)); - } - break; + Execute_sltiu(inst); + break; case InstructionOp::lb: - { - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - if constexpr (debug) - { - Cop0DataBreakpointCheck(addr); - MemoryBreakpointCheck(addr); - } - - u8 value; - if (!ReadMemoryByte(addr, &value)) - return; - - const u32 sxvalue = SignExtend32(value); - - WriteRegDelayed(inst.i.rt, sxvalue); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LBx(inst, addr, sxvalue); - } - break; + Execute_lb(inst); + break; case InstructionOp::lh: - { - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - if constexpr (debug) - { - Cop0DataBreakpointCheck(addr); - MemoryBreakpointCheck(addr); - } - - u16 value; - if (!ReadMemoryHalfWord(addr, &value)) - return; - - const u32 sxvalue = SignExtend32(value); - WriteRegDelayed(inst.i.rt, sxvalue); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LH(inst, addr, sxvalue); - } - break; + Execute_lh(inst); + break; case InstructionOp::lw: - { - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - if constexpr (debug) - { - Cop0DataBreakpointCheck(addr); - MemoryBreakpointCheck(addr); - } - - u32 value; - if (!ReadMemoryWord(addr, &value)) - return; - - WriteRegDelayed(inst.i.rt, value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LW(inst, addr, value); - } - break; + Execute_lw(inst); + break; case InstructionOp::lbu: - { - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - if constexpr (debug) - { - Cop0DataBreakpointCheck(addr); - MemoryBreakpointCheck(addr); - } - - u8 value; - if (!ReadMemoryByte(addr, &value)) - return; - - const u32 zxvalue = ZeroExtend32(value); - WriteRegDelayed(inst.i.rt, zxvalue); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LBx(inst, addr, zxvalue); - } - break; + Execute_lbu(inst); + break; case InstructionOp::lhu: - { - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - if constexpr (debug) - { - Cop0DataBreakpointCheck(addr); - MemoryBreakpointCheck(addr); - } - - u16 value; - if (!ReadMemoryHalfWord(addr, &value)) - return; - - const u32 zxvalue = ZeroExtend32(value); - WriteRegDelayed(inst.i.rt, zxvalue); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LHU(inst, addr, zxvalue); - } - break; + Execute_lhu(inst); + break; case InstructionOp::lwl: + Execute_lwl(inst); + break; + case InstructionOp::lwr: - { - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - const VirtualMemoryAddress aligned_addr = addr & ~UINT32_C(3); - if constexpr (debug) - { - Cop0DataBreakpointCheck(addr); - MemoryBreakpointCheck(addr); - } - - u32 aligned_value; - if (!ReadMemoryWord(aligned_addr, &aligned_value)) - return; - - // Bypasses load delay. No need to check the old value since this is the delay slot or it's not relevant. - const u32 existing_value = (inst.i.rt == g_state.load_delay_reg) ? g_state.load_delay_value : ReadReg(inst.i.rt); - const u8 shift = (Truncate8(addr) & u8(3)) * u8(8); - u32 new_value; - if (inst.op == InstructionOp::lwl) - { - const u32 mask = UINT32_C(0x00FFFFFF) >> shift; - new_value = (existing_value & mask) | (aligned_value << (24 - shift)); - } - else - { - const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift); - new_value = (existing_value & mask) | (aligned_value >> shift); - } - - WriteRegDelayed(inst.i.rt, new_value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LW(inst, addr, new_value); - } - break; + Execute_lwr(inst); + break; case InstructionOp::sb: - { - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - if constexpr (debug) - { - Cop0DataBreakpointCheck(addr); - MemoryBreakpointCheck(addr); - } - - const u32 value = ReadReg(inst.i.rt); - WriteMemoryByte(addr, value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_SB(inst, addr, value); - } - break; + Execute_sb(inst); + break; case InstructionOp::sh: - { - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - if constexpr (debug) - { - Cop0DataBreakpointCheck(addr); - MemoryBreakpointCheck(addr); - } - - const u32 value = ReadReg(inst.i.rt); - WriteMemoryHalfWord(addr, value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_SH(inst, addr, value); - } - break; + Execute_sh(inst); + break; case InstructionOp::sw: - { - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - if constexpr (debug) - { - Cop0DataBreakpointCheck(addr); - MemoryBreakpointCheck(addr); - } - - const u32 value = ReadReg(inst.i.rt); - WriteMemoryWord(addr, value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_SW(inst, addr, value); - } - break; + Execute_sw(inst); + break; case InstructionOp::swl: + Execute_swl(inst); + break; + case InstructionOp::swr: - { - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - const VirtualMemoryAddress aligned_addr = addr & ~UINT32_C(3); - if constexpr (debug) - { - Cop0DataBreakpointCheck(aligned_addr); - MemoryBreakpointCheck(aligned_addr); - } - - const u32 reg_value = ReadReg(inst.i.rt); - const u8 shift = (Truncate8(addr) & u8(3)) * u8(8); - u32 mem_value; - if (!ReadMemoryWord(aligned_addr, &mem_value)) - return; - - u32 new_value; - if (inst.op == InstructionOp::swl) - { - const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift; - new_value = (mem_value & mem_mask) | (reg_value >> (24 - shift)); - } - else - { - const u32 mem_mask = UINT32_C(0x00FFFFFF) >> (24 - shift); - new_value = (mem_value & mem_mask) | (reg_value << shift); - } - - WriteMemoryWord(aligned_addr, new_value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_SW(inst, aligned_addr, new_value); - } - break; + Execute_swr(inst); + break; case InstructionOp::j: - { - g_state.next_instruction_is_branch_delay_slot = true; - Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2)); - } - break; + Execute_j(inst); + break; case InstructionOp::jal: - { - WriteReg(Reg::ra, g_state.npc); - g_state.next_instruction_is_branch_delay_slot = true; - Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2)); - } - break; + Execute_jal(inst); + break; case InstructionOp::beq: - { - // We're still flagged as a branch delay slot even if the branch isn't taken. - g_state.next_instruction_is_branch_delay_slot = true; - const bool branch = (ReadReg(inst.i.rs) == ReadReg(inst.i.rt)); - if (branch) - Branch(g_state.pc + (inst.i.imm_sext32() << 2)); - } - break; + Execute_beq(inst); + break; case InstructionOp::bne: - { - g_state.next_instruction_is_branch_delay_slot = true; - const bool branch = (ReadReg(inst.i.rs) != ReadReg(inst.i.rt)); - if (branch) - Branch(g_state.pc + (inst.i.imm_sext32() << 2)); - } - break; + Execute_bne(inst); + break; case InstructionOp::bgtz: - { - g_state.next_instruction_is_branch_delay_slot = true; - const bool branch = (static_cast(ReadReg(inst.i.rs)) > 0); - if (branch) - Branch(g_state.pc + (inst.i.imm_sext32() << 2)); - } - break; + Execute_bgtz(inst); + break; case InstructionOp::blez: - { - g_state.next_instruction_is_branch_delay_slot = true; - const bool branch = (static_cast(ReadReg(inst.i.rs)) <= 0); - if (branch) - Branch(g_state.pc + (inst.i.imm_sext32() << 2)); - } - break; + Execute_blez(inst); + break; case InstructionOp::b: - { - g_state.next_instruction_is_branch_delay_slot = true; - const u8 rt = static_cast(inst.i.rt.GetValue()); - - // bgez is the inverse of bltz, so simply do ltz and xor the result - const bool bgez = ConvertToBoolUnchecked(rt & u8(1)); - const bool branch = (static_cast(ReadReg(inst.i.rs)) < 0) ^ bgez; - - // register is still linked even if the branch isn't taken - const bool link = (rt & u8(0x1E)) == u8(0x10); - if (link) - WriteReg(Reg::ra, g_state.npc); - - if (branch) - Branch(g_state.pc + (inst.i.imm_sext32() << 2)); - } - break; + Execute_b(inst); + break; case InstructionOp::cop0: { @@ -1661,151 +2104,12 @@ restart_instruction: switch (inst.cop.CommonOp()) { case CopCommonInstruction::mfcn: - { - u32 value; - - switch (static_cast(inst.r.rd.GetValue())) - { - case Cop0Reg::BPC: - value = g_state.cop0_regs.BPC; - break; - - case Cop0Reg::BPCM: - value = g_state.cop0_regs.BPCM; - break; - - case Cop0Reg::BDA: - value = g_state.cop0_regs.BDA; - break; - - case Cop0Reg::BDAM: - value = g_state.cop0_regs.BDAM; - break; - - case Cop0Reg::DCIC: - value = g_state.cop0_regs.dcic.bits; - break; - - case Cop0Reg::JUMPDEST: - value = g_state.cop0_regs.TAR; - break; - - case Cop0Reg::BadVaddr: - value = g_state.cop0_regs.BadVaddr; - break; - - case Cop0Reg::SR: - value = g_state.cop0_regs.sr.bits; - break; - - case Cop0Reg::CAUSE: - value = g_state.cop0_regs.cause.bits; - break; - - case Cop0Reg::EPC: - value = g_state.cop0_regs.EPC; - break; - - case Cop0Reg::PRID: - value = g_state.cop0_regs.PRID; - break; - - default: - RaiseException(Exception::RI); - return; - } - - WriteRegDelayed(inst.r.rt, value); - - if constexpr (pgxp_mode == PGXPMode::CPU) - PGXP::CPU_MFC0(inst, value); - } - break; + Execute_mfc0(inst); + break; case CopCommonInstruction::mtcn: - { - u32 value = ReadReg(inst.r.rt); - [[maybe_unused]] const u32 orig_value = value; - - switch (static_cast(inst.r.rd.GetValue())) - { - case Cop0Reg::BPC: - { - g_state.cop0_regs.BPC = value; - DEV_LOG("COP0 BPC <- {:08X}", value); - } - break; - - case Cop0Reg::BPCM: - { - g_state.cop0_regs.BPCM = value; - DEV_LOG("COP0 BPCM <- {:08X}", value); - if (UpdateDebugDispatcherFlag()) - ExitExecution(); - } - break; - - case Cop0Reg::BDA: - { - g_state.cop0_regs.BDA = value; - DEV_LOG("COP0 BDA <- {:08X}", value); - } - break; - - case Cop0Reg::BDAM: - { - g_state.cop0_regs.BDAM = value; - DEV_LOG("COP0 BDAM <- {:08X}", value); - } - break; - - case Cop0Reg::JUMPDEST: - { - WARNING_LOG("Ignoring write to Cop0 JUMPDEST"); - } - break; - - case Cop0Reg::DCIC: - { - g_state.cop0_regs.dcic.bits = (g_state.cop0_regs.dcic.bits & ~Cop0Registers::DCIC::WRITE_MASK) | - (value & Cop0Registers::DCIC::WRITE_MASK); - DEV_LOG("COP0 DCIC <- {:08X} (now {:08X})", value, g_state.cop0_regs.dcic.bits); - value = g_state.cop0_regs.dcic.bits; - if (UpdateDebugDispatcherFlag()) - ExitExecution(); - } - break; - - case Cop0Reg::SR: - { - g_state.cop0_regs.sr.bits = (g_state.cop0_regs.sr.bits & ~Cop0Registers::SR::WRITE_MASK) | - (value & Cop0Registers::SR::WRITE_MASK); - DEBUG_LOG("COP0 SR <- {:08X} (now {:08X})", value, g_state.cop0_regs.sr.bits); - value = g_state.cop0_regs.sr.bits; - UpdateMemoryPointers(); - CheckForPendingInterrupt(); - } - break; - - case Cop0Reg::CAUSE: - { - g_state.cop0_regs.cause.bits = (g_state.cop0_regs.cause.bits & ~Cop0Registers::CAUSE::WRITE_MASK) | - (value & Cop0Registers::CAUSE::WRITE_MASK); - DEBUG_LOG("COP0 CAUSE <- {:08X} (now {:08X})", value, g_state.cop0_regs.cause.bits); - value = g_state.cop0_regs.cause.bits; - CheckForPendingInterrupt(); - } - break; - - [[unlikely]] default: - RaiseException(Exception::RI); - return; - } - - if constexpr (pgxp_mode == PGXPMode::CPU) - PGXP::CPU_MTC0(inst, value, orig_value); - } - break; + Execute_mtc0(inst); + break; default: [[unlikely]] ERROR_LOG("Unhandled instruction at {:08X}: {:08X}", g_state.current_instruction_pc, @@ -1818,13 +2122,8 @@ restart_instruction: switch (inst.cop.Cop0Op()) { case Cop0Instruction::rfe: - { - // restore mode - g_state.cop0_regs.sr.mode_bits = - (g_state.cop0_regs.sr.mode_bits & UINT32_C(0b110000)) | (g_state.cop0_regs.sr.mode_bits >> 2); - CheckForPendingInterrupt(); - } - break; + Execute_rfe(inst); + break; case Cop0Instruction::tlbr: case Cop0Instruction::tlbwi: @@ -1857,48 +2156,20 @@ restart_instruction: switch (inst.cop.CommonOp()) { case CopCommonInstruction::cfcn: - { - StallUntilGTEComplete(); - - const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue()) + 32); - WriteRegDelayed(inst.r.rt, value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_MFC2(inst, value); - } - break; + Execute_cfc2(inst); + break; case CopCommonInstruction::ctcn: - { - const u32 value = ReadReg(inst.r.rt); - GTE::WriteRegister(static_cast(inst.r.rd.GetValue()) + 32, value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_MTC2(inst, value); - } - break; + Execute_ctc2(inst); + break; case CopCommonInstruction::mfcn: - { - StallUntilGTEComplete(); - - const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue())); - WriteRegDelayed(inst.r.rt, value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_MFC2(inst, value); - } - break; + Execute_mfc2(inst); + break; case CopCommonInstruction::mtcn: - { - const u32 value = ReadReg(inst.r.rt); - GTE::WriteRegister(static_cast(inst.r.rd.GetValue()), value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_MTC2(inst, value); - } - break; + Execute_mtc2(inst); + break; default: [[unlikely]] ERROR_LOG("Unhandled instruction at {:08X}: {:08X}", g_state.current_instruction_pc, @@ -1915,45 +2186,12 @@ restart_instruction: break; case InstructionOp::lwc2: - { - if (!g_state.cop0_regs.sr.CE2) - { - WARNING_LOG("Coprocessor 2 not enabled"); - RaiseException(Exception::CpU); - return; - } - - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - u32 value; - if (!ReadMemoryWord(addr, &value)) - return; - - GTE::WriteRegister(ZeroExtend32(static_cast(inst.i.rt.GetValue())), value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LWC2(inst, addr, value); - } - break; + Execute_lwc2(inst); + break; case InstructionOp::swc2: - { - if (!g_state.cop0_regs.sr.CE2) - { - WARNING_LOG("Coprocessor 2 not enabled"); - RaiseException(Exception::CpU); - return; - } - - StallUntilGTEComplete(); - - const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); - const u32 value = GTE::ReadRegister(ZeroExtend32(static_cast(inst.i.rt.GetValue()))); - WriteMemoryWord(addr, value); - - if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_SWC2(inst, addr, value); - } - break; + Execute_swc2(inst); + break; // swc0/lwc0/cop1/cop3 are essentially no-ops case InstructionOp::cop1: @@ -2534,53 +2772,203 @@ void CPU::SetSingleStepFlag() System::InterruptExecution(); } -template -void CPU::CodeCache::InterpretCachedBlock(const Block* block) +namespace CPU { +#define MAKE_CACHED_INSTRUCTION_HANDLER(insn) \ + template \ + static void CachedInstructionHandler_##insn(u32 arg) \ + { \ + g_state.pending_ticks++; \ + g_state.current_instruction.bits = arg; \ + g_state.current_instruction_pc = g_state.pc; \ + g_state.current_instruction_was_branch_taken = g_state.branch_was_taken; \ + g_state.branch_was_taken = false; \ + g_state.exception_raised = false; \ + g_state.pc = g_state.npc; \ + g_state.npc += 4; \ + Execute_##insn(Instruction{arg}); \ + UpdateLoadDelay(); /* TODO: For non-load instructions, we don't need to update next_load_delay_reg */ \ + g_state.next_instruction_is_branch_delay_slot = false; /* FIXME */ \ + } + +CPU_FOR_EACH_INSTRUCTION(MAKE_CACHED_INSTRUCTION_HANDLER); + +// TODO: inline gte ops + +static void CachedInstructionHandler_gte(u32 arg) { - // set up the state so we've already fetched the instruction - DebugAssert(g_state.pc == block->pc); - g_state.npc = block->pc + 4; - - const Instruction* instruction = block->Instructions(); - const Instruction* end_instruction = instruction + block->size; - const CodeCache::InstructionInfo* info = block->InstructionsInfo(); - - do - { - g_state.pending_ticks++; - - // now executing the instruction we previously fetched - g_state.current_instruction.bits = instruction->bits; - g_state.current_instruction_pc = g_state.pc; - g_state.current_instruction_in_branch_delay_slot = info->is_branch_delay_slot; // TODO: let int set it instead - g_state.current_instruction_was_branch_taken = g_state.branch_was_taken; - g_state.branch_was_taken = false; - g_state.exception_raised = false; - - // update pc - g_state.pc = g_state.npc; - g_state.npc += 4; - - // execute the instruction we previously fetched - ExecuteInstruction(); - - // next load delay - UpdateLoadDelay(); - - if (g_state.exception_raised) - break; - - instruction++; - info++; - } while (instruction != end_instruction); - - // cleanup so the interpreter can kick in if needed - g_state.next_instruction_is_branch_delay_slot = false; + g_state.pending_ticks++; + g_state.current_instruction.bits = arg; + g_state.current_instruction_pc = g_state.pc; + g_state.current_instruction_was_branch_taken = g_state.branch_was_taken; + g_state.branch_was_taken = false; + g_state.exception_raised = false; + g_state.pc = g_state.npc; + g_state.npc += 4; + StallUntilGTEComplete(); + GTE::ExecuteInstruction(arg); + UpdateLoadDelay(); + g_state.next_instruction_is_branch_delay_slot = false; /* FIXME */ } -template void CPU::CodeCache::InterpretCachedBlock(const Block* block); -template void CPU::CodeCache::InterpretCachedBlock(const Block* block); -template void CPU::CodeCache::InterpretCachedBlock(const Block* block); +} // namespace CPU + +CPU::CodeCache::CachedInterpreterHandler CPU::CodeCache::GetCachedInterpreterHandler(const Instruction inst) +{ + static constexpr PGXPMode pgxp_mode = PGXPMode::Disabled; +#define FPTR(insn) &CachedInstructionHandler_##insn + + switch (inst.op) + { +#define STANDARD_OP(op, insn) \ + case InstructionOp::op: \ + return FPTR(insn); + + STANDARD_OP(b, b); + STANDARD_OP(j, j); + STANDARD_OP(jal, jal); + STANDARD_OP(beq, beq); + STANDARD_OP(bne, bne); + STANDARD_OP(blez, blez); + STANDARD_OP(bgtz, bgtz); + STANDARD_OP(addi, addi); + STANDARD_OP(addiu, addiu); + STANDARD_OP(slti, slti); + STANDARD_OP(sltiu, sltiu); + STANDARD_OP(andi, andi); + STANDARD_OP(ori, ori); + STANDARD_OP(xori, xori); + STANDARD_OP(lui, lui); + STANDARD_OP(lb, lb); + STANDARD_OP(lbu, lbu); + STANDARD_OP(lh, lh); + STANDARD_OP(lhu, lhu); + STANDARD_OP(lw, lw); + STANDARD_OP(lwl, lwl); + STANDARD_OP(lwr, lwr); + STANDARD_OP(sb, sb); + STANDARD_OP(sh, sh); + STANDARD_OP(sw, sw); + STANDARD_OP(swl, swl); + STANDARD_OP(swr, swr); + + STANDARD_OP(lwc2, lwc2); + STANDARD_OP(swc2, swc2); + +#undef STANDARD_OP + + case InstructionOp::funct: + { + switch (inst.r.funct) + { +#define FUNCT_OP(op, insn) \ + case InstructionFunct::op: \ + return FPTR(insn); + + FUNCT_OP(sll, sll); + FUNCT_OP(srl, srl); + FUNCT_OP(sra, sra); + FUNCT_OP(sllv, sllv); + FUNCT_OP(srlv, srlv); + FUNCT_OP(srav, srav); + FUNCT_OP(jr, jr); + FUNCT_OP(jalr, jalr); + FUNCT_OP(syscall, syscall); + FUNCT_OP(break_, break); + FUNCT_OP(mfhi, mfhi); + FUNCT_OP(mthi, mthi); + FUNCT_OP(mflo, mflo); + FUNCT_OP(mtlo, mtlo); + FUNCT_OP(mult, mult); + FUNCT_OP(multu, multu); + FUNCT_OP(div, div); + FUNCT_OP(divu, divu); + FUNCT_OP(add, add); + FUNCT_OP(addu, addu); + FUNCT_OP(sub, sub); + FUNCT_OP(subu, subu); + FUNCT_OP(and_, and); + FUNCT_OP(or_, or); + FUNCT_OP(xor_, xor); + FUNCT_OP(nor, nor); + FUNCT_OP(slt, slt); + FUNCT_OP(sltu, sltu); + + default: + Panic("Unhandled funct"); + break; + } + } + +#undef FUNCT_OP + + case InstructionOp::cop0: + { + if (inst.cop.IsCommonInstruction()) + { + switch (inst.cop.CommonOp()) + { + case CopCommonInstruction::mfcn: + return FPTR(mfc0); + + case CopCommonInstruction::mtcn: + return FPTR(mtc0); + + default: + Panic("Unhandled cop0"); + return nullptr; + } + } + else if (inst.cop.Cop0Op() == Cop0Instruction::rfe) + { + return FPTR(rfe); + } + else + { + Panic("Unhandled cop0"); + return nullptr; + } + } + break; + + case InstructionOp::cop2: + { + if (inst.cop.IsCommonInstruction()) + { + switch (inst.cop.CommonOp()) + { + case CopCommonInstruction::cfcn: + return FPTR(cfc2); + + case CopCommonInstruction::ctcn: + return FPTR(ctc2); + + case CopCommonInstruction::mfcn: + return FPTR(mfc2); + + case CopCommonInstruction::mtcn: + return FPTR(mtc2); + + default: + Panic("Unhandled cop2"); + return nullptr; + } + } + else + { + return &CachedInstructionHandler_gte; + } + } + break; + + default: + Panic("Unhandled op"); + break; + } +} + +template void CPU::CodeCache::InterpretUncachedBlock(); +template void CPU::CodeCache::InterpretUncachedBlock(); +template void CPU::CodeCache::InterpretUncachedBlock(); template void CPU::CodeCache::InterpretUncachedBlock() @@ -2639,10 +3027,6 @@ void CPU::CodeCache::InterpretUncachedBlock() } } -template void CPU::CodeCache::InterpretUncachedBlock(); -template void CPU::CodeCache::InterpretUncachedBlock(); -template void CPU::CodeCache::InterpretUncachedBlock(); - bool CPU::RecompilerThunks::InterpretInstruction() { ExecuteInstruction(); diff --git a/src/core/cpu_core_private.h b/src/core/cpu_core_private.h index 479c2f58f..6472587ad 100644 --- a/src/core/cpu_core_private.h +++ b/src/core/cpu_core_private.h @@ -168,4 +168,77 @@ void UncheckedWriteMemoryWord(u32 address, u32 value); #endif +#define CPU_FOR_EACH_INSTRUCTION(X) \ + X(b) \ + X(j) \ + X(jal) \ + X(beq) \ + X(bne) \ + X(blez) \ + X(bgtz) \ + X(addi) \ + X(addiu) \ + X(slti) \ + X(sltiu) \ + X(andi) \ + X(ori) \ + X(xori) \ + X(lui) \ + X(lb) \ + X(lh) \ + X(lwl) \ + X(lw) \ + X(lbu) \ + X(lhu) \ + X(lwr) \ + X(sb) \ + X(sh) \ + X(swl) \ + X(sw) \ + X(swr) \ + X(mfc0) \ + X(mtc0) \ + X(rfe) \ + X(mfc2) \ + X(mtc2) \ + X(cfc2) \ + X(ctc2) \ + X(cop2) \ + X(lwc0) \ + X(lwc1) \ + X(lwc2) \ + X(lwc3) \ + X(swc0) \ + X(swc1) \ + X(swc2) \ + X(swc3) \ + X(sll) \ + X(srl) \ + X(sra) \ + X(sllv) \ + X(srlv) \ + X(srav) \ + X(jr) \ + X(jalr) \ + X(syscall) \ + X(break) \ + X(mfhi) \ + X(mthi) \ + X(mflo) \ + X(mtlo) \ + X(mult) \ + X(multu) \ + X(div) \ + X(divu) \ + X(add) \ + X(addu) \ + X(sub) \ + X(subu) \ + X(and) \ + X(or) \ + X(xor) \ + X(nor) \ + X(slt) \ + X(sltu) + } // namespace CPU \ No newline at end of file