From 2e9f656546caba6796b2f6ef1030f70b732a153f Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Wed, 19 Aug 2020 23:26:57 +1000 Subject: [PATCH] CPU: Implement PGXP CPU Mode This is *very* slow. You don't want to enable it if you don't need it. It is also incompatible with the recompiler and will disable it if the option is enabled. --- README.md | 1 + src/common/types.h | 7 + src/core/cpu_code_cache.cpp | 20 +- src/core/cpu_code_cache.h | 1 + src/core/cpu_core.cpp | 497 +++++--- src/core/cpu_recompiler_code_generator.cpp | 6 +- src/core/cpu_recompiler_thunks.h | 1 + src/core/cpu_types.h | 1 - src/core/host_interface.cpp | 20 + src/core/host_interface.h | 3 + src/core/pgxp.cpp | 1039 +++++++++++++++++ src/core/pgxp.h | 52 + src/core/settings.cpp | 2 + src/core/settings.h | 6 + src/core/types.h | 7 + .../libretro_host_interface.cpp | 10 +- src/duckstation-qt/gpusettingswidget.cpp | 5 + src/duckstation-qt/gpusettingswidget.ui | 7 + src/duckstation-sdl/sdl_host_interface.cpp | 3 + 19 files changed, 1490 insertions(+), 198 deletions(-) diff --git a/README.md b/README.md index 6ff2f0495..1d76ce271 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ A "BIOS" ROM image is required to to start the emulator and to play games. You c ## Latest News +- 2020/08/19: CPU PGXP mode added. It is very slow and incompatible with the recompiler, only use for games which need it. - 2020/08/15: Playlist support/single memcard for multi-disc games in Qt frontend added. - 2020/08/07: Automatic updater for standalone Windows builds. - 2020/08/01: Initial PGXP (geometry/perspective correction) support. diff --git a/src/common/types.h b/src/common/types.h index 02657135d..6b94fd87f 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -15,6 +15,13 @@ #endif #endif +// Force inline in non-debug helper +#ifdef _DEBUG +#define ALWAYS_INLINE_RELEASE +#else +#define ALWAYS_INLINE_RELEASE ALWAYS_INLINE +#endif + // unreferenced parameter macro #ifndef UNREFERENCED_VARIABLE #if defined(_MSC_VER) diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 1bd8bda9c..eb21533f4 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -110,7 +110,8 @@ void Shutdown() #endif } -void Execute() +template +static void ExecuteImpl() { CodeBlockKey next_block_key; @@ -157,7 +158,7 @@ void Execute() } else { - InterpretCachedBlock(*block); + InterpretCachedBlock(*block); } if (g_state.pending_ticks >= g_state.downcount) @@ -212,6 +213,21 @@ void Execute() g_state.regs.npc = g_state.regs.pc; } +void Execute() +{ + if (g_settings.gpu_pgxp_enable) + { + if (g_settings.gpu_pgxp_cpu) + ExecuteImpl(); + else + ExecuteImpl(); + } + else + { + ExecuteImpl(); + } +} + #ifdef WITH_RECOMPILER void ExecuteRecompiler() diff --git a/src/core/cpu_code_cache.h b/src/core/cpu_code_cache.h index 28401bea6..eec01ac3b 100644 --- a/src/core/cpu_code_cache.h +++ b/src/core/cpu_code_cache.h @@ -96,6 +96,7 @@ void SetUseRecompiler(bool enable); /// Invalidates all blocks which are in the range of the specified code page. void InvalidateBlocksWithPageIndex(u32 page_index); +template void InterpretCachedBlock(const CodeBlock& block); void InterpretUncachedBlock(); diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index 6ad58d962..6e2b8cb34 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -15,19 +15,9 @@ Log_SetChannel(CPU::Core); namespace CPU { -/// Sets the PC and flushes the pipeline. static void SetPC(u32 new_pc); - -// Updates load delays - call after each instruction static void UpdateLoadDelay(); - -// Fetches the instruction at m_regs.npc -static void ExecuteInstruction(); -static void ExecuteCop0Instruction(); -static void ExecuteCop2Instruction(); static void Branch(u32 target); - -// clears pipeline of load/branch delays static void FlushPipeline(); State g_state; @@ -139,14 +129,14 @@ bool DoState(StateWrapper& sw) return !sw.HasError(); } -void SetPC(u32 new_pc) +ALWAYS_INLINE_RELEASE void SetPC(u32 new_pc) { DebugAssert(Common::IsAlignedPow2(new_pc, 4)); g_state.regs.npc = new_pc; FlushPipeline(); } -void Branch(u32 target) +ALWAYS_INLINE_RELEASE void Branch(u32 target) { if (!Common::IsAlignedPow2(target, 4)) { @@ -240,7 +230,7 @@ void ClearExternalInterrupt(u8 bit) g_state.cop0_regs.cause.Ip &= static_cast(~(1u << bit)); } -void UpdateLoadDelay() +ALWAYS_INLINE_RELEASE static void UpdateLoadDelay() { // the old value is needed in case the delay slot instruction overwrites the same register if (g_state.load_delay_reg != Reg::count) @@ -251,7 +241,7 @@ void UpdateLoadDelay() g_state.next_load_delay_reg = Reg::count; } -void FlushPipeline() +ALWAYS_INLINE_RELEASE static void FlushPipeline() { // loads are flushed g_state.next_load_delay_reg = Reg::count; @@ -275,12 +265,12 @@ void FlushPipeline() g_state.current_instruction_was_branch_taken = false; } -ALWAYS_INLINE u32 ReadReg(Reg rs) +ALWAYS_INLINE static u32 ReadReg(Reg rs) { return g_state.regs.r[static_cast(rs)]; } -ALWAYS_INLINE void WriteReg(Reg rd, u32 value) +ALWAYS_INLINE static void WriteReg(Reg rd, u32 value) { g_state.regs.r[static_cast(rd)] = value; g_state.load_delay_reg = (rd == g_state.load_delay_reg) ? Reg::count : g_state.load_delay_reg; @@ -289,7 +279,7 @@ ALWAYS_INLINE void WriteReg(Reg rd, u32 value) g_state.regs.zero = 0; } -static void WriteRegDelayed(Reg rd, u32 value) +ALWAYS_INLINE_RELEASE static void WriteRegDelayed(Reg rd, u32 value) { Assert(g_state.next_load_delay_reg == Reg::count); if (rd == Reg::zero) @@ -304,7 +294,7 @@ static void WriteRegDelayed(Reg rd, u32 value) g_state.next_load_delay_value = value; } -static std::optional ReadCop0Reg(Cop0Reg reg) +ALWAYS_INLINE_RELEASE static std::optional ReadCop0Reg(Cop0Reg reg) { switch (reg) { @@ -347,7 +337,7 @@ static std::optional ReadCop0Reg(Cop0Reg reg) } } -static void WriteCop0Reg(Cop0Reg reg, u32 value) +ALWAYS_INLINE_RELEASE static void WriteCop0Reg(Cop0Reg reg, u32 value) { switch (reg) { @@ -431,12 +421,12 @@ static void LogInstruction(u32 bits, u32 pc, Registers* regs) WriteToExecutionLog("%08x: %08x %s\n", pc, bits, instr.GetCharArray()); } -static constexpr bool AddOverflow(u32 old_value, u32 add_value, u32 new_value) +ALWAYS_INLINE static constexpr bool AddOverflow(u32 old_value, u32 add_value, u32 new_value) { return (((new_value ^ old_value) & (new_value ^ add_value)) & UINT32_C(0x80000000)) != 0; } -static constexpr bool SubOverflow(u32 old_value, u32 sub_value, u32 new_value) +ALWAYS_INLINE static constexpr bool SubOverflow(u32 old_value, u32 sub_value, u32 new_value) { return (((new_value ^ old_value) & (old_value ^ sub_value)) & UINT32_C(0x80000000)) != 0; } @@ -467,53 +457,8 @@ void DisassembleAndPrint(u32 addr, u32 instructions_before /* = 0 */, u32 instru } } -void Execute() -{ - g_state.frame_done = false; - while (!g_state.frame_done) - { - TimingEvents::UpdateCPUDowncount(); - - while (g_state.pending_ticks <= g_state.downcount) - { - if (HasPendingInterrupt()) - DispatchInterrupt(); - - g_state.pending_ticks++; - - // now executing the instruction we previously fetched - g_state.current_instruction.bits = g_state.next_instruction.bits; - g_state.current_instruction_pc = g_state.regs.pc; - g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot; - g_state.current_instruction_was_branch_taken = g_state.branch_was_taken; - g_state.next_instruction_is_branch_delay_slot = false; - g_state.branch_was_taken = false; - g_state.exception_raised = false; - - // fetch the next instruction - if (!FetchInstruction()) - continue; - -#if 0 // GTE flag test debugging - if (g_state.m_current_instruction_pc == 0x8002cdf4) - { - if (g_state.m_regs.v1 != g_state.m_regs.v0) - printf("Got %08X Expected? %08X\n", g_state.m_regs.v1, g_state.m_regs.v0); - } -#endif - - // execute the instruction we previously fetched - ExecuteInstruction(); - - // next load delay - UpdateLoadDelay(); - } - - TimingEvents::RunEvents(); - } -} - -void ExecuteInstruction() +template +ALWAYS_INLINE_RELEASE static void ExecuteInstruction() { const Instruction inst = g_state.current_instruction; @@ -525,14 +470,6 @@ void ExecuteInstruction() } #endif -#if 0 - if (g_state.m_current_instruction_pc == 0x8002bf50) - { - TRACE_EXECUTION = true; - __debugbreak(); - } -#endif - #ifdef _DEBUG if (TRACE_EXECUTION) PrintInstruction(inst.bits, g_state.current_instruction_pc, &g_state.regs); @@ -549,6 +486,9 @@ void ExecuteInstruction() case InstructionFunct::sll: { const u32 new_value = ReadReg(inst.r.rt) << inst.r.shamt; + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLL(inst.bits, new_value, ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, new_value); } break; @@ -556,6 +496,9 @@ void ExecuteInstruction() case InstructionFunct::srl: { const u32 new_value = ReadReg(inst.r.rt) >> inst.r.shamt; + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRL(inst.bits, new_value, ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, new_value); } break; @@ -563,6 +506,9 @@ void ExecuteInstruction() case InstructionFunct::sra: { const u32 new_value = static_cast(static_cast(ReadReg(inst.r.rt)) >> inst.r.shamt); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRA(inst.bits, new_value, ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, new_value); } break; @@ -571,6 +517,9 @@ void ExecuteInstruction() { const u32 shift_amount = ReadReg(inst.r.rs) & UINT32_C(0x1F); const u32 new_value = ReadReg(inst.r.rt) << shift_amount; + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLLV(inst.bits, new_value, ReadReg(inst.r.rt), shift_amount); + WriteReg(inst.r.rd, new_value); } break; @@ -579,6 +528,9 @@ void ExecuteInstruction() { const u32 shift_amount = ReadReg(inst.r.rs) & UINT32_C(0x1F); const u32 new_value = ReadReg(inst.r.rt) >> shift_amount; + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRLV(inst.bits, new_value, ReadReg(inst.r.rt), shift_amount); + WriteReg(inst.r.rd, new_value); } break; @@ -587,6 +539,9 @@ void ExecuteInstruction() { const u32 shift_amount = ReadReg(inst.r.rs) & UINT32_C(0x1F); const u32 new_value = static_cast(static_cast(ReadReg(inst.r.rt)) >> shift_amount); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRAV(inst.bits, new_value, ReadReg(inst.r.rt), shift_amount); + WriteReg(inst.r.rd, new_value); } break; @@ -594,6 +549,9 @@ void ExecuteInstruction() case InstructionFunct::and_: { const u32 new_value = ReadReg(inst.r.rs) & ReadReg(inst.r.rt); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_AND(inst.bits, new_value, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, new_value); } break; @@ -601,6 +559,9 @@ void ExecuteInstruction() case InstructionFunct::or_: { const u32 new_value = ReadReg(inst.r.rs) | ReadReg(inst.r.rt); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_OR(inst.bits, new_value, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, new_value); } break; @@ -608,6 +569,9 @@ void ExecuteInstruction() case InstructionFunct::xor_: { const u32 new_value = ReadReg(inst.r.rs) ^ ReadReg(inst.r.rt); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_XOR(inst.bits, new_value, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, new_value); } break; @@ -615,6 +579,9 @@ void ExecuteInstruction() case InstructionFunct::nor: { const u32 new_value = ~(ReadReg(inst.r.rs) | ReadReg(inst.r.rt)); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_NOR(inst.bits, new_value, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, new_value); } break; @@ -630,6 +597,9 @@ void ExecuteInstruction() return; } + if constexpr (pgxp_mode == PGXPMode::CPU) + PGXP::CPU_ADD(inst.bits, new_value, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, new_value); } break; @@ -637,6 +607,9 @@ void ExecuteInstruction() case InstructionFunct::addu: { const u32 new_value = ReadReg(inst.r.rs) + ReadReg(inst.r.rt); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_ADDU(inst.bits, new_value, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, new_value); } break; @@ -652,6 +625,9 @@ void ExecuteInstruction() return; } + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SUB(inst.bits, new_value, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, new_value); } break; @@ -659,6 +635,9 @@ void ExecuteInstruction() case InstructionFunct::subu: { const u32 new_value = ReadReg(inst.r.rs) - ReadReg(inst.r.rt); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SUBU(inst.bits, new_value, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, new_value); } break; @@ -666,6 +645,9 @@ void ExecuteInstruction() case InstructionFunct::slt: { const u32 result = BoolToUInt32(static_cast(ReadReg(inst.r.rs)) < static_cast(ReadReg(inst.r.rt))); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLT(inst.bits, result, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, result); } break; @@ -673,12 +655,18 @@ void ExecuteInstruction() case InstructionFunct::sltu: { const u32 result = BoolToUInt32(ReadReg(inst.r.rs) < ReadReg(inst.r.rt)); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLTU(inst.bits, result, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, result); } break; case InstructionFunct::mfhi: { + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MFHI(inst.bits, ReadReg(inst.r.rd), g_state.regs.hi); + WriteReg(inst.r.rd, g_state.regs.hi); } break; @@ -686,12 +674,18 @@ void ExecuteInstruction() case InstructionFunct::mthi: { const u32 value = ReadReg(inst.r.rs); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MTHI(inst.bits, g_state.regs.hi, value); + g_state.regs.hi = value; } break; case InstructionFunct::mflo: { + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MFLO(inst.bits, ReadReg(inst.r.rd), g_state.regs.lo); + WriteReg(inst.r.rd, g_state.regs.lo); } break; @@ -699,6 +693,9 @@ void ExecuteInstruction() case InstructionFunct::mtlo: { const u32 value = ReadReg(inst.r.rs); + if constexpr (pgxp_mode == PGXPMode::CPU) + PGXP::CPU_MTLO(inst.bits, g_state.regs.lo, value); + g_state.regs.lo = value; } break; @@ -709,8 +706,12 @@ void ExecuteInstruction() const u32 rhs = ReadReg(inst.r.rt); const u64 result = static_cast(static_cast(SignExtend64(lhs)) * static_cast(SignExtend64(rhs))); + g_state.regs.hi = Truncate32(result >> 32); g_state.regs.lo = Truncate32(result); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MULT(inst.bits, g_state.regs.hi, g_state.regs.lo, lhs, rhs); } break; @@ -719,6 +720,10 @@ void ExecuteInstruction() const u32 lhs = ReadReg(inst.r.rs); const u32 rhs = ReadReg(inst.r.rt); const u64 result = ZeroExtend64(lhs) * ZeroExtend64(rhs); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MULTU(inst.bits, g_state.regs.hi, g_state.regs.lo, lhs, rhs); + g_state.regs.hi = Truncate32(result >> 32); g_state.regs.lo = Truncate32(result); } @@ -746,6 +751,9 @@ void ExecuteInstruction() g_state.regs.lo = static_cast(num / denom); g_state.regs.hi = static_cast(num % denom); } + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_DIV(inst.bits, g_state.regs.hi, g_state.regs.lo, num, denom); } break; @@ -765,6 +773,9 @@ void ExecuteInstruction() g_state.regs.lo = num / denom; g_state.regs.hi = num % denom; } + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_DIVU(inst.bits, g_state.regs.hi, g_state.regs.lo, num, denom); } break; @@ -808,25 +819,44 @@ void ExecuteInstruction() case InstructionOp::lui: { - WriteReg(inst.i.rt, inst.i.imm_zext32() << 16); + const u32 value = inst.i.imm_zext32() << 16; + WriteReg(inst.i.rt, value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_LUI(inst.bits, value); } break; case InstructionOp::andi: { - WriteReg(inst.i.rt, ReadReg(inst.i.rs) & inst.i.imm_zext32()); + const u32 new_value = ReadReg(inst.i.rs) & inst.i.imm_zext32(); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_ANDI(inst.bits, new_value, ReadReg(inst.i.rs)); + + WriteReg(inst.i.rt, new_value); } break; case InstructionOp::ori: { - WriteReg(inst.i.rt, ReadReg(inst.i.rs) | inst.i.imm_zext32()); + const u32 new_value = ReadReg(inst.i.rs) | inst.i.imm_zext32(); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_ORI(inst.bits, new_value, ReadReg(inst.i.rs)); + + WriteReg(inst.i.rt, new_value); } break; case InstructionOp::xori: { - WriteReg(inst.i.rt, ReadReg(inst.i.rs) ^ inst.i.imm_zext32()); + const u32 new_value = ReadReg(inst.i.rs) ^ inst.i.imm_zext32(); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_XORI(inst.bits, new_value, ReadReg(inst.i.rs)); + + WriteReg(inst.i.rt, new_value); } break; @@ -841,19 +871,31 @@ void ExecuteInstruction() return; } + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_ANDI(inst.bits, new_value, ReadReg(inst.i.rs)); + WriteReg(inst.i.rt, new_value); } break; case InstructionOp::addiu: { - WriteReg(inst.i.rt, ReadReg(inst.i.rs) + inst.i.imm_sext32()); + const u32 new_value = ReadReg(inst.i.rs) + inst.i.imm_sext32(); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_ADDIU(inst.bits, new_value, ReadReg(inst.i.rs)); + + WriteReg(inst.i.rt, new_value); } break; case InstructionOp::slti: { const u32 result = BoolToUInt32(static_cast(ReadReg(inst.i.rs)) < static_cast(inst.i.imm_sext32())); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLTI(inst.bits, result, ReadReg(inst.i.rs)); + WriteReg(inst.i.rt, result); } break; @@ -861,6 +903,10 @@ void ExecuteInstruction() case InstructionOp::sltiu: { const u32 result = BoolToUInt32(ReadReg(inst.i.rs) < inst.i.imm_sext32()); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLTIU(inst.bits, result, ReadReg(inst.i.rs)); + WriteReg(inst.i.rt, result); } break; @@ -876,7 +922,7 @@ void ExecuteInstruction() WriteRegDelayed(inst.i.rt, sxvalue); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_LBx(inst.bits, sxvalue, addr); } break; @@ -891,7 +937,7 @@ void ExecuteInstruction() const u32 sxvalue = SignExtend32(value); WriteRegDelayed(inst.i.rt, sxvalue); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_LHx(inst.bits, sxvalue, addr); } break; @@ -905,7 +951,7 @@ void ExecuteInstruction() WriteRegDelayed(inst.i.rt, value); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_LW(inst.bits, value, addr); } break; @@ -920,7 +966,7 @@ void ExecuteInstruction() const u32 zxvalue = ZeroExtend32(value); WriteRegDelayed(inst.i.rt, zxvalue); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_LBx(inst.bits, zxvalue, addr); } break; @@ -935,7 +981,7 @@ void ExecuteInstruction() const u32 zxvalue = ZeroExtend32(value); WriteRegDelayed(inst.i.rt, zxvalue); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_LHx(inst.bits, zxvalue, addr); } break; @@ -966,7 +1012,7 @@ void ExecuteInstruction() WriteRegDelayed(inst.i.rt, new_value); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_LW(inst.bits, new_value, addr); } break; @@ -977,7 +1023,7 @@ void ExecuteInstruction() const u8 value = Truncate8(ReadReg(inst.i.rt)); WriteMemoryByte(addr, value); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_SB(inst.bits, value, addr); } break; @@ -988,7 +1034,7 @@ void ExecuteInstruction() const u16 value = Truncate16(ReadReg(inst.i.rt)); WriteMemoryHalfWord(addr, value); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_SH(inst.bits, value, addr); } break; @@ -999,7 +1045,7 @@ void ExecuteInstruction() const u32 value = ReadReg(inst.i.rt); WriteMemoryWord(addr, value); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_SW(inst.bits, value, addr); } break; @@ -1029,7 +1075,7 @@ void ExecuteInstruction() WriteMemoryWord(aligned_addr, new_value); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_SW(inst.bits, new_value, addr); } break; @@ -1114,7 +1160,58 @@ void ExecuteInstruction() return; } - ExecuteCop0Instruction(); + if (inst.cop.IsCommonInstruction()) + { + switch (inst.cop.CommonOp()) + { + case CopCommonInstruction::mfcn: + { + const std::optional value = ReadCop0Reg(static_cast(inst.r.rd.GetValue())); + + if constexpr (pgxp_mode == PGXPMode::CPU) + PGXP::CPU_MFC0(inst.bits, value.value_or(0), ReadReg(inst.i.rs)); + + if (value) + WriteRegDelayed(inst.r.rt, value.value()); + else + RaiseException(Exception::RI); + } + break; + + case CopCommonInstruction::mtcn: + { + WriteCop0Reg(static_cast(inst.r.rd.GetValue()), ReadReg(inst.r.rt)); + + if constexpr (pgxp_mode == PGXPMode::CPU) + { + PGXP::CPU_MTC0(inst.bits, ReadCop0Reg(static_cast(inst.r.rd.GetValue())).value_or(0), + ReadReg(inst.i.rs)); + } + } + break; + + default: + Panic("Missing implementation"); + break; + } + } + else + { + switch (inst.cop.Cop0Op()) + { + case Cop0Instruction::rfe: + { + // restore mode + g_state.cop0_regs.sr.mode_bits = + (g_state.cop0_regs.sr.mode_bits & UINT32_C(0b110000)) | (g_state.cop0_regs.sr.mode_bits >> 2); + } + break; + + default: + Panic("Missing implementation"); + break; + } + } } break; @@ -1127,7 +1224,61 @@ void ExecuteInstruction() return; } - ExecuteCop2Instruction(); + if (inst.cop.IsCommonInstruction()) + { + // TODO: Combine with cop0. + switch (inst.cop.CommonOp()) + { + case CopCommonInstruction::cfcn: + { + const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue()) + 32); + WriteRegDelayed(inst.r.rt, value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_CFC2(inst.bits, value, value); + } + break; + + case CopCommonInstruction::ctcn: + { + const u32 value = ReadReg(inst.r.rt); + GTE::WriteRegister(static_cast(inst.r.rd.GetValue()) + 32, value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_CTC2(inst.bits, value, value); + } + break; + + case CopCommonInstruction::mfcn: + { + const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue())); + WriteRegDelayed(inst.r.rt, value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_MFC2(inst.bits, value, value); + } + break; + + case CopCommonInstruction::mtcn: + { + const u32 value = ReadReg(inst.r.rt); + GTE::WriteRegister(static_cast(inst.r.rd.GetValue()), value); + + if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::CPU_MTC2(inst.bits, value, value); + } + break; + + case CopCommonInstruction::bcnc: + default: + Panic("Missing implementation"); + break; + } + } + else + { + GTE::ExecuteInstruction(inst.bits); + } } break; @@ -1147,7 +1298,7 @@ void ExecuteInstruction() GTE::WriteRegister(ZeroExtend32(static_cast(inst.i.rt.GetValue())), value); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_LWC2(inst.bits, value, addr); } break; @@ -1165,12 +1316,12 @@ void ExecuteInstruction() const u32 value = GTE::ReadRegister(ZeroExtend32(static_cast(inst.i.rt.GetValue()))); WriteMemoryWord(addr, value); - if (g_settings.gpu_pgxp_enable) + if constexpr (pgxp_mode >= PGXPMode::Memory) PGXP::CPU_SWC2(inst.bits, value, addr); } break; - // swc0/lwc0/cop1/cop3 are essentially no-ops + // swc0/lwc0/cop1/cop3 are essentially no-ops case InstructionOp::cop1: case InstructionOp::cop3: case InstructionOp::lwc0: @@ -1183,7 +1334,7 @@ void ExecuteInstruction() } break; - // everything else is reserved/invalid + // everything else is reserved/invalid default: { RaiseException(Exception::RI); @@ -1192,117 +1343,71 @@ void ExecuteInstruction() } } -void ExecuteCop0Instruction() +template +static void ExecuteImpl() { - const Instruction inst = g_state.current_instruction; - - if (inst.cop.IsCommonInstruction()) + g_state.frame_done = false; + while (!g_state.frame_done) { - switch (inst.cop.CommonOp()) + TimingEvents::UpdateCPUDowncount(); + + while (g_state.pending_ticks <= g_state.downcount) { - case CopCommonInstruction::mfcn: - { - const std::optional value = ReadCop0Reg(static_cast(inst.r.rd.GetValue())); - if (value) - WriteRegDelayed(inst.r.rt, value.value()); - else - RaiseException(Exception::RI); - } - break; + if (HasPendingInterrupt()) + DispatchInterrupt(); - case CopCommonInstruction::mtcn: - { - WriteCop0Reg(static_cast(inst.r.rd.GetValue()), ReadReg(inst.r.rt)); - } - break; + g_state.pending_ticks++; - default: - Panic("Missing implementation"); - break; + // now executing the instruction we previously fetched + g_state.current_instruction.bits = g_state.next_instruction.bits; + g_state.current_instruction_pc = g_state.regs.pc; + g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot; + g_state.current_instruction_was_branch_taken = g_state.branch_was_taken; + g_state.next_instruction_is_branch_delay_slot = false; + g_state.branch_was_taken = false; + g_state.exception_raised = false; + + // fetch the next instruction + if (!FetchInstruction()) + continue; + +#if 0 // GTE flag test debugging + if (g_state.m_current_instruction_pc == 0x8002cdf4) + { + if (g_state.m_regs.v1 != g_state.m_regs.v0) + printf("Got %08X Expected? %08X\n", g_state.m_regs.v1, g_state.m_regs.v0); + } +#endif + + // execute the instruction we previously fetched + ExecuteInstruction(); + + // next load delay + UpdateLoadDelay(); } - } - else - { - switch (inst.cop.Cop0Op()) - { - case Cop0Instruction::rfe: - { - // restore mode - g_state.cop0_regs.sr.mode_bits = - (g_state.cop0_regs.sr.mode_bits & UINT32_C(0b110000)) | (g_state.cop0_regs.sr.mode_bits >> 2); - } - break; - default: - Panic("Missing implementation"); - break; - } + TimingEvents::RunEvents(); } } -void ExecuteCop2Instruction() +void Execute() { - const Instruction inst = g_state.current_instruction; - - if (inst.cop.IsCommonInstruction()) + if (g_settings.gpu_pgxp_enable) { - // TODO: Combine with cop0. - switch (inst.cop.CommonOp()) - { - case CopCommonInstruction::cfcn: - { - const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue()) + 32); - WriteRegDelayed(inst.r.rt, value); - - if (g_settings.gpu_pgxp_enable) - PGXP::CPU_CFC2(inst.bits, value, value); - } - break; - - case CopCommonInstruction::ctcn: - { - const u32 value = ReadReg(inst.r.rt); - GTE::WriteRegister(static_cast(inst.r.rd.GetValue()) + 32, value); - - if (g_settings.gpu_pgxp_enable) - PGXP::CPU_CTC2(inst.bits, value, value); - } - break; - - case CopCommonInstruction::mfcn: - { - const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue())); - WriteRegDelayed(inst.r.rt, value); - - if (g_settings.gpu_pgxp_enable) - PGXP::CPU_MFC2(inst.bits, value, value); - } - break; - - case CopCommonInstruction::mtcn: - { - const u32 value = ReadReg(inst.r.rt); - GTE::WriteRegister(static_cast(inst.r.rd.GetValue()), value); - - if (g_settings.gpu_pgxp_enable) - PGXP::CPU_MTC2(inst.bits, value, value); - } - break; - - case CopCommonInstruction::bcnc: - default: - Panic("Missing implementation"); - break; - } + if (g_settings.gpu_pgxp_cpu) + ExecuteImpl(); + else + ExecuteImpl(); } else { - GTE::ExecuteInstruction(inst.bits); + ExecuteImpl(); } } namespace CodeCache { +template void InterpretCachedBlock(const CodeBlock& block) { // set up the state so we've already fetched the instruction @@ -1327,7 +1432,7 @@ void InterpretCachedBlock(const CodeBlock& block) g_state.regs.npc += 4; // execute the instruction we previously fetched - ExecuteInstruction(); + ExecuteInstruction(); // next load delay UpdateLoadDelay(); @@ -1340,6 +1445,10 @@ void InterpretCachedBlock(const CodeBlock& block) g_state.next_instruction_is_branch_delay_slot = false; } +template void InterpretCachedBlock(const CodeBlock& block); +template void InterpretCachedBlock(const CodeBlock& block); +template void InterpretCachedBlock(const CodeBlock& block); + void InterpretUncachedBlock() { Panic("Fixme with regards to re-fetching PC"); @@ -1365,7 +1474,7 @@ void InterpretUncachedBlock() break; // execute the instruction we previously fetched - ExecuteInstruction(); + ExecuteInstruction(); // next load delay UpdateLoadDelay(); @@ -1387,7 +1496,13 @@ namespace Recompiler::Thunks { bool InterpretInstruction() { - ExecuteInstruction(); + ExecuteInstruction(); + return g_state.exception_raised; +} + +bool InterpretInstructionPGXP() +{ + ExecuteInstruction(); return g_state.exception_raised; } diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 2e8802c17..1c7ae0db4 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -1014,12 +1014,14 @@ bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi) { // TODO: Use carry flag or something here too Value return_value = m_register_cache.AllocateScratch(RegSize_8); - EmitFunctionCall(&return_value, &Thunks::InterpretInstruction); + EmitFunctionCall(&return_value, + g_settings.gpu_pgxp_enable ? &Thunks::InterpretInstructionPGXP : &Thunks::InterpretInstruction); EmitExceptionExitOnBool(return_value); } else { - EmitFunctionCall(nullptr, &Thunks::InterpretInstruction); + EmitFunctionCall(nullptr, + g_settings.gpu_pgxp_enable ? &Thunks::InterpretInstructionPGXP : &Thunks::InterpretInstruction); } m_current_instruction_in_branch_delay_slot_dirty = cbi.is_branch_instruction; diff --git a/src/core/cpu_recompiler_thunks.h b/src/core/cpu_recompiler_thunks.h index 9b9316dde..602f522af 100644 --- a/src/core/cpu_recompiler_thunks.h +++ b/src/core/cpu_recompiler_thunks.h @@ -13,6 +13,7 @@ namespace Recompiler::Thunks { // TODO: Abuse carry flag or something else for exception ////////////////////////////////////////////////////////////////////////// bool InterpretInstruction(); +bool InterpretInstructionPGXP(); // Memory access functions for the JIT - MSB is set on exception. u64 ReadMemoryByte(u32 address); diff --git a/src/core/cpu_types.h b/src/core/cpu_types.h index 48021af6d..785c746fb 100644 --- a/src/core/cpu_types.h +++ b/src/core/cpu_types.h @@ -135,7 +135,6 @@ enum class InstructionFunct : u8 or_ = 37, xor_ = 38, nor = 39, - sh = 41, slt = 42, sltu = 43 }; diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index e72cb1fa1..26a8634bc 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -375,6 +375,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si) si.SetBoolValue("GPU", "PGXPCulling", true); si.SetBoolValue("GPU", "PGXPTextureCorrection", true); si.SetBoolValue("GPU", "PGXPVertexCache", false); + si.SetBoolValue("GPU", "PGXPCPU", false); si.SetStringValue("Display", "CropMode", Settings::GetDisplayCropModeName(Settings::DEFAULT_DISPLAY_CROP_MODE)); si.SetStringValue("Display", "AspectRatio", @@ -438,6 +439,25 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si) void HostInterface::LoadSettings(SettingsInterface& si) { g_settings.Load(si); + + FixIncompatibleSettings(); +} + +void HostInterface::FixIncompatibleSettings() +{ + if (g_settings.gpu_pgxp_enable) + { + if (g_settings.gpu_renderer == GPURenderer::Software) + { + Log_WarningPrintf("PGXP enabled with software renderer, disabling"); + g_settings.gpu_pgxp_enable = false; + } + else if (g_settings.gpu_pgxp_cpu && g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler) + { + Log_WarningPrintf("Recompiler selected with PGXP CPU mode, falling back to cached interpreter"); + g_settings.cpu_execution_mode = CPUExecutionMode::CachedInterpreter; + } + } } void HostInterface::SaveSettings(SettingsInterface& si) diff --git a/src/core/host_interface.h b/src/core/host_interface.h index 2f92313a2..3999cf355 100644 --- a/src/core/host_interface.h +++ b/src/core/host_interface.h @@ -134,6 +134,9 @@ protected: /// Saves current settings variables to ini. virtual void SaveSettings(SettingsInterface& si); + /// Checks and fixes up any incompatible settings. + virtual void FixIncompatibleSettings(); + /// Checks for settings changes, std::move() the old settings away for comparing beforehand. virtual void CheckForSettingsChanges(const Settings& old_settings); diff --git a/src/core/pgxp.cpp b/src/core/pgxp.cpp index 7ca7c4f94..0c7de1894 100644 --- a/src/core/pgxp.cpp +++ b/src/core/pgxp.cpp @@ -77,6 +77,11 @@ typedef enum INVALID_8BIT_STORE = 6 } PGXP_error_states; +typedef enum +{ + VALID_HALF = (1 << 0) +} PGXP_half_flags; + #define NONE 0 #define ALL 0xFFFFFFFF #define VALID 1 @@ -92,9 +97,15 @@ typedef enum static const PGXP_value PGXP_value_invalid_address = {0.f, 0.f, 0.f, {0}, 0, 0, INVALID_ADDRESS, 0, 0}; static const PGXP_value PGXP_value_zero = {0.f, 0.f, 0.f, {0}, 0, VALID_ALL, 0, 0, 0}; +static void MakeValid(PGXP_value* pV, u32 psxV); static void Validate(PGXP_value* pV, u32 psxV); static void MaskValidate(PGXP_value* pV, u32 psxV, u32 mask, u32 validMask); +static double f16Sign(double in); +static double f16Unsign(double in); +static double fu16Trunc(double in); +static double f16Overflow(double in); + typedef union { struct @@ -138,6 +149,20 @@ static PGXP_value* CPU_reg = CPU_reg_mem; static PGXP_value* CP0_reg = CP0_reg_mem; // pgxp_value.c +void MakeValid(PGXP_value* pV, u32 psxV) +{ + psx_value psx; + psx.d = psxV; + if (VALID_01 != (pV->flags & VALID_01)) + { + pV->x = psx.sw.l; + pV->y = psx.sw.h; + pV->z = 0.f; + pV->flags |= VALID_01; + pV->value = psx.d; + } +} + void Validate(PGXP_value* pV, u32 psxV) { // assume pV is not NULL @@ -150,6 +175,23 @@ void MaskValidate(PGXP_value* pV, u32 psxV, u32 mask, u32 validMask) pV->flags &= ((pV->value & mask) == (psxV & mask)) ? ALL : (ALL ^ (validMask)); } +double f16Sign(double in) +{ + u32 s = (u32)(in * (double)((u32)1 << 16)); + return ((double)*((s32*)&s)) / (double)((s32)1 << 16); +} +double f16Unsign(double in) +{ + return (in >= 0) ? in : ((double)in + (double)USHRT_MAX + 1); +} +double f16Overflow(double in) +{ + double out = 0; + s64 v = ((s64)in) >> 16; + out = (double)v; + return out; +} + // pgxp_mem.c static void PGXP_InitMem(); static PGXP_value Mem[3 * 2048 * 1024 / 4]; // mirror 2MB in 32-bit words * 3 @@ -797,4 +839,1001 @@ void CPU_SW(u32 instr, u32 rtVal, u32 addr) WriteMem(&CPU_reg[rt(instr)], addr); } +void CPU_ADDI(u32 instr, u32 rtVal, u32 rsVal) +{ + // Rt = Rs + Imm (signed) + psx_value tempImm; + PGXP_value ret; + + Validate(&CPU_reg[rs(instr)], rsVal); + ret = CPU_reg[rs(instr)]; + tempImm.d = imm(instr); + tempImm.sd = (tempImm.sd << 16) >> 16; // sign extend + + ret.x = (float)f16Unsign(ret.x); + ret.x += (float)tempImm.w.l; + + // carry on over/underflow + float of = (ret.x > USHRT_MAX) ? 1.f : (ret.x < 0) ? -1.f : 0.f; + ret.x = (float)f16Sign(ret.x); + // ret.x -= of * (USHRT_MAX + 1); + ret.y += tempImm.sw.h + of; + + // truncate on overflow/underflow + ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f; + + CPU_reg[rt(instr)] = ret; + CPU_reg[rt(instr)].value = rtVal; +} + +void CPU_ADDIU(u32 instr, u32 rtVal, u32 rsVal) +{ + // Rt = Rs + Imm (signed) (unsafe?) + CPU_ADDI(instr, rtVal, rsVal); +} + +void CPU_ANDI(u32 instr, u32 rtVal, u32 rsVal) +{ + // Rt = Rs & Imm + psx_value vRt; + PGXP_value ret; + + Validate(&CPU_reg[rs(instr)], rsVal); + ret = CPU_reg[rs(instr)]; + + vRt.d = rtVal; + + ret.y = 0.f; // remove upper 16-bits + + switch (imm(instr)) + { + case 0: + // if 0 then x == 0 + ret.x = 0.f; + break; + case 0xFFFF: + // if saturated then x == x + break; + default: + // otherwise x is low precision value + ret.x = vRt.sw.l; + ret.flags |= VALID_0; + } + + ret.flags |= VALID_1; + + CPU_reg[rt(instr)] = ret; + CPU_reg[rt(instr)].value = rtVal; +} + +void CPU_ORI(u32 instr, u32 rtVal, u32 rsVal) +{ + // Rt = Rs | Imm + psx_value vRt; + PGXP_value ret; + + Validate(&CPU_reg[rs(instr)], rsVal); + ret = CPU_reg[rs(instr)]; + + vRt.d = rtVal; + + switch (imm(instr)) + { + case 0: + // if 0 then x == x + break; + default: + // otherwise x is low precision value + ret.x = vRt.sw.l; + ret.flags |= VALID_0; + } + + ret.value = rtVal; + CPU_reg[rt(instr)] = ret; +} + +void CPU_XORI(u32 instr, u32 rtVal, u32 rsVal) +{ + // Rt = Rs ^ Imm + psx_value vRt; + PGXP_value ret; + + Validate(&CPU_reg[rs(instr)], rsVal); + ret = CPU_reg[rs(instr)]; + + vRt.d = rtVal; + + switch (imm(instr)) + { + case 0: + // if 0 then x == x + break; + default: + // otherwise x is low precision value + ret.x = vRt.sw.l; + ret.flags |= VALID_0; + } + + ret.value = rtVal; + CPU_reg[rt(instr)] = ret; +} + +void CPU_SLTI(u32 instr, u32 rtVal, u32 rsVal) +{ + // Rt = Rs < Imm (signed) + psx_value tempImm; + PGXP_value ret; + + Validate(&CPU_reg[rs(instr)], rsVal); + ret = CPU_reg[rs(instr)]; + + tempImm.w.h = imm(instr); + ret.y = 0.f; + ret.x = (CPU_reg[rs(instr)].x < tempImm.sw.h) ? 1.f : 0.f; + ret.flags |= VALID_1; + ret.value = rtVal; + + CPU_reg[rt(instr)] = ret; +} + +void CPU_SLTIU(u32 instr, u32 rtVal, u32 rsVal) +{ + // Rt = Rs < Imm (Unsigned) + psx_value tempImm; + PGXP_value ret; + + Validate(&CPU_reg[rs(instr)], rsVal); + ret = CPU_reg[rs(instr)]; + + tempImm.w.h = imm(instr); + ret.y = 0.f; + ret.x = (f16Unsign(CPU_reg[rs(instr)].x) < tempImm.w.h) ? 1.f : 0.f; + ret.flags |= VALID_1; + ret.value = rtVal; + + CPU_reg[rt(instr)] = ret; +} + +//////////////////////////////////// +// Load Upper +//////////////////////////////////// +void CPU_LUI(u32 instr, u32 rtVal) +{ + // Rt = Imm << 16 + CPU_reg[rt(instr)] = PGXP_value_zero; + CPU_reg[rt(instr)].y = (float)(s16)imm(instr); + CPU_reg[rt(instr)].hFlags = VALID_HALF; + CPU_reg[rt(instr)].value = rtVal; + CPU_reg[rt(instr)].flags = VALID_01; +} + +//////////////////////////////////// +// Register Arithmetic +//////////////////////////////////// + +void CPU_ADD(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) +{ + // Rd = Rs + Rt (signed) + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); + Validate(&CPU_reg[rt(instr)], rtVal); + + // iCB: Only require one valid input + if (((CPU_reg[rt(instr)].flags & VALID_01) != VALID_01) != ((CPU_reg[rs(instr)].flags & VALID_01) != VALID_01)) + { + MakeValid(&CPU_reg[rs(instr)], rsVal); + MakeValid(&CPU_reg[rt(instr)], rtVal); + } + + ret = CPU_reg[rs(instr)]; + + ret.x = (float)f16Unsign(ret.x); + ret.x += (float)f16Unsign(CPU_reg[rt(instr)].x); + + // carry on over/underflow + float of = (ret.x > USHRT_MAX) ? 1.f : (ret.x < 0) ? -1.f : 0.f; + ret.x = (float)f16Sign(ret.x); + // ret.x -= of * (USHRT_MAX + 1); + ret.y += CPU_reg[rt(instr)].y + of; + + // truncate on overflow/underflow + ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f; + + // TODO: decide which "z/w" component to use + + ret.halfFlags[0] &= CPU_reg[rt(instr)].halfFlags[0]; + ret.gFlags |= CPU_reg[rt(instr)].gFlags; + ret.lFlags |= CPU_reg[rt(instr)].lFlags; + ret.hFlags |= CPU_reg[rt(instr)].hFlags; + + ret.value = rdVal; + + CPU_reg[rd(instr)] = ret; +} + +void CPU_ADDU(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) +{ + // Rd = Rs + Rt (signed) (unsafe?) + CPU_ADD(instr, rdVal, rsVal, rtVal); +} + +void CPU_SUB(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) +{ + // Rd = Rs - Rt (signed) + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); + Validate(&CPU_reg[rt(instr)], rtVal); + + // iCB: Only require one valid input + if (((CPU_reg[rt(instr)].flags & VALID_01) != VALID_01) != ((CPU_reg[rs(instr)].flags & VALID_01) != VALID_01)) + { + MakeValid(&CPU_reg[rs(instr)], rsVal); + MakeValid(&CPU_reg[rt(instr)], rtVal); + } + + ret = CPU_reg[rs(instr)]; + + ret.x = (float)f16Unsign(ret.x); + ret.x -= (float)f16Unsign(CPU_reg[rt(instr)].x); + + // carry on over/underflow + float of = (ret.x > USHRT_MAX) ? 1.f : (ret.x < 0) ? -1.f : 0.f; + ret.x = (float)f16Sign(ret.x); + // ret.x -= of * (USHRT_MAX + 1); + ret.y -= CPU_reg[rt(instr)].y - of; + + // truncate on overflow/underflow + ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f; + + ret.halfFlags[0] &= CPU_reg[rt(instr)].halfFlags[0]; + ret.gFlags |= CPU_reg[rt(instr)].gFlags; + ret.lFlags |= CPU_reg[rt(instr)].lFlags; + ret.hFlags |= CPU_reg[rt(instr)].hFlags; + + ret.value = rdVal; + + CPU_reg[rd(instr)] = ret; +} + +void CPU_SUBU(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) +{ + // Rd = Rs - Rt (signed) (unsafe?) + CPU_SUB(instr, rdVal, rsVal, rtVal); +} + +void CPU_AND(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) +{ + // Rd = Rs & Rt + psx_value vald, vals, valt; + PGXP_value ret; + + Validate(&CPU_reg[rs(instr)], rsVal); + Validate(&CPU_reg[rt(instr)], rtVal); + + // iCB: Only require one valid input + if (((CPU_reg[rt(instr)].flags & VALID_01) != VALID_01) != ((CPU_reg[rs(instr)].flags & VALID_01) != VALID_01)) + { + MakeValid(&CPU_reg[rs(instr)], rsVal); + MakeValid(&CPU_reg[rt(instr)], rtVal); + } + + vald.d = rdVal; + vals.d = rsVal; + valt.d = rtVal; + + // CPU_reg[rd(instr)].valid = CPU_reg[rs(instr)].valid && CPU_reg[rt(instr)].valid; + ret.flags = VALID_01; + + if (vald.w.l == 0) + { + ret.x = 0.f; + ret.lFlags = VALID_HALF; + } + else if (vald.w.l == vals.w.l) + { + ret.x = CPU_reg[rs(instr)].x; + ret.lFlags = CPU_reg[rs(instr)].lFlags; + ret.compFlags[0] = CPU_reg[rs(instr)].compFlags[0]; + } + else if (vald.w.l == valt.w.l) + { + ret.x = CPU_reg[rt(instr)].x; + ret.lFlags = CPU_reg[rt(instr)].lFlags; + ret.compFlags[0] = CPU_reg[rt(instr)].compFlags[0]; + } + else + { + ret.x = (float)vald.sw.l; + ret.compFlags[0] = VALID; + ret.lFlags = 0; + } + + if (vald.w.h == 0) + { + ret.y = 0.f; + ret.hFlags = VALID_HALF; + } + else if (vald.w.h == vals.w.h) + { + ret.y = CPU_reg[rs(instr)].y; + ret.hFlags = CPU_reg[rs(instr)].hFlags; + ret.compFlags[1] &= CPU_reg[rs(instr)].compFlags[1]; + } + else if (vald.w.h == valt.w.h) + { + ret.y = CPU_reg[rt(instr)].y; + ret.hFlags = CPU_reg[rt(instr)].hFlags; + ret.compFlags[1] &= CPU_reg[rt(instr)].compFlags[1]; + } + else + { + ret.y = (float)vald.sw.h; + ret.compFlags[1] = VALID; + ret.hFlags = 0; + } + + // iCB Hack: Force validity if even one half is valid + // if ((ret.hFlags & VALID_HALF) || (ret.lFlags & VALID_HALF)) + // ret.valid = 1; + // /iCB Hack + + // Get a valid W + if ((CPU_reg[rs(instr)].flags & VALID_2) == VALID_2) + { + ret.z = CPU_reg[rs(instr)].z; + ret.compFlags[2] = CPU_reg[rs(instr)].compFlags[2]; + } + else if ((CPU_reg[rt(instr)].flags & VALID_2) == VALID_2) + { + ret.z = CPU_reg[rt(instr)].z; + ret.compFlags[2] = CPU_reg[rt(instr)].compFlags[2]; + } + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; +} + +void CPU_OR(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) +{ + // Rd = Rs | Rt + CPU_AND(instr, rdVal, rsVal, rtVal); +} + +void CPU_XOR(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) +{ + // Rd = Rs ^ Rt + CPU_AND(instr, rdVal, rsVal, rtVal); +} + +void CPU_NOR(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) +{ + // Rd = Rs NOR Rt + CPU_AND(instr, rdVal, rsVal, rtVal); +} + +void CPU_SLT(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) +{ + // Rd = Rs < Rt (signed) + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); + Validate(&CPU_reg[rt(instr)], rtVal); + + // iCB: Only require one valid input + if (((CPU_reg[rt(instr)].flags & VALID_01) != VALID_01) != ((CPU_reg[rs(instr)].flags & VALID_01) != VALID_01)) + { + MakeValid(&CPU_reg[rs(instr)], rsVal); + MakeValid(&CPU_reg[rt(instr)], rtVal); + } + + ret = CPU_reg[rs(instr)]; + ret.y = 0.f; + ret.compFlags[1] = VALID; + + ret.x = (CPU_reg[rs(instr)].y < CPU_reg[rt(instr)].y) ? + 1.f : + (f16Unsign(CPU_reg[rs(instr)].x) < f16Unsign(CPU_reg[rt(instr)].x)) ? 1.f : 0.f; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; +} + +void CPU_SLTU(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) +{ + // Rd = Rs < Rt (unsigned) + PGXP_value ret; + Validate(&CPU_reg[rs(instr)], rsVal); + Validate(&CPU_reg[rt(instr)], rtVal); + + // iCB: Only require one valid input + if (((CPU_reg[rt(instr)].flags & VALID_01) != VALID_01) != ((CPU_reg[rs(instr)].flags & VALID_01) != VALID_01)) + { + MakeValid(&CPU_reg[rs(instr)], rsVal); + MakeValid(&CPU_reg[rt(instr)], rtVal); + } + + ret = CPU_reg[rs(instr)]; + ret.y = 0.f; + ret.compFlags[1] = VALID; + + ret.x = (f16Unsign(CPU_reg[rs(instr)].y) < f16Unsign(CPU_reg[rt(instr)].y)) ? + 1.f : + (f16Unsign(CPU_reg[rs(instr)].x) < f16Unsign(CPU_reg[rt(instr)].x)) ? 1.f : 0.f; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; +} + +//////////////////////////////////// +// Register mult/div +//////////////////////////////////// + +void CPU_MULT(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) +{ + // Hi/Lo = Rs * Rt (signed) + Validate(&CPU_reg[rs(instr)], rsVal); + Validate(&CPU_reg[rt(instr)], rtVal); + + // iCB: Only require one valid input + if (((CPU_reg[rt(instr)].flags & VALID_01) != VALID_01) != ((CPU_reg[rs(instr)].flags & VALID_01) != VALID_01)) + { + MakeValid(&CPU_reg[rs(instr)], rsVal); + MakeValid(&CPU_reg[rt(instr)], rtVal); + } + + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; + + CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + + double xx, xy, yx, yy; + double lx = 0, ly = 0, hx = 0, hy = 0; + + // Multiply out components + xx = f16Unsign(CPU_reg[rs(instr)].x) * f16Unsign(CPU_reg[rt(instr)].x); + xy = f16Unsign(CPU_reg[rs(instr)].x) * (CPU_reg[rt(instr)].y); + yx = (CPU_reg[rs(instr)].y) * f16Unsign(CPU_reg[rt(instr)].x); + yy = (CPU_reg[rs(instr)].y) * (CPU_reg[rt(instr)].y); + + // Split values into outputs + lx = xx; + + ly = f16Overflow(xx); + ly += xy + yx; + + hx = f16Overflow(ly); + hx += yy; + + hy = f16Overflow(hx); + + CPU_Lo.x = (float)f16Sign(lx); + CPU_Lo.y = (float)f16Sign(ly); + CPU_Hi.x = (float)f16Sign(hx); + CPU_Hi.y = (float)f16Sign(hy); + + CPU_Lo.value = loVal; + CPU_Hi.value = hiVal; +} + +void CPU_MULTU(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) +{ + // Hi/Lo = Rs * Rt (unsigned) + Validate(&CPU_reg[rs(instr)], rsVal); + Validate(&CPU_reg[rt(instr)], rtVal); + + // iCB: Only require one valid input + if (((CPU_reg[rt(instr)].flags & VALID_01) != VALID_01) != ((CPU_reg[rs(instr)].flags & VALID_01) != VALID_01)) + { + MakeValid(&CPU_reg[rs(instr)], rsVal); + MakeValid(&CPU_reg[rt(instr)], rtVal); + } + + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; + + CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + + double xx, xy, yx, yy; + double lx = 0, ly = 0, hx = 0, hy = 0; + + // Multiply out components + xx = f16Unsign(CPU_reg[rs(instr)].x) * f16Unsign(CPU_reg[rt(instr)].x); + xy = f16Unsign(CPU_reg[rs(instr)].x) * f16Unsign(CPU_reg[rt(instr)].y); + yx = f16Unsign(CPU_reg[rs(instr)].y) * f16Unsign(CPU_reg[rt(instr)].x); + yy = f16Unsign(CPU_reg[rs(instr)].y) * f16Unsign(CPU_reg[rt(instr)].y); + + // Split values into outputs + lx = xx; + + ly = f16Overflow(xx); + ly += xy + yx; + + hx = f16Overflow(ly); + hx += yy; + + hy = f16Overflow(hx); + + CPU_Lo.x = (float)f16Sign(lx); + CPU_Lo.y = (float)f16Sign(ly); + CPU_Hi.x = (float)f16Sign(hx); + CPU_Hi.y = (float)f16Sign(hy); + + CPU_Lo.value = loVal; + CPU_Hi.value = hiVal; +} + +void CPU_DIV(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) +{ + // Lo = Rs / Rt (signed) + // Hi = Rs % Rt (signed) + Validate(&CPU_reg[rs(instr)], rsVal); + Validate(&CPU_reg[rt(instr)], rtVal); + + //// iCB: Only require one valid input + if (((CPU_reg[rt(instr)].flags & VALID_01) != VALID_01) != ((CPU_reg[rs(instr)].flags & VALID_01) != VALID_01)) + { + MakeValid(&CPU_reg[rs(instr)], rsVal); + MakeValid(&CPU_reg[rt(instr)], rtVal); + } + + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; + + CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + + double vs = f16Unsign(CPU_reg[rs(instr)].x) + (CPU_reg[rs(instr)].y) * (double)(1 << 16); + double vt = f16Unsign(CPU_reg[rt(instr)].x) + (CPU_reg[rt(instr)].y) * (double)(1 << 16); + + double lo = vs / vt; + CPU_Lo.y = (float)f16Sign(f16Overflow(lo)); + CPU_Lo.x = (float)f16Sign(lo); + + double hi = fmod(vs, vt); + CPU_Hi.y = (float)f16Sign(f16Overflow(hi)); + CPU_Hi.x = (float)f16Sign(hi); + + CPU_Lo.value = loVal; + CPU_Hi.value = hiVal; +} + +void CPU_DIVU(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal) +{ + // Lo = Rs / Rt (unsigned) + // Hi = Rs % Rt (unsigned) + Validate(&CPU_reg[rs(instr)], rsVal); + Validate(&CPU_reg[rt(instr)], rtVal); + + //// iCB: Only require one valid input + if (((CPU_reg[rt(instr)].flags & VALID_01) != VALID_01) != ((CPU_reg[rs(instr)].flags & VALID_01) != VALID_01)) + { + MakeValid(&CPU_reg[rs(instr)], rsVal); + MakeValid(&CPU_reg[rt(instr)], rtVal); + } + + CPU_Lo = CPU_Hi = CPU_reg[rs(instr)]; + + CPU_Lo.halfFlags[0] = CPU_Hi.halfFlags[0] = (CPU_reg[rs(instr)].halfFlags[0] & CPU_reg[rt(instr)].halfFlags[0]); + + double vs = f16Unsign(CPU_reg[rs(instr)].x) + f16Unsign(CPU_reg[rs(instr)].y) * (double)(1 << 16); + double vt = f16Unsign(CPU_reg[rt(instr)].x) + f16Unsign(CPU_reg[rt(instr)].y) * (double)(1 << 16); + + double lo = vs / vt; + CPU_Lo.y = (float)f16Sign(f16Overflow(lo)); + CPU_Lo.x = (float)f16Sign(lo); + + double hi = fmod(vs, vt); + CPU_Hi.y = (float)f16Sign(f16Overflow(hi)); + CPU_Hi.x = (float)f16Sign(hi); + + CPU_Lo.value = loVal; + CPU_Hi.value = hiVal; +} + +//////////////////////////////////// +// Shift operations (sa) +//////////////////////////////////// +void CPU_SLL(u32 instr, u32 rdVal, u32 rtVal) +{ + // Rd = Rt << Sa + PGXP_value ret; + u32 sh = sa(instr); + Validate(&CPU_reg[rt(instr)], rtVal); + + ret = CPU_reg[rt(instr)]; + + // TODO: Shift flags + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) + { + x = 0.f; + y = 0.f; + } + else if (sh == 16) + { + y = f16Sign(x); + x = 0.f; + } + else if (sh >= 16) + { + y = x * (1 << (sh - 16)); + y = f16Sign(y); + x = 0.f; + } + else + { + x = x * (1 << sh); + y = y * (1 << sh); + y += f16Overflow(x); + x = f16Sign(x); + y = f16Sign(y); + } + + ret.x = (float)x; + ret.y = (float)y; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; +} + +void CPU_SRL(u32 instr, u32 rdVal, u32 rtVal) +{ + // Rd = Rt >> Sa + PGXP_value ret; + u32 sh = sa(instr); + Validate(&CPU_reg[rt(instr)], rtVal); + + ret = CPU_reg[rt(instr)]; + + double x = CPU_reg[rt(instr)].x, y = f16Unsign(CPU_reg[rt(instr)].y); + + psx_value iX; + iX.d = rtVal; + psx_value iY; + iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.d = iY.d >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + + ret.x = (float)x; + ret.y = (float)y; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; +} + +void CPU_SRA(u32 instr, u32 rdVal, u32 rtVal) +{ + // Rd = Rt >> Sa + PGXP_value ret; + u32 sh = sa(instr); + Validate(&CPU_reg[rt(instr)], rtVal); + ret = CPU_reg[rt(instr)]; + + double x = CPU_reg[rt(instr)].x, y = CPU_reg[rt(instr)].y; + + psx_value iX; + iX.d = rtVal; + psx_value iY; + iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.sd = iY.sd >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + + ret.x = (float)x; + ret.y = (float)y; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; +} + +//////////////////////////////////// +// Shift operations variable +//////////////////////////////////// +void CPU_SLLV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal) +{ + // Rd = Rt << Rs + PGXP_value ret; + u32 sh = rsVal & 0x1F; + Validate(&CPU_reg[rt(instr)], rtVal); + Validate(&CPU_reg[rs(instr)], rsVal); + + ret = CPU_reg[rt(instr)]; + + double x = f16Unsign(CPU_reg[rt(instr)].x); + double y = f16Unsign(CPU_reg[rt(instr)].y); + if (sh >= 32) + { + x = 0.f; + y = 0.f; + } + else if (sh == 16) + { + y = f16Sign(x); + x = 0.f; + } + else if (sh >= 16) + { + y = x * (1 << (sh - 16)); + y = f16Sign(y); + x = 0.f; + } + else + { + x = x * (1 << sh); + y = y * (1 << sh); + y += f16Overflow(x); + x = f16Sign(x); + y = f16Sign(y); + } + + ret.x = (float)x; + ret.y = (float)y; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; +} + +void CPU_SRLV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal) +{ + // Rd = Rt >> Sa + PGXP_value ret; + u32 sh = rsVal & 0x1F; + Validate(&CPU_reg[rt(instr)], rtVal); + Validate(&CPU_reg[rs(instr)], rsVal); + + ret = CPU_reg[rt(instr)]; + + double x = CPU_reg[rt(instr)].x, y = f16Unsign(CPU_reg[rt(instr)].y); + + psx_value iX; + iX.d = rtVal; + psx_value iY; + iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.d = iY.d >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + + ret.x = (float)x; + ret.y = (float)y; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; +} + +void CPU_SRAV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal) +{ + // Rd = Rt >> Sa + PGXP_value ret; + u32 sh = rsVal & 0x1F; + Validate(&CPU_reg[rt(instr)], rtVal); + Validate(&CPU_reg[rs(instr)], rsVal); + + ret = CPU_reg[rt(instr)]; + + double x = CPU_reg[rt(instr)].x, y = CPU_reg[rt(instr)].y; + + psx_value iX; + iX.d = rtVal; + psx_value iY; + iY.d = rtVal; + + iX.sd = (iX.sd << 16) >> 16; // remove Y + iY.sw.l = iX.sw.h; // overwrite x with sign(x) + + // Shift test values + psx_value dX; + dX.sd = iX.sd >> sh; + psx_value dY; + dY.sd = iY.sd >> sh; + + if (dX.sw.l != iX.sw.h) + x = x / (1 << sh); + else + x = dX.sw.l; // only sign bits left + + if (dY.sw.l != iX.sw.h) + { + if (sh == 16) + { + x = y; + } + else if (sh < 16) + { + x += y * (1 << (16 - sh)); + if (CPU_reg[rt(instr)].x < 0) + x += 1 << (16 - sh); + } + else + { + x += y / (1 << (sh - 16)); + } + } + + if ((dY.sw.h == 0) || (dY.sw.h == -1)) + y = dY.sw.h; + else + y = y / (1 << sh); + + x = f16Sign(x); + y = f16Sign(y); + + ret.x = (float)x; + ret.y = (float)y; + + ret.value = rdVal; + CPU_reg[rd(instr)] = ret; +} + +void CPU_MFHI(u32 instr, u32 rdVal, u32 hiVal) +{ + // Rd = Hi + Validate(&CPU_Hi, hiVal); + + CPU_reg[rd(instr)] = CPU_Hi; +} + +void CPU_MTHI(u32 instr, u32 hiVal, u32 rdVal) +{ + // Hi = Rd + Validate(&CPU_reg[rd(instr)], rdVal); + + CPU_Hi = CPU_reg[rd(instr)]; +} + +void CPU_MFLO(u32 instr, u32 rdVal, u32 loVal) +{ + // Rd = Lo + Validate(&CPU_Lo, loVal); + + CPU_reg[rd(instr)] = CPU_Lo; +} + +void CPU_MTLO(u32 instr, u32 loVal, u32 rdVal) +{ + // Lo = Rd + Validate(&CPU_reg[rd(instr)], rdVal); + + CPU_Lo = CPU_reg[rd(instr)]; +} + +void CPU_MFC0(u32 instr, u32 rtVal, u32 rdVal) +{ + // CPU[Rt] = CP0[Rd] + Validate(&CP0_reg[rd(instr)], rdVal); + CPU_reg[rt(instr)] = CP0_reg[rd(instr)]; + CPU_reg[rt(instr)].value = rtVal; +} + +void CPU_MTC0(u32 instr, u32 rdVal, u32 rtVal) +{ + // CP0[Rd] = CPU[Rt] + Validate(&CPU_reg[rt(instr)], rtVal); + CP0_reg[rd(instr)] = CPU_reg[rt(instr)]; + CP0_reg[rd(instr)].value = rdVal; +} + +void CPU_CFC0(u32 instr, u32 rtVal, u32 rdVal) +{ + // CPU[Rt] = CP0[Rd] + Validate(&CP0_reg[rd(instr)], rdVal); + CPU_reg[rt(instr)] = CP0_reg[rd(instr)]; + CPU_reg[rt(instr)].value = rtVal; +} + +void CPU_CTC0(u32 instr, u32 rdVal, u32 rtVal) +{ + // CP0[Rd] = CPU[Rt] + Validate(&CPU_reg[rt(instr)], rtVal); + CP0_reg[rd(instr)] = CPU_reg[rt(instr)]; + CP0_reg[rd(instr)].value = rdVal; +} + } // namespace PGXP \ No newline at end of file diff --git a/src/core/pgxp.h b/src/core/pgxp.h index 02b996615..94fb0fb49 100644 --- a/src/core/pgxp.h +++ b/src/core/pgxp.h @@ -51,4 +51,56 @@ void CPU_SB(u32 instr, u8 rtVal, u32 addr); void CPU_SH(u32 instr, u16 rtVal, u32 addr); void CPU_SW(u32 instr, u32 rtVal, u32 addr); +// Arithmetic with immediate value +void CPU_ADDI(u32 instr, u32 rtVal, u32 rsVal); +void CPU_ADDIU(u32 instr, u32 rtVal, u32 rsVal); +void CPU_ANDI(u32 instr, u32 rtVal, u32 rsVal); +void CPU_ORI(u32 instr, u32 rtVal, u32 rsVal); +void CPU_XORI(u32 instr, u32 rtVal, u32 rsVal); +void CPU_SLTI(u32 instr, u32 rtVal, u32 rsVal); +void CPU_SLTIU(u32 instr, u32 rtVal, u32 rsVal); + +// Load Upper +void CPU_LUI(u32 instr, u32 rtVal); + +// Register Arithmetic +void CPU_ADD(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal); +void CPU_ADDU(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal); +void CPU_SUB(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal); +void CPU_SUBU(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal); +void CPU_AND(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal); +void CPU_OR(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal); +void CPU_XOR(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal); +void CPU_NOR(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal); +void CPU_SLT(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal); +void CPU_SLTU(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal); + +// Register mult/div +void CPU_MULT(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal); +void CPU_MULTU(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal); +void CPU_DIV(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal); +void CPU_DIVU(u32 instr, u32 hiVal, u32 loVal, u32 rsVal, u32 rtVal); + +// Shift operations (sa) +void CPU_SLL(u32 instr, u32 rdVal, u32 rtVal); +void CPU_SRL(u32 instr, u32 rdVal, u32 rtVal); +void CPU_SRA(u32 instr, u32 rdVal, u32 rtVal); + +// Shift operations variable +void CPU_SLLV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal); +void CPU_SRLV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal); +void CPU_SRAV(u32 instr, u32 rdVal, u32 rtVal, u32 rsVal); + +// Move registers +void CPU_MFHI(u32 instr, u32 rdVal, u32 hiVal); +void CPU_MTHI(u32 instr, u32 hiVal, u32 rdVal); +void CPU_MFLO(u32 instr, u32 rdVal, u32 loVal); +void CPU_MTLO(u32 instr, u32 loVal, u32 rdVal); + +// CP0 Data transfer tracking +void CPU_MFC0(u32 instr, u32 rtVal, u32 rdVal); +void CPU_MTC0(u32 instr, u32 rdVal, u32 rtVal); +void CPU_CFC0(u32 instr, u32 rtVal, u32 rdVal); +void CPU_CTC0(u32 instr, u32 rdVal, u32 rtVal); + } // namespace PGXP \ No newline at end of file diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 9458112fb..e99dc3c59 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -106,6 +106,7 @@ void Settings::Load(SettingsInterface& si) gpu_pgxp_culling = si.GetBoolValue("GPU", "PGXPCulling", true); gpu_pgxp_texture_correction = si.GetBoolValue("GPU", "PGXPTextureCorrection", true); gpu_pgxp_vertex_cache = si.GetBoolValue("GPU", "PGXPVertexCache", false); + gpu_pgxp_cpu = si.GetBoolValue("GPU", "PGXPCPU", false); display_crop_mode = ParseDisplayCropMode( @@ -215,6 +216,7 @@ void Settings::Save(SettingsInterface& si) const si.SetBoolValue("GPU", "PGXPCulling", gpu_pgxp_culling); si.SetBoolValue("GPU", "PGXPTextureCorrection", gpu_pgxp_texture_correction); si.SetBoolValue("GPU", "PGXPVertexCache", gpu_pgxp_vertex_cache); + si.SetBoolValue("GPU", "PGXPCPU", gpu_pgxp_cpu); si.SetStringValue("Display", "CropMode", GetDisplayCropModeName(display_crop_mode)); si.SetStringValue("Display", "AspectRatio", GetDisplayAspectRatioName(display_aspect_ratio)); diff --git a/src/core/settings.h b/src/core/settings.h index ba8ead79a..1aa2f93fa 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -93,6 +93,7 @@ struct Settings bool gpu_pgxp_culling = true; bool gpu_pgxp_texture_correction = true; bool gpu_pgxp_vertex_cache = false; + bool gpu_pgxp_cpu = false; DisplayCropMode display_crop_mode = DisplayCropMode::None; DisplayAspectRatio display_aspect_ratio = DisplayAspectRatio::R4_3; bool display_linear_filtering = true; @@ -157,6 +158,11 @@ struct Settings ALWAYS_INLINE bool IsUsingRecompiler() const { return (cpu_execution_mode == CPUExecutionMode::Recompiler); } ALWAYS_INLINE bool IsUsingSoftwareRenderer() const { return (gpu_renderer == GPURenderer::Software); } + ALWAYS_INLINE PGXPMode GetPGXPMode() + { + return gpu_pgxp_enable ? (gpu_pgxp_cpu ? PGXPMode::CPU : PGXPMode::Memory) : PGXPMode::Disabled; + } + bool HasAnyPerGameMemoryCards() const; enum : u32 diff --git a/src/core/types.h b/src/core/types.h index 3c234cd90..46482d951 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -48,6 +48,13 @@ enum class CPUExecutionMode : u8 Count }; +enum class PGXPMode : u8 +{ + Disabled, + Memory, + CPU +}; + enum class GPURenderer : u8 { #ifdef WIN32 diff --git a/src/duckstation-libretro/libretro_host_interface.cpp b/src/duckstation-libretro/libretro_host_interface.cpp index 986709fb1..e193770e8 100644 --- a/src/duckstation-libretro/libretro_host_interface.cpp +++ b/src/duckstation-libretro/libretro_host_interface.cpp @@ -369,7 +369,7 @@ void LibretroHostInterface::OnSystemDestroyed() m_using_hardware_renderer = false; } -static std::array s_option_definitions = {{ +static std::array s_option_definitions = {{ {"duckstation_Console.Region", "Console Region", "Determines which region/hardware to emulate. Auto-Detect will use the region of the disc inserted.", @@ -500,6 +500,12 @@ static std::array s_option_definitions = {{ "Uses screen coordinates as a fallback when tracking vertices through memory fails. May improve PGXP compatibility.", {{"true", "Enabled"}, {"false", "Disabled"}}, "false"}, + {"duckstation_GPU.PGXPCPU", + "PGXP CPU Mode", + "Tries to track vertex manipulation through the CPU. Some games require this option for PGXP to be effective. " + "Very slow, and incompatible with the recompiler.", + {{"true", "Enabled"}, {"false", "Disabled"}}, + "false"}, {"duckstation_Display.CropMode", "Crop Mode", "Changes how much of the image is cropped. Some games display garbage in the overscan area which is typically " @@ -607,7 +613,7 @@ bool LibretroHostInterface::HasCoreVariablesChanged() void LibretroHostInterface::LoadSettings() { LibretroSettingsInterface si; - g_settings.Load(si); + HostInterface::LoadSettings(si); // Assume BIOS files are located in system directory. const char* system_directory = nullptr; diff --git a/src/duckstation-qt/gpusettingswidget.cpp b/src/duckstation-qt/gpusettingswidget.cpp index 15707b271..eae0af7fa 100644 --- a/src/duckstation-qt/gpusettingswidget.cpp +++ b/src/duckstation-qt/gpusettingswidget.cpp @@ -44,6 +44,7 @@ GPUSettingsWidget::GPUSettingsWidget(QtHostInterface* host_interface, QWidget* p SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.pgxpTextureCorrection, "GPU", "PGXPTextureCorrection", true); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.pgxpVertexCache, "GPU", "PGXPVertexCache", false); + SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.pgxpCPUMode, "GPU", "PGXPCPUMode", false); connect(m_ui.resolutionScale, QOverload::of(&QComboBox::currentIndexChanged), this, &GPUSettingsWidget::updateScaledDitheringEnabled); @@ -142,6 +143,9 @@ GPUSettingsWidget::GPUSettingsWidget(QtHostInterface* host_interface, QWidget* p dialog->registerWidgetHelp(m_ui.pgxpVertexCache, tr("Vertex Cache"), tr("Unchecked"), tr("Uses screen coordinates as a fallback when tracking vertices through memory fails. " "May improve PGXP compatibility.")); + dialog->registerWidgetHelp(m_ui.pgxpCPUMode, tr("CPU Mode"), tr("Unchecked"), + tr("Tries to track vertex manipulation through the CPU. Some games require this option for PGXP to be effective. " + "Very slow, and incompatible with the recompiler.")); } GPUSettingsWidget::~GPUSettingsWidget() = default; @@ -255,4 +259,5 @@ void GPUSettingsWidget::updatePGXPSettingsEnabled() m_ui.pgxpCulling->setEnabled(enabled); m_ui.pgxpTextureCorrection->setEnabled(enabled); m_ui.pgxpVertexCache->setEnabled(enabled); + m_ui.pgxpCPUMode->setEnabled(enabled); } diff --git a/src/duckstation-qt/gpusettingswidget.ui b/src/duckstation-qt/gpusettingswidget.ui index 33dcae1c2..0d858dde2 100644 --- a/src/duckstation-qt/gpusettingswidget.ui +++ b/src/duckstation-qt/gpusettingswidget.ui @@ -215,6 +215,13 @@ + + + + CPU Mode + + + diff --git a/src/duckstation-sdl/sdl_host_interface.cpp b/src/duckstation-sdl/sdl_host_interface.cpp index 06dcb5bd2..762692a3a 100644 --- a/src/duckstation-sdl/sdl_host_interface.cpp +++ b/src/duckstation-sdl/sdl_host_interface.cpp @@ -874,6 +874,8 @@ void SDLHostInterface::DrawQuickSettingsMenu() &m_settings_copy.gpu_pgxp_texture_correction, m_settings_copy.gpu_pgxp_enable); settings_changed |= ImGui::MenuItem("PGXP Vertex Cache", nullptr, &m_settings_copy.gpu_pgxp_vertex_cache, m_settings_copy.gpu_pgxp_enable); + settings_changed |= + ImGui::MenuItem("PGXP CPU Instructions", nullptr, &m_settings_copy.gpu_pgxp_cpu, m_settings_copy.gpu_pgxp_enable); ImGui::EndMenu(); } @@ -1347,6 +1349,7 @@ void SDLHostInterface::DrawSettingsWindow() settings_changed |= ImGui::Checkbox("PGXP Culling", &m_settings_copy.gpu_pgxp_culling); settings_changed |= ImGui::Checkbox("PGXP Texture Correction", &m_settings_copy.gpu_pgxp_texture_correction); settings_changed |= ImGui::Checkbox("PGXP Vertex Cache", &m_settings_copy.gpu_pgxp_vertex_cache); + settings_changed |= ImGui::Checkbox("PGXP CPU", &m_settings_copy.gpu_pgxp_cpu); } ImGui::EndTabItem();