From 6a0ce1850a5bc8e735f02865ca25c9170f9f4a8a Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Mon, 16 Mar 2015 21:44:49 +0300 Subject: [PATCH 01/23] Initial commit --- rpcs3/Emu/ARMv7/ARMv7Thread.cpp | 11 ++++++----- rpcs3/Emu/Cell/PPUThread.cpp | 21 ++++++++++++++------- rpcs3/Gui/MainFrame.cpp | 5 +++-- rpcs3/Ini.h | 2 +- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/ARMv7/ARMv7Thread.cpp b/rpcs3/Emu/ARMv7/ARMv7Thread.cpp index e35e9c8f8d..876824b9c4 100644 --- a/rpcs3/Emu/ARMv7/ARMv7Thread.cpp +++ b/rpcs3/Emu/ARMv7/ARMv7Thread.cpp @@ -174,16 +174,17 @@ void ARMv7Thread::DoReset() void ARMv7Thread::DoRun() { + m_dec = nullptr; + switch(Ini.CPUDecoderMode.GetValue()) { case 0: - //m_dec = new ARMv7Decoder(*new ARMv7DisAsm()); - break; - case 1: - case 2: m_dec = new ARMv7Decoder(context); - break; + break; + default: + LOG_ERROR(PPU, "Invalid CPU decoder mode: %d", Ini.CPUDecoderMode.GetValue()); + Emu.Pause(); } } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 412937419e..5572e65abf 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -94,18 +94,21 @@ void PPUThread::CloseStack() void PPUThread::DoRun() { + m_dec = nullptr; + switch(Ini.CPUDecoderMode.GetValue()) { - case 0: - //m_dec = new PPUDecoder(*new PPUDisAsm()); - break; - - case 1: + case 0: // original interpreter { auto ppui = new PPUInterpreter(*this); m_dec = new PPUDecoder(ppui); + break; + } + + case 1: // alternative interpreter + { + break; } - break; case 2: #ifdef PPU_LLVM_RECOMPILER @@ -217,10 +220,14 @@ void PPUThread::Task() { custom_task(*this); } - else + else if (m_dec) { CPUThread::Task(); } + else + { + + } } ppu_thread::ppu_thread(u32 entry, const std::string& name, u32 stack_size, u32 prio) diff --git a/rpcs3/Gui/MainFrame.cpp b/rpcs3/Gui/MainFrame.cpp index 48c7cdc97f..d42f3a7886 100644 --- a/rpcs3/Gui/MainFrame.cpp +++ b/rpcs3/Gui/MainFrame.cpp @@ -437,6 +437,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) wxCheckBox* chbox_dbg_ap_functioncall = new wxCheckBox(p_hle, wxID_ANY, "Auto Pause at Function Call"); cbox_cpu_decoder->Append("PPU Interpreter"); + cbox_cpu_decoder->Append("PPU Interpreter 2"); cbox_cpu_decoder->Append("PPU JIT (LLVM)"); cbox_spu_decoder->Append("SPU Interpreter"); @@ -531,7 +532,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) chbox_dbg_ap_systemcall ->SetValue(Ini.DBGAutoPauseSystemCall.GetValue()); chbox_dbg_ap_functioncall->SetValue(Ini.DBGAutoPauseFunctionCall.GetValue()); - cbox_cpu_decoder ->SetSelection(Ini.CPUDecoderMode.GetValue() ? Ini.CPUDecoderMode.GetValue() - 1 : 0); + cbox_cpu_decoder ->SetSelection(Ini.CPUDecoderMode.GetValue() ? Ini.CPUDecoderMode.GetValue() : 0); cbox_spu_decoder ->SetSelection(Ini.SPUDecoderMode.GetValue() ? Ini.SPUDecoderMode.GetValue() - 1 : 0); cbox_gs_render ->SetSelection(Ini.GSRenderMode.GetValue()); cbox_gs_resolution ->SetSelection(ResolutionIdToNum(Ini.GSResolution.GetValue()) - 1); @@ -632,7 +633,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) if(diag.ShowModal() == wxID_OK) { - Ini.CPUDecoderMode.SetValue(cbox_cpu_decoder->GetSelection() + 1); + Ini.CPUDecoderMode.SetValue(cbox_cpu_decoder->GetSelection()); Ini.SPUDecoderMode.SetValue(cbox_spu_decoder->GetSelection() + 1); Ini.GSRenderMode.SetValue(cbox_gs_render->GetSelection()); Ini.GSResolution.SetValue(ResolutionNumToId(cbox_gs_resolution->GetSelection() + 1)); diff --git a/rpcs3/Ini.h b/rpcs3/Ini.h index 6f2b9a7a11..d36f80c028 100644 --- a/rpcs3/Ini.h +++ b/rpcs3/Ini.h @@ -247,7 +247,7 @@ public: void Load() { // Core - CPUDecoderMode.Load(1); + CPUDecoderMode.Load(0); SPUDecoderMode.Load(1); // Graphics From a71cb5f52106119c8ddda966ba3de8148130caeb Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 17 Mar 2015 00:38:21 +0300 Subject: [PATCH 02/23] Basic concept --- Utilities/rFile.cpp | 2 +- rpcs3/Emu/ARMv7/ARMv7Thread.cpp | 1 + rpcs3/Emu/CPU/CPUThread.cpp | 2 + rpcs3/Emu/CPU/CPUThread.h | 10 + rpcs3/Emu/Cell/PPUInterpreter.cpp | 2001 +++++++++++++++++++++++++++++ rpcs3/Emu/Cell/PPUInterpreter.h | 2 +- rpcs3/Emu/Cell/PPUInterpreter2.h | 826 ++++++++++++ rpcs3/Emu/Cell/PPUThread.cpp | 528 +++++++- rpcs3/Emu/Cell/SPUThread.cpp | 1 + rpcs3/Emu/System.cpp | 43 +- rpcs3/Loader/ELF64.cpp | 13 + rpcs3/emucore.vcxproj | 2 + rpcs3/emucore.vcxproj.filters | 6 + 13 files changed, 3413 insertions(+), 24 deletions(-) create mode 100644 rpcs3/Emu/Cell/PPUInterpreter.cpp create mode 100644 rpcs3/Emu/Cell/PPUInterpreter2.h diff --git a/Utilities/rFile.cpp b/Utilities/rFile.cpp index 88db8ad28e..657a143f28 100644 --- a/Utilities/rFile.cpp +++ b/Utilities/rFile.cpp @@ -127,7 +127,7 @@ bool rRename(const std::string &from, const std::string &to) #ifdef _WIN32 if (!MoveFile(ConvertUTF8ToWString(from).c_str(), ConvertUTF8ToWString(to).c_str())) #else - if (rename(from.c_str(), to.c_str())) + if (int err = rename(from.c_str(), to.c_str())) #endif { LOG_ERROR(GENERAL, "Error renaming '%s' to '%s': 0x%llx", from.c_str(), to.c_str(), (u64)GET_API_ERROR); diff --git a/rpcs3/Emu/ARMv7/ARMv7Thread.cpp b/rpcs3/Emu/ARMv7/ARMv7Thread.cpp index 876824b9c4..564dd59e02 100644 --- a/rpcs3/Emu/ARMv7/ARMv7Thread.cpp +++ b/rpcs3/Emu/ARMv7/ARMv7Thread.cpp @@ -229,6 +229,7 @@ void ARMv7Thread::FastCall(u32 addr) void ARMv7Thread::FastStop() { m_status = Stopped; + m_events |= CPU_EVENT_STOP; } armv7_thread::armv7_thread(u32 entry, const std::string& name, u32 stack_size, s32 prio) diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 9710b85c5d..a490d187d4 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -17,6 +17,7 @@ CPUThread* GetCurrentCPUThread() CPUThread::CPUThread(CPUThreadType type) : ThreadBase("CPUThread") + , m_events(0) , m_type(type) , m_stack_size(0) , m_stack_addr(0) @@ -242,6 +243,7 @@ void CPUThread::Stop() SendDbgCommand(DID_STOP_THREAD, this); m_status = Stopped; + m_events |= CPU_EVENT_STOP; if(static_cast(this) != GetCurrentNamedThread()) { diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 13e8e7ebbf..e81d79216e 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -20,11 +20,19 @@ enum CPUThreadStatus CPUThread_Step, }; +// CPU Thread Events +enum : u64 +{ + CPU_EVENT_STOP = (1ull << 0), +}; + class CPUDecoder; class CPUThread : public ThreadBase { protected: + std::atomic m_events; // flags + u32 m_status; u32 m_id; u64 m_prio; @@ -45,6 +53,8 @@ protected: virtual void DumpInformation() override; public: + void AddEvent(const u64 event) { m_events |= event; } + virtual void InitRegs() = 0; virtual void InitStack() = 0; diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp new file mode 100644 index 0000000000..3e0a1b7a65 --- /dev/null +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -0,0 +1,2001 @@ +#include "stdafx.h" +#include "Utilities/Log.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/Cell/PPUThread.h" +#include "Emu/SysCalls/SysCalls.h" +#include "Emu/SysCalls/Modules.h" +#include "Emu/Cell/PPUDecoder.h" +#include "PPUInstrTable.h" +#include "PPUInterpreter.h" +#include "PPUInterpreter2.h" +#include "Emu/CPU/CPUThreadManager.h" + +void ppu_interpreter::NULL_OP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::NOP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + + +void ppu_interpreter::TDI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::TWI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + + +void ppu_interpreter::MFVSCR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MTVSCR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VADDCUW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VADDFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VADDSBS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VADDSHS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VADDSWS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VADDUBM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VADDUHM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VADDUWM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VADDUWS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VAND(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VANDC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VAVGSB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VAVGSH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VAVGSW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VAVGUB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VAVGUH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VAVGUW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCFSX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCFUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPBFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPBFP_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPEQFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPEQUB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPEQUH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPEQUW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGEFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGEFP_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTFP_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTSB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTSB_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTSH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTSH_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTSW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTSW_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTUB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTUB_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTUH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTUH_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTUW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCMPGTUW_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCTSXS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VCTUXS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VEXPTEFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VLOGEFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMADDFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMAXFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMAXSB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMAXSH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMAXSW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMAXUB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMAXUH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMAXUW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMHADDSHS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMHRADDSHS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMINFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMINSB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMINSH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMINSW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMINUB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMINUH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMINUW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMLADDUHM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMRGHB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMRGHH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMRGHW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMRGLB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMRGLH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMRGLW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMSUMMBM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMSUMSHM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMSUMSHS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMSUMUBM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMSUMUHM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMSUMUHS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMULESB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMULESH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMULEUB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMULEUH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMULOSB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMULOSH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMULOUB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VMULOUH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VNMSUBFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VNOR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VOR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VPERM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VPKPX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VPKSHSS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VPKSHUS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VPKSWSS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VPKSWUS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VPKUHUM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VPKUHUS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VPKUWUM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VPKUWUS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VREFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VRFIM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VRFIN(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VRFIP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VRFIZ(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VRLB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VRLH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VRLW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VRSQRTEFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSEL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSLB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSLDOI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSLH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSLO(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSLW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSPLTB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSPLTH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSPLTISB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSPLTISH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSPLTISW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSPLTW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSRAB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSRAH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSRAW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSRB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSRH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSRO(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSRW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUBCUW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUBFP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUBSBS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUBSHS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUBSWS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUBUBM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUBUHM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUBUWM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUBUWS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUMSWS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUM2SWS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUM4SBS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUM4SHS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VSUM4UBS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VUPKHPX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VUPKHSB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VUPKHSH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VUPKLPX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VUPKLSB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VUPKLSH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::VXOR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MULLI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SUBFIC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CMPLI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CMPI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ADDIC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ADDIC_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ADDI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ADDIS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::BC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::HACK(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::B(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MCRF(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::BCLR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CRNOR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CRANDC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ISYNC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CRXOR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CRNAND(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CRAND(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CREQV(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CRORC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CROR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::BCCTR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::RLWIMI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::RLWINM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::RLWNM(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ORI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ORIS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::XORI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::XORIS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ANDI_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ANDIS_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::RLDICL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::RLDICR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::RLDIC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::RLDIMI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::RLDC_LR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CMP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::TW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LVSL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LVEBX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SUBFC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MULHDU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ADDC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MULHWU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MFOCRF(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LWARX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LDX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LWZX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SLW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CNTLZW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SLD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::AND(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CMPL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LVSR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LVEHX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SUBF(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LDUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DCBST(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LWZUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::CNTLZD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ANDC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::TD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LVEWX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MULHD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MULHW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LDARX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DCBF(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LBZX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LVX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::NEG(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LBZUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::NOR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STVEBX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SUBFE(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ADDE(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MTOCRF(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STDX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STWCX_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STWX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STVEHX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STDUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STWUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STVEWX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SUBFZE(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ADDZE(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STDCX_(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STBX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STVX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MULLD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SUBFME(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ADDME(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MULLW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DCBTST(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STBUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ADD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DCBT(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LHZX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::EQV(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ECIWX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LHZUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::XOR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MFSPR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LWAX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DST(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LHAX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LVXL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MFTB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LWAUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DSTST(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LHAUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STHX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ORC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ECOWX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STHUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::OR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DIVDU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DIVWU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MTSPR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DCBI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::NAND(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STVXL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DIVD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DIVW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LVLX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LDBRX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LSWX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LWBRX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LFSX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SRW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SRD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LVRX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LSWI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LFSUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SYNC(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LFDX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LFDUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STVLX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STDBRX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STSWX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STWBRX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STFSX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STVRX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STFSUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STSWI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STFDX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STFDUX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LVLXL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LHBRX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SRAW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SRAD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LVRXL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DSS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SRAWI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SRADI1(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::SRADI2(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::EIEIO(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STVLXL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STHBRX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::EXTSH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STVRXL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::EXTSB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STFIWX(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::EXTSW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::ICBI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::DCBZ(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LWZ(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LWZU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LBZ(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LBZU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STWU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STBU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LHZ(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LHZU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LHA(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LHAU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STH(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STHU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LMW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STMW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LFS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LFSU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LFD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LFDU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STFS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STFSU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STFD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STFDU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LDU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::LWA(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FDIVS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FSUBS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FADDS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FSQRTS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FRES(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FMULS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FMADDS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FMSUBS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FNMSUBS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FNMADDS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::STDU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MTFSB1(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MCRFS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MTFSB0(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MTFSFI(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MFFS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::MTFSF(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + + +void ppu_interpreter::FCMPU(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FRSP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FCTIW(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FCTIWZ(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FDIV(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FSUB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FADD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FSQRT(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FSEL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FMUL(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FRSQRTE(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FMSUB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FMADD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FNMSUB(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FNMADD(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FCMPO(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FNEG(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FMR(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FNABS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FABS(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FCTID(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FCTIDZ(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::FCFID(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + + +void ppu_interpreter::UNK(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 7e53616f8b..f6dd80453a 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -18,6 +18,7 @@ #include extern u64 rotate_mask[64][64]; // defined in PPUThread.cpp, static didn't work correctly in GCC 4.9 for some reason + inline void InitRotateMask() { static bool inited = false; @@ -91,7 +92,6 @@ private: public: PPUInterpreter(PPUThread& cpu) : CPU(cpu) { - InitRotateMask(); } private: diff --git a/rpcs3/Emu/Cell/PPUInterpreter2.h b/rpcs3/Emu/Cell/PPUInterpreter2.h new file mode 100644 index 0000000000..6bd9373790 --- /dev/null +++ b/rpcs3/Emu/Cell/PPUInterpreter2.h @@ -0,0 +1,826 @@ +#pragma once +#include "PPUOpcodes.h" + +class PPUThread; + +union ppu_opcode_t +{ + u32 opcode; +}; + +using ppu_inter_func_t = void(*)(PPUThread& CPU, ppu_opcode_t opcode); + +namespace ppu_interpreter +{ + void NULL_OP(PPUThread& CPU, ppu_opcode_t op); + void NOP(PPUThread& CPU, ppu_opcode_t op); + + void TDI(PPUThread& CPU, ppu_opcode_t op); + void TWI(PPUThread& CPU, ppu_opcode_t op); + + void MFVSCR(PPUThread& CPU, ppu_opcode_t op); + void MTVSCR(PPUThread& CPU, ppu_opcode_t op); + void VADDCUW(PPUThread& CPU, ppu_opcode_t op); + void VADDFP(PPUThread& CPU, ppu_opcode_t op); + void VADDSBS(PPUThread& CPU, ppu_opcode_t op); + void VADDSHS(PPUThread& CPU, ppu_opcode_t op); + void VADDSWS(PPUThread& CPU, ppu_opcode_t op); + void VADDUBM(PPUThread& CPU, ppu_opcode_t op); + void VADDUBS(PPUThread& CPU, ppu_opcode_t op); + void VADDUHM(PPUThread& CPU, ppu_opcode_t op); + void VADDUHS(PPUThread& CPU, ppu_opcode_t op); + void VADDUWM(PPUThread& CPU, ppu_opcode_t op); + void VADDUWS(PPUThread& CPU, ppu_opcode_t op); + void VAND(PPUThread& CPU, ppu_opcode_t op); + void VANDC(PPUThread& CPU, ppu_opcode_t op); + void VAVGSB(PPUThread& CPU, ppu_opcode_t op); + void VAVGSH(PPUThread& CPU, ppu_opcode_t op); + void VAVGSW(PPUThread& CPU, ppu_opcode_t op); + void VAVGUB(PPUThread& CPU, ppu_opcode_t op); + void VAVGUH(PPUThread& CPU, ppu_opcode_t op); + void VAVGUW(PPUThread& CPU, ppu_opcode_t op); + void VCFSX(PPUThread& CPU, ppu_opcode_t op); + void VCFUX(PPUThread& CPU, ppu_opcode_t op); + void VCMPBFP(PPUThread& CPU, ppu_opcode_t op); + void VCMPBFP_(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQFP(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUB(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUH(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUW(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGEFP(PPUThread& CPU, ppu_opcode_t op); + void VCMPGEFP_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTFP(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTFP_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSB(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSB_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSH(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSH_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSW(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSW_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUB(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUB_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUH(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUH_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUW(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUW_(PPUThread& CPU, ppu_opcode_t op); + void VCTSXS(PPUThread& CPU, ppu_opcode_t op); + void VCTUXS(PPUThread& CPU, ppu_opcode_t op); + void VEXPTEFP(PPUThread& CPU, ppu_opcode_t op); + void VLOGEFP(PPUThread& CPU, ppu_opcode_t op); + void VMADDFP(PPUThread& CPU, ppu_opcode_t op); + void VMAXFP(PPUThread& CPU, ppu_opcode_t op); + void VMAXSB(PPUThread& CPU, ppu_opcode_t op); + void VMAXSH(PPUThread& CPU, ppu_opcode_t op); + void VMAXSW(PPUThread& CPU, ppu_opcode_t op); + void VMAXUB(PPUThread& CPU, ppu_opcode_t op); + void VMAXUH(PPUThread& CPU, ppu_opcode_t op); + void VMAXUW(PPUThread& CPU, ppu_opcode_t op); + void VMHADDSHS(PPUThread& CPU, ppu_opcode_t op); + void VMHRADDSHS(PPUThread& CPU, ppu_opcode_t op); + void VMINFP(PPUThread& CPU, ppu_opcode_t op); + void VMINSB(PPUThread& CPU, ppu_opcode_t op); + void VMINSH(PPUThread& CPU, ppu_opcode_t op); + void VMINSW(PPUThread& CPU, ppu_opcode_t op); + void VMINUB(PPUThread& CPU, ppu_opcode_t op); + void VMINUH(PPUThread& CPU, ppu_opcode_t op); + void VMINUW(PPUThread& CPU, ppu_opcode_t op); + void VMLADDUHM(PPUThread& CPU, ppu_opcode_t op); + void VMRGHB(PPUThread& CPU, ppu_opcode_t op); + void VMRGHH(PPUThread& CPU, ppu_opcode_t op); + void VMRGHW(PPUThread& CPU, ppu_opcode_t op); + void VMRGLB(PPUThread& CPU, ppu_opcode_t op); + void VMRGLH(PPUThread& CPU, ppu_opcode_t op); + void VMRGLW(PPUThread& CPU, ppu_opcode_t op); + void VMSUMMBM(PPUThread& CPU, ppu_opcode_t op); + void VMSUMSHM(PPUThread& CPU, ppu_opcode_t op); + void VMSUMSHS(PPUThread& CPU, ppu_opcode_t op); + void VMSUMUBM(PPUThread& CPU, ppu_opcode_t op); + void VMSUMUHM(PPUThread& CPU, ppu_opcode_t op); + void VMSUMUHS(PPUThread& CPU, ppu_opcode_t op); + void VMULESB(PPUThread& CPU, ppu_opcode_t op); + void VMULESH(PPUThread& CPU, ppu_opcode_t op); + void VMULEUB(PPUThread& CPU, ppu_opcode_t op); + void VMULEUH(PPUThread& CPU, ppu_opcode_t op); + void VMULOSB(PPUThread& CPU, ppu_opcode_t op); + void VMULOSH(PPUThread& CPU, ppu_opcode_t op); + void VMULOUB(PPUThread& CPU, ppu_opcode_t op); + void VMULOUH(PPUThread& CPU, ppu_opcode_t op); + void VNMSUBFP(PPUThread& CPU, ppu_opcode_t op); + void VNOR(PPUThread& CPU, ppu_opcode_t op); + void VOR(PPUThread& CPU, ppu_opcode_t op); + void VPERM(PPUThread& CPU, ppu_opcode_t op); + void VPKPX(PPUThread& CPU, ppu_opcode_t op); + void VPKSHSS(PPUThread& CPU, ppu_opcode_t op); + void VPKSHUS(PPUThread& CPU, ppu_opcode_t op); + void VPKSWSS(PPUThread& CPU, ppu_opcode_t op); + void VPKSWUS(PPUThread& CPU, ppu_opcode_t op); + void VPKUHUM(PPUThread& CPU, ppu_opcode_t op); + void VPKUHUS(PPUThread& CPU, ppu_opcode_t op); + void VPKUWUM(PPUThread& CPU, ppu_opcode_t op); + void VPKUWUS(PPUThread& CPU, ppu_opcode_t op); + void VREFP(PPUThread& CPU, ppu_opcode_t op); + void VRFIM(PPUThread& CPU, ppu_opcode_t op); + void VRFIN(PPUThread& CPU, ppu_opcode_t op); + void VRFIP(PPUThread& CPU, ppu_opcode_t op); + void VRFIZ(PPUThread& CPU, ppu_opcode_t op); + void VRLB(PPUThread& CPU, ppu_opcode_t op); + void VRLH(PPUThread& CPU, ppu_opcode_t op); + void VRLW(PPUThread& CPU, ppu_opcode_t op); + void VRSQRTEFP(PPUThread& CPU, ppu_opcode_t op); + void VSEL(PPUThread& CPU, ppu_opcode_t op); + void VSL(PPUThread& CPU, ppu_opcode_t op); + void VSLB(PPUThread& CPU, ppu_opcode_t op); + void VSLDOI(PPUThread& CPU, ppu_opcode_t op); + void VSLH(PPUThread& CPU, ppu_opcode_t op); + void VSLO(PPUThread& CPU, ppu_opcode_t op); + void VSLW(PPUThread& CPU, ppu_opcode_t op); + void VSPLTB(PPUThread& CPU, ppu_opcode_t op); + void VSPLTH(PPUThread& CPU, ppu_opcode_t op); + void VSPLTISB(PPUThread& CPU, ppu_opcode_t op); + void VSPLTISH(PPUThread& CPU, ppu_opcode_t op); + void VSPLTISW(PPUThread& CPU, ppu_opcode_t op); + void VSPLTW(PPUThread& CPU, ppu_opcode_t op); + void VSR(PPUThread& CPU, ppu_opcode_t op); + void VSRAB(PPUThread& CPU, ppu_opcode_t op); + void VSRAH(PPUThread& CPU, ppu_opcode_t op); + void VSRAW(PPUThread& CPU, ppu_opcode_t op); + void VSRB(PPUThread& CPU, ppu_opcode_t op); + void VSRH(PPUThread& CPU, ppu_opcode_t op); + void VSRO(PPUThread& CPU, ppu_opcode_t op); + void VSRW(PPUThread& CPU, ppu_opcode_t op); + void VSUBCUW(PPUThread& CPU, ppu_opcode_t op); + void VSUBFP(PPUThread& CPU, ppu_opcode_t op); + void VSUBSBS(PPUThread& CPU, ppu_opcode_t op); + void VSUBSHS(PPUThread& CPU, ppu_opcode_t op); + void VSUBSWS(PPUThread& CPU, ppu_opcode_t op); + void VSUBUBM(PPUThread& CPU, ppu_opcode_t op); + void VSUBUBS(PPUThread& CPU, ppu_opcode_t op); + void VSUBUHM(PPUThread& CPU, ppu_opcode_t op); + void VSUBUHS(PPUThread& CPU, ppu_opcode_t op); + void VSUBUWM(PPUThread& CPU, ppu_opcode_t op); + void VSUBUWS(PPUThread& CPU, ppu_opcode_t op); + void VSUMSWS(PPUThread& CPU, ppu_opcode_t op); + void VSUM2SWS(PPUThread& CPU, ppu_opcode_t op); + void VSUM4SBS(PPUThread& CPU, ppu_opcode_t op); + void VSUM4SHS(PPUThread& CPU, ppu_opcode_t op); + void VSUM4UBS(PPUThread& CPU, ppu_opcode_t op); + void VUPKHPX(PPUThread& CPU, ppu_opcode_t op); + void VUPKHSB(PPUThread& CPU, ppu_opcode_t op); + void VUPKHSH(PPUThread& CPU, ppu_opcode_t op); + void VUPKLPX(PPUThread& CPU, ppu_opcode_t op); + void VUPKLSB(PPUThread& CPU, ppu_opcode_t op); + void VUPKLSH(PPUThread& CPU, ppu_opcode_t op); + void VXOR(PPUThread& CPU, ppu_opcode_t op); + void MULLI(PPUThread& CPU, ppu_opcode_t op); + void SUBFIC(PPUThread& CPU, ppu_opcode_t op); + void CMPLI(PPUThread& CPU, ppu_opcode_t op); + void CMPI(PPUThread& CPU, ppu_opcode_t op); + void ADDIC(PPUThread& CPU, ppu_opcode_t op); + void ADDIC_(PPUThread& CPU, ppu_opcode_t op); + void ADDI(PPUThread& CPU, ppu_opcode_t op); + void ADDIS(PPUThread& CPU, ppu_opcode_t op); + void BC(PPUThread& CPU, ppu_opcode_t op); + void HACK(PPUThread& CPU, ppu_opcode_t op); + void SC(PPUThread& CPU, ppu_opcode_t op); + void B(PPUThread& CPU, ppu_opcode_t op); + void MCRF(PPUThread& CPU, ppu_opcode_t op); + void BCLR(PPUThread& CPU, ppu_opcode_t op); + void CRNOR(PPUThread& CPU, ppu_opcode_t op); + void CRANDC(PPUThread& CPU, ppu_opcode_t op); + void ISYNC(PPUThread& CPU, ppu_opcode_t op); + void CRXOR(PPUThread& CPU, ppu_opcode_t op); + void CRNAND(PPUThread& CPU, ppu_opcode_t op); + void CRAND(PPUThread& CPU, ppu_opcode_t op); + void CREQV(PPUThread& CPU, ppu_opcode_t op); + void CRORC(PPUThread& CPU, ppu_opcode_t op); + void CROR(PPUThread& CPU, ppu_opcode_t op); + void BCCTR(PPUThread& CPU, ppu_opcode_t op); + void RLWIMI(PPUThread& CPU, ppu_opcode_t op); + void RLWINM(PPUThread& CPU, ppu_opcode_t op); + void RLWNM(PPUThread& CPU, ppu_opcode_t op); + void ORI(PPUThread& CPU, ppu_opcode_t op); + void ORIS(PPUThread& CPU, ppu_opcode_t op); + void XORI(PPUThread& CPU, ppu_opcode_t op); + void XORIS(PPUThread& CPU, ppu_opcode_t op); + void ANDI_(PPUThread& CPU, ppu_opcode_t op); + void ANDIS_(PPUThread& CPU, ppu_opcode_t op); + void RLDICL(PPUThread& CPU, ppu_opcode_t op); + void RLDICR(PPUThread& CPU, ppu_opcode_t op); + void RLDIC(PPUThread& CPU, ppu_opcode_t op); + void RLDIMI(PPUThread& CPU, ppu_opcode_t op); + void RLDC_LR(PPUThread& CPU, ppu_opcode_t op); + void CMP(PPUThread& CPU, ppu_opcode_t op); + void TW(PPUThread& CPU, ppu_opcode_t op); + void LVSL(PPUThread& CPU, ppu_opcode_t op); + void LVEBX(PPUThread& CPU, ppu_opcode_t op); + void SUBFC(PPUThread& CPU, ppu_opcode_t op); + void MULHDU(PPUThread& CPU, ppu_opcode_t op); + void ADDC(PPUThread& CPU, ppu_opcode_t op); + void MULHWU(PPUThread& CPU, ppu_opcode_t op); + void MFOCRF(PPUThread& CPU, ppu_opcode_t op); + void LWARX(PPUThread& CPU, ppu_opcode_t op); + void LDX(PPUThread& CPU, ppu_opcode_t op); + void LWZX(PPUThread& CPU, ppu_opcode_t op); + void SLW(PPUThread& CPU, ppu_opcode_t op); + void CNTLZW(PPUThread& CPU, ppu_opcode_t op); + void SLD(PPUThread& CPU, ppu_opcode_t op); + void AND(PPUThread& CPU, ppu_opcode_t op); + void CMPL(PPUThread& CPU, ppu_opcode_t op); + void LVSR(PPUThread& CPU, ppu_opcode_t op); + void LVEHX(PPUThread& CPU, ppu_opcode_t op); + void SUBF(PPUThread& CPU, ppu_opcode_t op); + void LDUX(PPUThread& CPU, ppu_opcode_t op); + void DCBST(PPUThread& CPU, ppu_opcode_t op); + void LWZUX(PPUThread& CPU, ppu_opcode_t op); + void CNTLZD(PPUThread& CPU, ppu_opcode_t op); + void ANDC(PPUThread& CPU, ppu_opcode_t op); + void TD(PPUThread& CPU, ppu_opcode_t op); + void LVEWX(PPUThread& CPU, ppu_opcode_t op); + void MULHD(PPUThread& CPU, ppu_opcode_t op); + void MULHW(PPUThread& CPU, ppu_opcode_t op); + void LDARX(PPUThread& CPU, ppu_opcode_t op); + void DCBF(PPUThread& CPU, ppu_opcode_t op); + void LBZX(PPUThread& CPU, ppu_opcode_t op); + void LVX(PPUThread& CPU, ppu_opcode_t op); + void NEG(PPUThread& CPU, ppu_opcode_t op); + void LBZUX(PPUThread& CPU, ppu_opcode_t op); + void NOR(PPUThread& CPU, ppu_opcode_t op); + void STVEBX(PPUThread& CPU, ppu_opcode_t op); + void SUBFE(PPUThread& CPU, ppu_opcode_t op); + void ADDE(PPUThread& CPU, ppu_opcode_t op); + void MTOCRF(PPUThread& CPU, ppu_opcode_t op); + void STDX(PPUThread& CPU, ppu_opcode_t op); + void STWCX_(PPUThread& CPU, ppu_opcode_t op); + void STWX(PPUThread& CPU, ppu_opcode_t op); + void STVEHX(PPUThread& CPU, ppu_opcode_t op); + void STDUX(PPUThread& CPU, ppu_opcode_t op); + void STWUX(PPUThread& CPU, ppu_opcode_t op); + void STVEWX(PPUThread& CPU, ppu_opcode_t op); + void SUBFZE(PPUThread& CPU, ppu_opcode_t op); + void ADDZE(PPUThread& CPU, ppu_opcode_t op); + void STDCX_(PPUThread& CPU, ppu_opcode_t op); + void STBX(PPUThread& CPU, ppu_opcode_t op); + void STVX(PPUThread& CPU, ppu_opcode_t op); + void MULLD(PPUThread& CPU, ppu_opcode_t op); + void SUBFME(PPUThread& CPU, ppu_opcode_t op); + void ADDME(PPUThread& CPU, ppu_opcode_t op); + void MULLW(PPUThread& CPU, ppu_opcode_t op); + void DCBTST(PPUThread& CPU, ppu_opcode_t op); + void STBUX(PPUThread& CPU, ppu_opcode_t op); + void ADD(PPUThread& CPU, ppu_opcode_t op); + void DCBT(PPUThread& CPU, ppu_opcode_t op); + void LHZX(PPUThread& CPU, ppu_opcode_t op); + void EQV(PPUThread& CPU, ppu_opcode_t op); + void ECIWX(PPUThread& CPU, ppu_opcode_t op); + void LHZUX(PPUThread& CPU, ppu_opcode_t op); + void XOR(PPUThread& CPU, ppu_opcode_t op); + void MFSPR(PPUThread& CPU, ppu_opcode_t op); + void LWAX(PPUThread& CPU, ppu_opcode_t op); + void DST(PPUThread& CPU, ppu_opcode_t op); + void LHAX(PPUThread& CPU, ppu_opcode_t op); + void LVXL(PPUThread& CPU, ppu_opcode_t op); + void MFTB(PPUThread& CPU, ppu_opcode_t op); + void LWAUX(PPUThread& CPU, ppu_opcode_t op); + void DSTST(PPUThread& CPU, ppu_opcode_t op); + void LHAUX(PPUThread& CPU, ppu_opcode_t op); + void STHX(PPUThread& CPU, ppu_opcode_t op); + void ORC(PPUThread& CPU, ppu_opcode_t op); + void ECOWX(PPUThread& CPU, ppu_opcode_t op); + void STHUX(PPUThread& CPU, ppu_opcode_t op); + void OR(PPUThread& CPU, ppu_opcode_t op); + void DIVDU(PPUThread& CPU, ppu_opcode_t op); + void DIVWU(PPUThread& CPU, ppu_opcode_t op); + void MTSPR(PPUThread& CPU, ppu_opcode_t op); + void DCBI(PPUThread& CPU, ppu_opcode_t op); + void NAND(PPUThread& CPU, ppu_opcode_t op); + void STVXL(PPUThread& CPU, ppu_opcode_t op); + void DIVD(PPUThread& CPU, ppu_opcode_t op); + void DIVW(PPUThread& CPU, ppu_opcode_t op); + void LVLX(PPUThread& CPU, ppu_opcode_t op); + void LDBRX(PPUThread& CPU, ppu_opcode_t op); + void LSWX(PPUThread& CPU, ppu_opcode_t op); + void LWBRX(PPUThread& CPU, ppu_opcode_t op); + void LFSX(PPUThread& CPU, ppu_opcode_t op); + void SRW(PPUThread& CPU, ppu_opcode_t op); + void SRD(PPUThread& CPU, ppu_opcode_t op); + void LVRX(PPUThread& CPU, ppu_opcode_t op); + void LSWI(PPUThread& CPU, ppu_opcode_t op); + void LFSUX(PPUThread& CPU, ppu_opcode_t op); + void SYNC(PPUThread& CPU, ppu_opcode_t op); + void LFDX(PPUThread& CPU, ppu_opcode_t op); + void LFDUX(PPUThread& CPU, ppu_opcode_t op); + void STVLX(PPUThread& CPU, ppu_opcode_t op); + void STDBRX(PPUThread& CPU, ppu_opcode_t op); + void STSWX(PPUThread& CPU, ppu_opcode_t op); + void STWBRX(PPUThread& CPU, ppu_opcode_t op); + void STFSX(PPUThread& CPU, ppu_opcode_t op); + void STVRX(PPUThread& CPU, ppu_opcode_t op); + void STFSUX(PPUThread& CPU, ppu_opcode_t op); + void STSWI(PPUThread& CPU, ppu_opcode_t op); + void STFDX(PPUThread& CPU, ppu_opcode_t op); + void STFDUX(PPUThread& CPU, ppu_opcode_t op); + void LVLXL(PPUThread& CPU, ppu_opcode_t op); + void LHBRX(PPUThread& CPU, ppu_opcode_t op); + void SRAW(PPUThread& CPU, ppu_opcode_t op); + void SRAD(PPUThread& CPU, ppu_opcode_t op); + void LVRXL(PPUThread& CPU, ppu_opcode_t op); + void DSS(PPUThread& CPU, ppu_opcode_t op); + void SRAWI(PPUThread& CPU, ppu_opcode_t op); + void SRADI1(PPUThread& CPU, ppu_opcode_t op); + void SRADI2(PPUThread& CPU, ppu_opcode_t op); + void EIEIO(PPUThread& CPU, ppu_opcode_t op); + void STVLXL(PPUThread& CPU, ppu_opcode_t op); + void STHBRX(PPUThread& CPU, ppu_opcode_t op); + void EXTSH(PPUThread& CPU, ppu_opcode_t op); + void STVRXL(PPUThread& CPU, ppu_opcode_t op); + void EXTSB(PPUThread& CPU, ppu_opcode_t op); + void STFIWX(PPUThread& CPU, ppu_opcode_t op); + void EXTSW(PPUThread& CPU, ppu_opcode_t op); + void ICBI(PPUThread& CPU, ppu_opcode_t op); + void DCBZ(PPUThread& CPU, ppu_opcode_t op); + void LWZ(PPUThread& CPU, ppu_opcode_t op); + void LWZU(PPUThread& CPU, ppu_opcode_t op); + void LBZ(PPUThread& CPU, ppu_opcode_t op); + void LBZU(PPUThread& CPU, ppu_opcode_t op); + void STW(PPUThread& CPU, ppu_opcode_t op); + void STWU(PPUThread& CPU, ppu_opcode_t op); + void STB(PPUThread& CPU, ppu_opcode_t op); + void STBU(PPUThread& CPU, ppu_opcode_t op); + void LHZ(PPUThread& CPU, ppu_opcode_t op); + void LHZU(PPUThread& CPU, ppu_opcode_t op); + void LHA(PPUThread& CPU, ppu_opcode_t op); + void LHAU(PPUThread& CPU, ppu_opcode_t op); + void STH(PPUThread& CPU, ppu_opcode_t op); + void STHU(PPUThread& CPU, ppu_opcode_t op); + void LMW(PPUThread& CPU, ppu_opcode_t op); + void STMW(PPUThread& CPU, ppu_opcode_t op); + void LFS(PPUThread& CPU, ppu_opcode_t op); + void LFSU(PPUThread& CPU, ppu_opcode_t op); + void LFD(PPUThread& CPU, ppu_opcode_t op); + void LFDU(PPUThread& CPU, ppu_opcode_t op); + void STFS(PPUThread& CPU, ppu_opcode_t op); + void STFSU(PPUThread& CPU, ppu_opcode_t op); + void STFD(PPUThread& CPU, ppu_opcode_t op); + void STFDU(PPUThread& CPU, ppu_opcode_t op); + void LD(PPUThread& CPU, ppu_opcode_t op); + void LDU(PPUThread& CPU, ppu_opcode_t op); + void LWA(PPUThread& CPU, ppu_opcode_t op); + void FDIVS(PPUThread& CPU, ppu_opcode_t op); + void FSUBS(PPUThread& CPU, ppu_opcode_t op); + void FADDS(PPUThread& CPU, ppu_opcode_t op); + void FSQRTS(PPUThread& CPU, ppu_opcode_t op); + void FRES(PPUThread& CPU, ppu_opcode_t op); + void FMULS(PPUThread& CPU, ppu_opcode_t op); + void FMADDS(PPUThread& CPU, ppu_opcode_t op); + void FMSUBS(PPUThread& CPU, ppu_opcode_t op); + void FNMSUBS(PPUThread& CPU, ppu_opcode_t op); + void FNMADDS(PPUThread& CPU, ppu_opcode_t op); + void STD(PPUThread& CPU, ppu_opcode_t op); + void STDU(PPUThread& CPU, ppu_opcode_t op); + void MTFSB1(PPUThread& CPU, ppu_opcode_t op); + void MCRFS(PPUThread& CPU, ppu_opcode_t op); + void MTFSB0(PPUThread& CPU, ppu_opcode_t op); + void MTFSFI(PPUThread& CPU, ppu_opcode_t op); + void MFFS(PPUThread& CPU, ppu_opcode_t op); + void MTFSF(PPUThread& CPU, ppu_opcode_t op); + + void FCMPU(PPUThread& CPU, ppu_opcode_t op); + void FRSP(PPUThread& CPU, ppu_opcode_t op); + void FCTIW(PPUThread& CPU, ppu_opcode_t op); + void FCTIWZ(PPUThread& CPU, ppu_opcode_t op); + void FDIV(PPUThread& CPU, ppu_opcode_t op); + void FSUB(PPUThread& CPU, ppu_opcode_t op); + void FADD(PPUThread& CPU, ppu_opcode_t op); + void FSQRT(PPUThread& CPU, ppu_opcode_t op); + void FSEL(PPUThread& CPU, ppu_opcode_t op); + void FMUL(PPUThread& CPU, ppu_opcode_t op); + void FRSQRTE(PPUThread& CPU, ppu_opcode_t op); + void FMSUB(PPUThread& CPU, ppu_opcode_t op); + void FMADD(PPUThread& CPU, ppu_opcode_t op); + void FNMSUB(PPUThread& CPU, ppu_opcode_t op); + void FNMADD(PPUThread& CPU, ppu_opcode_t op); + void FCMPO(PPUThread& CPU, ppu_opcode_t op); + void FNEG(PPUThread& CPU, ppu_opcode_t op); + void FMR(PPUThread& CPU, ppu_opcode_t op); + void FNABS(PPUThread& CPU, ppu_opcode_t op); + void FABS(PPUThread& CPU, ppu_opcode_t op); + void FCTID(PPUThread& CPU, ppu_opcode_t op); + void FCTIDZ(PPUThread& CPU, ppu_opcode_t op); + void FCFID(PPUThread& CPU, ppu_opcode_t op); + + void UNK(PPUThread& CPU, ppu_opcode_t op); +} + +class PPUInterpreter2 : public PPUOpcodes +{ +public: + virtual ~PPUInterpreter2() {} + + ppu_inter_func_t func; + + virtual void NULL_OP() { func = ppu_interpreter::NULL_OP; } + virtual void NOP() { func = ppu_interpreter::NOP; } + + virtual void TDI(u32 to, u32 ra, s32 simm16) { func = ppu_interpreter::TDI; } + virtual void TWI(u32 to, u32 ra, s32 simm16) { func = ppu_interpreter::TWI; } + + virtual void MFVSCR(u32 vd) { func = ppu_interpreter::MFVSCR; } + virtual void MTVSCR(u32 vb) { func = ppu_interpreter::MTVSCR; } + virtual void VADDCUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDCUW; } + virtual void VADDFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDFP; } + virtual void VADDSBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDSBS; } + virtual void VADDSHS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDSHS; } + virtual void VADDSWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDSWS; } + virtual void VADDUBM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUBM; } + virtual void VADDUBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUBS; } + virtual void VADDUHM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUHM; } + virtual void VADDUHS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUHS; } + virtual void VADDUWM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUWM; } + virtual void VADDUWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUWS; } + virtual void VAND(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAND; } + virtual void VANDC(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VANDC; } + virtual void VAVGSB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGSB; } + virtual void VAVGSH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGSH; } + virtual void VAVGSW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGSW; } + virtual void VAVGUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGUB; } + virtual void VAVGUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGUH; } + virtual void VAVGUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGUW; } + virtual void VCFSX(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VCFSX; } + virtual void VCFUX(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VCFUX; } + virtual void VCMPBFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPBFP; } + virtual void VCMPBFP_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPBFP_; } + virtual void VCMPEQFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQFP; } + virtual void VCMPEQFP_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQFP_; } + virtual void VCMPEQUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUB; } + virtual void VCMPEQUB_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUB_; } + virtual void VCMPEQUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUH; } + virtual void VCMPEQUH_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUH_; } + virtual void VCMPEQUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUW; } + virtual void VCMPEQUW_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUW_; } + virtual void VCMPGEFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGEFP; } + virtual void VCMPGEFP_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGEFP_; } + virtual void VCMPGTFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTFP; } + virtual void VCMPGTFP_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTFP_; } + virtual void VCMPGTSB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSB; } + virtual void VCMPGTSB_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSB_; } + virtual void VCMPGTSH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSH; } + virtual void VCMPGTSH_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSH_; } + virtual void VCMPGTSW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSW; } + virtual void VCMPGTSW_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSW_; } + virtual void VCMPGTUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUB; } + virtual void VCMPGTUB_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUB_; } + virtual void VCMPGTUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUH; } + virtual void VCMPGTUH_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUH_; } + virtual void VCMPGTUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUW; } + virtual void VCMPGTUW_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUW_; } + virtual void VCTSXS(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VCTSXS; } + virtual void VCTUXS(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VCTUXS; } + virtual void VEXPTEFP(u32 vd, u32 vb) { func = ppu_interpreter::VEXPTEFP; } + virtual void VLOGEFP(u32 vd, u32 vb) { func = ppu_interpreter::VLOGEFP; } + virtual void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) { func = ppu_interpreter::VMADDFP; } + virtual void VMAXFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXFP; } + virtual void VMAXSB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXSB; } + virtual void VMAXSH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXSH; } + virtual void VMAXSW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXSW; } + virtual void VMAXUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXUB; } + virtual void VMAXUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXUH; } + virtual void VMAXUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXUW; } + virtual void VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMHADDSHS; } + virtual void VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMHRADDSHS; } + virtual void VMINFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINFP; } + virtual void VMINSB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINSB; } + virtual void VMINSH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINSH; } + virtual void VMINSW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINSW; } + virtual void VMINUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINUB; } + virtual void VMINUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINUH; } + virtual void VMINUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINUW; } + virtual void VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMLADDUHM; } + virtual void VMRGHB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGHB; } + virtual void VMRGHH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGHH; } + virtual void VMRGHW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGHW; } + virtual void VMRGLB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGLB; } + virtual void VMRGLH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGLH; } + virtual void VMRGLW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGLW; } + virtual void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMMBM; } + virtual void VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMSHM; } + virtual void VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMSHS; } + virtual void VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMUBM; } + virtual void VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMUHM; } + virtual void VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMUHS; } + virtual void VMULESB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULESB; } + virtual void VMULESH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULESH; } + virtual void VMULEUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULEUB; } + virtual void VMULEUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULEUH; } + virtual void VMULOSB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULOSB; } + virtual void VMULOSH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULOSH; } + virtual void VMULOUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULOUB; } + virtual void VMULOUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULOUH; } + virtual void VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) { func = ppu_interpreter::VNMSUBFP; } + virtual void VNOR(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VNOR; } + virtual void VOR(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VOR; } + virtual void VPERM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VPERM; } + virtual void VPKPX(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKPX; } + virtual void VPKSHSS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKSHSS; } + virtual void VPKSHUS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKSHUS; } + virtual void VPKSWSS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKSWSS; } + virtual void VPKSWUS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKSWUS; } + virtual void VPKUHUM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKUHUM; } + virtual void VPKUHUS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKUHUS; } + virtual void VPKUWUM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKUWUM; } + virtual void VPKUWUS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKUWUS; } + virtual void VREFP(u32 vd, u32 vb) { func = ppu_interpreter::VREFP; } + virtual void VRFIM(u32 vd, u32 vb) { func = ppu_interpreter::VRFIM; } + virtual void VRFIN(u32 vd, u32 vb) { func = ppu_interpreter::VRFIN; } + virtual void VRFIP(u32 vd, u32 vb) { func = ppu_interpreter::VRFIP; } + virtual void VRFIZ(u32 vd, u32 vb) { func = ppu_interpreter::VRFIZ; } + virtual void VRLB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VRLB; } + virtual void VRLH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VRLH; } + virtual void VRLW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VRLW; } + virtual void VRSQRTEFP(u32 vd, u32 vb) { func = ppu_interpreter::VRSQRTEFP; } + virtual void VSEL(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VSEL; } + virtual void VSL(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSL; } + virtual void VSLB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSLB; } + virtual void VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) { func = ppu_interpreter::VSLDOI; } + virtual void VSLH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSLH; } + virtual void VSLO(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSLO; } + virtual void VSLW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSLW; } + virtual void VSPLTB(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VSPLTB; } + virtual void VSPLTH(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VSPLTH; } + virtual void VSPLTISB(u32 vd, s32 simm5) { func = ppu_interpreter::VSPLTISB; } + virtual void VSPLTISH(u32 vd, s32 simm5) { func = ppu_interpreter::VSPLTISH; } + virtual void VSPLTISW(u32 vd, s32 simm5) { func = ppu_interpreter::VSPLTISW; } + virtual void VSPLTW(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VSPLTW; } + virtual void VSR(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSR; } + virtual void VSRAB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRAB; } + virtual void VSRAH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRAH; } + virtual void VSRAW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRAW; } + virtual void VSRB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRB; } + virtual void VSRH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRH; } + virtual void VSRO(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRO; } + virtual void VSRW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRW; } + virtual void VSUBCUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBCUW; } + virtual void VSUBFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBFP; } + virtual void VSUBSBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBSBS; } + virtual void VSUBSHS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBSHS; } + virtual void VSUBSWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBSWS; } + virtual void VSUBUBM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUBM; } + virtual void VSUBUBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUBS; } + virtual void VSUBUHM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUHM; } + virtual void VSUBUHS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUHS; } + virtual void VSUBUWM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUWM; } + virtual void VSUBUWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUWS; } + virtual void VSUMSWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUMSWS; } + virtual void VSUM2SWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUM2SWS; } + virtual void VSUM4SBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUM4SBS; } + virtual void VSUM4SHS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUM4SHS; } + virtual void VSUM4UBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUM4UBS; } + virtual void VUPKHPX(u32 vd, u32 vb) { func = ppu_interpreter::VUPKHPX; } + virtual void VUPKHSB(u32 vd, u32 vb) { func = ppu_interpreter::VUPKHSB; } + virtual void VUPKHSH(u32 vd, u32 vb) { func = ppu_interpreter::VUPKHSH; } + virtual void VUPKLPX(u32 vd, u32 vb) { func = ppu_interpreter::VUPKLPX; } + virtual void VUPKLSB(u32 vd, u32 vb) { func = ppu_interpreter::VUPKLSB; } + virtual void VUPKLSH(u32 vd, u32 vb) { func = ppu_interpreter::VUPKLSH; } + virtual void VXOR(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VXOR; } + virtual void MULLI(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::MULLI; } + virtual void SUBFIC(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::SUBFIC; } + virtual void CMPLI(u32 bf, u32 l, u32 ra, u32 uimm16) { func = ppu_interpreter::CMPLI; } + virtual void CMPI(u32 bf, u32 l, u32 ra, s32 simm16) { func = ppu_interpreter::CMPI; } + virtual void ADDIC(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::ADDIC; } + virtual void ADDIC_(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::ADDIC_; } + virtual void ADDI(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::ADDI; } + virtual void ADDIS(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::ADDIS; } + virtual void BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { func = ppu_interpreter::BC; } + virtual void HACK(u32 index) { func = ppu_interpreter::HACK; } + virtual void SC(u32 lev) { func = ppu_interpreter::SC; } + virtual void B(s32 ll, u32 aa, u32 lk) { func = ppu_interpreter::B; } + virtual void MCRF(u32 crfd, u32 crfs) { func = ppu_interpreter::MCRF; } + virtual void BCLR(u32 bo, u32 bi, u32 bh, u32 lk) { func = ppu_interpreter::BCLR; } + virtual void CRNOR(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRNOR; } + virtual void CRANDC(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRANDC; } + virtual void ISYNC() { func = ppu_interpreter::ISYNC; } + virtual void CRXOR(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRXOR; } + virtual void CRNAND(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRNAND; } + virtual void CRAND(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRAND; } + virtual void CREQV(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CREQV; } + virtual void CRORC(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRORC; } + virtual void CROR(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CROR; } + virtual void BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { func = ppu_interpreter::BCCTR; } + virtual void RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { func = ppu_interpreter::RLWIMI; } + virtual void RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { func = ppu_interpreter::RLWINM; } + virtual void RLWNM(u32 ra, u32 rs, u32 rb, u32 MB, u32 ME, bool rc) { func = ppu_interpreter::RLWNM; } + virtual void ORI(u32 rs, u32 ra, u32 uimm16) { func = ppu_interpreter::ORI; } + virtual void ORIS(u32 rs, u32 ra, u32 uimm16) { func = ppu_interpreter::ORIS; } + virtual void XORI(u32 ra, u32 rs, u32 uimm16) { func = ppu_interpreter::XORI; } + virtual void XORIS(u32 ra, u32 rs, u32 uimm16) { func = ppu_interpreter::XORIS; } + virtual void ANDI_(u32 ra, u32 rs, u32 uimm16) { func = ppu_interpreter::ANDI_; } + virtual void ANDIS_(u32 ra, u32 rs, u32 uimm16) { func = ppu_interpreter::ANDIS_; } + virtual void RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { func = ppu_interpreter::RLDICL; } + virtual void RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) { func = ppu_interpreter::RLDICR; } + virtual void RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { func = ppu_interpreter::RLDIC; } + virtual void RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { func = ppu_interpreter::RLDIMI; } + virtual void RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) { func = ppu_interpreter::RLDC_LR; } + virtual void CMP(u32 crfd, u32 l, u32 ra, u32 rb) { func = ppu_interpreter::CMP; } + virtual void TW(u32 to, u32 ra, u32 rb) { func = ppu_interpreter::TW; } + virtual void LVSL(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVSL; } + virtual void LVEBX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVEBX; } + virtual void SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::SUBFC; } + virtual void MULHDU(u32 rd, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::MULHDU; } + virtual void ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::ADDC; } + virtual void MULHWU(u32 rd, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::MULHWU; } + virtual void MFOCRF(u32 a, u32 rd, u32 crm) { func = ppu_interpreter::MFOCRF; } + virtual void LWARX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWARX; } + virtual void LDX(u32 ra, u32 rs, u32 rb) { func = ppu_interpreter::LDX; } + virtual void LWZX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWZX; } + virtual void SLW(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SLW; } + virtual void CNTLZW(u32 ra, u32 rs, bool rc) { func = ppu_interpreter::CNTLZW; } + virtual void SLD(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SLD; } + virtual void AND(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::AND; } + virtual void CMPL(u32 bf, u32 l, u32 ra, u32 rb) { func = ppu_interpreter::CMPL; } + virtual void LVSR(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVSR; } + virtual void LVEHX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVEHX; } + virtual void SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::SUBF; } + virtual void LDUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LDUX; } + virtual void DCBST(u32 ra, u32 rb) { func = ppu_interpreter::DCBST; } + virtual void LWZUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWZUX; } + virtual void CNTLZD(u32 ra, u32 rs, bool rc) { func = ppu_interpreter::CNTLZD; } + virtual void ANDC(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::ANDC; } + virtual void TD(u32 to, u32 ra, u32 rb) { func = ppu_interpreter::TD; } + virtual void LVEWX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVEWX; } + virtual void MULHD(u32 rd, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::MULHD; } + virtual void MULHW(u32 rd, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::MULHW; } + virtual void LDARX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LDARX; } + virtual void DCBF(u32 ra, u32 rb) { func = ppu_interpreter::DCBF; } + virtual void LBZX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LBZX; } + virtual void LVX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVX; } + virtual void NEG(u32 rd, u32 ra, u32 oe, bool rc) { func = ppu_interpreter::NEG; } + virtual void LBZUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LBZUX; } + virtual void NOR(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::NOR; } + virtual void STVEBX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVEBX; } + virtual void SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::SUBFE; } + virtual void ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::ADDE; } + virtual void MTOCRF(u32 l, u32 crm, u32 rs) { func = ppu_interpreter::MTOCRF; } + virtual void STDX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STDX; } + virtual void STWCX_(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STWCX_; } + virtual void STWX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STWX; } + virtual void STVEHX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVEHX; } + virtual void STDUX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STDUX; } + virtual void STWUX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STWUX; } + virtual void STVEWX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVEWX; } + virtual void SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { func = ppu_interpreter::SUBFZE; } + virtual void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { func = ppu_interpreter::ADDZE; } + virtual void STDCX_(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STDCX_; } + virtual void STBX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STBX; } + virtual void STVX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVX; } + virtual void MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::MULLD; } + virtual void SUBFME(u32 rd, u32 ra, u32 oe, bool rc) { func = ppu_interpreter::SUBFME; } + virtual void ADDME(u32 rd, u32 ra, u32 oe, bool rc) { func = ppu_interpreter::ADDME; } + virtual void MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::MULLW; } + virtual void DCBTST(u32 ra, u32 rb, u32 th) { func = ppu_interpreter::DCBTST; } + virtual void STBUX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STBUX; } + virtual void ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::ADD; } + virtual void DCBT(u32 ra, u32 rb, u32 th) { func = ppu_interpreter::DCBT; } + virtual void LHZX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LHZX; } + virtual void EQV(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::EQV; } + virtual void ECIWX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::ECIWX; } + virtual void LHZUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LHZUX; } + virtual void XOR(u32 rs, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::XOR; } + virtual void MFSPR(u32 rd, u32 spr) { func = ppu_interpreter::MFSPR; } + virtual void LWAX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWAX; } + virtual void DST(u32 ra, u32 rb, u32 strm, u32 t) { func = ppu_interpreter::DST; } + virtual void LHAX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LHAX; } + virtual void LVXL(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVXL; } + virtual void MFTB(u32 rd, u32 spr) { func = ppu_interpreter::MFTB; } + virtual void LWAUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWAUX; } + virtual void DSTST(u32 ra, u32 rb, u32 strm, u32 t) { func = ppu_interpreter::DSTST; } + virtual void LHAUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LHAUX; } + virtual void STHX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STHX; } + virtual void ORC(u32 rs, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::ORC; } + virtual void ECOWX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::ECOWX; } + virtual void STHUX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STHUX; } + virtual void OR(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::OR; } + virtual void DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::DIVDU; } + virtual void DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::DIVWU; } + virtual void MTSPR(u32 spr, u32 rs) { func = ppu_interpreter::MTSPR; } + virtual void DCBI(u32 ra, u32 rb) { func = ppu_interpreter::DCBI; } + virtual void NAND(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::NAND; } + virtual void STVXL(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVXL; } + virtual void DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::DIVD; } + virtual void DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::DIVW; } + virtual void LVLX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVLX; } + virtual void LDBRX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LDBRX; } + virtual void LSWX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LSWX; } + virtual void LWBRX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWBRX; } + virtual void LFSX(u32 frd, u32 ra, u32 rb) { func = ppu_interpreter::LFSX; } + virtual void SRW(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SRW; } + virtual void SRD(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SRD; } + virtual void LVRX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVRX; } + virtual void LSWI(u32 rd, u32 ra, u32 nb) { func = ppu_interpreter::LSWI; } + virtual void LFSUX(u32 frd, u32 ra, u32 rb) { func = ppu_interpreter::LFSUX; } + virtual void SYNC(u32 l) { func = ppu_interpreter::SYNC; } + virtual void LFDX(u32 frd, u32 ra, u32 rb) { func = ppu_interpreter::LFDX; } + virtual void LFDUX(u32 frd, u32 ra, u32 rb) { func = ppu_interpreter::LFDUX; } + virtual void STVLX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVLX; } + virtual void STDBRX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STDBRX; } + virtual void STSWX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STSWX; } + virtual void STWBRX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STWBRX; } + virtual void STFSX(u32 frs, u32 ra, u32 rb) { func = ppu_interpreter::STFSX; } + virtual void STVRX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVRX; } + virtual void STFSUX(u32 frs, u32 ra, u32 rb) { func = ppu_interpreter::STFSUX; } + virtual void STSWI(u32 rd, u32 ra, u32 nb) { func = ppu_interpreter::STSWI; } + virtual void STFDX(u32 frs, u32 ra, u32 rb) { func = ppu_interpreter::STFDX; } + virtual void STFDUX(u32 frs, u32 ra, u32 rb) { func = ppu_interpreter::STFDUX; } + virtual void LVLXL(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVLXL; } + virtual void LHBRX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LHBRX; } + virtual void SRAW(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SRAW; } + virtual void SRAD(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SRAD; } + virtual void LVRXL(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVRXL; } + virtual void DSS(u32 strm, u32 a) { func = ppu_interpreter::DSS; } + virtual void SRAWI(u32 ra, u32 rs, u32 sh, bool rc) { func = ppu_interpreter::SRAWI; } + virtual void SRADI1(u32 ra, u32 rs, u32 sh, bool rc) { func = ppu_interpreter::SRADI1; } + virtual void SRADI2(u32 ra, u32 rs, u32 sh, bool rc) { func = ppu_interpreter::SRADI2; } + virtual void EIEIO() { func = ppu_interpreter::EIEIO; } + virtual void STVLXL(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVLXL; } + virtual void STHBRX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STHBRX; } + virtual void EXTSH(u32 ra, u32 rs, bool rc) { func = ppu_interpreter::EXTSH; } + virtual void STVRXL(u32 sd, u32 ra, u32 rb) { func = ppu_interpreter::STVRXL; } + virtual void EXTSB(u32 ra, u32 rs, bool rc) { func = ppu_interpreter::EXTSB; } + virtual void STFIWX(u32 frs, u32 ra, u32 rb) { func = ppu_interpreter::STFIWX; } + virtual void EXTSW(u32 ra, u32 rs, bool rc) { func = ppu_interpreter::EXTSW; } + virtual void ICBI(u32 ra, u32 rb) { func = ppu_interpreter::ICBI; } + virtual void DCBZ(u32 ra, u32 rb) { func = ppu_interpreter::DCBZ; } + virtual void LWZ(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LWZ; } + virtual void LWZU(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LWZU; } + virtual void LBZ(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LBZ; } + virtual void LBZU(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LBZU; } + virtual void STW(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STW; } + virtual void STWU(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STWU; } + virtual void STB(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STB; } + virtual void STBU(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STBU; } + virtual void LHZ(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LHZ; } + virtual void LHZU(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LHZU; } + virtual void LHA(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::LHA; } + virtual void LHAU(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::LHAU; } + virtual void STH(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STH; } + virtual void STHU(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STHU; } + virtual void LMW(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LMW; } + virtual void STMW(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STMW; } + virtual void LFS(u32 frd, u32 ra, s32 d) { func = ppu_interpreter::LFS; } + virtual void LFSU(u32 frd, u32 ra, s32 d) { func = ppu_interpreter::LFSU; } + virtual void LFD(u32 frd, u32 ra, s32 d) { func = ppu_interpreter::LFD; } + virtual void LFDU(u32 frd, u32 ra, s32 d) { func = ppu_interpreter::LFDU; } + virtual void STFS(u32 frs, u32 ra, s32 d) { func = ppu_interpreter::STFS; } + virtual void STFSU(u32 frs, u32 ra, s32 d) { func = ppu_interpreter::STFSU; } + virtual void STFD(u32 frs, u32 ra, s32 d) { func = ppu_interpreter::STFD; } + virtual void STFDU(u32 frs, u32 ra, s32 d) { func = ppu_interpreter::STFDU; } + virtual void LD(u32 rd, u32 ra, s32 ds) { func = ppu_interpreter::LD; } + virtual void LDU(u32 rd, u32 ra, s32 ds) { func = ppu_interpreter::LDU; } + virtual void LWA(u32 rd, u32 ra, s32 ds) { func = ppu_interpreter::LWA; } + virtual void FDIVS(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FDIVS; } + virtual void FSUBS(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FSUBS; } + virtual void FADDS(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FADDS; } + virtual void FSQRTS(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FSQRTS; } + virtual void FRES(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FRES; } + virtual void FMULS(u32 frd, u32 fra, u32 frc, bool rc) { func = ppu_interpreter::FMULS; } + virtual void FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FMADDS; } + virtual void FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FMSUBS; } + virtual void FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FNMSUBS; } + virtual void FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FNMADDS; } + virtual void STD(u32 rs, u32 ra, s32 ds) { func = ppu_interpreter::STD; } + virtual void STDU(u32 rs, u32 ra, s32 ds) { func = ppu_interpreter::STDU; } + virtual void MTFSB1(u32 bt, bool rc) { func = ppu_interpreter::MTFSB1; } + virtual void MCRFS(u32 bf, u32 bfa) { func = ppu_interpreter::MCRFS; } + virtual void MTFSB0(u32 bt, bool rc) { func = ppu_interpreter::MTFSB0; } + virtual void MTFSFI(u32 crfd, u32 i, bool rc) { func = ppu_interpreter::MTFSFI; } + virtual void MFFS(u32 frd, bool rc) { func = ppu_interpreter::MFFS; } + virtual void MTFSF(u32 flm, u32 frb, bool rc) { func = ppu_interpreter::MTFSF; } + + virtual void FCMPU(u32 bf, u32 fra, u32 frb) { func = ppu_interpreter::FCMPU; } + virtual void FRSP(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FRSP; } + virtual void FCTIW(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FCTIW; } + virtual void FCTIWZ(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FCTIWZ; } + virtual void FDIV(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FDIV; } + virtual void FSUB(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FSUB; } + virtual void FADD(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FADD; } + virtual void FSQRT(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FSQRT; } + virtual void FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FSEL; } + virtual void FMUL(u32 frd, u32 fra, u32 frc, bool rc) { func = ppu_interpreter::FMUL; } + virtual void FRSQRTE(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FRSQRTE; } + virtual void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FMSUB; } + virtual void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FMADD; } + virtual void FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FNMSUB; } + virtual void FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FNMADD; } + virtual void FCMPO(u32 crfd, u32 fra, u32 frb) { func = ppu_interpreter::FCMPO; } + virtual void FNEG(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FNEG; } + virtual void FMR(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FMR; } + virtual void FNABS(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FNABS; } + virtual void FABS(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FABS; } + virtual void FCTID(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FCTID; } + virtual void FCTIDZ(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FCTIDZ; } + virtual void FCFID(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FCFID; } + + virtual void UNK(const u32 code, const u32 opcode, const u32 gcode) { func = ppu_interpreter::UNK; } +}; \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 5572e65abf..23f0102285 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -8,15 +8,494 @@ #include "Emu/SysCalls/Modules.h" #include "Emu/Cell/PPUDecoder.h" #include "Emu/Cell/PPUInterpreter.h" +#include "Emu/Cell/PPUInterpreter2.h" #include "Emu/Cell/PPULLVMRecompiler.h" //#include "Emu/Cell/PPURecompiler.h" #include "Emu/CPU/CPUThreadManager.h" +#ifdef _WIN32 +#include +#else +#include +#include +#endif + u64 rotate_mask[64][64]; +const ppu_inter_func_t g_ppu_inter_func_list[] = +{ + nullptr, + + ppu_interpreter::NULL_OP, + ppu_interpreter::NOP, + + ppu_interpreter::TDI, + ppu_interpreter::TWI, + + ppu_interpreter::MFVSCR, + ppu_interpreter::MTVSCR, + ppu_interpreter::VADDCUW, + ppu_interpreter::VADDFP, + ppu_interpreter::VADDSBS, + ppu_interpreter::VADDSHS, + ppu_interpreter::VADDSWS, + ppu_interpreter::VADDUBM, + ppu_interpreter::VADDUBS, + ppu_interpreter::VADDUHM, + ppu_interpreter::VADDUHS, + ppu_interpreter::VADDUWM, + ppu_interpreter::VADDUWS, + ppu_interpreter::VAND, + ppu_interpreter::VANDC, + ppu_interpreter::VAVGSB, + ppu_interpreter::VAVGSH, + ppu_interpreter::VAVGSW, + ppu_interpreter::VAVGUB, + ppu_interpreter::VAVGUH, + ppu_interpreter::VAVGUW, + ppu_interpreter::VCFSX, + ppu_interpreter::VCFUX, + ppu_interpreter::VCMPBFP, + ppu_interpreter::VCMPBFP_, + ppu_interpreter::VCMPEQFP, + ppu_interpreter::VCMPEQFP_, + ppu_interpreter::VCMPEQUB, + ppu_interpreter::VCMPEQUB_, + ppu_interpreter::VCMPEQUH, + ppu_interpreter::VCMPEQUH_, + ppu_interpreter::VCMPEQUW, + ppu_interpreter::VCMPEQUW_, + ppu_interpreter::VCMPGEFP, + ppu_interpreter::VCMPGEFP_, + ppu_interpreter::VCMPGTFP, + ppu_interpreter::VCMPGTFP_, + ppu_interpreter::VCMPGTSB, + ppu_interpreter::VCMPGTSB_, + ppu_interpreter::VCMPGTSH, + ppu_interpreter::VCMPGTSH_, + ppu_interpreter::VCMPGTSW, + ppu_interpreter::VCMPGTSW_, + ppu_interpreter::VCMPGTUB, + ppu_interpreter::VCMPGTUB_, + ppu_interpreter::VCMPGTUH, + ppu_interpreter::VCMPGTUH_, + ppu_interpreter::VCMPGTUW, + ppu_interpreter::VCMPGTUW_, + ppu_interpreter::VCTSXS, + ppu_interpreter::VCTUXS, + ppu_interpreter::VEXPTEFP, + ppu_interpreter::VLOGEFP, + ppu_interpreter::VMADDFP, + ppu_interpreter::VMAXFP, + ppu_interpreter::VMAXSB, + ppu_interpreter::VMAXSH, + ppu_interpreter::VMAXSW, + ppu_interpreter::VMAXUB, + ppu_interpreter::VMAXUH, + ppu_interpreter::VMAXUW, + ppu_interpreter::VMHADDSHS, + ppu_interpreter::VMHRADDSHS, + ppu_interpreter::VMINFP, + ppu_interpreter::VMINSB, + ppu_interpreter::VMINSH, + ppu_interpreter::VMINSW, + ppu_interpreter::VMINUB, + ppu_interpreter::VMINUH, + ppu_interpreter::VMINUW, + ppu_interpreter::VMLADDUHM, + ppu_interpreter::VMRGHB, + ppu_interpreter::VMRGHH, + ppu_interpreter::VMRGHW, + ppu_interpreter::VMRGLB, + ppu_interpreter::VMRGLH, + ppu_interpreter::VMRGLW, + ppu_interpreter::VMSUMMBM, + ppu_interpreter::VMSUMSHM, + ppu_interpreter::VMSUMSHS, + ppu_interpreter::VMSUMUBM, + ppu_interpreter::VMSUMUHM, + ppu_interpreter::VMSUMUHS, + ppu_interpreter::VMULESB, + ppu_interpreter::VMULESH, + ppu_interpreter::VMULEUB, + ppu_interpreter::VMULEUH, + ppu_interpreter::VMULOSB, + ppu_interpreter::VMULOSH, + ppu_interpreter::VMULOUB, + ppu_interpreter::VMULOUH, + ppu_interpreter::VNMSUBFP, + ppu_interpreter::VNOR, + ppu_interpreter::VOR, + ppu_interpreter::VPERM, + ppu_interpreter::VPKPX, + ppu_interpreter::VPKSHSS, + ppu_interpreter::VPKSHUS, + ppu_interpreter::VPKSWSS, + ppu_interpreter::VPKSWUS, + ppu_interpreter::VPKUHUM, + ppu_interpreter::VPKUHUS, + ppu_interpreter::VPKUWUM, + ppu_interpreter::VPKUWUS, + ppu_interpreter::VREFP, + ppu_interpreter::VRFIM, + ppu_interpreter::VRFIN, + ppu_interpreter::VRFIP, + ppu_interpreter::VRFIZ, + ppu_interpreter::VRLB, + ppu_interpreter::VRLH, + ppu_interpreter::VRLW, + ppu_interpreter::VRSQRTEFP, + ppu_interpreter::VSEL, + ppu_interpreter::VSL, + ppu_interpreter::VSLB, + ppu_interpreter::VSLDOI, + ppu_interpreter::VSLH, + ppu_interpreter::VSLO, + ppu_interpreter::VSLW, + ppu_interpreter::VSPLTB, + ppu_interpreter::VSPLTH, + ppu_interpreter::VSPLTISB, + ppu_interpreter::VSPLTISH, + ppu_interpreter::VSPLTISW, + ppu_interpreter::VSPLTW, + ppu_interpreter::VSR, + ppu_interpreter::VSRAB, + ppu_interpreter::VSRAH, + ppu_interpreter::VSRAW, + ppu_interpreter::VSRB, + ppu_interpreter::VSRH, + ppu_interpreter::VSRO, + ppu_interpreter::VSRW, + ppu_interpreter::VSUBCUW, + ppu_interpreter::VSUBFP, + ppu_interpreter::VSUBSBS, + ppu_interpreter::VSUBSHS, + ppu_interpreter::VSUBSWS, + ppu_interpreter::VSUBUBM, + ppu_interpreter::VSUBUBS, + ppu_interpreter::VSUBUHM, + ppu_interpreter::VSUBUHS, + ppu_interpreter::VSUBUWM, + ppu_interpreter::VSUBUWS, + ppu_interpreter::VSUMSWS, + ppu_interpreter::VSUM2SWS, + ppu_interpreter::VSUM4SBS, + ppu_interpreter::VSUM4SHS, + ppu_interpreter::VSUM4UBS, + ppu_interpreter::VUPKHPX, + ppu_interpreter::VUPKHSB, + ppu_interpreter::VUPKHSH, + ppu_interpreter::VUPKLPX, + ppu_interpreter::VUPKLSB, + ppu_interpreter::VUPKLSH, + ppu_interpreter::VXOR, + ppu_interpreter::MULLI, + ppu_interpreter::SUBFIC, + ppu_interpreter::CMPLI, + ppu_interpreter::CMPI, + ppu_interpreter::ADDIC, + ppu_interpreter::ADDIC_, + ppu_interpreter::ADDI, + ppu_interpreter::ADDIS, + ppu_interpreter::BC, + ppu_interpreter::HACK, + ppu_interpreter::SC, + ppu_interpreter::B, + ppu_interpreter::MCRF, + ppu_interpreter::BCLR, + ppu_interpreter::CRNOR, + ppu_interpreter::CRANDC, + ppu_interpreter::ISYNC, + ppu_interpreter::CRXOR, + ppu_interpreter::CRNAND, + ppu_interpreter::CRAND, + ppu_interpreter::CREQV, + ppu_interpreter::CRORC, + ppu_interpreter::CROR, + ppu_interpreter::BCCTR, + ppu_interpreter::RLWIMI, + ppu_interpreter::RLWINM, + ppu_interpreter::RLWNM, + ppu_interpreter::ORI, + ppu_interpreter::ORIS, + ppu_interpreter::XORI, + ppu_interpreter::XORIS, + ppu_interpreter::ANDI_, + ppu_interpreter::ANDIS_, + ppu_interpreter::RLDICL, + ppu_interpreter::RLDICR, + ppu_interpreter::RLDIC, + ppu_interpreter::RLDIMI, + ppu_interpreter::RLDC_LR, + ppu_interpreter::CMP, + ppu_interpreter::TW, + ppu_interpreter::LVSL, + ppu_interpreter::LVEBX, + ppu_interpreter::SUBFC, + ppu_interpreter::MULHDU, + ppu_interpreter::ADDC, + ppu_interpreter::MULHWU, + ppu_interpreter::MFOCRF, + ppu_interpreter::LWARX, + ppu_interpreter::LDX, + ppu_interpreter::LWZX, + ppu_interpreter::SLW, + ppu_interpreter::CNTLZW, + ppu_interpreter::SLD, + ppu_interpreter::AND, + ppu_interpreter::CMPL, + ppu_interpreter::LVSR, + ppu_interpreter::LVEHX, + ppu_interpreter::SUBF, + ppu_interpreter::LDUX, + ppu_interpreter::DCBST, + ppu_interpreter::LWZUX, + ppu_interpreter::CNTLZD, + ppu_interpreter::ANDC, + ppu_interpreter::TD, + ppu_interpreter::LVEWX, + ppu_interpreter::MULHD, + ppu_interpreter::MULHW, + ppu_interpreter::LDARX, + ppu_interpreter::DCBF, + ppu_interpreter::LBZX, + ppu_interpreter::LVX, + ppu_interpreter::NEG, + ppu_interpreter::LBZUX, + ppu_interpreter::NOR, + ppu_interpreter::STVEBX, + ppu_interpreter::SUBFE, + ppu_interpreter::ADDE, + ppu_interpreter::MTOCRF, + ppu_interpreter::STDX, + ppu_interpreter::STWCX_, + ppu_interpreter::STWX, + ppu_interpreter::STVEHX, + ppu_interpreter::STDUX, + ppu_interpreter::STWUX, + ppu_interpreter::STVEWX, + ppu_interpreter::SUBFZE, + ppu_interpreter::ADDZE, + ppu_interpreter::STDCX_, + ppu_interpreter::STBX, + ppu_interpreter::STVX, + ppu_interpreter::MULLD, + ppu_interpreter::SUBFME, + ppu_interpreter::ADDME, + ppu_interpreter::MULLW, + ppu_interpreter::DCBTST, + ppu_interpreter::STBUX, + ppu_interpreter::ADD, + ppu_interpreter::DCBT, + ppu_interpreter::LHZX, + ppu_interpreter::EQV, + ppu_interpreter::ECIWX, + ppu_interpreter::LHZUX, + ppu_interpreter::XOR, + ppu_interpreter::MFSPR, + ppu_interpreter::LWAX, + ppu_interpreter::DST, + ppu_interpreter::LHAX, + ppu_interpreter::LVXL, + ppu_interpreter::MFTB, + ppu_interpreter::LWAUX, + ppu_interpreter::DSTST, + ppu_interpreter::LHAUX, + ppu_interpreter::STHX, + ppu_interpreter::ORC, + ppu_interpreter::ECOWX, + ppu_interpreter::STHUX, + ppu_interpreter::OR, + ppu_interpreter::DIVDU, + ppu_interpreter::DIVWU, + ppu_interpreter::MTSPR, + ppu_interpreter::DCBI, + ppu_interpreter::NAND, + ppu_interpreter::STVXL, + ppu_interpreter::DIVD, + ppu_interpreter::DIVW, + ppu_interpreter::LVLX, + ppu_interpreter::LDBRX, + ppu_interpreter::LSWX, + ppu_interpreter::LWBRX, + ppu_interpreter::LFSX, + ppu_interpreter::SRW, + ppu_interpreter::SRD, + ppu_interpreter::LVRX, + ppu_interpreter::LSWI, + ppu_interpreter::LFSUX, + ppu_interpreter::SYNC, + ppu_interpreter::LFDX, + ppu_interpreter::LFDUX, + ppu_interpreter::STVLX, + ppu_interpreter::STDBRX, + ppu_interpreter::STSWX, + ppu_interpreter::STWBRX, + ppu_interpreter::STFSX, + ppu_interpreter::STVRX, + ppu_interpreter::STFSUX, + ppu_interpreter::STSWI, + ppu_interpreter::STFDX, + ppu_interpreter::STFDUX, + ppu_interpreter::LVLXL, + ppu_interpreter::LHBRX, + ppu_interpreter::SRAW, + ppu_interpreter::SRAD, + ppu_interpreter::LVRXL, + ppu_interpreter::DSS, + ppu_interpreter::SRAWI, + ppu_interpreter::SRADI1, + ppu_interpreter::SRADI2, + ppu_interpreter::EIEIO, + ppu_interpreter::STVLXL, + ppu_interpreter::STHBRX, + ppu_interpreter::EXTSH, + ppu_interpreter::STVRXL, + ppu_interpreter::EXTSB, + ppu_interpreter::STFIWX, + ppu_interpreter::EXTSW, + ppu_interpreter::ICBI, + ppu_interpreter::DCBZ, + ppu_interpreter::LWZ, + ppu_interpreter::LWZU, + ppu_interpreter::LBZ, + ppu_interpreter::LBZU, + ppu_interpreter::STW, + ppu_interpreter::STWU, + ppu_interpreter::STB, + ppu_interpreter::STBU, + ppu_interpreter::LHZ, + ppu_interpreter::LHZU, + ppu_interpreter::LHA, + ppu_interpreter::LHAU, + ppu_interpreter::STH, + ppu_interpreter::STHU, + ppu_interpreter::LMW, + ppu_interpreter::STMW, + ppu_interpreter::LFS, + ppu_interpreter::LFSU, + ppu_interpreter::LFD, + ppu_interpreter::LFDU, + ppu_interpreter::STFS, + ppu_interpreter::STFSU, + ppu_interpreter::STFD, + ppu_interpreter::STFDU, + ppu_interpreter::LD, + ppu_interpreter::LDU, + ppu_interpreter::LWA, + ppu_interpreter::FDIVS, + ppu_interpreter::FSUBS, + ppu_interpreter::FADDS, + ppu_interpreter::FSQRTS, + ppu_interpreter::FRES, + ppu_interpreter::FMULS, + ppu_interpreter::FMADDS, + ppu_interpreter::FMSUBS, + ppu_interpreter::FNMSUBS, + ppu_interpreter::FNMADDS, + ppu_interpreter::STD, + ppu_interpreter::STDU, + ppu_interpreter::MTFSB1, + ppu_interpreter::MCRFS, + ppu_interpreter::MTFSB0, + ppu_interpreter::MTFSFI, + ppu_interpreter::MFFS, + ppu_interpreter::MTFSF, + + ppu_interpreter::FCMPU, + ppu_interpreter::FRSP, + ppu_interpreter::FCTIW, + ppu_interpreter::FCTIWZ, + ppu_interpreter::FDIV, + ppu_interpreter::FSUB, + ppu_interpreter::FADD, + ppu_interpreter::FSQRT, + ppu_interpreter::FSEL, + ppu_interpreter::FMUL, + ppu_interpreter::FRSQRTE, + ppu_interpreter::FMSUB, + ppu_interpreter::FMADD, + ppu_interpreter::FNMSUB, + ppu_interpreter::FNMADD, + ppu_interpreter::FCMPO, + ppu_interpreter::FNEG, + ppu_interpreter::FMR, + ppu_interpreter::FNABS, + ppu_interpreter::FABS, + ppu_interpreter::FCTID, + ppu_interpreter::FCTIDZ, + ppu_interpreter::FCFID, + + ppu_interpreter::UNK, +}; + extern u32 ppu_get_tls(u32 thread); extern void ppu_free_tls(u32 thread); +void* g_ppu_exec_map = nullptr; + +void finalize_ppu_exec_map() +{ + if (g_ppu_exec_map) + { +#ifdef _WIN32 + VirtualFree(g_ppu_exec_map, 0, MEM_RELEASE); +#else + munmap(g_ppu_exec_map, 0x100000000); +#endif + g_ppu_exec_map = nullptr; + } +} + +void initialize_ppu_exec_map() +{ + finalize_ppu_exec_map(); + +#ifdef _WIN32 + g_ppu_exec_map = VirtualAlloc(NULL, 0x100000000, MEM_RESERVE, PAGE_NOACCESS); +#else + g_ppu_exec_map = mmap(nullptr, 0x100000000, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); +#endif +} + +void fill_ppu_exec_map(u32 addr, u32 size) +{ +#ifdef _WIN32 + VirtualAlloc((u8*)g_ppu_exec_map + addr, size, MEM_COMMIT, PAGE_READWRITE); +#else + mprotect((u8*)g_ppu_exec_map + addr, size, PROT_READ | PROT_WRITE); +#endif + + PPUInterpreter2* inter; + PPUDecoder dec(inter = new PPUInterpreter2); + + for (u32 pos = addr; pos < addr + size; pos += 4) + { + inter->func = nullptr; + + // decode PPU opcode + dec.Decode(vm::read32(pos)); + + u32 index = 0; + + // find function index + for (const auto& func : g_ppu_inter_func_list) + { + if (inter->func == func) + { + index = &func - g_ppu_inter_func_list; + break; + } + } + + // zero function is nullptr, it shouldn't happen + assert(index); + + // write index in memory + *(u32*)((u8*)g_ppu_exec_map + pos) = index; + } +} + PPUThread& GetCurrentPPUThread() { CPUThread* thread = GetCurrentCPUThread(); @@ -29,6 +508,7 @@ PPUThread& GetCurrentPPUThread() PPUThread::PPUThread() : CPUThread(CPU_THREAD_PPU) { Reset(); + InitRotateMask(); } PPUThread::~PPUThread() @@ -192,14 +672,16 @@ void PPUThread::FastCall2(u32 addr, u32 rtoc) auto old_rtoc = GPR[2]; auto old_LR = LR; auto old_thread = GetCurrentNamedThread(); + auto old_task = custom_task; m_status = Running; PC = addr; GPR[2] = rtoc; LR = Emu.GetCPUThreadStop(); SetCurrentNamedThread(this); + custom_task = nullptr; - CPUThread::Task(); + Task(); m_status = old_status; PC = old_PC; @@ -207,26 +689,56 @@ void PPUThread::FastCall2(u32 addr, u32 rtoc) GPR[2] = old_rtoc; LR = old_LR; SetCurrentNamedThread(old_thread); + custom_task = old_task; } void PPUThread::FastStop() { m_status = Stopped; + m_events |= CPU_EVENT_STOP; } void PPUThread::Task() { if (custom_task) { - custom_task(*this); + return custom_task(*this); } - else if (m_dec) - { - CPUThread::Task(); - } - else - { + if (m_dec) + { + return CPUThread::Task(); + } + + while (true) + { + //if (Emu.IsStopped()) + //{ + // return; + //} + + if (m_events) + { + // process events + if (m_events & CPU_EVENT_STOP && (Emu.IsStopped() || IsStopped() || IsPaused())) + { + m_events &= ~CPU_EVENT_STOP; + return; + } + } + + // read opcode + const ppu_opcode_t opcode = { vm::read32(PC) }; + + // read interpreter function index + const u32 index = *(u32*)((u8*)g_ppu_exec_map + PC); + + // call interpreter function + g_ppu_inter_func_list[index](*this, opcode); + + // next instruction + //PC += 4; + NextPc(4); } } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 854567c362..ac68abb7f4 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -178,6 +178,7 @@ void SPUThread::FastCall(u32 ls_addr) void SPUThread::FastStop() { m_status = Stopped; + m_events |= CPU_EVENT_STOP; } void SPUThread::FastRun() diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 82d200e275..e5d6bf05b3 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -39,6 +39,8 @@ static const std::string& BreakPointsDBName = "BreakPoints.dat"; static const u16 bpdb_version = 0x1000; extern std::atomic g_thread_count; +extern void finalize_ppu_exec_map(); + Emulator::Emulator() : m_status(Stopped) , m_mode(DisAsm) @@ -98,40 +100,41 @@ void Emulator::SetTitle(const std::string& title) void Emulator::CheckStatus() { - //auto& threads = GetCPU().GetThreads(); + //auto threads = GetCPU().GetThreads(); + //if (!threads.size()) //{ // Stop(); // return; //} - //bool IsAllPaused = true; - //for (u32 i = 0; i < threads.size(); ++i) + //bool AllPaused = true; + + //for (auto& t : threads) //{ - // if (threads[i]->IsPaused()) continue; - // IsAllPaused = false; + // if (t->IsPaused()) continue; + // AllPaused = false; // break; //} - //if(IsAllPaused) + //if (AllPaused) //{ - // //ConLog.Warning("all paused!"); // Pause(); // return; //} - //bool IsAllStoped = true; - //for (u32 i = 0; i < threads.size(); ++i) + //bool AllStopped = true; + + //for (auto& t : threads) //{ - // if (threads[i]->IsStopped()) continue; - // IsAllStoped = false; + // if (t->IsStopped()) continue; + // AllStopped = false; // break; //} - //if (IsAllStoped) + //if (AllStopped) //{ - // //LOG_WARNING(GENERAL, "all stoped!"); - // Pause(); //Stop(); + // Pause(); //} } @@ -327,8 +330,18 @@ void Emulator::Stop() if(IsStopped()) return; SendDbgCommand(DID_STOP_EMU); + m_status = Stopped; + { + auto threads = GetCPU().GetThreads(); + + for (auto& t : threads) + { + t->AddEvent(CPU_EVENT_STOP); + } + } + while (g_thread_count) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); @@ -370,6 +383,8 @@ void Emulator::Stop() CurGameInfo.Reset(); Memory.Close(); + + finalize_ppu_exec_map(); SendDbgCommand(DID_STOPPED_EMU); } diff --git a/rpcs3/Loader/ELF64.cpp b/rpcs3/Loader/ELF64.cpp index 6c89d7cf04..56999154e1 100644 --- a/rpcs3/Loader/ELF64.cpp +++ b/rpcs3/Loader/ELF64.cpp @@ -16,6 +16,9 @@ using namespace PPU_instr; +extern void initialize_ppu_exec_map(); +extern void fill_ppu_exec_map(u32 addr, u32 size); + namespace loader { namespace handlers @@ -547,6 +550,16 @@ namespace loader main_thread.args({ Emu.GetPath()/*, "-emu"*/ }).run(); main_thread.gpr(11, OPD.addr()).gpr(12, Emu.GetMallocPageSize()); + initialize_ppu_exec_map(); + + for (u32 page = 0; page < 0x20000000; page += 4096) + { + if (vm::check_addr(page, 4096)) + { + fill_ppu_exec_map(page, 4096); + } + } + return ok; } diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index f7f0b774a9..45870f8502 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -37,6 +37,7 @@ + @@ -366,6 +367,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 9d50d29f54..a404d9b903 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -860,6 +860,9 @@ Emu\SysCalls\Modules + + Emu\CPU\Cell + @@ -1543,5 +1546,8 @@ Emu\SysCalls\Modules + + Emu\CPU\Cell + \ No newline at end of file From 0ca4c189a51cd56e3f357fdd1ed63a322841f24d Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 17 Mar 2015 02:18:13 +0300 Subject: [PATCH 03/23] Small update --- rpcs3/Emu/Cell/PPUInterpreter.cpp | 74 ++++++++++++++++++++----------- rpcs3/Emu/Cell/PPUInterpreter2.h | 34 ++++++++++++++ 2 files changed, 82 insertions(+), 26 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 3e0a1b7a65..d38adcf020 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -13,44 +13,48 @@ void ppu_interpreter::NULL_OP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + throw __FUNCTION__; } void ppu_interpreter::NOP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::TDI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + throw __FUNCTION__; } void ppu_interpreter::TWI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + throw __FUNCTION__; } void ppu_interpreter::MFVSCR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + throw __FUNCTION__; } void ppu_interpreter::MTVSCR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::VADDCUW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = ~CPU.VPR[op.va]._u32[w] < CPU.VPR[op.vb]._u32[w]; + } } void ppu_interpreter::VADDFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._f[w] = CPU.VPR[op.va]._f[w] + CPU.VPR[op.vb]._f[w]; + } } void ppu_interpreter::VADDSBS(PPUThread& CPU, ppu_opcode_t op) @@ -820,12 +824,16 @@ void ppu_interpreter::VXOR(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::MULLI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.rd] = (s64)CPU.GPR[op.ra] * op.simm16; } void ppu_interpreter::SUBFIC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + const u64 IMM = (s64)op.simm16; + CPU.GPR[op.rd] = ~RA + IMM + 1; + + CPU.XER.CA = CPU.IsCarry(~RA, IMM, 1); } void ppu_interpreter::CMPLI(PPUThread& CPU, ppu_opcode_t op) @@ -840,22 +848,27 @@ void ppu_interpreter::CMPI(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::ADDIC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = RA + op.simm16; + CPU.XER.CA = CPU.IsCarry(RA, op.simm16); } void ppu_interpreter::ADDIC_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = RA + op.simm16; + CPU.XER.CA = CPU.IsCarry(RA, op.simm16); + CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::ADDI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.rd] = op.ra ? ((s64)CPU.GPR[op.ra] + op.simm16) : op.simm16; } void ppu_interpreter::ADDIS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.rd] = op.ra ? ((s64)CPU.GPR[op.ra] + (op.simm16 << 16)) : (op.simm16 << 16); } void ppu_interpreter::BC(PPUThread& CPU, ppu_opcode_t op) @@ -865,7 +878,7 @@ void ppu_interpreter::BC(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::HACK(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + execute_ppu_func_by_index(CPU, op.opcode & 0x3ffffff); } void ppu_interpreter::SC(PPUThread& CPU, ppu_opcode_t op) @@ -900,7 +913,7 @@ void ppu_interpreter::CRANDC(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::ISYNC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + _mm_mfence(); } void ppu_interpreter::CRXOR(PPUThread& CPU, ppu_opcode_t op) @@ -955,32 +968,34 @@ void ppu_interpreter::RLWNM(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::ORI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = CPU.GPR[op.rs] | op.uimm16; } void ppu_interpreter::ORIS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = CPU.GPR[op.rs] | ((u64)op.uimm16 << 16); } void ppu_interpreter::XORI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = CPU.GPR[op.rs] ^ op.uimm16; } void ppu_interpreter::XORIS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = CPU.GPR[op.rs] ^ ((u64)op.uimm16 << 16); } void ppu_interpreter::ANDI_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = CPU.GPR[op.rs] & op.uimm16; + CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::ANDIS_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = CPU.GPR[op.rs] & ((u64)op.uimm16 << 16); + CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::RLDICL(PPUThread& CPU, ppu_opcode_t op) @@ -1015,7 +1030,7 @@ void ppu_interpreter::CMP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::TW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + throw __FUNCTION__; } void ppu_interpreter::LVSL(PPUThread& CPU, ppu_opcode_t op) @@ -1055,17 +1070,24 @@ void ppu_interpreter::MFOCRF(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::LWARX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + be_t value; + vm::reservation_acquire(&value, vm::cast(addr), sizeof(value)); + + CPU.GPR[op.rd] = value; } void ppu_interpreter::LDX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read64(vm::cast(addr)); } void ppu_interpreter::LWZX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read32(vm::cast(addr)); } void ppu_interpreter::SLW(PPUThread& CPU, ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/PPUInterpreter2.h b/rpcs3/Emu/Cell/PPUInterpreter2.h index 6bd9373790..d99396d756 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter2.h +++ b/rpcs3/Emu/Cell/PPUInterpreter2.h @@ -6,6 +6,40 @@ class PPUThread; union ppu_opcode_t { u32 opcode; + + struct + { + u32 : 6; // 26..31 + u32 vc : 5; // 21..25 + u32 vb : 5; // 16..20 + u32 va : 5; // 11..15 + u32 vd : 5; // 6..10 + u32 : 6; // 0..5 + }; + + struct + { + u32 : 6; // 26..31 + u32 : 5; // 21..25 + u32 rb : 5; // 16..20 + u32 ra : 5; // 11..15 + u32 rd : 5; // 6..10 + u32 : 6; // 0..5 + }; + + struct + { + u32 uimm16 : 16; // 16..31 + u32 : 5; // 11..15 + u32 rs : 5; // 6..10 + u32 : 6; // 0..5 + }; + + struct + { + s32 simm16 : 16; // 16..31 + s32 : 16; + }; }; using ppu_inter_func_t = void(*)(PPUThread& CPU, ppu_opcode_t opcode); From 573f112b37f9e9623b47ae4654be5eeae78c0e4d Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 17 Mar 2015 03:44:35 +0300 Subject: [PATCH 04/23] Small update --- rpcs3/Emu/Cell/PPUInterpreter.cpp | 134 ++++++++++++++++++++++++++---- rpcs3/Emu/Cell/PPUInterpreter2.h | 50 ++++++++++- 2 files changed, 166 insertions(+), 18 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index d38adcf020..2188f160a1 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -13,7 +13,7 @@ void ppu_interpreter::NULL_OP(PPUThread& CPU, ppu_opcode_t op) { - throw __FUNCTION__; + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::NOP(PPUThread& CPU, ppu_opcode_t op) @@ -23,12 +23,30 @@ void ppu_interpreter::NOP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::TDI(PPUThread& CPU, ppu_opcode_t op) { - throw __FUNCTION__; + s64 a = CPU.GPR[op.ra]; + + if ((a < (s64)op.simm16 && (op.bo & 0x10)) || + (a >(s64)op.simm16 && (op.bo & 0x8)) || + (a == (s64)op.simm16 && (op.bo & 0x4)) || + ((u64)a < (u64)op.simm16 && (op.bo & 0x2)) || + ((u64)a >(u64)op.simm16 && (op.bo & 0x1))) + { + throw fmt::format("Trap! (tdi 0x%x, r%d, 0x%x)", op.bo, op.ra, op.simm16); + } } void ppu_interpreter::TWI(PPUThread& CPU, ppu_opcode_t op) { - throw __FUNCTION__; + s32 a = (s32)CPU.GPR[op.ra]; + + if ((a < op.simm16 && (op.bo & 0x10)) || + (a > op.simm16 && (op.bo & 0x8)) || + (a == op.simm16 && (op.bo & 0x4)) || + ((u32)a < (u32)op.simm16 && (op.bo & 0x2)) || + ((u32)a >(u32)op.simm16 && (op.bo & 0x1))) + { + throw fmt::Format("Trap! (twi 0x%x, r%d, 0x%x)", op.bo, op.ra, op.simm16); + } } @@ -39,6 +57,7 @@ void ppu_interpreter::MFVSCR(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::MTVSCR(PPUThread& CPU, ppu_opcode_t op) { + // ignored (MFVSCR disabled) } void ppu_interpreter::VADDCUW(PPUThread& CPU, ppu_opcode_t op) @@ -59,57 +78,144 @@ void ppu_interpreter::VADDFP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDSBS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (u32 b = 0; b < 16; ++b) + { + s16 result = (s16)CPU.VPR[op.va]._s8[b] + (s16)CPU.VPR[op.vb]._s8[b]; + + if (result > 0x7f) + { + CPU.VPR[op.vd]._s8[b] = 0x7f; + } + else if (result < -0x80) + { + CPU.VPR[op.vd]._s8[b] = -0x80; + } + else + CPU.VPR[op.vd]._s8[b] = (s8)result; + } } void ppu_interpreter::VADDSHS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + s32 result = (s32)CPU.VPR[op.va]._s16[h] + (s32)CPU.VPR[op.vb]._s16[h]; + + if (result > 0x7fff) + { + CPU.VPR[op.vd]._s16[h] = 0x7fff; + } + else if (result < -0x8000) + { + CPU.VPR[op.vd]._s16[h] = -0x8000; + } + else + CPU.VPR[op.vd]._s16[h] = result; + } } void ppu_interpreter::VADDSWS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + s64 result = (s64)CPU.VPR[op.va]._s32[w] + (s64)CPU.VPR[op.vb]._s32[w]; + + if (result > 0x7fffffff) + { + CPU.VPR[op.vd]._s32[w] = 0x7fffffff; + } + else if (result < (s32)0x80000000) + { + CPU.VPR[op.vd]._s32[w] = 0x80000000; + } + else + CPU.VPR[op.vd]._s32[w] = (s32)result; + } } void ppu_interpreter::VADDUBM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = CPU.VPR[op.va]._u8[b] + CPU.VPR[op.vb]._u8[b]; + } } void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + u16 result = (u16)CPU.VPR[op.va]._u8[b] + (u16)CPU.VPR[op.vb]._u8[b]; + + if (result > 0xff) + { + CPU.VPR[op.vd]._u8[b] = 0xff; + } + else + CPU.VPR[op.vd]._u8[b] = (u8)result; + } } void ppu_interpreter::VADDUHM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = CPU.VPR[op.va]._u16[h] + CPU.VPR[op.vb]._u16[h]; + } } void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + u32 result = (u32)CPU.VPR[op.va]._u16[h] + (u32)CPU.VPR[op.vb]._u16[h]; + + if (result > 0xffff) + { + CPU.VPR[op.vd]._u16[h] = 0xffff; + } + else + CPU.VPR[op.vd]._u16[h] = result; + } } void ppu_interpreter::VADDUWM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] + CPU.VPR[op.vb]._u32[w]; + } } void ppu_interpreter::VADDUWS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + u64 result = (u64)CPU.VPR[op.va]._u32[w] + (u64)CPU.VPR[op.vb]._u32[w]; + + if (result > 0xffffffff) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + } + else + CPU.VPR[op.vd]._u32[w] = (u32)result; + } } void ppu_interpreter::VAND(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] & CPU.VPR[op.vb]._u32[w]; + } } void ppu_interpreter::VANDC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] & (~CPU.VPR[op.vb]._u32[w]); + } } void ppu_interpreter::VAVGSB(PPUThread& CPU, ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/PPUInterpreter2.h b/rpcs3/Emu/Cell/PPUInterpreter2.h index d99396d756..f6399b394b 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter2.h +++ b/rpcs3/Emu/Cell/PPUInterpreter2.h @@ -7,6 +7,27 @@ union ppu_opcode_t { u32 opcode; + struct + { + u32 rc : 1; // 31 + u32 shh : 1; // 30 + u32 : 3; // 27..29 + u32 mbmeh : 1; // 26 + u32 mbmel : 5; // 21..25 + u32 shl : 5; // 16..20 + u32 vuimm : 5; // 11..15 + u32 vs : 5; // 6..10 + u32 : 6; + }; + + struct + { + u32 : 6; // 26..31 + u32 vsh : 4; // 22..25 + u32 : 1; // 21 + u32 spr : 10; // 11..20 + }; + struct { u32 : 6; // 26..31 @@ -14,7 +35,7 @@ union ppu_opcode_t u32 vb : 5; // 16..20 u32 va : 5; // 11..15 u32 vd : 5; // 6..10 - u32 : 6; // 0..5 + u32 : 6; }; struct @@ -24,7 +45,7 @@ union ppu_opcode_t u32 rb : 5; // 16..20 u32 ra : 5; // 11..15 u32 rd : 5; // 6..10 - u32 : 6; // 0..5 + u32 : 6; }; struct @@ -32,13 +53,34 @@ union ppu_opcode_t u32 uimm16 : 16; // 16..31 u32 : 5; // 11..15 u32 rs : 5; // 6..10 - u32 : 6; // 0..5 + u32 : 6; }; struct { s32 simm16 : 16; // 16..31 - s32 : 16; + s32 vsimm : 5; // 11..15 + s32 : 11; + }; + + struct + { + u32 : 18; // 14..31 + u32 crfs : 3; // 11..13 + u32 : 2; // 9..10 + u32 crfd : 3; // 6..8 + u32 : 6; + }; + + struct + { + u32 rc : 1; // 31 + u32 me : 5; // 26..30 + u32 mb : 5; // 21..25 + u32 sh : 5; // 16..20 + u32 bi : 5; // 11..15 + u32 bo : 5; // 6..10 + u32 : 6; }; }; From 620e937473e9d46c8d550fd9013528c7976c652d Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 17 Mar 2015 23:03:24 +0300 Subject: [PATCH 05/23] Basic load --- rpcs3/Emu/Cell/PPUInstrTable.h | 4 +- rpcs3/Emu/Cell/PPUInterpreter.cpp | 2743 +++++++++++++++++++++++++---- rpcs3/Emu/Cell/PPUInterpreter.h | 48 +- rpcs3/Emu/Cell/PPUInterpreter2.h | 77 +- rpcs3/Emu/Cell/PPUThread.cpp | 28 +- 5 files changed, 2472 insertions(+), 428 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInstrTable.h b/rpcs3/Emu/Cell/PPUInstrTable.h index 4dbc1eb4cc..fbad298fbb 100644 --- a/rpcs3/Emu/Cell/PPUInstrTable.h +++ b/rpcs3/Emu/Cell/PPUInstrTable.h @@ -115,8 +115,6 @@ namespace PPU_instr */ static CodeField<30> AA; - static CodeFieldSignedOffset<6, 29, 2> LI(FIELD_BRANCH); - // static CodeFieldSignedOffset<6, 29, 2> LL(FIELD_BRANCH); /* @@ -245,7 +243,7 @@ namespace PPU_instr bind_instr(main_list, BC, BO, BI, BD, AA, LK); bind_instr(main_list, HACK, uimm26); bind_instr(main_list, SC, LEV); - bind_instr(main_list, B, LI, AA, LK); + bind_instr(main_list, B, LL, AA, LK); bind_instr(main_list, RLWIMI, RA, RS, SH, MB, ME, RC); bind_instr(main_list, RLWINM, RA, RS, SH, MB, ME, RC); bind_instr(main_list, RLWNM, RA, RS, RB, MB, ME, RC); diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 2188f160a1..a2602977eb 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -31,7 +31,7 @@ void ppu_interpreter::TDI(PPUThread& CPU, ppu_opcode_t op) ((u64)a < (u64)op.simm16 && (op.bo & 0x2)) || ((u64)a >(u64)op.simm16 && (op.bo & 0x1))) { - throw fmt::format("Trap! (tdi 0x%x, r%d, 0x%x)", op.bo, op.ra, op.simm16); + throw __FUNCTION__; } } @@ -45,7 +45,7 @@ void ppu_interpreter::TWI(PPUThread& CPU, ppu_opcode_t op) ((u32)a < (u32)op.simm16 && (op.bo & 0x2)) || ((u32)a >(u32)op.simm16 && (op.bo & 0x1))) { - throw fmt::Format("Trap! (twi 0x%x, r%d, 0x%x)", op.bo, op.ra, op.simm16); + throw __FUNCTION__; } } @@ -220,712 +220,1996 @@ void ppu_interpreter::VANDC(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VAVGSB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._s8[b] = (CPU.VPR[op.va]._s8[b] + CPU.VPR[op.vb]._s8[b] + 1) >> 1; + } } void ppu_interpreter::VAVGSH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._s16[h] = (CPU.VPR[op.va]._s16[h] + CPU.VPR[op.vb]._s16[h] + 1) >> 1; + } } void ppu_interpreter::VAVGSW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s32[w] = ((s64)CPU.VPR[op.va]._s32[w] + (s64)CPU.VPR[op.vb]._s32[w] + 1) >> 1; + } } void ppu_interpreter::VAVGUB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + CPU.VPR[op.vd]._u8[b] = (CPU.VPR[op.va]._u8[b] + CPU.VPR[op.vb]._u8[b] + 1) >> 1; } void ppu_interpreter::VAVGUH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = (CPU.VPR[op.va]._u16[h] + CPU.VPR[op.vb]._u16[h] + 1) >> 1; + } } void ppu_interpreter::VAVGUW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = ((u64)CPU.VPR[op.va]._u32[w] + (u64)CPU.VPR[op.vb]._u32[w] + 1) >> 1; + } } void ppu_interpreter::VCFSX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 scale = 1 << op.vuimm; + + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._f[w] = ((float)CPU.VPR[op.vb]._s32[w]) / scale; + } } void ppu_interpreter::VCFUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 scale = 1 << op.vuimm; + + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._f[w] = ((float)CPU.VPR[op.vb]._u32[w]) / scale; + } } void ppu_interpreter::VCMPBFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + u32 mask = 1 << 31 | 1 << 30; + + const float a = CPU.VPR[op.va]._f[w]; + const float b = CPU.VPR[op.vb]._f[w]; + + if (a <= b) mask &= ~(1 << 31); + if (a >= -b) mask &= ~(1 << 30); + + CPU.VPR[op.vd]._u32[w] = mask; + } } void ppu_interpreter::VCMPBFP_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + bool allInBounds = true; + + for (uint w = 0; w < 4; w++) + { + u32 mask = 1 << 31 | 1 << 30; + + const float a = CPU.VPR[op.va]._f[w]; + const float b = CPU.VPR[op.vb]._f[w]; + + if (a <= b) mask &= ~(1 << 31); + if (a >= -b) mask &= ~(1 << 30); + + CPU.VPR[op.vd]._u32[w] = mask; + + if (mask) + allInBounds = false; + } + + // Bit n°2 of CR6 + CPU.SetCR(6, 0); + CPU.SetCRBit(6, 0x2, allInBounds); } void ppu_interpreter::VCMPEQFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_equal = 0x8; + int none_equal = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._f[w] == CPU.VPR[op.vb]._f[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_equal = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_equal = 0; + } + } } void ppu_interpreter::VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_equal = 0x8; + int none_equal = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._f[w] == CPU.VPR[op.vb]._f[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_equal = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_equal = 0; + } + } + + CPU.CR.cr6 = all_equal | none_equal; } void ppu_interpreter::VCMPEQUB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_equal = 0x8; + int none_equal = 0x2; + + for (uint b = 0; b < 16; b++) + { + if (CPU.VPR[op.va]._u8[b] == CPU.VPR[op.vb]._u8[b]) + { + CPU.VPR[op.vd]._u8[b] = 0xff; + none_equal = 0; + } + else + { + CPU.VPR[op.vd]._u8[b] = 0; + all_equal = 0; + } + } } void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_equal = 0x8; + int none_equal = 0x2; + + for (uint b = 0; b < 16; b++) + { + if (CPU.VPR[op.va]._u8[b] == CPU.VPR[op.vb]._u8[b]) + { + CPU.VPR[op.vd]._u8[b] = 0xff; + none_equal = 0; + } + else + { + CPU.VPR[op.vd]._u8[b] = 0; + all_equal = 0; + } + } + + CPU.CR.cr6 = all_equal | none_equal; } void ppu_interpreter::VCMPEQUH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_equal = 0x8; + int none_equal = 0x2; + + for (uint h = 0; h < 8; h++) + { + if (CPU.VPR[op.va]._u16[h] == CPU.VPR[op.vb]._u16[h]) + { + CPU.VPR[op.vd]._u16[h] = 0xffff; + none_equal = 0; + } + else + { + CPU.VPR[op.vd]._u16[h] = 0; + all_equal = 0; + } + } } void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_equal = 0x8; + int none_equal = 0x2; + + for (uint h = 0; h < 8; h++) + { + if (CPU.VPR[op.va]._u16[h] == CPU.VPR[op.vb]._u16[h]) + { + CPU.VPR[op.vd]._u16[h] = 0xffff; + none_equal = 0; + } + else + { + CPU.VPR[op.vd]._u16[h] = 0; + all_equal = 0; + } + } + + CPU.CR.cr6 = all_equal | none_equal; } void ppu_interpreter::VCMPEQUW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_equal = 0x8; + int none_equal = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._u32[w] == CPU.VPR[op.vb]._u32[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_equal = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_equal = 0; + } + } } void ppu_interpreter::VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_equal = 0x8; + int none_equal = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._u32[w] == CPU.VPR[op.vb]._u32[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_equal = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_equal = 0; + } + } + + CPU.CR.cr6 = all_equal | none_equal; } void ppu_interpreter::VCMPGEFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_ge = 0x8; + int none_ge = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._f[w] >= CPU.VPR[op.vb]._f[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_ge = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_ge = 0; + } + } } void ppu_interpreter::VCMPGEFP_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_ge = 0x8; + int none_ge = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._f[w] >= CPU.VPR[op.vb]._f[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_ge = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_ge = 0; + } + } + + CPU.CR.cr6 = all_ge | none_ge; } void ppu_interpreter::VCMPGTFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_ge = 0x8; + int none_ge = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._f[w] > CPU.VPR[op.vb]._f[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_ge = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_ge = 0; + } + } } void ppu_interpreter::VCMPGTFP_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_ge = 0x8; + int none_ge = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._f[w] > CPU.VPR[op.vb]._f[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_ge = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_ge = 0; + } + } + + CPU.CR.cr6 = all_ge | none_ge; } void ppu_interpreter::VCMPGTSB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint b = 0; b < 16; b++) + { + if (CPU.VPR[op.va]._s8[b] > CPU.VPR[op.vb]._s8[b]) + { + CPU.VPR[op.vd]._u8[b] = 0xff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u8[b] = 0; + all_gt = 0; + } + } } void ppu_interpreter::VCMPGTSB_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint b = 0; b < 16; b++) + { + if (CPU.VPR[op.va]._s8[b] > CPU.VPR[op.vb]._s8[b]) + { + CPU.VPR[op.vd]._u8[b] = 0xff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u8[b] = 0; + all_gt = 0; + } + } + + CPU.CR.cr6 = all_gt | none_gt; } void ppu_interpreter::VCMPGTSH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint h = 0; h < 8; h++) + { + if (CPU.VPR[op.va]._s16[h] > CPU.VPR[op.vb]._s16[h]) + { + CPU.VPR[op.vd]._u16[h] = 0xffff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u16[h] = 0; + all_gt = 0; + } + } } void ppu_interpreter::VCMPGTSH_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint h = 0; h < 8; h++) + { + if (CPU.VPR[op.va]._s16[h] > CPU.VPR[op.vb]._s16[h]) + { + CPU.VPR[op.vd]._u16[h] = 0xffff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u16[h] = 0; + all_gt = 0; + } + } + + CPU.CR.cr6 = all_gt | none_gt; } void ppu_interpreter::VCMPGTSW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._s32[w] > CPU.VPR[op.vb]._s32[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_gt = 0; + } + } } void ppu_interpreter::VCMPGTSW_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._s32[w] > CPU.VPR[op.vb]._s32[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_gt = 0; + } + } + + CPU.CR.cr6 = all_gt | none_gt; } void ppu_interpreter::VCMPGTUB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint b = 0; b < 16; b++) + { + if (CPU.VPR[op.va]._u8[b] > CPU.VPR[op.vb]._u8[b]) + { + CPU.VPR[op.vd]._u8[b] = 0xff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u8[b] = 0; + all_gt = 0; + } + } } void ppu_interpreter::VCMPGTUB_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint b = 0; b < 16; b++) + { + if (CPU.VPR[op.va]._u8[b] > CPU.VPR[op.vb]._u8[b]) + { + CPU.VPR[op.vd]._u8[b] = 0xff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u8[b] = 0; + all_gt = 0; + } + } + + CPU.CR.cr6 = all_gt | none_gt; } void ppu_interpreter::VCMPGTUH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint h = 0; h < 8; h++) + { + if (CPU.VPR[op.va]._u16[h] > CPU.VPR[op.vb]._u16[h]) + { + CPU.VPR[op.vd]._u16[h] = 0xffff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u16[h] = 0; + all_gt = 0; + } + } } void ppu_interpreter::VCMPGTUH_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint h = 0; h < 8; h++) + { + if (CPU.VPR[op.va]._u16[h] > CPU.VPR[op.vb]._u16[h]) + { + CPU.VPR[op.vd]._u16[h] = 0xffff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u16[h] = 0; + all_gt = 0; + } + } + + CPU.CR.cr6 = all_gt | none_gt; } void ppu_interpreter::VCMPGTUW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._u32[w] > CPU.VPR[op.vb]._u32[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_gt = 0; + } + } } void ppu_interpreter::VCMPGTUW_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + int all_gt = 0x8; + int none_gt = 0x2; + + for (uint w = 0; w < 4; w++) + { + if (CPU.VPR[op.va]._u32[w] > CPU.VPR[op.vb]._u32[w]) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffff; + none_gt = 0; + } + else + { + CPU.VPR[op.vd]._u32[w] = 0; + all_gt = 0; + } + } + + CPU.CR.cr6 = all_gt | none_gt; } void ppu_interpreter::VCTSXS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 nScale = 1 << op.vuimm; + + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + if (std::isnan(b)) + { + CPU.VPR[op.vd]._s32[w] = 0; + } + else + { + double result = (double)b * nScale; + if (result > 0x7fffffff) + { + CPU.VPR[op.vd]._s32[w] = (int)0x7fffffff; + } + else if (result < -pow(2, 31)) + { + CPU.VPR[op.vd]._s32[w] = (int)0x80000000; + } + else + CPU.VPR[op.vd]._s32[w] = (int)trunc(result); + } + } } void ppu_interpreter::VCTUXS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 nScale = 1 << op.vuimm; + + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + if (std::isnan(b)) + { + CPU.VPR[op.vd]._s32[w] = 0; + } + else + { + double result = (double)b * nScale; + if (result > 0xffffffffu) + { + CPU.VPR[op.vd]._u32[w] = 0xffffffffu; + } + else if (result < 0) + { + CPU.VPR[op.vd]._u32[w] = 0; + } + else + CPU.VPR[op.vd]._u32[w] = (u32)trunc(result); + } + } } void ppu_interpreter::VEXPTEFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = powf(2.0f, b); + } } void ppu_interpreter::VLOGEFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = log2f(b); + } } void ppu_interpreter::VMADDFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float a = CPU.VPR[op.va]._f[w]; + const float b = CPU.VPR[op.vb]._f[w]; + const float c = CPU.VPR[op.vc]._f[w]; + const float result = fmaf(a, c, b); + CPU.VPR[op.vd]._f[w] = result; + } } void ppu_interpreter::VMAXFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float a = CPU.VPR[op.va]._f[w]; + const float b = CPU.VPR[op.vb]._f[w]; + if (a > b) + CPU.VPR[op.vd]._f[w] = a; + else if (b > a) + CPU.VPR[op.vd]._f[w] = b; + else if (CPU.VPR[op.vb]._u32[w] == 0x80000000) + CPU.VPR[op.vd]._f[w] = a; // max(+0,-0) = +0 + else + CPU.VPR[op.vd]._f[w] = b; + } } void ppu_interpreter::VMAXSB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + CPU.VPR[op.vd]._s8[b] = std::max(CPU.VPR[op.va]._s8[b], CPU.VPR[op.vb]._s8[b]); } void ppu_interpreter::VMAXSH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._s16[h] = std::max(CPU.VPR[op.va]._s16[h], CPU.VPR[op.vb]._s16[h]); + } } void ppu_interpreter::VMAXSW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s32[w] = std::max(CPU.VPR[op.va]._s32[w], CPU.VPR[op.vb]._s32[w]); + } } void ppu_interpreter::VMAXUB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + CPU.VPR[op.vd]._u8[b] = std::max(CPU.VPR[op.va]._u8[b], CPU.VPR[op.vb]._u8[b]); } void ppu_interpreter::VMAXUH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = std::max(CPU.VPR[op.va]._u16[h], CPU.VPR[op.vb]._u16[h]); + } } void ppu_interpreter::VMAXUW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = std::max(CPU.VPR[op.va]._u32[w], CPU.VPR[op.vb]._u32[w]); + } } void ppu_interpreter::VMHADDSHS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + s32 result = (s32)CPU.VPR[op.va]._s16[h] * (s32)CPU.VPR[op.vb]._s16[h]; + result = (result >> 15) + (s32)CPU.VPR[op.vc]._s16[h]; + + if (result > INT16_MAX) + { + CPU.VPR[op.vd]._s16[h] = (s16)INT16_MAX; + } + else if (result < INT16_MIN) + { + CPU.VPR[op.vd]._s16[h] = (s16)INT16_MIN; + } + else + CPU.VPR[op.vd]._s16[h] = (s16)result; + } } void ppu_interpreter::VMHRADDSHS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + s32 result = ((s32)CPU.VPR[op.va]._s16[h] * (s32)CPU.VPR[op.vb]._s16[h]) + 0x4000; + result = (result >> 15) + (s32)CPU.VPR[op.vc]._s16[h]; + + if (result > INT16_MAX) + { + CPU.VPR[op.vd]._s16[h] = (s16)INT16_MAX; + } + else if (result < INT16_MIN) + { + CPU.VPR[op.vd]._s16[h] = (s16)INT16_MIN; + } + else + CPU.VPR[op.vd]._s16[h] = (s16)result; + } } void ppu_interpreter::VMINFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float a = CPU.VPR[op.va]._f[w]; + const float b = CPU.VPR[op.vb]._f[w]; + if (a < b) + CPU.VPR[op.vd]._f[w] = a; + else if (b < a) + CPU.VPR[op.vd]._f[w] = b; + else if (CPU.VPR[op.vb]._u32[w] == 0x00000000) + CPU.VPR[op.vd]._f[w] = a; // min(-0,+0) = -0 + else + CPU.VPR[op.vd]._f[w] = b; + } } void ppu_interpreter::VMINSB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._s8[b] = std::min(CPU.VPR[op.va]._s8[b], CPU.VPR[op.vb]._s8[b]); + } } void ppu_interpreter::VMINSH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._s16[h] = std::min(CPU.VPR[op.va]._s16[h], CPU.VPR[op.vb]._s16[h]); + } } void ppu_interpreter::VMINSW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s32[w] = std::min(CPU.VPR[op.va]._s32[w], CPU.VPR[op.vb]._s32[w]); + } } void ppu_interpreter::VMINUB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = std::min(CPU.VPR[op.va]._u8[b], CPU.VPR[op.vb]._u8[b]); + } } void ppu_interpreter::VMINUH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = std::min(CPU.VPR[op.va]._u16[h], CPU.VPR[op.vb]._u16[h]); + } } void ppu_interpreter::VMINUW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = std::min(CPU.VPR[op.va]._u32[w], CPU.VPR[op.vb]._u32[w]); + } } void ppu_interpreter::VMLADDUHM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = CPU.VPR[op.va]._u16[h] * CPU.VPR[op.vb]._u16[h] + CPU.VPR[op.vc]._u16[h]; + } } void ppu_interpreter::VMRGHB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u8[15 - h * 2] = VA._u8[15 - h]; + CPU.VPR[op.vd]._u8[15 - h * 2 - 1] = VB._u8[15 - h]; + } } void ppu_interpreter::VMRGHH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u16[7 - w * 2] = VA._u16[7 - w]; + CPU.VPR[op.vd]._u16[7 - w * 2 - 1] = VB._u16[7 - w]; + } } void ppu_interpreter::VMRGHW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint d = 0; d < 2; d++) + { + CPU.VPR[op.vd]._u32[3 - d * 2] = VA._u32[3 - d]; + CPU.VPR[op.vd]._u32[3 - d * 2 - 1] = VB._u32[3 - d]; + } } void ppu_interpreter::VMRGLB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u8[15 - h * 2] = VA._u8[7 - h]; + CPU.VPR[op.vd]._u8[15 - h * 2 - 1] = VB._u8[7 - h]; + } } void ppu_interpreter::VMRGLH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u16[7 - w * 2] = VA._u16[3 - w]; + CPU.VPR[op.vd]._u16[7 - w * 2 - 1] = VB._u16[3 - w]; + } } void ppu_interpreter::VMRGLW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint d = 0; d < 2; d++) + { + CPU.VPR[op.vd]._u32[3 - d * 2] = VA._u32[1 - d]; + CPU.VPR[op.vd]._u32[3 - d * 2 - 1] = VB._u32[1 - d]; + } } void ppu_interpreter::VMSUMMBM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + s32 result = 0; + + for (uint b = 0; b < 4; b++) + { + result += CPU.VPR[op.va]._s8[w * 4 + b] * CPU.VPR[op.vb]._u8[w * 4 + b]; + } + + result += CPU.VPR[op.vc]._s32[w]; + CPU.VPR[op.vd]._s32[w] = result; + } } void ppu_interpreter::VMSUMSHM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + s32 result = 0; + + for (uint h = 0; h < 2; h++) + { + result += CPU.VPR[op.va]._s16[w * 2 + h] * CPU.VPR[op.vb]._s16[w * 2 + h]; + } + + result += CPU.VPR[op.vc]._s32[w]; + CPU.VPR[op.vd]._s32[w] = result; + } } void ppu_interpreter::VMSUMSHS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + s64 result = 0; + s32 saturated = 0; + + for (uint h = 0; h < 2; h++) + { + result += CPU.VPR[op.va]._s16[w * 2 + h] * CPU.VPR[op.vb]._s16[w * 2 + h]; + } + + result += CPU.VPR[op.vc]._s32[w]; + + if (result > 0x7fffffff) + { + saturated = 0x7fffffff; + } + else if (result < (s64)(s32)0x80000000) + { + saturated = 0x80000000; + } + else + saturated = (s32)result; + + CPU.VPR[op.vd]._s32[w] = saturated; + } } void ppu_interpreter::VMSUMUBM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + u32 result = 0; + + for (uint b = 0; b < 4; b++) + { + result += (u32)CPU.VPR[op.va]._u8[w * 4 + b] * (u32)CPU.VPR[op.vb]._u8[w * 4 + b]; + } + + result += CPU.VPR[op.vc]._u32[w]; + CPU.VPR[op.vd]._u32[w] = result; + } } void ppu_interpreter::VMSUMUHM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + u32 result = 0; + + for (uint h = 0; h < 2; h++) + { + result += (u32)CPU.VPR[op.va]._u16[w * 2 + h] * (u32)CPU.VPR[op.vb]._u16[w * 2 + h]; + } + + result += CPU.VPR[op.vc]._u32[w]; + CPU.VPR[op.vd]._u32[w] = result; + } } void ppu_interpreter::VMSUMUHS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + u64 result = 0; + u32 saturated = 0; + + for (uint h = 0; h < 2; h++) + { + result += (u64)CPU.VPR[op.va]._u16[w * 2 + h] * (u64)CPU.VPR[op.vb]._u16[w * 2 + h]; + } + + result += CPU.VPR[op.vc]._u32[w]; + + if (result > 0xffffffffu) + { + saturated = 0xffffffff; + } + else + saturated = (u32)result; + + CPU.VPR[op.vd]._u32[w] = saturated; + } } void ppu_interpreter::VMULESB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._s16[h] = (s16)CPU.VPR[op.va]._s8[h * 2 + 1] * (s16)CPU.VPR[op.vb]._s8[h * 2 + 1]; + } } void ppu_interpreter::VMULESH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s32[w] = (s32)CPU.VPR[op.va]._s16[w * 2 + 1] * (s32)CPU.VPR[op.vb]._s16[w * 2 + 1]; + } } void ppu_interpreter::VMULEUB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = (u16)CPU.VPR[op.va]._u8[h * 2 + 1] * (u16)CPU.VPR[op.vb]._u8[h * 2 + 1]; + } } void ppu_interpreter::VMULEUH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = (u32)CPU.VPR[op.va]._u16[w * 2 + 1] * (u32)CPU.VPR[op.vb]._u16[w * 2 + 1]; + } } void ppu_interpreter::VMULOSB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._s16[h] = (s16)CPU.VPR[op.va]._s8[h * 2] * (s16)CPU.VPR[op.vb]._s8[h * 2]; + } } void ppu_interpreter::VMULOSH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s32[w] = (s32)CPU.VPR[op.va]._s16[w * 2] * (s32)CPU.VPR[op.vb]._s16[w * 2]; + } } void ppu_interpreter::VMULOUB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = (u16)CPU.VPR[op.va]._u8[h * 2] * (u16)CPU.VPR[op.vb]._u8[h * 2]; + } } void ppu_interpreter::VMULOUH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = (u32)CPU.VPR[op.va]._u16[w * 2] * (u32)CPU.VPR[op.vb]._u16[w * 2]; + } } void ppu_interpreter::VNMSUBFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float a = CPU.VPR[op.va]._f[w]; + const float b = CPU.VPR[op.vb]._f[w]; + const float c = CPU.VPR[op.vc]._f[w]; + const float result = -fmaf(a, c, -b); + CPU.VPR[op.vd]._f[w] = result; + } } void ppu_interpreter::VNOR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = ~(CPU.VPR[op.va]._u32[w] | CPU.VPR[op.vb]._u32[w]); + } } void ppu_interpreter::VOR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] | CPU.VPR[op.vb]._u32[w]; + } } void ppu_interpreter::VPERM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u8 tmpSRC[32]; + memcpy(tmpSRC, CPU.VPR[op.vb]._u8, 16); + memcpy(tmpSRC + 16, CPU.VPR[op.va]._u8, 16); + + for (uint b = 0; b < 16; b++) + { + u8 index = CPU.VPR[op.vc]._u8[b] & 0x1f; + + CPU.VPR[op.vd]._u8[b] = tmpSRC[0x1f - index]; + } } void ppu_interpreter::VPKPX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 4; h++) + { + u16 bb7 = VB._u8[15 - (h * 4 + 0)] & 0x1; + u16 bb8 = VB._u8[15 - (h * 4 + 1)] >> 3; + u16 bb16 = VB._u8[15 - (h * 4 + 2)] >> 3; + u16 bb24 = VB._u8[15 - (h * 4 + 3)] >> 3; + u16 ab7 = VA._u8[15 - (h * 4 + 0)] & 0x1; + u16 ab8 = VA._u8[15 - (h * 4 + 1)] >> 3; + u16 ab16 = VA._u8[15 - (h * 4 + 2)] >> 3; + u16 ab24 = VA._u8[15 - (h * 4 + 3)] >> 3; + + CPU.VPR[op.vd]._u16[3 - h] = (bb7 << 15) | (bb8 << 10) | (bb16 << 5) | bb24; + CPU.VPR[op.vd]._u16[4 + (3 - h)] = (ab7 << 15) | (ab8 << 10) | (ab16 << 5) | ab24; + } } void ppu_interpreter::VPKSHSS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint b = 0; b < 8; b++) + { + s16 result = VA._s16[b]; + + if (result > INT8_MAX) + { + result = INT8_MAX; + } + else if (result < INT8_MIN) + { + result = INT8_MIN; + } + + CPU.VPR[op.vd]._s8[b + 8] = (s8)result; + + result = VB._s16[b]; + + if (result > INT8_MAX) + { + result = INT8_MAX; + } + else if (result < INT8_MIN) + { + result = INT8_MIN; + } + + CPU.VPR[op.vd]._s8[b] = (s8)result; + } } void ppu_interpreter::VPKSHUS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint b = 0; b < 8; b++) + { + s16 result = VA._s16[b]; + + if (result > UINT8_MAX) + { + result = UINT8_MAX; + } + else if (result < 0) + { + result = 0; + } + + CPU.VPR[op.vd]._u8[b + 8] = (u8)result; + + result = VB._s16[b]; + + if (result > UINT8_MAX) + { + result = UINT8_MAX; + } + else if (result < 0) + { + result = 0; + } + + CPU.VPR[op.vd]._u8[b] = (u8)result; + } } void ppu_interpreter::VPKSWSS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 4; h++) + { + s32 result = VA._s32[h]; + + if (result > INT16_MAX) + { + result = INT16_MAX; + } + else if (result < INT16_MIN) + { + result = INT16_MIN; + } + + CPU.VPR[op.vd]._s16[h + 4] = result; + + result = VB._s32[h]; + + if (result > INT16_MAX) + { + result = INT16_MAX; + } + else if (result < INT16_MIN) + { + result = INT16_MIN; + } + + CPU.VPR[op.vd]._s16[h] = result; + } } void ppu_interpreter::VPKSWUS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 4; h++) + { + s32 result = VA._s32[h]; + + if (result > UINT16_MAX) + { + result = UINT16_MAX; + } + else if (result < 0) + { + result = 0; + } + + CPU.VPR[op.vd]._u16[h + 4] = result; + + result = VB._s32[h]; + + if (result > UINT16_MAX) + { + result = UINT16_MAX; + } + else if (result < 0) + { + result = 0; + } + + CPU.VPR[op.vd]._u16[h] = result; + } } void ppu_interpreter::VPKUHUM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint b = 0; b < 8; b++) + { + CPU.VPR[op.vd]._u8[b + 8] = VA._u8[b * 2]; + CPU.VPR[op.vd]._u8[b] = VB._u8[b * 2]; + } } void ppu_interpreter::VPKUHUS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint b = 0; b < 8; b++) + { + u16 result = VA._u16[b]; + + if (result > UINT8_MAX) + { + result = UINT8_MAX; + } + + CPU.VPR[op.vd]._u8[b + 8] = (u8)result; + + result = VB._u16[b]; + + if (result > UINT8_MAX) + { + result = UINT8_MAX; + } + + CPU.VPR[op.vd]._u8[b] = (u8)result; + } } void ppu_interpreter::VPKUWUM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 4; h++) + { + CPU.VPR[op.vd]._u16[h + 4] = VA._u16[h * 2]; + CPU.VPR[op.vd]._u16[h] = VB._u16[h * 2]; + } } void ppu_interpreter::VPKUWUS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 4; h++) + { + u32 result = VA._u32[h]; + + if (result > UINT16_MAX) + { + result = UINT16_MAX; + } + + CPU.VPR[op.vd]._u16[h + 4] = result; + + result = VB._u32[h]; + + if (result > UINT16_MAX) + { + result = UINT16_MAX; + } + + CPU.VPR[op.vd]._u16[h] = result; + } } void ppu_interpreter::VREFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = 1.0f / b; + } } void ppu_interpreter::VRFIM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = floorf(CPU.VPR[op.vb]._f[w]); + } } void ppu_interpreter::VRFIN(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = nearbyintf(CPU.VPR[op.vb]._f[w]); + } } void ppu_interpreter::VRFIP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = ceilf(CPU.VPR[op.vb]._f[w]); + } } void ppu_interpreter::VRFIZ(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = truncf(CPU.VPR[op.vb]._f[w]); + } } void ppu_interpreter::VRLB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + int nRot = CPU.VPR[op.vb]._u8[b] & 0x7; + + CPU.VPR[op.vd]._u8[b] = (CPU.VPR[op.va]._u8[b] << nRot) | (CPU.VPR[op.va]._u8[b] >> (8 - nRot)); + } } void ppu_interpreter::VRLH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = rotl16(CPU.VPR[op.va]._u16[h], CPU.VPR[op.vb]._u8[h * 2] & 0xf); + } } void ppu_interpreter::VRLW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = (u32)rotl32(CPU.VPR[op.va]._u32[w], CPU.VPR[op.vb]._u8[w * 4] & 0x1f); + } } void ppu_interpreter::VRSQRTEFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = 1.0f / sqrtf(b); + } } void ppu_interpreter::VSEL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = (CPU.VPR[op.vb]._u8[b] & CPU.VPR[op.vc]._u8[b]) | (CPU.VPR[op.va]._u8[b] & (~CPU.VPR[op.vc]._u8[b])); + } } void ppu_interpreter::VSL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u8 sh = CPU.VPR[op.vb]._u8[0] & 0x7; + + CPU.VPR[op.vd]._u8[0] = VA._u8[0] << sh; + for (uint b = 1; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = (VA._u8[b] << sh) | (VA._u8[b - 1] >> (8 - sh)); + } } void ppu_interpreter::VSLB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = CPU.VPR[op.va]._u8[b] << (CPU.VPR[op.vb]._u8[b] & 0x7); + } } void ppu_interpreter::VSLDOI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u8 tmpSRC[32]; + memcpy(tmpSRC, CPU.VPR[op.vb]._u8, 16); + memcpy(tmpSRC + 16, CPU.VPR[op.va]._u8, 16); + + for (uint b = 0; b<16; b++) + { + CPU.VPR[op.vd]._u8[15 - b] = tmpSRC[31 - (b + op.vsh)]; + } } void ppu_interpreter::VSLH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = CPU.VPR[op.va]._u16[h] << (CPU.VPR[op.vb]._u16[h] & 0xf); + } } void ppu_interpreter::VSLO(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u8 nShift = (CPU.VPR[op.vb]._u8[0] >> 3) & 0xf; + + CPU.VPR[op.vd].clear(); + + for (u8 b = 0; b < 16 - nShift; b++) + { + CPU.VPR[op.vd]._u8[15 - b] = VA._u8[15 - (b + nShift)]; + } } void ppu_interpreter::VSLW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] << (CPU.VPR[op.vb]._u32[w] & 0x1f); + } } void ppu_interpreter::VSPLTB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u8 byte = CPU.VPR[op.vb]._u8[15 - op.vuimm]; + + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = byte; + } } void ppu_interpreter::VSPLTH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + assert(op.vuimm < 8); + + u16 hword = CPU.VPR[op.vb]._u16[7 - op.vuimm]; + + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = hword; + } } void ppu_interpreter::VSPLTISB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = op.vsimm; + } } void ppu_interpreter::VSPLTISH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = (s16)op.vsimm; + } } void ppu_interpreter::VSPLTISW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = (s32)op.vsimm; + } } void ppu_interpreter::VSPLTW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + assert(op.vuimm < 4); + + u32 word = CPU.VPR[op.vb]._u32[3 - op.vuimm]; + + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = word; + } } void ppu_interpreter::VSR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u8 sh = CPU.VPR[op.vb]._u8[0] & 0x7; + + CPU.VPR[op.vd]._u8[15] = VA._u8[15] >> sh; + for (uint b = 14; ~b; b--) + { + CPU.VPR[op.vd]._u8[b] = (VA._u8[b] >> sh) | (VA._u8[b + 1] << (8 - sh)); + } } void ppu_interpreter::VSRAB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._s8[b] = CPU.VPR[op.va]._s8[b] >> (CPU.VPR[op.vb]._u8[b] & 0x7); + } } void ppu_interpreter::VSRAH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._s16[h] = CPU.VPR[op.va]._s16[h] >> (CPU.VPR[op.vb]._u16[h] & 0xf); + } } void ppu_interpreter::VSRAW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s32[w] = CPU.VPR[op.va]._s32[w] >> (CPU.VPR[op.vb]._u32[w] & 0x1f); + } } void ppu_interpreter::VSRB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = CPU.VPR[op.va]._u8[b] >> (CPU.VPR[op.vb]._u8[b] & 0x7); + } } void ppu_interpreter::VSRH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = CPU.VPR[op.va]._u16[h] >> (CPU.VPR[op.vb]._u16[h] & 0xf); + } } void ppu_interpreter::VSRO(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VA = CPU.VPR[op.va]; + u8 nShift = (CPU.VPR[op.vb]._u8[0] >> 3) & 0xf; + + CPU.VPR[op.vd].clear(); + + for (u8 b = 0; b < 16 - nShift; b++) + { + CPU.VPR[op.vd]._u8[b] = VA._u8[b + nShift]; + } } void ppu_interpreter::VSRW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] >> (CPU.VPR[op.vb]._u32[w] & 0x1f); + } } void ppu_interpreter::VSUBCUW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] < CPU.VPR[op.vb]._u32[w] ? 0 : 1; + } } void ppu_interpreter::VSUBFP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + const float a = CPU.VPR[op.va]._f[w]; + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = a - b; + } } void ppu_interpreter::VSUBSBS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + s16 result = (s16)CPU.VPR[op.va]._s8[b] - (s16)CPU.VPR[op.vb]._s8[b]; + + if (result < INT8_MIN) + { + CPU.VPR[op.vd]._s8[b] = INT8_MIN; + } + else if (result > INT8_MAX) + { + CPU.VPR[op.vd]._s8[b] = INT8_MAX; + } + else + CPU.VPR[op.vd]._s8[b] = (s8)result; + } } void ppu_interpreter::VSUBSHS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + s32 result = (s32)CPU.VPR[op.va]._s16[h] - (s32)CPU.VPR[op.vb]._s16[h]; + + if (result < INT16_MIN) + { + CPU.VPR[op.vd]._s16[h] = (s16)INT16_MIN; + } + else if (result > INT16_MAX) + { + CPU.VPR[op.vd]._s16[h] = (s16)INT16_MAX; + } + else + CPU.VPR[op.vd]._s16[h] = (s16)result; + } } void ppu_interpreter::VSUBSWS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + s64 result = (s64)CPU.VPR[op.va]._s32[w] - (s64)CPU.VPR[op.vb]._s32[w]; + + if (result < INT32_MIN) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MIN; + } + else if (result > INT32_MAX) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MAX; + } + else + CPU.VPR[op.vd]._s32[w] = (s32)result; + } } void ppu_interpreter::VSUBUBM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = (u8)((CPU.VPR[op.va]._u8[b] - CPU.VPR[op.vb]._u8[b]) & 0xff); + } } void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint b = 0; b < 16; b++) + { + s16 result = (s16)CPU.VPR[op.va]._u8[b] - (s16)CPU.VPR[op.vb]._u8[b]; + + if (result < 0) + { + CPU.VPR[op.vd]._u8[b] = 0; + } + else + CPU.VPR[op.vd]._u8[b] = (u8)result; + } } void ppu_interpreter::VSUBUHM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = CPU.VPR[op.va]._u16[h] - CPU.VPR[op.vb]._u16[h]; + } } void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint h = 0; h < 8; h++) + { + s32 result = (s32)CPU.VPR[op.va]._u16[h] - (s32)CPU.VPR[op.vb]._u16[h]; + + if (result < 0) + { + CPU.VPR[op.vd]._u16[h] = 0; + } + else + CPU.VPR[op.vd]._u16[h] = (u16)result; + } } void ppu_interpreter::VSUBUWM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] - CPU.VPR[op.vb]._u32[w]; + } } void ppu_interpreter::VSUBUWS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + s64 result = (s64)CPU.VPR[op.va]._u32[w] - (s64)CPU.VPR[op.vb]._u32[w]; + + if (result < 0) + { + CPU.VPR[op.vd]._u32[w] = 0; + } + else + CPU.VPR[op.vd]._u32[w] = (u32)result; + } } void ppu_interpreter::VSUMSWS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + s64 sum = CPU.VPR[op.vb]._s32[0]; + + for (uint w = 0; w < 4; w++) + { + sum += CPU.VPR[op.va]._s32[w]; + } + + CPU.VPR[op.vd].clear(); + if (sum > INT32_MAX) + { + CPU.VPR[op.vd]._s32[0] = (s32)INT32_MAX; + } + else if (sum < INT32_MIN) + { + CPU.VPR[op.vd]._s32[0] = (s32)INT32_MIN; + } + else + CPU.VPR[op.vd]._s32[0] = (s32)sum; } void ppu_interpreter::VSUM2SWS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint n = 0; n < 2; n++) + { + s64 sum = (s64)CPU.VPR[op.va]._s32[n * 2] + CPU.VPR[op.va]._s32[n * 2 + 1] + CPU.VPR[op.vb]._s32[n * 2]; + + if (sum > INT32_MAX) + { + CPU.VPR[op.vd]._s32[n * 2] = (s32)INT32_MAX; + } + else if (sum < INT32_MIN) + { + CPU.VPR[op.vd]._s32[n * 2] = (s32)INT32_MIN; + } + else + CPU.VPR[op.vd]._s32[n * 2] = (s32)sum; + } + CPU.VPR[op.vd]._s32[1] = 0; + CPU.VPR[op.vd]._s32[3] = 0; } void ppu_interpreter::VSUM4SBS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + s64 sum = CPU.VPR[op.vb]._s32[w]; + + for (uint b = 0; b < 4; b++) + { + sum += CPU.VPR[op.va]._s8[w * 4 + b]; + } + + if (sum > INT32_MAX) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MAX; + } + else if (sum < INT32_MIN) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MIN; + } + else + CPU.VPR[op.vd]._s32[w] = (s32)sum; + } } void ppu_interpreter::VSUM4SHS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + s64 sum = CPU.VPR[op.vb]._s32[w]; + + for (uint h = 0; h < 2; h++) + { + sum += CPU.VPR[op.va]._s16[w * 2 + h]; + } + + if (sum > INT32_MAX) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MAX; + } + else if (sum < INT32_MIN) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MIN; + } + else + CPU.VPR[op.vd]._s32[w] = (s32)sum; + } } void ppu_interpreter::VSUM4UBS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + for (uint w = 0; w < 4; w++) + { + u64 sum = CPU.VPR[op.vb]._u32[w]; + + for (uint b = 0; b < 4; b++) + { + sum += CPU.VPR[op.va]._u8[w * 4 + b]; + } + + if (sum > UINT32_MAX) + { + CPU.VPR[op.vd]._u32[w] = (u32)UINT32_MAX; + } + else + CPU.VPR[op.vd]._u32[w] = (u32)sum; + } } void ppu_interpreter::VUPKHPX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VB = CPU.VPR[op.vb]; + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s8[w * 4 + 3] = VB._s8[8 + w * 2 + 1] >> 7; // signed shift sign extends + CPU.VPR[op.vd]._u8[w * 4 + 2] = (VB._u8[8 + w * 2 + 1] >> 2) & 0x1f; + CPU.VPR[op.vd]._u8[w * 4 + 1] = ((VB._u8[8 + w * 2 + 1] & 0x3) << 3) | ((VB._u8[8 + w * 2 + 0] >> 5) & 0x7); + CPU.VPR[op.vd]._u8[w * 4 + 0] = VB._u8[8 + w * 2 + 0] & 0x1f; + } } void ppu_interpreter::VUPKHSB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._s16[h] = VB._s8[8 + h]; + } } void ppu_interpreter::VUPKHSH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VB = CPU.VPR[op.vb]; + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s32[w] = VB._s16[4 + w]; + } } void ppu_interpreter::VUPKLPX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VB = CPU.VPR[op.vb]; + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s8[w * 4 + 3] = VB._s8[w * 2 + 1] >> 7; // signed shift sign extends + CPU.VPR[op.vd]._u8[w * 4 + 2] = (VB._u8[w * 2 + 1] >> 2) & 0x1f; + CPU.VPR[op.vd]._u8[w * 4 + 1] = ((VB._u8[w * 2 + 1] & 0x3) << 3) | ((VB._u8[w * 2 + 0] >> 5) & 0x7); + CPU.VPR[op.vd]._u8[w * 4 + 0] = VB._u8[w * 2 + 0] & 0x1f; + } } void ppu_interpreter::VUPKLSB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._s16[h] = VB._s8[h]; + } } void ppu_interpreter::VUPKLSH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u128 VB = CPU.VPR[op.vb]; + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s32[w] = VB._s16[w]; + } } void ppu_interpreter::VXOR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.VPR[op.vd]._u32[0] = CPU.VPR[op.va]._u32[0] ^ CPU.VPR[op.vb]._u32[0]; + CPU.VPR[op.vd]._u32[1] = CPU.VPR[op.va]._u32[1] ^ CPU.VPR[op.vb]._u32[1]; + CPU.VPR[op.vd]._u32[2] = CPU.VPR[op.va]._u32[2] ^ CPU.VPR[op.vb]._u32[2]; + CPU.VPR[op.vd]._u32[3] = CPU.VPR[op.va]._u32[3] ^ CPU.VPR[op.vb]._u32[3]; } void ppu_interpreter::MULLI(PPUThread& CPU, ppu_opcode_t op) @@ -944,12 +2228,12 @@ void ppu_interpreter::SUBFIC(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::CMPLI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.UpdateCRnU(op.l10, op.crfd, CPU.GPR[op.ra], op.uimm16); } void ppu_interpreter::CMPI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.UpdateCRnS(op.l10, op.crfd, CPU.GPR[op.ra], op.simm16); } void ppu_interpreter::ADDIC(PPUThread& CPU, ppu_opcode_t op) @@ -979,7 +2263,22 @@ void ppu_interpreter::ADDIS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::BC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u8 bo0 = (op.bo & 0x10) ? 1 : 0; + const u8 bo1 = (op.bo & 0x08) ? 1 : 0; + const u8 bo2 = (op.bo & 0x04) ? 1 : 0; + const u8 bo3 = (op.bo & 0x02) ? 1 : 0; + + if (!bo2) --CPU.CTR; + + const u8 ctr_ok = bo2 | ((CPU.CTR != 0) ^ bo3); + const u8 cond_ok = bo0 | (CPU.IsCR(op.bi) ^ (~bo1 & 0x1)); + + if (ctr_ok && cond_ok) + { + const u32 nextLR = CPU.PC + 4; + CPU.SetBranch(PPUOpcodes::branchTarget((op.aa ? 0 : CPU.PC), op.simm16), op.lk); + if (op.lk) CPU.LR = nextLR; + } } void ppu_interpreter::HACK(PPUThread& CPU, ppu_opcode_t op) @@ -989,32 +2288,56 @@ void ppu_interpreter::HACK(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::SC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + switch (op.lev) + { + case 0x0: SysCalls::DoSyscall(CPU, CPU.GPR[11]); break; + case 0x3: CPU.FastStop(); break; + default: throw __FUNCTION__; + } } void ppu_interpreter::B(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u32 nextLR = CPU.PC + 4; + CPU.SetBranch(PPUOpcodes::branchTarget(op.aa ? 0 : CPU.PC, op.ll), op.lk); + if (op.lk) CPU.LR = nextLR; } void ppu_interpreter::MCRF(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.SetCR(op.crfd, CPU.GetCR(op.crfs)); } void ppu_interpreter::BCLR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u8 bo0 = (op.bo & 0x10) ? 1 : 0; + const u8 bo1 = (op.bo & 0x08) ? 1 : 0; + const u8 bo2 = (op.bo & 0x04) ? 1 : 0; + const u8 bo3 = (op.bo & 0x02) ? 1 : 0; + + if (!bo2) --CPU.CTR; + + const u8 ctr_ok = bo2 | ((CPU.CTR != 0) ^ bo3); + const u8 cond_ok = bo0 | (CPU.IsCR(op.bi) ^ (~bo1 & 0x1)); + + if (ctr_ok && cond_ok) + { + const u32 nextLR = CPU.PC + 4; + CPU.SetBranch(PPUOpcodes::branchTarget(0, (u32)CPU.LR), true); + if (op.lk) CPU.LR = nextLR; + } } void ppu_interpreter::CRNOR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u8 v = 1 ^ (CPU.IsCR(op.crba) | CPU.IsCR(op.crbb)); + CPU.SetCRBit2(op.crbd, v & 0x1); } void ppu_interpreter::CRANDC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u8 v = CPU.IsCR(op.crba) & (1 ^ CPU.IsCR(op.crbb)); + CPU.SetCRBit2(op.crbd, v & 0x1); } void ppu_interpreter::ISYNC(PPUThread& CPU, ppu_opcode_t op) @@ -1024,52 +2347,67 @@ void ppu_interpreter::ISYNC(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::CRXOR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u8 v = CPU.IsCR(op.crba) ^ CPU.IsCR(op.crbb); + CPU.SetCRBit2(op.crbd, v & 0x1); } void ppu_interpreter::CRNAND(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u8 v = 1 ^ (CPU.IsCR(op.crba) & CPU.IsCR(op.crbb)); + CPU.SetCRBit2(op.crbd, v & 0x1); } void ppu_interpreter::CRAND(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u8 v = CPU.IsCR(op.crba) & CPU.IsCR(op.crbb); + CPU.SetCRBit2(op.crbd, v & 0x1); } void ppu_interpreter::CREQV(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u8 v = 1 ^ (CPU.IsCR(op.crba) ^ CPU.IsCR(op.crbb)); + CPU.SetCRBit2(op.crbd, v & 0x1); } void ppu_interpreter::CRORC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u8 v = CPU.IsCR(op.crba) | (1 ^ CPU.IsCR(op.crbb)); + CPU.SetCRBit2(op.crbd, v & 0x1); } void ppu_interpreter::CROR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u8 v = CPU.IsCR(op.crba) | CPU.IsCR(op.crbb); + CPU.SetCRBit2(op.crbd, v & 0x1); } void ppu_interpreter::BCCTR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + if (op.bo & 0x10 || CPU.IsCR(op.bi) == ((op.bo & 0x8) != 0)) + { + const u32 nextLR = CPU.PC + 4; + CPU.SetBranch(PPUOpcodes::branchTarget(0, (u32)CPU.CTR), true); + if (op.lk) CPU.LR = nextLR; + } } void ppu_interpreter::RLWIMI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 mask = rotate_mask[32 + op.mb][32 + op.me]; + CPU.GPR[op.ra] = (CPU.GPR[op.ra] & ~mask) | (rotl32(CPU.GPR[op.rs], op.sh) & mask); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::RLWINM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = rotl32(CPU.GPR[op.rs], op.sh) & rotate_mask[32 + op.mb][32 + op.me]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::RLWNM(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = rotl32(CPU.GPR[op.rs], CPU.GPR[op.rb] & 0x1f) & rotate_mask[32 + op.mb][32 + op.me]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::ORI(PPUThread& CPU, ppu_opcode_t op) @@ -1106,72 +2444,149 @@ void ppu_interpreter::ANDIS_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::RLDICL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + auto sh = (op.shh << 5) | op.shl; + auto mb = (op.mbmeh << 5) | op.mbmel; + + CPU.GPR[op.ra] = rotl64(CPU.GPR[op.rs], sh) & rotate_mask[mb][63]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::RLDICR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + auto sh = (op.shh << 5) | op.shl; + auto me = (op.mbmeh << 5) | op.mbmel; + + CPU.GPR[op.ra] = rotl64(CPU.GPR[op.rs], sh) & rotate_mask[0][me]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::RLDIC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + auto sh = (op.shh << 5) | op.shl; + auto mb = (op.mbmeh << 5) | op.mbmel; + + CPU.GPR[op.ra] = rotl64(CPU.GPR[op.rs], sh) & rotate_mask[mb][63 - sh]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::RLDIMI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + auto sh = (op.shh << 5) | op.shl; + auto mb = (op.mbmeh << 5) | op.mbmel; + + const u64 mask = rotate_mask[mb][63 - sh]; + CPU.GPR[op.ra] = (CPU.GPR[op.ra] & ~mask) | (rotl64(CPU.GPR[op.rs], sh) & mask); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::RLDC_LR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + auto sh = (u32)(CPU.GPR[op.rb] & 0x3F); + auto mbme = (op.mbmeh << 5) | op.mbmel; + + if (op.aa) // rldcr + { + CPU.GPR[op.ra] = rotl64(CPU.GPR[op.rs], sh) & rotate_mask[0][mbme]; + } + else // rldcl + { + CPU.GPR[op.ra] = rotl64(CPU.GPR[op.rs], sh) & rotate_mask[mbme][63]; + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::CMP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.UpdateCRnS(op.l10, op.crfd, CPU.GPR[op.ra], CPU.GPR[op.rb]); } void ppu_interpreter::TW(PPUThread& CPU, ppu_opcode_t op) { - throw __FUNCTION__; + s32 a = (s32)CPU.GPR[op.ra]; + s32 b = (s32)CPU.GPR[op.rb]; + + if ((a < b && (op.bo & 0x10)) || + (a > b && (op.bo & 0x8)) || + (a == b && (op.bo & 0x4)) || + ((u32)a < (u32)b && (op.bo & 0x2)) || + ((u32)a >(u32)b && (op.bo & 0x1))) + { + throw __FUNCTION__; + } } void ppu_interpreter::LVSL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + static const u64 lvsl_values[0x10][2] = + { + { 0x08090A0B0C0D0E0F, 0x0001020304050607 }, + { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, + { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, + { 0x0B0C0D0E0F101112, 0x030405060708090A }, + { 0x0C0D0E0F10111213, 0x0405060708090A0B }, + { 0x0D0E0F1011121314, 0x05060708090A0B0C }, + { 0x0E0F101112131415, 0x060708090A0B0C0D }, + { 0x0F10111213141516, 0x0708090A0B0C0D0E }, + { 0x1011121314151617, 0x08090A0B0C0D0E0F }, + { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, + { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, + { 0x131415161718191A, 0x0B0C0D0E0F101112 }, + { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, + { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, + { 0x161718191A1B1C1D, 0x0E0F101112131415 }, + { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, + }; + + CPU.VPR[op.vd]._u64[0] = lvsl_values[addr & 0xf][0]; + CPU.VPR[op.vd]._u64[1] = lvsl_values[addr & 0xf][1]; } void ppu_interpreter::LVEBX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.VPR[op.vd]._u8[15 - (addr & 0xf)] = vm::read8(vm::cast(addr)); } void ppu_interpreter::SUBFC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = ~RA + RB + 1; + CPU.XER.CA = CPU.IsCarry(~RA, RB, 1); + if (op.oe) CPU.SetOV((~RA >> 63 == RB >> 63) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::MULHDU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.rd] = __umulh(CPU.GPR[op.ra], CPU.GPR[op.rb]); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::ADDC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = RA + RB; + CPU.XER.CA = CPU.IsCarry(RA, RB); + if (op.oe) CPU.SetOV((RA >> 63 == RB >> 63) && (RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::MULHWU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 a = (u32)CPU.GPR[op.ra]; + u32 b = (u32)CPU.GPR[op.rb]; + CPU.GPR[op.rd] = ((u64)a * (u64)b) >> 32; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::MFOCRF(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.rd] = CPU.CR.CR; } void ppu_interpreter::LWARX(PPUThread& CPU, ppu_opcode_t op) @@ -1198,932 +2613,1526 @@ void ppu_interpreter::LWZX(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::SLW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 n = CPU.GPR[op.rb] & 0x1f; + u32 r = (u32)rotl32((u32)CPU.GPR[op.rs], n); + u32 m = ((u32)CPU.GPR[op.rb] & 0x20) ? 0 : (u32)rotate_mask[32][63 - n]; + + CPU.GPR[op.ra] = r & m; + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::CNTLZW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 i; + for (i = 0; i < 32; i++) + { + if (CPU.GPR[op.rs] & (1ULL << (31 - i))) break; + } + + CPU.GPR[op.ra] = i; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::SLD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 n = CPU.GPR[op.rb] & 0x3f; + u64 r = rotl64(CPU.GPR[op.rs], n); + u64 m = (CPU.GPR[op.rb] & 0x40) ? 0 : rotate_mask[0][63 - n]; + + CPU.GPR[op.ra] = r & m; + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::AND(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = CPU.GPR[op.rs] & CPU.GPR[op.rb]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::CMPL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.UpdateCRnU(op.l10, op.crfd, CPU.GPR[op.ra], CPU.GPR[op.rb]); } void ppu_interpreter::LVSR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + static const u64 lvsr_values[0x10][2] = + { + { 0x18191A1B1C1D1E1F, 0x1011121314151617 }, + { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, + { 0x161718191A1B1C1D, 0x0E0F101112131415 }, + { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, + { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, + { 0x131415161718191A, 0x0B0C0D0E0F101112 }, + { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, + { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, + { 0x1011121314151617, 0x08090A0B0C0D0E0F }, + { 0x0F10111213141516, 0x0708090A0B0C0D0E }, + { 0x0E0F101112131415, 0x060708090A0B0C0D }, + { 0x0D0E0F1011121314, 0x05060708090A0B0C }, + { 0x0C0D0E0F10111213, 0x0405060708090A0B }, + { 0x0B0C0D0E0F101112, 0x030405060708090A }, + { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, + { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, + }; + + CPU.VPR[op.vd]._u64[0] = lvsr_values[addr & 0xf][0]; + CPU.VPR[op.vd]._u64[1] = lvsr_values[addr & 0xf][1]; } void ppu_interpreter::LVEHX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~1ULL; + CPU.VPR[op.vd]._u16[7 - ((addr >> 1) & 0x7)] = vm::read16(vm::cast(addr)); } void ppu_interpreter::SUBF(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = RB - RA; + if (op.oe) CPU.SetOV((~RA >> 63 == RB >> 63) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::LDUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read64(vm::cast(addr)); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::DCBST(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::LWZUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read32(vm::cast(addr)); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::CNTLZD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 i; + for (i = 0; i < 64; i++) + { + if (CPU.GPR[op.rs] & (1ULL << (63 - i))) break; + } + + CPU.GPR[op.ra] = i; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::ANDC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = CPU.GPR[op.rs] & ~CPU.GPR[op.rb]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::TD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + throw __FUNCTION__; } void ppu_interpreter::LVEWX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~3ULL; + CPU.VPR[op.vd]._u32[3 - ((addr >> 2) & 0x3)] = vm::read32(vm::cast(addr)); } void ppu_interpreter::MULHD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.rd] = __mulh(CPU.GPR[op.ra], CPU.GPR[op.rb]); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::MULHW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + s32 a = (s32)CPU.GPR[op.ra]; + s32 b = (s32)CPU.GPR[op.rb]; + CPU.GPR[op.rd] = ((s64)a * (s64)b) >> 32; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::LDARX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + be_t value; + vm::reservation_acquire(&value, vm::cast(addr), sizeof(value)); + + CPU.GPR[op.rd] = value; } void ppu_interpreter::DCBF(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::LBZX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read8(vm::cast(addr)); } void ppu_interpreter::LVX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.VPR[op.vd] = vm::read128((u64)((op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfULL)); } void ppu_interpreter::NEG(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = 0 - RA; + if (op.oe) CPU.SetOV((~RA >> 63 == 0) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::LBZUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read8(vm::cast(addr)); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::NOR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = ~(CPU.GPR[op.rs] | CPU.GPR[op.rb]); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::STVEBX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u8 eb = addr & 0xf; + vm::write8(vm::cast(addr), CPU.VPR[op.vs]._u8[15 - eb]); } void ppu_interpreter::SUBFE(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = ~RA + RB + CPU.XER.CA; + CPU.XER.CA = CPU.IsCarry(~RA, RB, CPU.XER.CA); + if (op.oe) CPU.SetOV((~RA >> 63 == RB >> 63) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::ADDE(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + if (CPU.XER.CA) + { + if (RA == ~0ULL) //-1 + { + CPU.GPR[op.rd] = RB; + CPU.XER.CA = 1; + } + else + { + CPU.GPR[op.rd] = RA + 1 + RB; + CPU.XER.CA = CPU.IsCarry(RA + 1, RB); + } + } + else + { + CPU.GPR[op.rd] = RA + RB; + CPU.XER.CA = CPU.IsCarry(RA, RB); + } + if (op.oe) CPU.SetOV((RA >> 63 == RB >> 63) && (RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::MTOCRF(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + if (op.l11) + { + u32 n = 0, count = 0; + for (u32 i = 0; i<8; ++i) + { + if (op.crm & (1 << i)) + { + n = i; + count++; + } + } + + if (count == 1) + { + //CR[4*n : 4*n+3] = RS[32+4*n : 32+4*n+3]; + CPU.SetCR(7 - n, (CPU.GPR[op.rs] >> (4 * n)) & 0xf); + } + else + CPU.CR.CR = 0; + } + else + { + for (u32 i = 0; i<8; ++i) + { + if (op.crm & (1 << i)) + { + CPU.SetCR(7 - i, (CPU.GPR[op.rs] >> (i * 4)) & 0xf); + } + } + } } void ppu_interpreter::STDX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::write64(vm::cast(addr), CPU.GPR[op.rs]); } void ppu_interpreter::STWCX_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + const be_t value = be_t::make((u32)CPU.GPR[op.rs]); + CPU.SetCR_EQ(0, vm::reservation_update(vm::cast(addr), &value, sizeof(value))); } void ppu_interpreter::STWX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::write32(vm::cast(addr), (u32)CPU.GPR[op.rs]); } void ppu_interpreter::STVEHX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~1ULL; + const u8 eb = (addr & 0xf) >> 1; + vm::write16(vm::cast(addr), CPU.VPR[op.vs]._u16[7 - eb]); } void ppu_interpreter::STDUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::write64(vm::cast(addr), CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::STWUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::write32(vm::cast(addr), (u32)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::STVEWX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~3ULL; + const u8 eb = (addr & 0xf) >> 2; + vm::write32(vm::cast(addr), CPU.VPR[op.vs]._u32[3 - eb]); } void ppu_interpreter::SUBFZE(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = ~RA + CPU.XER.CA; + CPU.XER.CA = CPU.IsCarry(~RA, CPU.XER.CA); + if (op.oe) CPU.SetOV((~RA >> 63 == 0) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::ADDZE(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = RA + CPU.XER.CA; + CPU.XER.CA = CPU.IsCarry(RA, CPU.XER.CA); + if (op.oe) CPU.SetOV((RA >> 63 == 0) && (RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::STDCX_(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + const be_t value = be_t::make(CPU.GPR[op.rs]); + CPU.SetCR_EQ(0, vm::reservation_update(vm::cast(addr), &value, sizeof(value))); } void ppu_interpreter::STBX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::write8(vm::cast(addr), (u8)CPU.GPR[op.rs]); } void ppu_interpreter::STVX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + vm::write128((u64)((op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfULL), CPU.VPR[op.vs]); } void ppu_interpreter::MULLD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const s64 RA = CPU.GPR[op.ra]; + const s64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = (s64)(RA * RB); + if (op.oe) + { + const s64 high = __mulh(RA, RB); + CPU.SetOV(high != s64(CPU.GPR[op.rd]) >> 63); + } + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::SUBFME(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = ~RA + CPU.XER.CA + ~0ULL; + CPU.XER.CA = CPU.IsCarry(~RA, CPU.XER.CA, ~0ULL); + if (op.oe) CPU.SetOV((~RA >> 63 == 1) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::ADDME(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const s64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = RA + CPU.XER.CA - 1; + CPU.XER.CA |= RA != 0; + + if (op.oe) CPU.SetOV((u64(RA) >> 63 == 1) && (u64(RA) >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::MULLW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.rd] = (s64)((s64)(s32)CPU.GPR[op.ra] * (s64)(s32)CPU.GPR[op.rb]); + if (op.oe) CPU.SetOV(s64(CPU.GPR[op.rd]) < s64(-1) << 31 || s64(CPU.GPR[op.rd]) >= s64(1) << 31); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::DCBTST(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::STBUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::write8(vm::cast(addr), (u8)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::ADD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = RA + RB; + if (op.oe) CPU.SetOV((RA >> 63 == RB >> 63) && (RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::DCBT(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::LHZX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read16(vm::cast(addr)); } void ppu_interpreter::EQV(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = ~(CPU.GPR[op.rs] ^ CPU.GPR[op.rb]); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::ECIWX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + throw __FUNCTION__; } void ppu_interpreter::LHZUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read16(vm::cast(addr)); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::XOR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = CPU.GPR[op.rs] ^ CPU.GPR[op.rb]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::MFSPR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5); + + switch (n) + { + case 0x001: CPU.GPR[op.rd] = CPU.XER.XER; return; + case 0x008: CPU.GPR[op.rd] = CPU.LR; return; + case 0x009: CPU.GPR[op.rd] = CPU.CTR; return; + case 0x100: CPU.GPR[op.rd] = CPU.VRSAVE; return; + case 0x103: CPU.GPR[op.rd] = CPU.SPRG[3]; return; + + case 0x10C: CPU.TB = get_time(); CPU.GPR[op.rd] = CPU.TB; return; + case 0x10D: CPU.TB = get_time(); CPU.GPR[op.rd] = CPU.TB >> 32; return; + + case 0x110: + case 0x111: + case 0x112: + case 0x113: + case 0x114: + case 0x115: + case 0x116: + case 0x117: CPU.GPR[op.rd] = CPU.SPRG[n - 0x110]; return; + } + + throw __FUNCTION__; } void ppu_interpreter::LWAX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = (s64)(s32)vm::read32(vm::cast(addr)); } void ppu_interpreter::DST(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::LHAX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = (s64)(s16)vm::read16(vm::cast(addr)); } void ppu_interpreter::LVXL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.VPR[op.vd] = vm::read128((u64)((op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfULL)); } void ppu_interpreter::MFTB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5); + + CPU.TB = get_time(); + switch (n) + { + case 0x10C: CPU.GPR[op.rd] = CPU.TB; break; + case 0x10D: CPU.GPR[op.rd] = CPU.TB >> 32; break; + default: throw __FUNCTION__; + } } void ppu_interpreter::LWAUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = (s64)(s32)vm::read32(vm::cast(addr)); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::DSTST(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::LHAUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = (s64)(s16)vm::read16(vm::cast(addr)); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::STHX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::write16(vm::cast(addr), (u16)CPU.GPR[op.rs]); } void ppu_interpreter::ORC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = CPU.GPR[op.rs] | ~CPU.GPR[op.rb]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::ECOWX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + throw __FUNCTION__; } void ppu_interpreter::STHUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::write16(vm::cast(addr), (u16)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::OR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = CPU.GPR[op.rs] | CPU.GPR[op.rb]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::DIVDU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + + if (RB == 0) + { + if (op.oe) CPU.SetOV(true); + CPU.GPR[op.rd] = 0; + } + else + { + if (op.oe) CPU.SetOV(false); + CPU.GPR[op.rd] = RA / RB; + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::DIVWU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u32 RA = (u32)CPU.GPR[op.ra]; + const u32 RB = (u32)CPU.GPR[op.rb]; + + if (RB == 0) + { + if (op.oe) CPU.SetOV(true); + CPU.GPR[op.rd] = 0; + } + else + { + if (op.oe) CPU.SetOV(false); + CPU.GPR[op.rd] = RA / RB; + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::MTSPR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5); + + switch (n) + { + case 0x001: CPU.XER.XER = CPU.GPR[op.rs]; return; + case 0x008: CPU.LR = CPU.GPR[op.rs]; return; + case 0x009: CPU.CTR = CPU.GPR[op.rs]; return; + case 0x100: CPU.VRSAVE = (u32)CPU.GPR[op.rs]; return; + + case 0x110: + case 0x111: + case 0x112: + case 0x113: + case 0x114: + case 0x115: + case 0x116: + case 0x117: CPU.SPRG[n - 0x110] = CPU.GPR[op.rs]; return; + } + + throw __FUNCTION__; } void ppu_interpreter::DCBI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::NAND(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = ~(CPU.GPR[op.rs] & CPU.GPR[op.rb]); + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::STVXL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + vm::write128((u64)((op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfULL), CPU.VPR[op.vs]); } void ppu_interpreter::DIVD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const s64 RA = CPU.GPR[op.ra]; + const s64 RB = CPU.GPR[op.rb]; + + if (RB == 0 || ((u64)RA == (1ULL << 63) && RB == -1)) + { + if (op.oe) CPU.SetOV(true); + CPU.GPR[op.rd] = /*(((u64)RA & (1ULL << 63)) && RB == 0) ? -1 :*/ 0; + } + else + { + if (op.oe) CPU.SetOV(false); + CPU.GPR[op.rd] = RA / RB; + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::DIVW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const s32 RA = (s32)CPU.GPR[op.ra]; + const s32 RB = (s32)CPU.GPR[op.rb]; + + if (RB == 0 || ((u32)RA == (1 << 31) && RB == -1)) + { + if (op.oe) CPU.SetOV(true); + CPU.GPR[op.rd] = /*(((u32)RA & (1 << 31)) && RB == 0) ? -1 :*/ 0; + } + else + { + if (op.oe) CPU.SetOV(false); + CPU.GPR[op.rd] = (u32)(RA / RB); + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); } void ppu_interpreter::LVLX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u32 eb = addr & 0xf; + + CPU.VPR[op.vd].clear(); + for (u32 i = 0; i < 16u - eb; ++i) CPU.VPR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i)); } void ppu_interpreter::LDBRX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::get_ref(vm::cast(addr)); } void ppu_interpreter::LSWX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + u32 count = CPU.XER.XER & 0x7F; + for (; count >= 4; count -= 4, addr += 4, op.rd = (op.rd + 1) & 31) + { + CPU.GPR[op.rd] = vm::get_ref>(vm::cast(addr)); + } + if (count) + { + u32 value = 0; + for (u32 byte = 0; byte < count; byte++) + { + u32 byte_value = vm::get_ref(vm::cast(addr + byte)); + value |= byte_value << ((3 ^ byte) * 8); + } + CPU.GPR[op.rd] = value; + } } void ppu_interpreter::LWBRX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::get_ref(vm::cast(addr)); } void ppu_interpreter::LFSX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); } void ppu_interpreter::SRW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 n = CPU.GPR[op.rb] & 0x1f; + u32 r = (u32)rotl32((u32)CPU.GPR[op.rs], 64 - n); + u32 m = ((u32)CPU.GPR[op.rb] & 0x20) ? 0 : (u32)rotate_mask[32 + n][63]; + CPU.GPR[op.ra] = r & m; + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::SRD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 n = CPU.GPR[op.rb] & 0x3f; + u64 r = rotl64(CPU.GPR[op.rs], 64 - n); + u64 m = (CPU.GPR[op.rb] & 0x40) ? 0 : rotate_mask[n][63]; + CPU.GPR[op.ra] = r & m; + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::LVRX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u8 eb = addr & 0xf; + + CPU.VPR[op.vd].clear(); + for (u32 i = 16 - eb; i < 16; ++i) CPU.VPR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i - 16)); } void ppu_interpreter::LSWI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u64 addr = op.ra ? CPU.GPR[op.ra] : 0; + u64 N = op.rb ? op.rb : 32; + u8 reg = op.rd; + + while (N > 0) + { + if (N > 3) + { + CPU.GPR[reg] = vm::read32(vm::cast(addr)); + addr += 4; + N -= 4; + } + else + { + u32 buf = 0; + u32 i = 3; + while (N > 0) + { + N = N - 1; + buf |= vm::read8(vm::cast(addr)) << (i * 8); + addr++; + i--; + } + CPU.GPR[reg] = buf; + } + reg = (reg + 1) % 32; + } } void ppu_interpreter::LFSUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::SYNC(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + _mm_mfence(); } void ppu_interpreter::LFDX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); } void ppu_interpreter::LFDUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::STVLX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u32 eb = addr & 0xf; + + for (u32 i = 0; i < 16u - eb; ++i) vm::write8(vm::cast(addr + i), CPU.VPR[op.vs]._u8[15 - i]); } void ppu_interpreter::STDBRX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::get_ref(vm::cast(addr)) = CPU.GPR[op.rs]; } void ppu_interpreter::STSWX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + u32 count = CPU.XER.XER & 0x7F; + for (; count >= 4; count -= 4, addr += 4, op.rs = (op.rs + 1) & 31) + { + vm::write32(vm::cast(addr), (u32)CPU.GPR[op.rs]); + } + if (count) + { + u32 value = (u32)CPU.GPR[op.rs]; + for (u32 byte = 0; byte < count; byte++) + { + u32 byte_value = (u8)(value >> ((3 ^ byte) * 8)); + vm::write8(vm::cast(addr + byte), byte_value); + } + } } void ppu_interpreter::STWBRX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::get_ref(vm::cast(addr)) = (u32)CPU.GPR[op.rs]; } void ppu_interpreter::STFSX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::get_ref>(vm::cast(addr)) = static_cast(CPU.FPR[op.frs]); } void ppu_interpreter::STVRX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u8 eb = addr & 0xf; + + for (u32 i = 16 - eb; i < 16; ++i) vm::write8(vm::cast(addr + i - 16), CPU.VPR[op.vs]._u8[15 - i]); } void ppu_interpreter::STFSUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::get_ref>(vm::cast(addr)) = static_cast(CPU.FPR[op.frs]); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::STSWI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u64 addr = op.ra ? CPU.GPR[op.ra] : 0; + u64 N = op.rb ? op.rb : 32; + u8 reg = op.rd; + + while (N > 0) + { + if (N > 3) + { + vm::write32(vm::cast(addr), (u32)CPU.GPR[reg]); + addr += 4; + N -= 4; + } + else + { + u32 buf = (u32)CPU.GPR[reg]; + while (N > 0) + { + N = N - 1; + vm::write8(vm::cast(addr), (0xFF000000 & buf) >> 24); + buf <<= 8; + addr++; + } + } + reg = (reg + 1) % 32; + } } void ppu_interpreter::STFDX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::get_ref>(vm::cast(addr)) = CPU.FPR[op.frs]; } void ppu_interpreter::STFDUX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::get_ref>(vm::cast(addr)) = CPU.FPR[op.frs]; + CPU.GPR[op.ra] = addr; } void ppu_interpreter::LVLXL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u32 eb = addr & 0xf; + + CPU.VPR[op.vd].clear(); + for (u32 i = 0; i < 16u - eb; ++i) CPU.VPR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i)); } void ppu_interpreter::LHBRX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::get_ref(vm::cast(addr)); } void ppu_interpreter::SRAW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + s32 RS = (s32)CPU.GPR[op.rs]; + u8 shift = CPU.GPR[op.rb] & 63; + if (shift > 31) + { + CPU.GPR[op.ra] = 0 - (RS < 0); + CPU.XER.CA = (RS < 0); + } + else + { + CPU.GPR[op.ra] = RS >> shift; + CPU.XER.CA = (RS < 0) & ((CPU.GPR[op.ra] << shift) != RS); + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::SRAD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + s64 RS = CPU.GPR[op.rs]; + u8 shift = CPU.GPR[op.rb] & 127; + if (shift > 63) + { + CPU.GPR[op.ra] = 0 - (RS < 0); + CPU.XER.CA = (RS < 0); + } + else + { + CPU.GPR[op.ra] = RS >> shift; + CPU.XER.CA = (RS < 0) & ((CPU.GPR[op.ra] << shift) != RS); + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::LVRXL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u8 eb = addr & 0xf; + + CPU.VPR[op.vd].clear(); + for (u32 i = 16 - eb; i < 16; ++i) CPU.VPR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i - 16)); } void ppu_interpreter::DSS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::SRAWI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + s32 RS = (u32)CPU.GPR[op.rs]; + CPU.GPR[op.ra] = RS >> op.sh; + CPU.XER.CA = (RS < 0) & ((u32)(CPU.GPR[op.ra] << op.sh) != RS); + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } -void ppu_interpreter::SRADI1(PPUThread& CPU, ppu_opcode_t op) +void ppu_interpreter::SRADI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); -} + auto sh = (op.shh << 5) | op.shl; + s64 RS = CPU.GPR[op.rs]; + CPU.GPR[op.ra] = RS >> sh; + CPU.XER.CA = (RS < 0) & ((CPU.GPR[op.ra] << sh) != RS); -void ppu_interpreter::SRADI2(PPUThread& CPU, ppu_opcode_t op) -{ - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::EIEIO(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + _mm_mfence(); } void ppu_interpreter::STVLXL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u32 eb = addr & 0xf; + + for (u32 i = 0; i < 16u - eb; ++i) vm::write8(vm::cast(addr + i), CPU.VPR[op.vs]._u8[15 - i]); } void ppu_interpreter::STHBRX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::get_ref(vm::cast(addr)) = (u16)CPU.GPR[op.rs]; } void ppu_interpreter::EXTSH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = (s64)(s16)CPU.GPR[op.rs]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::STVRXL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u8 eb = addr & 0xf; + + for (u32 i = 16 - eb; i < 16; ++i) vm::write8(vm::cast(addr + i - 16), CPU.VPR[op.vs]._u8[15 - i]); } void ppu_interpreter::EXTSB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = (s64)(s8)CPU.GPR[op.rs]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::STFIWX(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::write32(vm::cast(addr), (u32&)CPU.FPR[op.frs]); } void ppu_interpreter::EXTSW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.GPR[op.ra] = (s64)(s32)CPU.GPR[op.rs]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); } void ppu_interpreter::ICBI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); } void ppu_interpreter::DCBZ(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + memset(vm::get_ptr(vm::cast(addr) & ~127), 0, 128); } void ppu_interpreter::LWZ(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.GPR[op.rd] = vm::read32(vm::cast(addr)); } void ppu_interpreter::LWZU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.GPR[op.rd] = vm::read32(vm::cast(addr)); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::LBZ(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.GPR[op.rd] = vm::read8(vm::cast(addr)); } void ppu_interpreter::LBZU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.GPR[op.rd] = vm::read8(vm::cast(addr)); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::STW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + vm::write32(vm::cast(addr), (u32)CPU.GPR[op.rs]); } void ppu_interpreter::STWU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + op.simm16; + vm::write32(vm::cast(addr), (u32)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::STB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + vm::write8(vm::cast(addr), (u8)CPU.GPR[op.rs]); } void ppu_interpreter::STBU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + op.simm16; + vm::write8(vm::cast(addr), (u8)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::LHZ(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.GPR[op.rd] = vm::read16(vm::cast(addr)); } void ppu_interpreter::LHZU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.GPR[op.rd] = vm::read16(vm::cast(addr)); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::LHA(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.GPR[op.rd] = (s64)(s16)vm::read16(vm::cast(addr)); } void ppu_interpreter::LHAU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.GPR[op.rd] = (s64)(s16)vm::read16(vm::cast(addr)); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::STH(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + vm::write16(vm::cast(addr), (u16)CPU.GPR[op.rs]); } void ppu_interpreter::STHU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + op.simm16; + vm::write16(vm::cast(addr), (u16)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::LMW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + for (u32 i = op.rd; i<32; ++i, addr += 4) + { + CPU.GPR[i] = vm::read32(vm::cast(addr)); + } } void ppu_interpreter::STMW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + for (u32 i = op.rs; i<32; ++i, addr += 4) + { + vm::write32(vm::cast(addr), (u32)CPU.GPR[i]); + } } void ppu_interpreter::LFS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); } void ppu_interpreter::LFSU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::LFD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); } void ppu_interpreter::LFDU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::STFS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + vm::get_ref>(vm::cast(addr)) = static_cast(CPU.FPR[op.frs]); } void ppu_interpreter::STFSU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + op.simm16; + vm::get_ref>(vm::cast(addr)) = static_cast(CPU.FPR[op.frs]); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::STFD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + vm::get_ref>(vm::cast(addr)) = CPU.FPR[op.frs]; } void ppu_interpreter::STFDU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + op.simm16; + vm::get_ref>(vm::cast(addr)) = CPU.FPR[op.frs]; + CPU.GPR[op.ra] = addr; } void ppu_interpreter::LD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = (op.simm16 & ~3) + (op.ra ? CPU.GPR[op.ra] : 0); + CPU.GPR[op.rd] = vm::read64(vm::cast(addr)); } void ppu_interpreter::LDU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + (op.simm16 & ~3); + CPU.GPR[op.rd] = vm::read64(vm::cast(addr)); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::LWA(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = (op.simm16 & ~3) + (op.ra ? CPU.GPR[op.ra] : 0); + CPU.GPR[op.rd] = (s64)(s32)vm::read32(vm::cast(addr)); } void ppu_interpreter::FDIVS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] / CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FSUBS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FADDS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FSQRTS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = sqrt(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FRES(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = 1.0 / CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FMULS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FMADDS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc] + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FMSUBS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc] - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FNMSUBS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = -(CPU.FPR[op.fra] * CPU.FPR[op.frc]) + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FNMADDS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = -(CPU.FPR[op.fra] * CPU.FPR[op.frc]) - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::STD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = (op.simm16 & ~3) + (op.ra ? CPU.GPR[op.ra] : 0); + vm::write64(vm::cast(addr), CPU.GPR[op.rs]); } void ppu_interpreter::STDU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + const u64 addr = CPU.GPR[op.ra] + (op.simm16 & ~3); + vm::write64(vm::cast(addr), CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; } void ppu_interpreter::MTFSB1(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u64 mask = (1ULL << (31 - op.crbd)); + if ((op.crbd >= 3 && op.crbd <= 6) && !(CPU.FPSCR.FPSCR & mask)) mask |= 1ULL << 31; //FPSCR.FX + if ((op.crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode enabled"); + CPU.SetFPSCR(CPU.FPSCR.FPSCR | mask); + + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::MCRFS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.SetCR(op.crfd, (CPU.FPSCR.FPSCR >> ((7 - op.crfs) * 4)) & 0xf); + const u32 exceptions_mask = 0x9FF80700; + CPU.SetFPSCR(CPU.FPSCR.FPSCR & ~(exceptions_mask & 0xf << ((7 - op.crfs) * 4))); } void ppu_interpreter::MTFSB0(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u64 mask = (1ULL << (31 - op.crbd)); + if ((op.crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode disabled"); + CPU.SetFPSCR(CPU.FPSCR.FPSCR & ~mask); + + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::MTFSFI(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 mask = 0xF0000000 >> (op.crfd * 4); + u32 val = (op.i & 0xF) << ((7 - op.crfd) * 4); + + const u32 oldNI = CPU.FPSCR.NI; + CPU.SetFPSCR((CPU.FPSCR.FPSCR & ~mask) | val); + if (CPU.FPSCR.NI != oldNI) + { + if (oldNI) + LOG_WARNING(PPU, "Non-IEEE mode disabled"); + else + LOG_WARNING(PPU, "Non-IEEE mode enabled"); + } + + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::MFFS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + (u64&)CPU.FPR[op.frd]._double = CPU.FPSCR.FPSCR; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::MTFSF(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + u32 mask = 0; + for (u32 i = 0; i<8; ++i) + { + if (op.flm & (1 << i)) mask |= 0xf << (i * 4); + } + mask &= ~0x60000000; + + const u32 oldNI = CPU.FPSCR.NI; + CPU.SetFPSCR((CPU.FPSCR.FPSCR & ~mask) | ((u32&)CPU.FPR[op.frb] & mask)); + if (CPU.FPSCR.NI != oldNI) + { + if (oldNI) + LOG_WARNING(PPU, "Non-IEEE mode disabled"); + else + LOG_WARNING(PPU, "Non-IEEE mode enabled"); + } + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FCMPU(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + s32 cmp_res = FPRdouble::Cmp(CPU.FPR[op.fra], CPU.FPR[op.frb]); + //CPU.FPSCR.FPRF = cmp_res; + CPU.SetCR(op.crfd, cmp_res); } void ppu_interpreter::FRSP(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = static_cast(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FCTIW(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + (s32&)CPU.FPR[op.frd]._double = lrint(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FCTIWZ(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + (s32&)CPU.FPR[op.frd]._double = static_cast(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FDIV(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] / CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FSUB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FADD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FSQRT(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = sqrt(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FSEL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] >= 0.0 ? CPU.FPR[op.frc] : CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FMUL(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FRSQRTE(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = 1.0 / sqrt(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FMSUB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc] - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FMADD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc] + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FNMSUB(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = -(CPU.FPR[op.fra] * CPU.FPR[op.frc]) + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FNMADD(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = -(CPU.FPR[op.fra] * CPU.FPR[op.frc]) - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FCMPO(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + s32 cmp_res = FPRdouble::Cmp(CPU.FPR[op.fra], CPU.FPR[op.frb]); + //CPU.FPSCR.FPRF = cmp_res; + CPU.SetCR(op.crfd, cmp_res); } void ppu_interpreter::FNEG(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = -CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FMR(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FNABS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = -fabs(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FABS(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = fabs(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FCTID(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + (s64&)CPU.FPR[op.frd]._double = llrint(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FCTIDZ(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + (s64&)CPU.FPR[op.frd]._double = static_cast(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::FCFID(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + CPU.FPR[op.frd]._double = static_cast((s64&)CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); } void ppu_interpreter::UNK(PPUThread& CPU, ppu_opcode_t op) { - PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); + throw __FUNCTION__; } diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index f6dd80453a..9358019869 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -2445,6 +2445,11 @@ private: if(oe) CPU.SetOV((~RA>>63 == RB>>63) && (~RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } + void MULHDU(u32 rd, u32 ra, u32 rb, bool rc) + { + CPU.GPR[rd] = __umulh(CPU.GPR[ra], CPU.GPR[rb]); + if(rc) CPU.UpdateCR0(CPU.GPR[rd]); + } void ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { const u64 RA = CPU.GPR[ra]; @@ -2454,11 +2459,6 @@ private: if(oe) CPU.SetOV((RA>>63 == RB>>63) && (RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } - void MULHDU(u32 rd, u32 ra, u32 rb, bool rc) - { - CPU.GPR[rd] = __umulh(CPU.GPR[ra], CPU.GPR[rb]); - if(rc) CPU.UpdateCR0(CPU.GPR[rd]); - } void MULHWU(u32 rd, u32 ra, u32 rb, bool rc) { u32 a = (u32)CPU.GPR[ra]; @@ -2780,14 +2780,6 @@ private: const u8 eb = (addr & 0xf) >> 2; vm::write32(vm::cast(addr), CPU.VPR[vs]._u32[3 - eb]); } - void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) - { - const u64 RA = CPU.GPR[ra]; - CPU.GPR[rd] = RA + CPU.XER.CA; - CPU.XER.CA = CPU.IsCarry(RA, CPU.XER.CA); - if(oe) CPU.SetOV((RA>>63 == 0) && (RA>>63 != CPU.GPR[rd]>>63)); - if(rc) CPU.UpdateCR0(CPU.GPR[rd]); - } void SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { const u64 RA = CPU.GPR[ra]; @@ -2796,6 +2788,14 @@ private: if(oe) CPU.SetOV((~RA>>63 == 0) && (~RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } + void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) + { + const u64 RA = CPU.GPR[ra]; + CPU.GPR[rd] = RA + CPU.XER.CA; + CPU.XER.CA = CPU.IsCarry(RA, CPU.XER.CA); + if(oe) CPU.SetOV((RA>>63 == 0) && (RA>>63 != CPU.GPR[rd]>>63)); + if(rc) CPU.UpdateCR0(CPU.GPR[rd]); + } void STDCX_(u32 rs, u32 ra, u32 rb) { const u64 addr = ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]; @@ -2812,14 +2812,6 @@ private: { vm::write128((u64)((ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfULL), CPU.VPR[vs]); } - void SUBFME(u32 rd, u32 ra, u32 oe, bool rc) - { - const u64 RA = CPU.GPR[ra]; - CPU.GPR[rd] = ~RA + CPU.XER.CA + ~0ULL; - CPU.XER.CA = CPU.IsCarry(~RA, CPU.XER.CA, ~0ULL); - if(oe) CPU.SetOV((~RA>>63 == 1) && (~RA>>63 != CPU.GPR[rd]>>63)); - if(rc) CPU.UpdateCR0(CPU.GPR[rd]); - } void MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { const s64 RA = CPU.GPR[ra]; @@ -2832,6 +2824,14 @@ private: } if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } + void SUBFME(u32 rd, u32 ra, u32 oe, bool rc) + { + const u64 RA = CPU.GPR[ra]; + CPU.GPR[rd] = ~RA + CPU.XER.CA + ~0ULL; + CPU.XER.CA = CPU.IsCarry(~RA, CPU.XER.CA, ~0ULL); + if(oe) CPU.SetOV((~RA>>63 == 1) && (~RA>>63 != CPU.GPR[rd]>>63)); + if(rc) CPU.UpdateCR0(CPU.GPR[rd]); + } void ADDME(u32 rd, u32 ra, u32 oe, bool rc) { const s64 RA = CPU.GPR[ra]; @@ -3432,9 +3432,7 @@ private: { const u64 addr = ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]; - auto const cache_line = vm::get_ptr(vm::cast(addr) & ~127); - if (cache_line) - memset(cache_line, 0, 128); + memset(vm::get_ptr(vm::cast(addr) & ~127), 0, 128); } void LWZ(u32 rd, u32 ra, s32 d) { @@ -3618,7 +3616,6 @@ private: } void LDU(u32 rd, u32 ra, s32 ds) { - //if(ra == 0 || rt == ra) return; const u64 addr = CPU.GPR[ra] + ds; CPU.GPR[rd] = vm::read64(vm::cast(addr)); CPU.GPR[ra] = addr; @@ -3684,7 +3681,6 @@ private: } void STDU(u32 rs, u32 ra, s32 ds) { - //if(ra == 0 || rs == ra) return; const u64 addr = CPU.GPR[ra] + ds; vm::write64(vm::cast(addr), CPU.GPR[rs]); CPU.GPR[ra] = addr; diff --git a/rpcs3/Emu/Cell/PPUInterpreter2.h b/rpcs3/Emu/Cell/PPUInterpreter2.h index f6399b394b..d5bb82a77a 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter2.h +++ b/rpcs3/Emu/Cell/PPUInterpreter2.h @@ -22,10 +22,11 @@ union ppu_opcode_t struct { - u32 : 6; // 26..31 - u32 vsh : 4; // 22..25 - u32 : 1; // 21 + u32 : 6; // 26..31 + u32 vsh : 4; // 22..25 + u32 oe : 1; // 21 u32 spr : 10; // 11..20 + u32 : 11; }; struct @@ -40,7 +41,9 @@ union ppu_opcode_t struct { - u32 : 6; // 26..31 + u32 lk : 1; // 31 + u32 aa : 1; // 30 + u32 : 4; // 26..29 u32 : 5; // 21..25 u32 rb : 5; // 16..20 u32 ra : 5; // 11..15 @@ -51,7 +54,8 @@ union ppu_opcode_t struct { u32 uimm16 : 16; // 16..31 - u32 : 5; // 11..15 + u32 : 4; // 12..15 + u32 l11 : 1; // 11 u32 rs : 5; // 6..10 u32 : 6; }; @@ -65,10 +69,32 @@ union ppu_opcode_t struct { - u32 : 18; // 14..31 - u32 crfs : 3; // 11..13 - u32 : 2; // 9..10 - u32 crfd : 3; // 6..8 + s32 ll : 26; // 6..31 + s32 : 6; + }; + + struct + { + u32 : 5; // 27..31 + u32 lev : 7; // 20..26 + u32 i : 4; // 16..19 + u32 : 2; // 14..15 + u32 crfs : 3; // 11..13 + u32 l10 : 1; // 10 + u32 : 1; // 9 + u32 crfd : 3; // 6..8 + u32 : 6; + }; + + struct + { + u32 : 1; // 31 + u32 : 1; // 30 + u32 : 4; // 26..29 + u32 : 5; // 21..25 + u32 crbb : 5; // 16..20 + u32 crba : 5; // 11..15 + u32 crbd : 5; // 6..10 u32 : 6; }; @@ -82,6 +108,32 @@ union ppu_opcode_t u32 bo : 5; // 6..10 u32 : 6; }; + + struct + { + u32 : 6; // 26..31 + u32 frc : 5; // 21..25 + u32 frb : 5; // 16..20 + u32 fra : 5; // 11..15 + u32 frd : 5; // 6..10 + u32 : 6; + }; + + struct + { + u32 : 12; // 20..31 + u32 crm : 8; // 12..19 + u32 : 1; // 11 + u32 frs : 5; // 6..10 + u32 : 6; + }; + + struct + { + u32 : 17; // 15..31 + u32 flm : 8; // 7..14 + u32 : 7; + }; }; using ppu_inter_func_t = void(*)(PPUThread& CPU, ppu_opcode_t opcode); @@ -406,8 +458,7 @@ namespace ppu_interpreter void LVRXL(PPUThread& CPU, ppu_opcode_t op); void DSS(PPUThread& CPU, ppu_opcode_t op); void SRAWI(PPUThread& CPU, ppu_opcode_t op); - void SRADI1(PPUThread& CPU, ppu_opcode_t op); - void SRADI2(PPUThread& CPU, ppu_opcode_t op); + void SRADI(PPUThread& CPU, ppu_opcode_t op); void EIEIO(PPUThread& CPU, ppu_opcode_t op); void STVLXL(PPUThread& CPU, ppu_opcode_t op); void STHBRX(PPUThread& CPU, ppu_opcode_t op); @@ -816,8 +867,8 @@ public: virtual void LVRXL(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVRXL; } virtual void DSS(u32 strm, u32 a) { func = ppu_interpreter::DSS; } virtual void SRAWI(u32 ra, u32 rs, u32 sh, bool rc) { func = ppu_interpreter::SRAWI; } - virtual void SRADI1(u32 ra, u32 rs, u32 sh, bool rc) { func = ppu_interpreter::SRADI1; } - virtual void SRADI2(u32 ra, u32 rs, u32 sh, bool rc) { func = ppu_interpreter::SRADI2; } + virtual void SRADI1(u32 ra, u32 rs, u32 sh, bool rc) { func = ppu_interpreter::SRADI; } + virtual void SRADI2(u32 ra, u32 rs, u32 sh, bool rc) { func = ppu_interpreter::SRADI; } virtual void EIEIO() { func = ppu_interpreter::EIEIO; } virtual void STVLXL(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVLXL; } virtual void STHBRX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STHBRX; } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 23f0102285..76753e1751 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -24,8 +24,6 @@ u64 rotate_mask[64][64]; const ppu_inter_func_t g_ppu_inter_func_list[] = { - nullptr, - ppu_interpreter::NULL_OP, ppu_interpreter::NOP, @@ -344,8 +342,7 @@ const ppu_inter_func_t g_ppu_inter_func_list[] = ppu_interpreter::LVRXL, ppu_interpreter::DSS, ppu_interpreter::SRAWI, - ppu_interpreter::SRADI1, - ppu_interpreter::SRADI2, + ppu_interpreter::SRADI, ppu_interpreter::EIEIO, ppu_interpreter::STVLXL, ppu_interpreter::STHBRX, @@ -471,7 +468,7 @@ void fill_ppu_exec_map(u32 addr, u32 size) for (u32 pos = addr; pos < addr + size; pos += 4) { - inter->func = nullptr; + inter->func = ppu_interpreter::NULL_OP; // decode PPU opcode dec.Decode(vm::read32(pos)); @@ -479,18 +476,14 @@ void fill_ppu_exec_map(u32 addr, u32 size) u32 index = 0; // find function index - for (const auto& func : g_ppu_inter_func_list) + for (; index < sizeof(g_ppu_inter_func_list) / sizeof(ppu_inter_func_t); index++) { - if (inter->func == func) + if (inter->func == g_ppu_inter_func_list[index]) { - index = &func - g_ppu_inter_func_list; break; } } - // zero function is nullptr, it shouldn't happen - assert(index); - // write index in memory *(u32*)((u8*)g_ppu_exec_map + pos) = index; } @@ -700,6 +693,8 @@ void PPUThread::FastStop() void PPUThread::Task() { + SetHostRoundingMode(FPSCR_RN_NEAR); + if (custom_task) { return custom_task(*this); @@ -712,10 +707,8 @@ void PPUThread::Task() while (true) { - //if (Emu.IsStopped()) - //{ - // return; - //} + // get interpreter function + const auto func = g_ppu_inter_func_list[*(u32*)((u8*)g_ppu_exec_map + PC)]; if (m_events) { @@ -730,11 +723,8 @@ void PPUThread::Task() // read opcode const ppu_opcode_t opcode = { vm::read32(PC) }; - // read interpreter function index - const u32 index = *(u32*)((u8*)g_ppu_exec_map + PC); - // call interpreter function - g_ppu_inter_func_list[index](*this, opcode); + func(*this, opcode); // next instruction //PC += 4; From 63276a3f84528c5e19fcd918d541f9d01de1cf1a Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Fri, 20 Mar 2015 19:53:54 +0300 Subject: [PATCH 06/23] SPU interpreter (basic concept) --- rpcs3/Emu/Cell/PPUThread.cpp | 6 +- rpcs3/Emu/Cell/SPUInterpreter.cpp | 1022 +++++++++++++++++++++++++++++ rpcs3/Emu/Cell/SPUInterpreter2.h | 439 +++++++++++++ rpcs3/Emu/Cell/SPUThread.cpp | 90 ++- rpcs3/Gui/MainFrame.cpp | 5 +- rpcs3/emucore.vcxproj | 2 + rpcs3/emucore.vcxproj.filters | 6 + 7 files changed, 1552 insertions(+), 18 deletions(-) create mode 100644 rpcs3/Emu/Cell/SPUInterpreter.cpp create mode 100644 rpcs3/Emu/Cell/SPUInterpreter2.h diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 76753e1751..61215ad12e 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -569,7 +569,7 @@ void PPUThread::DoRun() { m_dec = nullptr; - switch(Ini.CPUDecoderMode.GetValue()) + switch (auto mode = Ini.CPUDecoderMode.GetValue()) { case 0: // original interpreter { @@ -598,9 +598,11 @@ void PPUThread::DoRun() //case 3: m_dec = new PPURecompiler(*this); break; default: - LOG_ERROR(PPU, "Invalid CPU decoder mode: %d", Ini.CPUDecoderMode.GetValue()); + { + LOG_ERROR(PPU, "Invalid CPU decoder mode: %d", mode); Emu.Pause(); } + } } void PPUThread::DoResume() diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp new file mode 100644 index 0000000000..618a06fda0 --- /dev/null +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -0,0 +1,1022 @@ +#include "stdafx.h" +#include "Utilities/Log.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" + +#include "SPUThread.h" +#include "SPUInstrTable.h" +#include "SPUInterpreter.h" +#include "SPUInterpreter2.h" + +void spu_interpreter::DEFAULT(SPUThread& CPU, spu_opcode_t op) +{ + SPUInterpreter inter(CPU); (*SPU_instr::rrr_list)(&inter, op.opcode); +} + + +void spu_interpreter::STOP(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::LNOP(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SYNC(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DSYNC(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MFSPR(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::RDCH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::RCHCNT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SF(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::OR(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BG(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SFH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::NOR(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ABSDB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTM(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTMA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SHL(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTHM(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTMAH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SHLH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTMI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTMAI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SHLI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTHI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTHMI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTMAHI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SHLHI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::A(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::AND(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CG(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::AH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::NAND(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::AVGB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MTSPR(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::WRCH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BIZ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BINZ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BIHZ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BIHNZ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::STOPD(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::STQX(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BISL(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::IRET(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BISLED(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::HBR(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::GB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::GBH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::GBB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FSM(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FSMH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FSMB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FREST(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FRSQEST(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::LQX(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTQBYBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTQMBYBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SHLQBYBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CBX(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CHX(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CWX(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CDX(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTQBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTQMBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SHLQBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTQBY(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTQMBY(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SHLQBY(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ORX(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CBD(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CHD(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CWD(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CDD(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTQBII(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTQMBII(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SHLQBII(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTQBYI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ROTQMBYI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SHLQBYI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::NOP(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CGT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::XOR(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CGTH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::EQV(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CGTB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SUMB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::HGT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CLZ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::XSWD(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::XSHW(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CNTB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::XSBH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CLGT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ANDC(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FCGT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFCGT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FS(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FM(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CLGTH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ORC(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FCMGT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFCMGT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFS(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFM(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CLGTB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::HLGT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFMA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFMS(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFNMS(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFNMA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CEQ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MPYHHU(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ADDX(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SFX(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CGX(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BGX(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MPYHHA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MPYHHAU(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FSCRRD(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FESD(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FRDS(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FSCRWR(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFTSV(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FCEQ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFCEQ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MPY(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MPYH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MPYHH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MPYS(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CEQH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FCMEQ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::DFCMEQ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MPYU(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CEQB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::HEQ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + + +void spu_interpreter::CFLTS(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CFLTU(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CSFLT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CUFLT(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + + +void spu_interpreter::BRZ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::STQA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BRNZ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BRHZ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BRHNZ(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::STQR(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BRA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::LQA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BRASL(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BR(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FSMBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::BRSL(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::LQR(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::IL(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ILHU(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ILH(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::IOHL(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + + +void spu_interpreter::ORI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ORHI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ORBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SFI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SFHI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ANDI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ANDHI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ANDBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::AI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::AHI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::STQD(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::LQD(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::XORI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::XORHI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::XORBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CGTI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CGTHI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CGTBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::HGTI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CLGTI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CLGTHI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CLGTBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::HLGTI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MPYI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MPYUI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CEQI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CEQHI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::CEQBI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::HEQI(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + + +void spu_interpreter::HBRA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::HBRR(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::ILA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + + +void spu_interpreter::SELB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::SHUFB(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::MPYA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FNMS(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FMA(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + +void spu_interpreter::FMS(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + + +void spu_interpreter::UNK(SPUThread& CPU, spu_opcode_t op) +{ + DEFAULT(CPU, op); +} + diff --git a/rpcs3/Emu/Cell/SPUInterpreter2.h b/rpcs3/Emu/Cell/SPUInterpreter2.h new file mode 100644 index 0000000000..48daf67b8e --- /dev/null +++ b/rpcs3/Emu/Cell/SPUInterpreter2.h @@ -0,0 +1,439 @@ +#pragma once + +class SPUThread; + +union spu_opcode_t +{ + u32 opcode; + + +}; + +using spu_inter_func_t = void(*)(SPUThread& CPU, spu_opcode_t opcode); + +namespace spu_interpreter +{ + void DEFAULT(SPUThread& CPU, spu_opcode_t op); + + void STOP(SPUThread& CPU, spu_opcode_t op); + void LNOP(SPUThread& CPU, spu_opcode_t op); + void SYNC(SPUThread& CPU, spu_opcode_t op); + void DSYNC(SPUThread& CPU, spu_opcode_t op); + void MFSPR(SPUThread& CPU, spu_opcode_t op); + void RDCH(SPUThread& CPU, spu_opcode_t op); + void RCHCNT(SPUThread& CPU, spu_opcode_t op); + void SF(SPUThread& CPU, spu_opcode_t op); + void OR(SPUThread& CPU, spu_opcode_t op); + void BG(SPUThread& CPU, spu_opcode_t op); + void SFH(SPUThread& CPU, spu_opcode_t op); + void NOR(SPUThread& CPU, spu_opcode_t op); + void ABSDB(SPUThread& CPU, spu_opcode_t op); + void ROT(SPUThread& CPU, spu_opcode_t op); + void ROTM(SPUThread& CPU, spu_opcode_t op); + void ROTMA(SPUThread& CPU, spu_opcode_t op); + void SHL(SPUThread& CPU, spu_opcode_t op); + void ROTH(SPUThread& CPU, spu_opcode_t op); + void ROTHM(SPUThread& CPU, spu_opcode_t op); + void ROTMAH(SPUThread& CPU, spu_opcode_t op); + void SHLH(SPUThread& CPU, spu_opcode_t op); + void ROTI(SPUThread& CPU, spu_opcode_t op); + void ROTMI(SPUThread& CPU, spu_opcode_t op); + void ROTMAI(SPUThread& CPU, spu_opcode_t op); + void SHLI(SPUThread& CPU, spu_opcode_t op); + void ROTHI(SPUThread& CPU, spu_opcode_t op); + void ROTHMI(SPUThread& CPU, spu_opcode_t op); + void ROTMAHI(SPUThread& CPU, spu_opcode_t op); + void SHLHI(SPUThread& CPU, spu_opcode_t op); + void A(SPUThread& CPU, spu_opcode_t op); + void AND(SPUThread& CPU, spu_opcode_t op); + void CG(SPUThread& CPU, spu_opcode_t op); + void AH(SPUThread& CPU, spu_opcode_t op); + void NAND(SPUThread& CPU, spu_opcode_t op); + void AVGB(SPUThread& CPU, spu_opcode_t op); + void MTSPR(SPUThread& CPU, spu_opcode_t op); + void WRCH(SPUThread& CPU, spu_opcode_t op); + void BIZ(SPUThread& CPU, spu_opcode_t op); + void BINZ(SPUThread& CPU, spu_opcode_t op); + void BIHZ(SPUThread& CPU, spu_opcode_t op); + void BIHNZ(SPUThread& CPU, spu_opcode_t op); + void STOPD(SPUThread& CPU, spu_opcode_t op); + void STQX(SPUThread& CPU, spu_opcode_t op); + void BI(SPUThread& CPU, spu_opcode_t op); + void BISL(SPUThread& CPU, spu_opcode_t op); + void IRET(SPUThread& CPU, spu_opcode_t op); + void BISLED(SPUThread& CPU, spu_opcode_t op); + void HBR(SPUThread& CPU, spu_opcode_t op); + void GB(SPUThread& CPU, spu_opcode_t op); + void GBH(SPUThread& CPU, spu_opcode_t op); + void GBB(SPUThread& CPU, spu_opcode_t op); + void FSM(SPUThread& CPU, spu_opcode_t op); + void FSMH(SPUThread& CPU, spu_opcode_t op); + void FSMB(SPUThread& CPU, spu_opcode_t op); + void FREST(SPUThread& CPU, spu_opcode_t op); + void FRSQEST(SPUThread& CPU, spu_opcode_t op); + void LQX(SPUThread& CPU, spu_opcode_t op); + void ROTQBYBI(SPUThread& CPU, spu_opcode_t op); + void ROTQMBYBI(SPUThread& CPU, spu_opcode_t op); + void SHLQBYBI(SPUThread& CPU, spu_opcode_t op); + void CBX(SPUThread& CPU, spu_opcode_t op); + void CHX(SPUThread& CPU, spu_opcode_t op); + void CWX(SPUThread& CPU, spu_opcode_t op); + void CDX(SPUThread& CPU, spu_opcode_t op); + void ROTQBI(SPUThread& CPU, spu_opcode_t op); + void ROTQMBI(SPUThread& CPU, spu_opcode_t op); + void SHLQBI(SPUThread& CPU, spu_opcode_t op); + void ROTQBY(SPUThread& CPU, spu_opcode_t op); + void ROTQMBY(SPUThread& CPU, spu_opcode_t op); + void SHLQBY(SPUThread& CPU, spu_opcode_t op); + void ORX(SPUThread& CPU, spu_opcode_t op); + void CBD(SPUThread& CPU, spu_opcode_t op); + void CHD(SPUThread& CPU, spu_opcode_t op); + void CWD(SPUThread& CPU, spu_opcode_t op); + void CDD(SPUThread& CPU, spu_opcode_t op); + void ROTQBII(SPUThread& CPU, spu_opcode_t op); + void ROTQMBII(SPUThread& CPU, spu_opcode_t op); + void SHLQBII(SPUThread& CPU, spu_opcode_t op); + void ROTQBYI(SPUThread& CPU, spu_opcode_t op); + void ROTQMBYI(SPUThread& CPU, spu_opcode_t op); + void SHLQBYI(SPUThread& CPU, spu_opcode_t op); + void NOP(SPUThread& CPU, spu_opcode_t op); + void CGT(SPUThread& CPU, spu_opcode_t op); + void XOR(SPUThread& CPU, spu_opcode_t op); + void CGTH(SPUThread& CPU, spu_opcode_t op); + void EQV(SPUThread& CPU, spu_opcode_t op); + void CGTB(SPUThread& CPU, spu_opcode_t op); + void SUMB(SPUThread& CPU, spu_opcode_t op); + void HGT(SPUThread& CPU, spu_opcode_t op); + void CLZ(SPUThread& CPU, spu_opcode_t op); + void XSWD(SPUThread& CPU, spu_opcode_t op); + void XSHW(SPUThread& CPU, spu_opcode_t op); + void CNTB(SPUThread& CPU, spu_opcode_t op); + void XSBH(SPUThread& CPU, spu_opcode_t op); + void CLGT(SPUThread& CPU, spu_opcode_t op); + void ANDC(SPUThread& CPU, spu_opcode_t op); + void FCGT(SPUThread& CPU, spu_opcode_t op); + void DFCGT(SPUThread& CPU, spu_opcode_t op); + void FA(SPUThread& CPU, spu_opcode_t op); + void FS(SPUThread& CPU, spu_opcode_t op); + void FM(SPUThread& CPU, spu_opcode_t op); + void CLGTH(SPUThread& CPU, spu_opcode_t op); + void ORC(SPUThread& CPU, spu_opcode_t op); + void FCMGT(SPUThread& CPU, spu_opcode_t op); + void DFCMGT(SPUThread& CPU, spu_opcode_t op); + void DFA(SPUThread& CPU, spu_opcode_t op); + void DFS(SPUThread& CPU, spu_opcode_t op); + void DFM(SPUThread& CPU, spu_opcode_t op); + void CLGTB(SPUThread& CPU, spu_opcode_t op); + void HLGT(SPUThread& CPU, spu_opcode_t op); + void DFMA(SPUThread& CPU, spu_opcode_t op); + void DFMS(SPUThread& CPU, spu_opcode_t op); + void DFNMS(SPUThread& CPU, spu_opcode_t op); + void DFNMA(SPUThread& CPU, spu_opcode_t op); + void CEQ(SPUThread& CPU, spu_opcode_t op); + void MPYHHU(SPUThread& CPU, spu_opcode_t op); + void ADDX(SPUThread& CPU, spu_opcode_t op); + void SFX(SPUThread& CPU, spu_opcode_t op); + void CGX(SPUThread& CPU, spu_opcode_t op); + void BGX(SPUThread& CPU, spu_opcode_t op); + void MPYHHA(SPUThread& CPU, spu_opcode_t op); + void MPYHHAU(SPUThread& CPU, spu_opcode_t op); + void FSCRRD(SPUThread& CPU, spu_opcode_t op); + void FESD(SPUThread& CPU, spu_opcode_t op); + void FRDS(SPUThread& CPU, spu_opcode_t op); + void FSCRWR(SPUThread& CPU, spu_opcode_t op); + void DFTSV(SPUThread& CPU, spu_opcode_t op); + void FCEQ(SPUThread& CPU, spu_opcode_t op); + void DFCEQ(SPUThread& CPU, spu_opcode_t op); + void MPY(SPUThread& CPU, spu_opcode_t op); + void MPYH(SPUThread& CPU, spu_opcode_t op); + void MPYHH(SPUThread& CPU, spu_opcode_t op); + void MPYS(SPUThread& CPU, spu_opcode_t op); + void CEQH(SPUThread& CPU, spu_opcode_t op); + void FCMEQ(SPUThread& CPU, spu_opcode_t op); + void DFCMEQ(SPUThread& CPU, spu_opcode_t op); + void MPYU(SPUThread& CPU, spu_opcode_t op); + void CEQB(SPUThread& CPU, spu_opcode_t op); + void FI(SPUThread& CPU, spu_opcode_t op); + void HEQ(SPUThread& CPU, spu_opcode_t op); + + void CFLTS(SPUThread& CPU, spu_opcode_t op); + void CFLTU(SPUThread& CPU, spu_opcode_t op); + void CSFLT(SPUThread& CPU, spu_opcode_t op); + void CUFLT(SPUThread& CPU, spu_opcode_t op); + + void BRZ(SPUThread& CPU, spu_opcode_t op); + void STQA(SPUThread& CPU, spu_opcode_t op); + void BRNZ(SPUThread& CPU, spu_opcode_t op); + void BRHZ(SPUThread& CPU, spu_opcode_t op); + void BRHNZ(SPUThread& CPU, spu_opcode_t op); + void STQR(SPUThread& CPU, spu_opcode_t op); + void BRA(SPUThread& CPU, spu_opcode_t op); + void LQA(SPUThread& CPU, spu_opcode_t op); + void BRASL(SPUThread& CPU, spu_opcode_t op); + void BR(SPUThread& CPU, spu_opcode_t op); + void FSMBI(SPUThread& CPU, spu_opcode_t op); + void BRSL(SPUThread& CPU, spu_opcode_t op); + void LQR(SPUThread& CPU, spu_opcode_t op); + void IL(SPUThread& CPU, spu_opcode_t op); + void ILHU(SPUThread& CPU, spu_opcode_t op); + void ILH(SPUThread& CPU, spu_opcode_t op); + void IOHL(SPUThread& CPU, spu_opcode_t op); + + void ORI(SPUThread& CPU, spu_opcode_t op); + void ORHI(SPUThread& CPU, spu_opcode_t op); + void ORBI(SPUThread& CPU, spu_opcode_t op); + void SFI(SPUThread& CPU, spu_opcode_t op); + void SFHI(SPUThread& CPU, spu_opcode_t op); + void ANDI(SPUThread& CPU, spu_opcode_t op); + void ANDHI(SPUThread& CPU, spu_opcode_t op); + void ANDBI(SPUThread& CPU, spu_opcode_t op); + void AI(SPUThread& CPU, spu_opcode_t op); + void AHI(SPUThread& CPU, spu_opcode_t op); + void STQD(SPUThread& CPU, spu_opcode_t op); + void LQD(SPUThread& CPU, spu_opcode_t op); + void XORI(SPUThread& CPU, spu_opcode_t op); + void XORHI(SPUThread& CPU, spu_opcode_t op); + void XORBI(SPUThread& CPU, spu_opcode_t op); + void CGTI(SPUThread& CPU, spu_opcode_t op); + void CGTHI(SPUThread& CPU, spu_opcode_t op); + void CGTBI(SPUThread& CPU, spu_opcode_t op); + void HGTI(SPUThread& CPU, spu_opcode_t op); + void CLGTI(SPUThread& CPU, spu_opcode_t op); + void CLGTHI(SPUThread& CPU, spu_opcode_t op); + void CLGTBI(SPUThread& CPU, spu_opcode_t op); + void HLGTI(SPUThread& CPU, spu_opcode_t op); + void MPYI(SPUThread& CPU, spu_opcode_t op); + void MPYUI(SPUThread& CPU, spu_opcode_t op); + void CEQI(SPUThread& CPU, spu_opcode_t op); + void CEQHI(SPUThread& CPU, spu_opcode_t op); + void CEQBI(SPUThread& CPU, spu_opcode_t op); + void HEQI(SPUThread& CPU, spu_opcode_t op); + + void HBRA(SPUThread& CPU, spu_opcode_t op); + void HBRR(SPUThread& CPU, spu_opcode_t op); + void ILA(SPUThread& CPU, spu_opcode_t op); + + void SELB(SPUThread& CPU, spu_opcode_t op); + void SHUFB(SPUThread& CPU, spu_opcode_t op); + void MPYA(SPUThread& CPU, spu_opcode_t op); + void FNMS(SPUThread& CPU, spu_opcode_t op); + void FMA(SPUThread& CPU, spu_opcode_t op); + void FMS(SPUThread& CPU, spu_opcode_t op); + + void UNK(SPUThread& CPU, spu_opcode_t op); +} + +class SPUInterpreter2 : public SPUOpcodes +{ +public: + virtual ~SPUInterpreter2() {} + + spu_inter_func_t func; + + virtual void STOP(u32 code) { func = spu_interpreter::STOP; } + virtual void LNOP() { func = spu_interpreter::LNOP; } + virtual void SYNC(u32 Cbit) { func = spu_interpreter::SYNC; } + virtual void DSYNC() { func = spu_interpreter::DSYNC; } + virtual void MFSPR(u32 rt, u32 sa) { func = spu_interpreter::MFSPR; } + virtual void RDCH(u32 rt, u32 ra) { func = spu_interpreter::RDCH; } + virtual void RCHCNT(u32 rt, u32 ra) { func = spu_interpreter::RCHCNT; } + virtual void SF(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SF; } + virtual void OR(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::OR; } + virtual void BG(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::BG; } + virtual void SFH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SFH; } + virtual void NOR(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::NOR; } + virtual void ABSDB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ABSDB; } + virtual void ROT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROT; } + virtual void ROTM(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTM; } + virtual void ROTMA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTMA; } + virtual void SHL(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SHL; } + virtual void ROTH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTH; } + virtual void ROTHM(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTHM; } + virtual void ROTMAH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTMAH; } + virtual void SHLH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SHLH; } + virtual void ROTI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTI; } + virtual void ROTMI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTMI; } + virtual void ROTMAI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTMAI; } + virtual void SHLI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::SHLI; } + virtual void ROTHI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTHI; } + virtual void ROTHMI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTHMI; } + virtual void ROTMAHI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTMAHI; } + virtual void SHLHI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::SHLHI; } + virtual void A(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::A; } + virtual void AND(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::AND; } + virtual void CG(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CG; } + virtual void AH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::AH; } + virtual void NAND(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::NAND; } + virtual void AVGB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::AVGB; } + virtual void MTSPR(u32 rt, u32 sa) { func = spu_interpreter::MTSPR; } + virtual void WRCH(u32 ra, u32 rt) { func = spu_interpreter::WRCH; } + virtual void BIZ(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BIZ; } + virtual void BINZ(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BINZ; } + virtual void BIHZ(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BIHZ; } + virtual void BIHNZ(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BIHNZ; } + virtual void STOPD(u32 rc, u32 ra, u32 rb) { func = spu_interpreter::STOPD; } + virtual void STQX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::STQX; } + virtual void BI(u32 intr, u32 ra) { func = spu_interpreter::BI; } + virtual void BISL(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BISL; } + virtual void IRET(u32 ra) { func = spu_interpreter::IRET; } + virtual void BISLED(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BISLED; } + virtual void HBR(u32 p, u32 ro, u32 ra) { func = spu_interpreter::HBR; } + virtual void GB(u32 rt, u32 ra) { func = spu_interpreter::GB; } + virtual void GBH(u32 rt, u32 ra) { func = spu_interpreter::GBH; } + virtual void GBB(u32 rt, u32 ra) { func = spu_interpreter::GBB; } + virtual void FSM(u32 rt, u32 ra) { func = spu_interpreter::FSM; } + virtual void FSMH(u32 rt, u32 ra) { func = spu_interpreter::FSMH; } + virtual void FSMB(u32 rt, u32 ra) { func = spu_interpreter::FSMB; } + virtual void FREST(u32 rt, u32 ra) { func = spu_interpreter::FREST; } + virtual void FRSQEST(u32 rt, u32 ra) { func = spu_interpreter::FRSQEST; } + virtual void LQX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::LQX; } + virtual void ROTQBYBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQBYBI; } + virtual void ROTQMBYBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQMBYBI; } + virtual void SHLQBYBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SHLQBYBI; } + virtual void CBX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CBX; } + virtual void CHX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CHX; } + virtual void CWX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CWX; } + virtual void CDX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CDX; } + virtual void ROTQBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQBI; } + virtual void ROTQMBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQMBI; } + virtual void SHLQBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SHLQBI; } + virtual void ROTQBY(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQBY; } + virtual void ROTQMBY(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQMBY; } + virtual void SHLQBY(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SHLQBY; } + virtual void ORX(u32 rt, u32 ra) { func = spu_interpreter::ORX; } + virtual void CBD(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::CBD; } + virtual void CHD(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::CHD; } + virtual void CWD(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::CWD; } + virtual void CDD(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::CDD; } + virtual void ROTQBII(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTQBII; } + virtual void ROTQMBII(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTQMBII; } + virtual void SHLQBII(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::SHLQBII; } + virtual void ROTQBYI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTQBYI; } + virtual void ROTQMBYI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTQMBYI; } + virtual void SHLQBYI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::SHLQBYI; } + virtual void NOP(u32 rt) { func = spu_interpreter::NOP; } + virtual void CGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CGT; } + virtual void XOR(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::XOR; } + virtual void CGTH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CGTH; } + virtual void EQV(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::EQV; } + virtual void CGTB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CGTB; } + virtual void SUMB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SUMB; } + virtual void HGT(u32 rt, s32 ra, s32 rb) { func = spu_interpreter::HGT; } + virtual void CLZ(u32 rt, u32 ra) { func = spu_interpreter::CLZ; } + virtual void XSWD(u32 rt, u32 ra) { func = spu_interpreter::XSWD; } + virtual void XSHW(u32 rt, u32 ra) { func = spu_interpreter::XSHW; } + virtual void CNTB(u32 rt, u32 ra) { func = spu_interpreter::CNTB; } + virtual void XSBH(u32 rt, u32 ra) { func = spu_interpreter::XSBH; } + virtual void CLGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CLGT; } + virtual void ANDC(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ANDC; } + virtual void FCGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FCGT; } + virtual void DFCGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFCGT; } + virtual void FA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FA; } + virtual void FS(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FS; } + virtual void FM(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FM; } + virtual void CLGTH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CLGTH; } + virtual void ORC(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ORC; } + virtual void FCMGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FCMGT; } + virtual void DFCMGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFCMGT; } + virtual void DFA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFA; } + virtual void DFS(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFS; } + virtual void DFM(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFM; } + virtual void CLGTB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CLGTB; } + virtual void HLGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::HLGT; } + virtual void DFMA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFMA; } + virtual void DFMS(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFMS; } + virtual void DFNMS(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFNMS; } + virtual void DFNMA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFNMA; } + virtual void CEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CEQ; } + virtual void MPYHHU(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYHHU; } + virtual void ADDX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ADDX; } + virtual void SFX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SFX; } + virtual void CGX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CGX; } + virtual void BGX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::BGX; } + virtual void MPYHHA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYHHA; } + virtual void MPYHHAU(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYHHAU; } + virtual void FSCRRD(u32 rt) { func = spu_interpreter::FSCRRD; } + virtual void FESD(u32 rt, u32 ra) { func = spu_interpreter::FESD; } + virtual void FRDS(u32 rt, u32 ra) { func = spu_interpreter::FRDS; } + virtual void FSCRWR(u32 rt, u32 ra) { func = spu_interpreter::FSCRWR; } + virtual void DFTSV(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::DFTSV; } + virtual void FCEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FCEQ; } + virtual void DFCEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFCEQ; } + virtual void MPY(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPY; } + virtual void MPYH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYH; } + virtual void MPYHH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYHH; } + virtual void MPYS(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYS; } + virtual void CEQH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CEQH; } + virtual void FCMEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FCMEQ; } + virtual void DFCMEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFCMEQ; } + virtual void MPYU(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYU; } + virtual void CEQB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CEQB; } + virtual void FI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FI; } + virtual void HEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::HEQ; } + + virtual void CFLTS(u32 rt, u32 ra, s32 i8) { func = spu_interpreter::CFLTS; } + virtual void CFLTU(u32 rt, u32 ra, s32 i8) { func = spu_interpreter::CFLTU; } + virtual void CSFLT(u32 rt, u32 ra, s32 i8) { func = spu_interpreter::CSFLT; } + virtual void CUFLT(u32 rt, u32 ra, s32 i8) { func = spu_interpreter::CUFLT; } + + virtual void BRZ(u32 rt, s32 i16) { func = spu_interpreter::BRZ; } + virtual void STQA(u32 rt, s32 i16) { func = spu_interpreter::STQA; } + virtual void BRNZ(u32 rt, s32 i16) { func = spu_interpreter::BRNZ; } + virtual void BRHZ(u32 rt, s32 i16) { func = spu_interpreter::BRHZ; } + virtual void BRHNZ(u32 rt, s32 i16) { func = spu_interpreter::BRHNZ; } + virtual void STQR(u32 rt, s32 i16) { func = spu_interpreter::STQR; } + virtual void BRA(s32 i16) { func = spu_interpreter::BRA; } + virtual void LQA(u32 rt, s32 i16) { func = spu_interpreter::LQA; } + virtual void BRASL(u32 rt, s32 i16) { func = spu_interpreter::BRASL; } + virtual void BR(s32 i16) { func = spu_interpreter::BR; } + virtual void FSMBI(u32 rt, s32 i16) { func = spu_interpreter::FSMBI; } + virtual void BRSL(u32 rt, s32 i16) { func = spu_interpreter::BRSL; } + virtual void LQR(u32 rt, s32 i16) { func = spu_interpreter::LQR; } + virtual void IL(u32 rt, s32 i16) { func = spu_interpreter::IL; } + virtual void ILHU(u32 rt, s32 i16) { func = spu_interpreter::ILHU; } + virtual void ILH(u32 rt, s32 i16) { func = spu_interpreter::ILH; } + virtual void IOHL(u32 rt, s32 i16) { func = spu_interpreter::IOHL; } + + virtual void ORI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ORI; } + virtual void ORHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ORHI; } + virtual void ORBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ORBI; } + virtual void SFI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::SFI; } + virtual void SFHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::SFHI; } + virtual void ANDI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ANDI; } + virtual void ANDHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ANDHI; } + virtual void ANDBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ANDBI; } + virtual void AI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::AI; } + virtual void AHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::AHI; } + virtual void STQD(u32 rt, s32 i10, u32 ra) { func = spu_interpreter::STQD; } + virtual void LQD(u32 rt, s32 i10, u32 ra) { func = spu_interpreter::LQD; } + virtual void XORI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::XORI; } + virtual void XORHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::XORHI; } + virtual void XORBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::XORBI; } + virtual void CGTI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CGTI; } + virtual void CGTHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CGTHI; } + virtual void CGTBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CGTBI; } + virtual void HGTI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::HGTI; } + virtual void CLGTI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CLGTI; } + virtual void CLGTHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CLGTHI; } + virtual void CLGTBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CLGTBI; } + virtual void HLGTI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::HLGTI; } + virtual void MPYI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::MPYI; } + virtual void MPYUI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::MPYUI; } + virtual void CEQI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CEQI; } + virtual void CEQHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CEQHI; } + virtual void CEQBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CEQBI; } + virtual void HEQI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::HEQI; } + + virtual void HBRA(s32 ro, s32 i16) { func = spu_interpreter::HBRA; } + virtual void HBRR(s32 ro, s32 i16) { func = spu_interpreter::HBRR; } + virtual void ILA(u32 rt, u32 i18) { func = spu_interpreter::ILA; } + + virtual void SELB(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::SELB; } + virtual void SHUFB(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::SHUFB; } + virtual void MPYA(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::MPYA; } + virtual void FNMS(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::FNMS; } + virtual void FMA(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::FMA; } + virtual void FMS(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::FMS; } + + virtual void UNK(u32 code, u32 opcode, u32 gcode) { func = spu_interpreter::UNK; } +}; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index ac68abb7f4..c68ead7b02 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -17,10 +17,38 @@ #include "Emu/Cell/SPUThread.h" #include "Emu/Cell/SPUDecoder.h" #include "Emu/Cell/SPUInterpreter.h" +#include "Emu/Cell/SPUInterpreter2.h" #include "Emu/Cell/SPURecompiler.h" #include +class spu_inter_func_list_t +{ + std::array funcs; + +public: + spu_inter_func_list_t() + { + auto inter = new SPUInterpreter2; + SPUDecoder dec(*inter); + + for (u32 i = 0; i < funcs.size(); i++) + { + inter->func = spu_interpreter::DEFAULT; + + dec.Decode(i << 21); + + funcs[i] = inter->func; + } + } + + __forceinline spu_inter_func_t operator [] (u32 opcode) + { + return funcs[opcode >> 21]; + } +} +g_spu_inter_func_list; + SPUThread& GetCurrentSPUThread() { CPUThread* thread = GetCurrentCPUThread(); @@ -46,23 +74,43 @@ SPUThread::~SPUThread() void SPUThread::Task() { - const int round = std::fegetround(); std::fesetround(FE_TOWARDZERO); if (m_custom_task) { - m_custom_task(*this); - } - else - { - CPUThread::Task(); + return m_custom_task(*this); } - if (std::fegetround() != FE_TOWARDZERO) + if (m_dec) { - LOG_ERROR(SPU, "Rounding mode has changed(%d)", std::fegetround()); + return CPUThread::Task(); + } + + while (true) + { + // read opcode + const spu_opcode_t opcode = { vm::read32(PC + offset) }; + + // get interpreter function + const auto func = g_spu_inter_func_list[opcode.opcode]; + + if (m_events) + { + // process events + if (m_events & CPU_EVENT_STOP && (Emu.IsStopped() || IsStopped() || IsPaused())) + { + m_events &= ~CPU_EVENT_STOP; + return; + } + } + + // call interpreter function + func(*this, opcode); + + // next instruction + //PC += 4; + NextPc(4); } - std::fesetround(round); } void SPUThread::DoReset() @@ -122,19 +170,33 @@ void SPUThread::CloseStack() void SPUThread::DoRun() { - switch(Ini.SPUDecoderMode.GetValue()) + m_dec = nullptr; + + switch (auto mode = Ini.SPUDecoderMode.GetValue()) + { + case 0: // original interpreter { - case 1: m_dec = new SPUDecoder(*new SPUInterpreter(*this)); - break; + break; + } + + case 1: // alternative interpreter + { + break; + } + case 2: + { m_dec = new SPURecompilerCore(*this); - break; + break; + } default: - LOG_ERROR(SPU, "Invalid SPU decoder mode: %d", Ini.SPUDecoderMode.GetValue()); + { + LOG_ERROR(SPU, "Invalid SPU decoder mode: %d", mode); Emu.Pause(); } + } } void SPUThread::DoResume() diff --git a/rpcs3/Gui/MainFrame.cpp b/rpcs3/Gui/MainFrame.cpp index d42f3a7886..dd0298777e 100644 --- a/rpcs3/Gui/MainFrame.cpp +++ b/rpcs3/Gui/MainFrame.cpp @@ -441,6 +441,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) cbox_cpu_decoder->Append("PPU JIT (LLVM)"); cbox_spu_decoder->Append("SPU Interpreter"); + cbox_spu_decoder->Append("SPU Interpreter 2"); cbox_spu_decoder->Append("SPU JIT (ASMJIT)"); cbox_gs_render->Append("Null"); @@ -533,7 +534,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) chbox_dbg_ap_functioncall->SetValue(Ini.DBGAutoPauseFunctionCall.GetValue()); cbox_cpu_decoder ->SetSelection(Ini.CPUDecoderMode.GetValue() ? Ini.CPUDecoderMode.GetValue() : 0); - cbox_spu_decoder ->SetSelection(Ini.SPUDecoderMode.GetValue() ? Ini.SPUDecoderMode.GetValue() - 1 : 0); + cbox_spu_decoder ->SetSelection(Ini.SPUDecoderMode.GetValue() ? Ini.SPUDecoderMode.GetValue() : 0); cbox_gs_render ->SetSelection(Ini.GSRenderMode.GetValue()); cbox_gs_resolution ->SetSelection(ResolutionIdToNum(Ini.GSResolution.GetValue()) - 1); cbox_gs_aspect ->SetSelection(Ini.GSAspectRatio.GetValue() - 1); @@ -634,7 +635,7 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) if(diag.ShowModal() == wxID_OK) { Ini.CPUDecoderMode.SetValue(cbox_cpu_decoder->GetSelection()); - Ini.SPUDecoderMode.SetValue(cbox_spu_decoder->GetSelection() + 1); + Ini.SPUDecoderMode.SetValue(cbox_spu_decoder->GetSelection()); Ini.GSRenderMode.SetValue(cbox_gs_render->GetSelection()); Ini.GSResolution.SetValue(ResolutionNumToId(cbox_gs_resolution->GetSelection() + 1)); Ini.GSAspectRatio.SetValue(cbox_gs_aspect->GetSelection() + 1); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 45870f8502..c3329dfd33 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -38,6 +38,7 @@ + @@ -376,6 +377,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index a404d9b903..6ef16d3491 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -863,6 +863,9 @@ Emu\CPU\Cell + + Emu\CPU\Cell + @@ -1549,5 +1552,8 @@ Emu\CPU\Cell + + Emu\CPU\Cell + \ No newline at end of file From c8bb83b824bd72fb3be54600406e9b6ee8a1a3f2 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Fri, 20 Mar 2015 20:30:13 +0300 Subject: [PATCH 07/23] spu_opcode_t --- rpcs3/Emu/Cell/PPUInterpreter2.h | 2 +- rpcs3/Emu/Cell/SPUInterpreter.cpp | 17 ++++---- rpcs3/Emu/Cell/SPUInterpreter2.h | 66 +++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 11 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter2.h b/rpcs3/Emu/Cell/PPUInterpreter2.h index d5bb82a77a..9ad6989714 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter2.h +++ b/rpcs3/Emu/Cell/PPUInterpreter2.h @@ -9,7 +9,7 @@ union ppu_opcode_t struct { - u32 rc : 1; // 31 + u32 : 1; // 31 u32 shh : 1; // 30 u32 : 3; // 27..29 u32 mbmeh : 1; // 26 diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 618a06fda0..282df3963d 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -16,37 +16,36 @@ void spu_interpreter::DEFAULT(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::STOP(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.stop_and_signal(op.opcode & 0x3fff); } void spu_interpreter::LNOP(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); } void spu_interpreter::SYNC(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + _mm_mfence(); } void spu_interpreter::DSYNC(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + _mm_mfence(); } void spu_interpreter::MFSPR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].clear(); } void spu_interpreter::RDCH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::from32r(CPU.get_ch_value(op.ra)); } void spu_interpreter::RCHCNT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::from32r(CPU.get_ch_count(op.ra)); } void spu_interpreter::SF(SPUThread& CPU, spu_opcode_t op) @@ -191,12 +190,11 @@ void spu_interpreter::AVGB(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::MTSPR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); } void spu_interpreter::WRCH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.set_ch_value(op.ra, CPU.GPR[op.rt]._u32[3]); } void spu_interpreter::BIZ(SPUThread& CPU, spu_opcode_t op) @@ -1019,4 +1017,3 @@ void spu_interpreter::UNK(SPUThread& CPU, spu_opcode_t op) { DEFAULT(CPU, op); } - diff --git a/rpcs3/Emu/Cell/SPUInterpreter2.h b/rpcs3/Emu/Cell/SPUInterpreter2.h index 48daf67b8e..68dcb860e2 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter2.h +++ b/rpcs3/Emu/Cell/SPUInterpreter2.h @@ -6,7 +6,73 @@ union spu_opcode_t { u32 opcode; + struct + { + u32 rt : 7; // 25..31 + u32 ra : 7; // 18..24 + u32 rb : 7; // 11..17 + u32 rc : 7; // 4..10 + }; + struct + { + u32 : 14; // 18..31 + u32 i7 : 7; // 11..17 + }; + + struct + { + u32 : 14; // 18..31 + u32 i8 : 8; // 10..17 + }; + + struct + { + u32 : 14; // 18..31 + u32 i10 : 10; // 8..17 + }; + + struct + { + u32 : 7; // 25..31 + u32 i16 : 16; // 9..24 + }; + + struct + { + u32 : 7; // 25..31 + u32 i18 : 18; // 7..24 + }; + + struct + { + s32 : 14; // 18..31 + s32 si7 : 7; // 11..17 + }; + + struct + { + s32 : 14; // 18..31 + s32 si8 : 8; // 10..17 + }; + + struct + { + s32 : 14; // 18..31 + s32 si10 : 10; // 8..17 + }; + + struct + { + s32 : 7; // 25..31 + s32 si16 : 16; // 9..24 + }; + + struct + { + s32 : 7; // 25..31 + s32 si18 : 18; // 7..24 + }; }; using spu_inter_func_t = void(*)(SPUThread& CPU, spu_opcode_t opcode); From ef563f038d90099c7238c46ec0b500895ec9f6e4 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 21 Mar 2015 02:36:05 +0300 Subject: [PATCH 08/23] SPU: some instructions updated --- Utilities/BEType.h | 42 +++++ rpcs3/Emu/Cell/PPUThread.cpp | 13 +- rpcs3/Emu/Cell/SPUInterpreter.cpp | 283 +++++++++++++++++++++++------- rpcs3/Emu/Cell/SPUInterpreter.h | 24 +-- rpcs3/Emu/Cell/SPUInterpreter2.h | 7 + rpcs3/Emu/Cell/SPURecompiler.h | 24 +-- rpcs3/Emu/Cell/SPUThread.cpp | 12 +- 7 files changed, 296 insertions(+), 109 deletions(-) diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 8cf5e9a89c..17094fcf1e 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -227,16 +227,58 @@ union _CRT_ALIGN(16) u128 return ret; } + static u128 fromF(__m128 value) + { + u128 ret; + ret.vf = value; + return ret; + } + static __forceinline u128 add8(const u128& left, const u128& right) { return fromV(_mm_add_epi8(left.vi, right.vi)); } + static __forceinline u128 add16(const u128& left, const u128& right) + { + return fromV(_mm_add_epi16(left.vi, right.vi)); + } + + static __forceinline u128 add32(const u128& left, const u128& right) + { + return fromV(_mm_add_epi32(left.vi, right.vi)); + } + + static __forceinline u128 addfs(const u128& left, const u128& right) + { + return fromF(_mm_add_ps(left.vf, right.vf)); + } + static __forceinline u128 sub8(const u128& left, const u128& right) { return fromV(_mm_sub_epi8(left.vi, right.vi)); } + static __forceinline u128 sub16(const u128& left, const u128& right) + { + return fromV(_mm_sub_epi16(left.vi, right.vi)); + } + + static __forceinline u128 sub32(const u128& left, const u128& right) + { + return fromV(_mm_sub_epi32(left.vi, right.vi)); + } + + static __forceinline u128 subfs(const u128& left, const u128& right) + { + return fromF(_mm_sub_ps(left.vf, right.vf)); + } + + static __forceinline u128 maxu8(const u128& left, const u128& right) + { + return fromV(_mm_max_epu8(left.vi, right.vi)); + } + static __forceinline u128 minu8(const u128& left, const u128& right) { return fromV(_mm_min_epu8(left.vi, right.vi)); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 61215ad12e..393efddff1 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -667,14 +667,14 @@ void PPUThread::FastCall2(u32 addr, u32 rtoc) auto old_rtoc = GPR[2]; auto old_LR = LR; auto old_thread = GetCurrentNamedThread(); - auto old_task = custom_task; + auto old_task = decltype(custom_task)(); m_status = Running; PC = addr; GPR[2] = rtoc; LR = Emu.GetCPUThreadStop(); SetCurrentNamedThread(this); - custom_task = nullptr; + custom_task.swap(old_task); Task(); @@ -684,7 +684,7 @@ void PPUThread::FastCall2(u32 addr, u32 rtoc) GPR[2] = old_rtoc; LR = old_LR; SetCurrentNamedThread(old_thread); - custom_task = old_task; + custom_task.swap(old_task); } void PPUThread::FastStop() @@ -715,7 +715,12 @@ void PPUThread::Task() if (m_events) { // process events - if (m_events & CPU_EVENT_STOP && (Emu.IsStopped() || IsStopped() || IsPaused())) + if (Emu.IsStopped()) + { + return; + } + + if (m_events & CPU_EVENT_STOP && (IsStopped() || IsPaused())) { m_events &= ~CPU_EVENT_STOP; return; diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 282df3963d..0f2aa5382e 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -8,6 +8,16 @@ #include "SPUInterpreter.h" #include "SPUInterpreter2.h" +#ifdef _MSC_VER +#include +#define rotl32 _rotl +#define rotl16 _rotl16 +#else +#include +#define rotl16(x,r) (((u16)(x) << (r)) | ((u16)(x) >> (16 - (r)))) +#define rotl32(x,r) (((u32)(x) << (r)) | ((u32)(x) >> (32 - (r)))) +#endif + void spu_interpreter::DEFAULT(SPUThread& CPU, spu_opcode_t op) { SPUInterpreter inter(CPU); (*SPU_instr::rrr_list)(&inter, op.opcode); @@ -50,142 +60,211 @@ void spu_interpreter::RCHCNT(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::SF(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]); } void spu_interpreter::OR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.GPR[op.ra] | CPU.GPR[op.rb]; } void spu_interpreter::BG(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + for (u32 i = 0; i < 4; i++) + { + CPU.GPR[op.rt]._u32[i] = CPU.GPR[op.ra]._u32[i] <= CPU.GPR[op.rb]._u32[i]; + } } void spu_interpreter::SFH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::sub16(CPU.GPR[op.rb], CPU.GPR[op.ra]); } void spu_interpreter::NOR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = ~(CPU.GPR[op.ra] | CPU.GPR[op.rb]); } void spu_interpreter::ABSDB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + CPU.GPR[op.rt] = u128::sub8(u128::maxu8(a, b), u128::minu8(a, b)); } void spu_interpreter::ROT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + CPU.GPR[op.rt]._u32[i] = rotl32(a._u32[i], b._s32[i]); + } } void spu_interpreter::ROTM(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + const u64 value = a._u32[i]; + CPU.GPR[op.rt]._u32[i] = static_cast(value >> (0 - b._u32[i])); + } } void spu_interpreter::ROTMA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + const s64 value = a._s32[i]; + CPU.GPR[op.rt]._s32[i] = static_cast(value >> (0 - b._u32[i])); + } } void spu_interpreter::SHL(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + const u64 value = a._u32[i]; + CPU.GPR[op.rt]._u32[i] = static_cast(value << b._u32[i]); + } } void spu_interpreter::ROTH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + CPU.GPR[op.rt]._u16[i] = rotl16(a._u16[i], b._u8[i * 2]); + } } void spu_interpreter::ROTHM(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + const u32 value = a._u16[i]; + CPU.GPR[op.rt]._u16[i] = static_cast(value >> (0 - b._u16[i])); + } } void spu_interpreter::ROTMAH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + const s32 value = a._s16[i]; + CPU.GPR[op.rt]._s16[i] = static_cast(value >> (0 - b._u16[i])); + } } void spu_interpreter::SHLH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + const u32 value = a._u16[i]; + CPU.GPR[op.rt]._u16[i] = static_cast(value << b._u16[i]); + } } void spu_interpreter::ROTI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vi; + const s32 n = op.si7 & 0x1f; + + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi32(a, n), _mm_srli_epi32(a, 32 - n)); } void spu_interpreter::ROTMI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_srli_epi32(CPU.GPR[op.ra].vi, -op.si7 & 0x3f); } void spu_interpreter::ROTMAI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_srai_epi32(CPU.GPR[op.ra].vi, -op.si7 & 0x3f); } void spu_interpreter::SHLI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_slli_epi32(CPU.GPR[op.ra].vi, op.si7 & 0x3f); } void spu_interpreter::ROTHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vi; + const s32 n = op.si7 & 0xf; + + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi16(a, n), _mm_srli_epi16(a, 16 - n)); } void spu_interpreter::ROTHMI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_srli_epi16(CPU.GPR[op.ra].vi, -op.si7 & 0x1f); } void spu_interpreter::ROTMAHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_srai_epi16(CPU.GPR[op.ra].vi, -op.si7 & 0x1f); } void spu_interpreter::SHLHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_slli_epi16(CPU.GPR[op.ra].vi, op.si7 & 0x1f); } void spu_interpreter::A(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::AND(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.GPR[op.ra] & CPU.GPR[op.rb]; } void spu_interpreter::CG(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + for (u32 i = 0; i < 4; i++) + { + CPU.GPR[op.rt]._u32[i] = ~CPU.GPR[op.ra]._u32[i] < CPU.GPR[op.rb]._u32[i]; + } } void spu_interpreter::AH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::add16(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::NAND(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = ~(CPU.GPR[op.ra] & CPU.GPR[op.rb]); } void spu_interpreter::AVGB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_avg_epu8(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); } void spu_interpreter::MTSPR(SPUThread& CPU, spu_opcode_t op) @@ -199,137 +278,200 @@ void spu_interpreter::WRCH(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::BIZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u32[3] == 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } } void spu_interpreter::BINZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u32[3] != 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } } void spu_interpreter::BIHZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u16[6] == 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } } void spu_interpreter::BIHNZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u16[6] != 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } } void spu_interpreter::STOPD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } void spu_interpreter::STQX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.write128((CPU.GPR[op.ra]._u32[3] + CPU.GPR[op.rb]._u32[3]) & 0x3fff0, CPU.GPR[op.rt]); } void spu_interpreter::BI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); } void spu_interpreter::BISL(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (op.d || op.e) + { + throw __FUNCTION__; + } + + const u32 target = SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0); + CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4); + CPU.SetBranch(target); } void spu_interpreter::IRET(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } void spu_interpreter::BISLED(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } void spu_interpreter::HBR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); } void spu_interpreter::GB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + u32 result = 0; + for (u32 i = 0; i < 4; i++) + { + result |= (CPU.GPR[op.ra]._u32[i] & 1) << i; + } + + CPU.GPR[op.rt] = u128::from32r(result); } void spu_interpreter::GBH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + u32 result = 0; + for (u32 i = 0; i < 8; i++) + { + result |= (CPU.GPR[op.ra]._u16[i] & 1) << i; + } + + CPU.GPR[op.rt] = u128::from32r(result); } void spu_interpreter::GBB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(CPU.GPR[op.ra].vi, 7))); } void spu_interpreter::FSM(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = g_imm_table.fsm_table[CPU.GPR[op.ra]._u32[3] & 0xf]; } void spu_interpreter::FSMH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = g_imm_table.fsmh_table[CPU.GPR[op.ra]._u32[3] & 0xff]; } void spu_interpreter::FSMB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = g_imm_table.fsmb_table[CPU.GPR[op.ra]._u32[3] & 0xffff]; } void spu_interpreter::FREST(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vf = _mm_rcp_ps(CPU.GPR[op.ra].vf); } void spu_interpreter::FRSQEST(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vf = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[op.ra].vf, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); } void spu_interpreter::LQX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.read128((CPU.GPR[op.ra]._u32[3] + CPU.GPR[op.rb]._u32[3]) & 0x3fff0); } void spu_interpreter::ROTQBYBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.rldq_pshufb[CPU.GPR[op.rb]._u32[3] >> 3 & 0xf]); } void spu_interpreter::ROTQMBYBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.srdq_pshufb[-(CPU.GPR[op.rb]._s32[3] >> 3) & 0x1f]); } void spu_interpreter::SHLQBYBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.sldq_pshufb[CPU.GPR[op.rb]._u32[3] >> 3 & 0x1f]); } void spu_interpreter::CBX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = ~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xf; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u8[t] = 0x03; } void spu_interpreter::CHX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u16[t] = 0x0203; } void spu_interpreter::CWX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u32[t] = 0x00010203; } void spu_interpreter::CDX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull; } void spu_interpreter::ROTQBI(SPUThread& CPU, spu_opcode_t op) @@ -349,42 +491,50 @@ void spu_interpreter::SHLQBI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::ROTQBY(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.rldq_pshufb[CPU.GPR[op.rb]._u32[3] & 0xf]); } void spu_interpreter::ROTQMBY(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.srdq_pshufb[-CPU.GPR[op.rb]._s32[3] & 0x1f]); } void spu_interpreter::SHLQBY(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.sldq_pshufb[CPU.GPR[op.rb]._u32[3] & 0x1f]); } void spu_interpreter::ORX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::from32r(CPU.GPR[op.ra]._u32[0] | CPU.GPR[op.ra]._u32[1] | CPU.GPR[op.ra]._u32[2] | CPU.GPR[op.ra]._u32[3]); } void spu_interpreter::CBD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = ~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xf; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u8[t] = 0x03; } void spu_interpreter::CHD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u16[t] = 0x0203; } void spu_interpreter::CWD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u32[t] = 0x00010203; } void spu_interpreter::CDD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull; } void spu_interpreter::ROTQBII(SPUThread& CPU, spu_opcode_t op) @@ -404,22 +554,21 @@ void spu_interpreter::SHLQBII(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::ROTQBYI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.rldq_pshufb[op.i7 & 0xf]); } void spu_interpreter::ROTQMBYI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.srdq_pshufb[-op.si7 & 0x1f]); } void spu_interpreter::SHLQBYI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.sldq_pshufb[op.i7 & 0x1f]); } void spu_interpreter::NOP(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); } void spu_interpreter::CGT(SPUThread& CPU, spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 2361d0c376..97d4934b3c 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -315,11 +315,9 @@ private: } void BIZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -336,11 +334,9 @@ private: } void BINZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -357,11 +353,9 @@ private: } void BIHZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -378,11 +372,9 @@ private: } void BIHNZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -409,11 +401,9 @@ private: } void BI(u32 intr, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -423,11 +413,9 @@ private: } void BISL(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } diff --git a/rpcs3/Emu/Cell/SPUInterpreter2.h b/rpcs3/Emu/Cell/SPUInterpreter2.h index 68dcb860e2..d4de2559c9 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter2.h +++ b/rpcs3/Emu/Cell/SPUInterpreter2.h @@ -73,6 +73,13 @@ union spu_opcode_t s32 : 7; // 25..31 s32 si18 : 18; // 7..24 }; + + struct + { + u32 : 18; // 14..31 + u32 e : 1; // 13 + u32 d : 1; // 12 + }; }; using spu_inter_func_t = void(*)(SPUThread& CPU, spu_opcode_t opcode); diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 85ba680249..0391532151 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -1073,11 +1073,9 @@ private: } void BIZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1094,11 +1092,9 @@ private: } void BINZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1115,11 +1111,9 @@ private: } void BIHZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1136,11 +1130,9 @@ private: } void BIHNZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1188,11 +1180,9 @@ private: } void BI(u32 intr, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1206,11 +1196,9 @@ private: } void BISL(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index c68ead7b02..625fbd2c74 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -97,7 +97,12 @@ void SPUThread::Task() if (m_events) { // process events - if (m_events & CPU_EVENT_STOP && (Emu.IsStopped() || IsStopped() || IsPaused())) + if (Emu.IsStopped()) + { + return; + } + + if (m_events & CPU_EVENT_STOP && (IsStopped() || IsPaused())) { m_events &= ~CPU_EVENT_STOP; return; @@ -225,16 +230,19 @@ void SPUThread::FastCall(u32 ls_addr) auto old_PC = PC; auto old_LR = GPR[0]._u32[3]; auto old_stack = GPR[1]._u32[3]; // only saved and restored (may be wrong) + auto old_task = decltype(m_custom_task)(); m_status = Running; PC = ls_addr; GPR[0]._u32[3] = 0x0; + m_custom_task.swap(m_custom_task); - CPUThread::Task(); + SPUThread::Task(); PC = old_PC; GPR[0]._u32[3] = old_LR; GPR[1]._u32[3] = old_stack; + m_custom_task.swap(m_custom_task); } void SPUThread::FastStop() From ff1708a899aa67467e4adcc6c87c4cf2b262932b Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 21 Mar 2015 17:29:33 +0300 Subject: [PATCH 09/23] SPU: some instructions updated --- Utilities/BEType.h | 18 +++ rpcs3/Emu/Cell/SPUInterpreter.cpp | 248 ++++++++++++++++++------------ 2 files changed, 170 insertions(+), 96 deletions(-) diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 17094fcf1e..a4ecdc93a1 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -88,6 +88,7 @@ union _CRT_ALIGN(16) u128 double _d[2]; __m128 vf; __m128i vi; + __m128d vd; class bit_array_128 { @@ -234,6 +235,13 @@ union _CRT_ALIGN(16) u128 return ret; } + static u128 fromD(__m128d value) + { + u128 ret; + ret.vd = value; + return ret; + } + static __forceinline u128 add8(const u128& left, const u128& right) { return fromV(_mm_add_epi8(left.vi, right.vi)); @@ -254,6 +262,11 @@ union _CRT_ALIGN(16) u128 return fromF(_mm_add_ps(left.vf, right.vf)); } + static __forceinline u128 addfd(const u128& left, const u128& right) + { + return fromD(_mm_add_pd(left.vd, right.vd)); + } + static __forceinline u128 sub8(const u128& left, const u128& right) { return fromV(_mm_sub_epi8(left.vi, right.vi)); @@ -274,6 +287,11 @@ union _CRT_ALIGN(16) u128 return fromF(_mm_sub_ps(left.vf, right.vf)); } + static __forceinline u128 subfd(const u128& left, const u128& right) + { + return fromD(_mm_sub_pd(left.vd, right.vd)); + } + static __forceinline u128 maxu8(const u128& left, const u128& right) { return fromV(_mm_max_epu8(left.vi, right.vi)); diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 0f2aa5382e..9d22bfbea0 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -90,7 +90,6 @@ void spu_interpreter::ABSDB(SPUThread& CPU, spu_opcode_t op) { const auto a = CPU.GPR[op.ra]; const auto b = CPU.GPR[op.rb]; - CPU.GPR[op.rt] = u128::sub8(u128::maxu8(a, b), u128::minu8(a, b)); } @@ -192,7 +191,6 @@ void spu_interpreter::ROTI(SPUThread& CPU, spu_opcode_t op) { const auto a = CPU.GPR[op.ra].vi; const s32 n = op.si7 & 0x1f; - CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi32(a, n), _mm_srli_epi32(a, 32 - n)); } @@ -215,7 +213,6 @@ void spu_interpreter::ROTHI(SPUThread& CPU, spu_opcode_t op) { const auto a = CPU.GPR[op.ra].vi; const s32 n = op.si7 & 0xf; - CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi16(a, n), _mm_srli_epi16(a, 16 - n)); } @@ -376,24 +373,12 @@ void spu_interpreter::HBR(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::GB(SPUThread& CPU, spu_opcode_t op) { - u32 result = 0; - for (u32 i = 0; i < 4; i++) - { - result |= (CPU.GPR[op.ra]._u32[i] & 1) << i; - } - - CPU.GPR[op.rt] = u128::from32r(result); + CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0)), 7))); } void spu_interpreter::GBH(SPUThread& CPU, spu_opcode_t op) { - u32 result = 0; - for (u32 i = 0; i < 8; i++) - { - result |= (CPU.GPR[op.ra]._u16[i] & 1) << i; - } - - CPU.GPR[op.rt] = u128::from32r(result); + CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0)), 7))); } void spu_interpreter::GBB(SPUThread& CPU, spu_opcode_t op) @@ -423,7 +408,8 @@ void spu_interpreter::FREST(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::FRSQEST(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt].vf = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[op.ra].vf, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); + static const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + CPU.GPR[op.rt].vf = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[op.ra].vf, mask)); } void spu_interpreter::LQX(SPUThread& CPU, spu_opcode_t op) @@ -476,17 +462,23 @@ void spu_interpreter::CDX(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::ROTQBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vi; + const s32 n = CPU.GPR[op.rb]._s32[3] & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_alignr_epi8(a, a, 8), 64 - n)); } void spu_interpreter::ROTQMBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vi; + const s32 n = -CPU.GPR[op.rb]._s32[3] & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_srli_epi64(a, n), _mm_slli_epi64(_mm_srli_si128(a, 8), 64 - n)); } void spu_interpreter::SHLQBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vi; + const s32 n = CPU.GPR[op.rb]._u32[3] & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_slli_si128(a, 8), 64 - n)); } void spu_interpreter::ROTQBY(SPUThread& CPU, spu_opcode_t op) @@ -539,17 +531,23 @@ void spu_interpreter::CDD(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::ROTQBII(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vi; + const s32 n = op.i7 & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_alignr_epi8(a, a, 8), 64 - n)); } void spu_interpreter::ROTQMBII(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vi; + const s32 n = -op.si7 & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_srli_epi64(a, n), _mm_slli_epi64(_mm_srli_si128(a, 8), 64 - n)); } void spu_interpreter::SHLQBII(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vi; + const s32 n = op.i7 & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_slli_si128(a, 8), 64 - n)); } void spu_interpreter::ROTQBYI(SPUThread& CPU, spu_opcode_t op) @@ -573,52 +571,62 @@ void spu_interpreter::NOP(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::CGT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi32(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); } void spu_interpreter::XOR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.GPR[op.ra] ^ CPU.GPR[op.rb]; } void spu_interpreter::CGTH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi16(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); } void spu_interpreter::EQV(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = ~(CPU.GPR[op.ra] ^ CPU.GPR[op.rb]); } void spu_interpreter::CGTB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi8(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); } void spu_interpreter::SUMB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto ones = _mm_set1_epi8(1); + const auto a = _mm_maddubs_epi16(CPU.GPR[op.ra].vi, ones); + const auto b = _mm_maddubs_epi16(CPU.GPR[op.rb].vi, ones); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(_mm_hadd_epi16(a, b), _mm_set_epi8(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0)); } void spu_interpreter::HGT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (CPU.GPR[op.ra]._s32[3] > CPU.GPR[op.rb]._s32[3]) + { + CPU.halt(); + } } void spu_interpreter::CLZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + for (u32 i = 0; i < 4; i++) + { + CPU.GPR[op.rt]._u32[i] = cntlz32(CPU.GPR[op.ra]._u32[i]); + } } void spu_interpreter::XSWD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt]._s64[0] = CPU.GPR[op.ra]._s32[0]; + CPU.GPR[op.rt]._s64[1] = CPU.GPR[op.ra]._s32[2]; } void spu_interpreter::XSHW(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_srai_epi32(_mm_slli_epi32(CPU.GPR[op.ra].vi, 16), 16); } void spu_interpreter::CNTB(SPUThread& CPU, spu_opcode_t op) @@ -628,217 +636,244 @@ void spu_interpreter::CNTB(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::XSBH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_srai_epi16(_mm_slli_epi16(CPU.GPR[op.ra].vi, 8), 8); } void spu_interpreter::CLGT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto sign = _mm_set1_epi32(0x80000000); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi32(_mm_xor_si128(CPU.GPR[op.ra].vi, sign), _mm_xor_si128(CPU.GPR[op.rb].vi, sign)); } void spu_interpreter::ANDC(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::andnot(CPU.GPR[op.rb], CPU.GPR[op.ra]); } void spu_interpreter::FCGT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vf = _mm_cmp_ps(CPU.GPR[op.rb].vf, CPU.GPR[op.ra].vf, 1); } void spu_interpreter::DFCGT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } void spu_interpreter::FA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::addfs(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::FS(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::subfs(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::FM(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vf = _mm_mul_ps(CPU.GPR[op.ra].vf, CPU.GPR[op.rb].vf); } void spu_interpreter::CLGTH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto sign = _mm_set1_epi32(0x80008000); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi16(_mm_xor_si128(CPU.GPR[op.ra].vi, sign), _mm_xor_si128(CPU.GPR[op.rb].vi, sign)); } void spu_interpreter::ORC(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.GPR[op.ra] | ~CPU.GPR[op.rb]; } void spu_interpreter::FCMGT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + static const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + CPU.GPR[op.rt].vf = _mm_cmp_ps(_mm_and_ps(CPU.GPR[op.rb].vf, mask), _mm_and_ps(CPU.GPR[op.ra].vf, mask), 1); } void spu_interpreter::DFCMGT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } void spu_interpreter::DFA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::addfd(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::DFS(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::subfd(CPU.GPR[op.ra], CPU.GPR[op.rb]); } void spu_interpreter::DFM(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vd = _mm_mul_pd(CPU.GPR[op.ra].vd, CPU.GPR[op.rb].vd); } void spu_interpreter::CLGTB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto sign = _mm_set1_epi32(0x80808080); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi8(_mm_xor_si128(CPU.GPR[op.ra].vi, sign), _mm_xor_si128(CPU.GPR[op.rb].vi, sign)); } void spu_interpreter::HLGT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (CPU.GPR[op.ra]._u32[3] > CPU.GPR[op.rb]._u32[3]) + { + CPU.halt(); + } } void spu_interpreter::DFMA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vd = _mm_add_pd(_mm_mul_pd(CPU.GPR[op.ra].vd, CPU.GPR[op.rb].vd), CPU.GPR[op.rt].vd); } void spu_interpreter::DFMS(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vd = _mm_sub_pd(_mm_mul_pd(CPU.GPR[op.ra].vd, CPU.GPR[op.rb].vd), CPU.GPR[op.rt].vd); } void spu_interpreter::DFNMS(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vd = _mm_sub_pd(CPU.GPR[op.rt].vd, _mm_mul_pd(CPU.GPR[op.ra].vd, CPU.GPR[op.rb].vd)); } void spu_interpreter::DFNMA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vd = _mm_sub_pd(_mm_set1_pd(0.0), _mm_add_pd(_mm_mul_pd(CPU.GPR[op.ra].vd, CPU.GPR[op.rb].vd), CPU.GPR[op.rt].vd)); } void spu_interpreter::CEQ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpeq_epi32(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); } void spu_interpreter::MPYHHU(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + return DEFAULT(CPU, op); + //CPU.GPR[op.rt].vi = _mm_madd_epi16(_mm_srli_epi32(CPU.GPR[op.ra].vi, 16), _mm_srli_epi32(CPU.GPR[op.rb].vi, 16)); } void spu_interpreter::ADDX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::add32(u128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]), CPU.GPR[op.rt] & u128::from32p(1)); } void spu_interpreter::SFX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = u128::sub32(u128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]), u128::andnot(CPU.GPR[op.rt], u128::from32p(1))); } void spu_interpreter::CGX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + for (s32 i = 0; i < 4; i++) + { + const u64 carry = CPU.GPR[op.rt]._u32[i] & 1; + CPU.GPR[op.rt]._u32[i] = (carry + CPU.GPR[op.ra]._u32[i] + CPU.GPR[op.rb]._u32[i]) >> 32; + } } void spu_interpreter::BGX(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + for (s32 i = 0; i < 4; i++) + { + const s64 result = (u64)CPU.GPR[op.rb]._u32[i] - (u64)CPU.GPR[op.ra]._u32[i] - (u64)(1 - (CPU.GPR[op.rt]._u32[i] & 1)); + CPU.GPR[op.rt]._u32[i] = result >= 0; + } } void spu_interpreter::MPYHHA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + return DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_add_epi32(CPU.GPR[op.rt].vi, _mm_madd_epi16(_mm_srli_epi32(CPU.GPR[op.ra].vi, 16), _mm_srli_epi32(CPU.GPR[op.rb].vi, 16))); } void spu_interpreter::MPYHHAU(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + return DEFAULT(CPU, op); + const auto a = _mm_srli_epi32(CPU.GPR[op.ra].vi, 16); + const auto b = _mm_srli_epi32(CPU.GPR[op.rb].vi, 16); + CPU.GPR[op.rt].vi = _mm_add_epi32(CPU.GPR[op.rt].vi, _mm_or_si128(_mm_slli_epi32(_mm_mulhi_epu16(a, b), 16), _mm_mullo_epi16(a, b))); } void spu_interpreter::FSCRRD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].clear(); } void spu_interpreter::FESD(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vf; + CPU.GPR[op.rt].vd = _mm_cvtps_pd(_mm_shuffle_ps(a, a, 0x8d)); } void spu_interpreter::FRDS(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto t = _mm_cvtpd_ps(CPU.GPR[op.ra].vd); + CPU.GPR[op.rt].vf = _mm_shuffle_ps(t, t, 0x72); } void spu_interpreter::FSCRWR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); } void spu_interpreter::DFTSV(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } void spu_interpreter::FCEQ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vf = _mm_cmp_ps(CPU.GPR[op.rb].vf, CPU.GPR[op.ra].vf, 0); } void spu_interpreter::DFCEQ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } void spu_interpreter::MPY(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + return DEFAULT(CPU, op); + static const auto mask = _mm_set1_epi32(0xffff); + CPU.GPR[op.rt].vi = _mm_madd_epi16(_mm_and_si128(CPU.GPR[op.ra].vi, mask), _mm_and_si128(CPU.GPR[op.rb].vi, mask)); } void spu_interpreter::MPYH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + return DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(CPU.GPR[op.ra].vi, 16), CPU.GPR[op.rb].vi), 16); } void spu_interpreter::MPYHH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + return DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_madd_epi16(_mm_srli_epi32(CPU.GPR[op.ra].vi, 16), _mm_srli_epi32(CPU.GPR[op.rb].vi, 16)); } void spu_interpreter::MPYS(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + return DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_srai_epi32(_mm_slli_epi32(_mm_mulhi_epi16(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi), 16), 16); } void spu_interpreter::CEQH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpeq_epi16(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); } void spu_interpreter::FCMEQ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + CPU.GPR[op.rt].vf = _mm_cmp_ps(_mm_and_ps(CPU.GPR[op.rb].vf, mask), _mm_and_ps(CPU.GPR[op.ra].vf, mask), 0); } void spu_interpreter::DFCMEQ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } void spu_interpreter::MPYU(SPUThread& CPU, spu_opcode_t op) @@ -848,17 +883,20 @@ void spu_interpreter::MPYU(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::CEQB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpeq_epi8(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); } void spu_interpreter::FI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.GPR[op.rb]; } void spu_interpreter::HEQ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (CPU.GPR[op.ra]._s32[3] == CPU.GPR[op.rb]._s32[3]) + { + CPU.halt(); + } } @@ -885,52 +923,66 @@ void spu_interpreter::CUFLT(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::BRZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (CPU.GPR[op.rt]._u32[3] == 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.PC, op.i16)); + } } void spu_interpreter::STQA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.write128((op.i16 << 2) & 0x3fff0, CPU.GPR[op.rt]); } void spu_interpreter::BRNZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (CPU.GPR[op.rt]._u32[3] != 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.PC, op.i16)); + } } void spu_interpreter::BRHZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (CPU.GPR[op.rt]._u16[6] == 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.PC, op.i16)); + } } void spu_interpreter::BRHNZ(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (CPU.GPR[op.rt]._u16[6] != 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.PC, op.i16)); + } } void spu_interpreter::STQR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.write128(SPUOpcodes::branchTarget(CPU.PC, op.i16) & 0x3fff0, CPU.GPR[op.rt]); } void spu_interpreter::BRA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.SetBranch(SPUOpcodes::branchTarget(0, op.i16)); } void spu_interpreter::LQA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.read128((op.i16 << 2) & 0x3fff0); } void spu_interpreter::BRASL(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const u32 target = SPUOpcodes::branchTarget(0, op.i16); + CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4); + CPU.SetBranch(target); } void spu_interpreter::BR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.PC, op.i16)); } void spu_interpreter::FSMBI(SPUThread& CPU, spu_opcode_t op) @@ -940,32 +992,34 @@ void spu_interpreter::FSMBI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::BRSL(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const u32 target = SPUOpcodes::branchTarget(CPU.PC, op.i16); + CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4); + CPU.SetBranch(target); } void spu_interpreter::LQR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.read128(SPUOpcodes::branchTarget(CPU.PC, op.i16) & 0x3fff0); } void spu_interpreter::IL(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_set1_epi32(op.si16); } void spu_interpreter::ILHU(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_set1_epi32(op.i16 << 16); } void spu_interpreter::ILH(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_set1_epi16(op.i16); } void spu_interpreter::IOHL(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_or_si128(CPU.GPR[op.rt].vi, _mm_set1_epi32(op.i16)); } @@ -1021,11 +1075,13 @@ void spu_interpreter::AHI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::STQD(SPUThread& CPU, spu_opcode_t op) { + // DEFAULT(CPU, op); } void spu_interpreter::LQD(SPUThread& CPU, spu_opcode_t op) { + // DEFAULT(CPU, op); } From e477a0b8fc75650a1337c4f4eef774524a0e38b8 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 21 Mar 2015 18:36:56 +0300 Subject: [PATCH 10/23] SPU: some instructions updated --- rpcs3/Emu/Cell/SPUInterpreter.cpp | 79 ++++++++++++++++++------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 9d22bfbea0..048cb5a8d4 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -1025,99 +1025,100 @@ void spu_interpreter::IOHL(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::ORI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_or_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10)); } void spu_interpreter::ORHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_or_si128(CPU.GPR[op.ra].vi, _mm_set1_epi16(op.si10)); } void spu_interpreter::ORBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_or_si128(CPU.GPR[op.ra].vi, _mm_set1_epi8(op.i8)); } void spu_interpreter::SFI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_sub_epi32(_mm_set1_epi32(op.si10), CPU.GPR[op.ra].vi); } void spu_interpreter::SFHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_sub_epi16(_mm_set1_epi16(op.si10), CPU.GPR[op.ra].vi); } void spu_interpreter::ANDI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_and_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10)); } void spu_interpreter::ANDHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_and_si128(CPU.GPR[op.ra].vi, _mm_set1_epi16(op.si10)); } void spu_interpreter::ANDBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_and_si128(CPU.GPR[op.ra].vi, _mm_set1_epi8(op.i8)); } void spu_interpreter::AI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_add_epi32(_mm_set1_epi32(op.si10), CPU.GPR[op.ra].vi); } void spu_interpreter::AHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_add_epi16(_mm_set1_epi16(op.si10), CPU.GPR[op.ra].vi); } void spu_interpreter::STQD(SPUThread& CPU, spu_opcode_t op) { - // - DEFAULT(CPU, op); + CPU.write128((CPU.GPR[op.ra]._s32[3] + (op.si10 << 4)) & 0x3fff0, CPU.GPR[op.rt]); } void spu_interpreter::LQD(SPUThread& CPU, spu_opcode_t op) { - // - DEFAULT(CPU, op); + CPU.GPR[op.rt] = CPU.read128((CPU.GPR[op.ra]._s32[3] + (op.si10 << 4)) & 0x3fff0); } void spu_interpreter::XORI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10)); } void spu_interpreter::XORHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi16(op.si10)); } void spu_interpreter::XORBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi8(op.i8)); } void spu_interpreter::CGTI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi32(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10)); } void spu_interpreter::CGTHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi16(CPU.GPR[op.ra].vi, _mm_set1_epi16(op.si10)); } void spu_interpreter::CGTBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi8(CPU.GPR[op.ra].vi, _mm_set1_epi8(op.i8)); } void spu_interpreter::HGTI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (CPU.GPR[op.ra]._s32[3] > op.si10) + { + CPU.halt(); + } } void spu_interpreter::CLGTI(SPUThread& CPU, spu_opcode_t op) @@ -1137,7 +1138,10 @@ void spu_interpreter::CLGTBI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::HLGTI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (CPU.GPR[op.ra]._u32[3] > static_cast(op.si10)) + { + CPU.halt(); + } } void spu_interpreter::MPYI(SPUThread& CPU, spu_opcode_t op) @@ -1152,73 +1156,80 @@ void spu_interpreter::MPYUI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::CEQI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpeq_epi32(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10)); } void spu_interpreter::CEQHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpeq_epi16(CPU.GPR[op.ra].vi, _mm_set1_epi16(op.si10)); } void spu_interpreter::CEQBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpeq_epi8(CPU.GPR[op.ra].vi, _mm_set1_epi8(op.i8)); } void spu_interpreter::HEQI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + if (CPU.GPR[op.ra]._s32[3] == op.si10) + { + CPU.halt(); + } } void spu_interpreter::HBRA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); } void spu_interpreter::HBRR(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); } void spu_interpreter::ILA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_set1_epi32(op.i18); } void spu_interpreter::SELB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + // rt <> rc + CPU.GPR[op.rc] = (CPU.GPR[op.rt] & CPU.GPR[op.rb]) | u128::andnot(CPU.GPR[op.rt], CPU.GPR[op.ra]); } void spu_interpreter::SHUFB(SPUThread& CPU, spu_opcode_t op) { + // rt <> rc DEFAULT(CPU, op); } void spu_interpreter::MPYA(SPUThread& CPU, spu_opcode_t op) { + // rt <> rc DEFAULT(CPU, op); } void spu_interpreter::FNMS(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + // rt <> rc + CPU.GPR[op.rc].vf = _mm_sub_ps(CPU.GPR[op.rt].vf, _mm_mul_ps(CPU.GPR[op.ra].vf, CPU.GPR[op.rb].vf)); } void spu_interpreter::FMA(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + // rt <> rc + CPU.GPR[op.rc].vf = _mm_add_ps(_mm_mul_ps(CPU.GPR[op.ra].vf, CPU.GPR[op.rb].vf), CPU.GPR[op.rt].vf); } void spu_interpreter::FMS(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + // rt <> rc + CPU.GPR[op.rc].vf = _mm_sub_ps(_mm_mul_ps(CPU.GPR[op.ra].vf, CPU.GPR[op.rb].vf), CPU.GPR[op.rt].vf); } void spu_interpreter::UNK(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + throw __FUNCTION__; } From a86317ce5abd123e1b4f919e724eb3bab4a70690 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 21 Mar 2015 21:07:37 +0300 Subject: [PATCH 11/23] SPU: some instructions updated --- rpcs3/Emu/Cell/SPUInterpreter.cpp | 53 ++++++++++++++++++++++++------- rpcs3/Emu/Cell/SPUInterpreter2.h | 8 ++--- rpcs3/Emu/Cell/SPUThread.cpp | 4 +-- 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 048cb5a8d4..faddf20138 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -408,7 +408,7 @@ void spu_interpreter::FREST(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::FRSQEST(SPUThread& CPU, spu_opcode_t op) { - static const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); CPU.GPR[op.rt].vf = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[op.ra].vf, mask)); } @@ -631,7 +631,10 @@ void spu_interpreter::XSHW(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::CNTB(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto counts = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + const auto mask = _mm_set1_epi8(0xf); + const auto a = CPU.GPR[op.ra].vi; + CPU.GPR[op.rt].vi = _mm_add_epi8(_mm_shuffle_epi8(counts, _mm_and_si128(a, mask)), _mm_shuffle_epi8(counts, _mm_and_si128(_mm_srli_epi64(a, 4), mask))); } void spu_interpreter::XSBH(SPUThread& CPU, spu_opcode_t op) @@ -688,7 +691,7 @@ void spu_interpreter::ORC(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::FCMGT(SPUThread& CPU, spu_opcode_t op) { - static const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); CPU.GPR[op.rt].vf = _mm_cmp_ps(_mm_and_ps(CPU.GPR[op.rb].vf, mask), _mm_and_ps(CPU.GPR[op.ra].vf, mask), 1); } @@ -838,7 +841,7 @@ void spu_interpreter::DFCEQ(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::MPY(SPUThread& CPU, spu_opcode_t op) { return DEFAULT(CPU, op); - static const auto mask = _mm_set1_epi32(0xffff); + const auto mask = _mm_set1_epi32(0xffff); CPU.GPR[op.rt].vi = _mm_madd_epi16(_mm_and_si128(CPU.GPR[op.ra].vi, mask), _mm_and_si128(CPU.GPR[op.rb].vi, mask)); } @@ -900,24 +903,50 @@ void spu_interpreter::HEQ(SPUThread& CPU, spu_opcode_t op) } +class spu_scale_table_t +{ + std::array<__m128, 155 + 174> m_data; + +public: + spu_scale_table_t() + { + for (s32 i = -155; i < 174; i++) + { + m_data[i + 155] = _mm_set1_ps(static_cast(pow(2, i))); + } + } + + __forceinline __m128 operator [] (s32 scale) const + { + return m_data[scale + 155]; + } +} +const g_spu_scale_table; + + void spu_interpreter::CFLTS(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto scaled = _mm_mul_ps(CPU.GPR[op.ra].vf, g_spu_scale_table[173 - op.i8]); + CPU.GPR[op.rt].vi = _mm_xor_si128(_mm_cvttps_epi32(scaled), _mm_castps_si128(_mm_cmpge_ps(scaled, _mm_set1_ps(0x80000000)))); } void spu_interpreter::CFLTU(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto scaled1 = _mm_max_ps(_mm_mul_ps(CPU.GPR[op.ra].vf, g_spu_scale_table[173 - op.i8]), _mm_set1_ps(0.0f)); + const auto scaled2 = _mm_and_ps(_mm_sub_ps(scaled1, _mm_set1_ps(0x80000000)), _mm_cmpge_ps(scaled1, _mm_set1_ps(0x80000000))); + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_or_si128(_mm_cvttps_epi32(scaled1), _mm_cvttps_epi32(scaled2)), _mm_castps_si128(_mm_cmpge_ps(scaled1, _mm_set1_ps(0x100000000)))); } void spu_interpreter::CSFLT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vf = _mm_mul_ps(_mm_cvtepi32_ps(CPU.GPR[op.ra].vi), g_spu_scale_table[op.i8 - 155]); } void spu_interpreter::CUFLT(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = CPU.GPR[op.ra].vi; + const auto fix = _mm_and_ps(_mm_castsi128_ps(_mm_srai_epi32(a, 31)), _mm_set1_ps(0x80000000)); + CPU.GPR[op.rt].vf = _mm_mul_ps(_mm_add_ps(_mm_cvtepi32_ps(_mm_and_si128(a, _mm_set1_epi32(0x7fffffff))), fix), g_spu_scale_table[op.i8 - 155]); } @@ -987,7 +1016,7 @@ void spu_interpreter::BR(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::FSMBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = g_imm_table.fsmb_table[op.i16]; } void spu_interpreter::BRSL(SPUThread& CPU, spu_opcode_t op) @@ -1123,17 +1152,17 @@ void spu_interpreter::HGTI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::CLGTI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi32(_mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0x80000000)), _mm_set1_epi32(op.si10 ^ 0x80000000)); } void spu_interpreter::CLGTHI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi16(_mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0x80008000)), _mm_set1_epi16(op.si10 ^ 0x8000)); } void spu_interpreter::CLGTBI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_cmpgt_epi8(_mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0x80808080)), _mm_set1_epi8(op.i8 ^ 0x80)); } void spu_interpreter::HLGTI(SPUThread& CPU, spu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUInterpreter2.h b/rpcs3/Emu/Cell/SPUInterpreter2.h index d4de2559c9..8d1afd0c6e 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter2.h +++ b/rpcs3/Emu/Cell/SPUInterpreter2.h @@ -8,10 +8,10 @@ union spu_opcode_t struct { - u32 rt : 7; // 25..31 + u32 rt : 7; // 25..31, it's actually RC in 4-op instructions u32 ra : 7; // 18..24 u32 rb : 7; // 11..17 - u32 rc : 7; // 4..10 + u32 rc : 7; // 4..10, it's actually RT in 4-op instructions }; struct @@ -77,8 +77,8 @@ union spu_opcode_t struct { u32 : 18; // 14..31 - u32 e : 1; // 13 - u32 d : 1; // 12 + u32 e : 1; // 13, "enable interrupts" bit + u32 d : 1; // 12, "disable interrupts" bit }; }; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 625fbd2c74..c7909bee55 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -42,12 +42,12 @@ public: } } - __forceinline spu_inter_func_t operator [] (u32 opcode) + __forceinline spu_inter_func_t operator [] (u32 opcode) const { return funcs[opcode >> 21]; } } -g_spu_inter_func_list; +const g_spu_inter_func_list; SPUThread& GetCurrentSPUThread() { From 08dd95e52b75d4f6b7c7376c6e21f0533ab72371 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 21 Mar 2015 22:39:56 +0300 Subject: [PATCH 12/23] SPU: Floating Interpolate implemented --- rpcs3/Emu/Cell/SPUInterpreter.cpp | 51 ++++++++++++++++++------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index faddf20138..b2bcc91d0a 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -18,6 +18,27 @@ #define rotl32(x,r) (((u32)(x) << (r)) | ((u32)(x) >> (32 - (r)))) #endif +class spu_scale_table_t +{ + std::array<__m128, 155 + 174> m_data; + +public: + spu_scale_table_t() + { + for (s32 i = -155; i < 174; i++) + { + m_data[i + 155] = _mm_set1_ps(static_cast(pow(2, i))); + } + } + + __forceinline __m128 operator [] (s32 scale) const + { + return m_data[scale + 155]; + } +} +const g_spu_scale_table; + + void spu_interpreter::DEFAULT(SPUThread& CPU, spu_opcode_t op) { SPUInterpreter inter(CPU); (*SPU_instr::rrr_list)(&inter, op.opcode); @@ -891,7 +912,14 @@ void spu_interpreter::CEQB(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::FI(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt] = CPU.GPR[op.rb]; + const auto mask_se = _mm_castsi128_ps(_mm_set1_epi32(0xff800000)); // sign and exponent mask + const auto mask_bf = _mm_castsi128_ps(_mm_set1_epi32(0x007ffc00)); // base fraction mask + const auto mask_sf = _mm_set1_epi32(0x000003ff); // step fraction mask + const auto mask_yf = _mm_set1_epi32(0x0007ffff); // Y fraction mask (bits 13..31) + const auto base = _mm_or_ps(_mm_and_ps(CPU.GPR[op.rb].vf, mask_bf), _mm_castsi128_ps(_mm_set1_epi32(0x3f800000))); + const auto step = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(CPU.GPR[op.rb].vi, mask_sf)), g_spu_scale_table[-13]); + const auto y = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(CPU.GPR[op.ra].vi, mask_yf)), g_spu_scale_table[-19]); + CPU.GPR[op.rt].vf = _mm_or_ps(_mm_and_ps(mask_se, CPU.GPR[op.rb].vf), _mm_andnot_ps(mask_se, _mm_sub_ps(base, _mm_mul_ps(step, y)))); } void spu_interpreter::HEQ(SPUThread& CPU, spu_opcode_t op) @@ -903,27 +931,6 @@ void spu_interpreter::HEQ(SPUThread& CPU, spu_opcode_t op) } -class spu_scale_table_t -{ - std::array<__m128, 155 + 174> m_data; - -public: - spu_scale_table_t() - { - for (s32 i = -155; i < 174; i++) - { - m_data[i + 155] = _mm_set1_ps(static_cast(pow(2, i))); - } - } - - __forceinline __m128 operator [] (s32 scale) const - { - return m_data[scale + 155]; - } -} -const g_spu_scale_table; - - void spu_interpreter::CFLTS(SPUThread& CPU, spu_opcode_t op) { const auto scaled = _mm_mul_ps(CPU.GPR[op.ra].vf, g_spu_scale_table[173 - op.i8]); From dc6c21833d8ade47a66246f92028e6d98957e09a Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 22 Mar 2015 01:03:28 +0300 Subject: [PATCH 13/23] SPU: completed --- rpcs3/Emu/Cell/SPUInterpreter.cpp | 36 ++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index b2bcc91d0a..9d9cbdde35 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -777,8 +777,9 @@ void spu_interpreter::CEQ(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::MPYHHU(SPUThread& CPU, spu_opcode_t op) { - return DEFAULT(CPU, op); - //CPU.GPR[op.rt].vi = _mm_madd_epi16(_mm_srli_epi32(CPU.GPR[op.ra].vi, 16), _mm_srli_epi32(CPU.GPR[op.rb].vi, 16)); + const auto a = _mm_srli_epi32(CPU.GPR[op.ra].vi, 16); + const auto b = _mm_srli_epi32(CPU.GPR[op.rb].vi, 16); + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi32(_mm_mulhi_epu16(a, b), 16), _mm_mullo_epi16(a, b)); } void spu_interpreter::ADDX(SPUThread& CPU, spu_opcode_t op) @@ -811,13 +812,11 @@ void spu_interpreter::BGX(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::MPYHHA(SPUThread& CPU, spu_opcode_t op) { - return DEFAULT(CPU, op); CPU.GPR[op.rt].vi = _mm_add_epi32(CPU.GPR[op.rt].vi, _mm_madd_epi16(_mm_srli_epi32(CPU.GPR[op.ra].vi, 16), _mm_srli_epi32(CPU.GPR[op.rb].vi, 16))); } void spu_interpreter::MPYHHAU(SPUThread& CPU, spu_opcode_t op) { - return DEFAULT(CPU, op); const auto a = _mm_srli_epi32(CPU.GPR[op.ra].vi, 16); const auto b = _mm_srli_epi32(CPU.GPR[op.rb].vi, 16); CPU.GPR[op.rt].vi = _mm_add_epi32(CPU.GPR[op.rt].vi, _mm_or_si128(_mm_slli_epi32(_mm_mulhi_epu16(a, b), 16), _mm_mullo_epi16(a, b))); @@ -861,26 +860,22 @@ void spu_interpreter::DFCEQ(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::MPY(SPUThread& CPU, spu_opcode_t op) { - return DEFAULT(CPU, op); const auto mask = _mm_set1_epi32(0xffff); CPU.GPR[op.rt].vi = _mm_madd_epi16(_mm_and_si128(CPU.GPR[op.ra].vi, mask), _mm_and_si128(CPU.GPR[op.rb].vi, mask)); } void spu_interpreter::MPYH(SPUThread& CPU, spu_opcode_t op) { - return DEFAULT(CPU, op); CPU.GPR[op.rt].vi = _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(CPU.GPR[op.ra].vi, 16), CPU.GPR[op.rb].vi), 16); } void spu_interpreter::MPYHH(SPUThread& CPU, spu_opcode_t op) { - return DEFAULT(CPU, op); CPU.GPR[op.rt].vi = _mm_madd_epi16(_mm_srli_epi32(CPU.GPR[op.ra].vi, 16), _mm_srli_epi32(CPU.GPR[op.rb].vi, 16)); } void spu_interpreter::MPYS(SPUThread& CPU, spu_opcode_t op) { - return DEFAULT(CPU, op); CPU.GPR[op.rt].vi = _mm_srai_epi32(_mm_slli_epi32(_mm_mulhi_epi16(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi), 16), 16); } @@ -902,7 +897,9 @@ void spu_interpreter::DFCMEQ(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::MPYU(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = _mm_and_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0xffff)); + const auto b = _mm_and_si128(CPU.GPR[op.rb].vi, _mm_set1_epi32(0xffff)); + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi32(_mm_mulhi_epu16(a, b), 16), _mm_mullo_epi16(a, b)); } void spu_interpreter::CEQB(SPUThread& CPU, spu_opcode_t op) @@ -1182,12 +1179,14 @@ void spu_interpreter::HLGTI(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::MPYI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + CPU.GPR[op.rt].vi = _mm_madd_epi16(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10 & 0xffff)); } void spu_interpreter::MPYUI(SPUThread& CPU, spu_opcode_t op) { - DEFAULT(CPU, op); + const auto a = _mm_and_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0xffff)); + const auto i = _mm_set1_epi32(op.si10 & 0xffff); + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi32(_mm_mulhi_epu16(a, i), 16), _mm_mullo_epi16(a, i)); } void spu_interpreter::CEQI(SPUThread& CPU, spu_opcode_t op) @@ -1237,13 +1236,24 @@ void spu_interpreter::SELB(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::SHUFB(SPUThread& CPU, spu_opcode_t op) { // rt <> rc - DEFAULT(CPU, op); + const auto index = _mm_xor_si128(CPU.GPR[op.rt].vi, _mm_set1_epi32(0x0f0f0f0f)); + const auto res1 = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, index); + const auto bit4 = _mm_set1_epi32(0x10101010); + const auto k1 = _mm_cmpeq_epi8(_mm_and_si128(index, bit4), bit4); + const auto res2 = _mm_or_si128(_mm_and_si128(k1, _mm_shuffle_epi8(CPU.GPR[op.rb].vi, index)), _mm_andnot_si128(k1, res1)); + const auto bit67 = _mm_set1_epi32(0xc0c0c0c0); + const auto k2 = _mm_cmpeq_epi8(_mm_and_si128(index, bit67), bit67); + const auto res3 = _mm_or_si128(res2, k2); + const auto bit567 = _mm_set1_epi32(0xe0e0e0e0); + const auto k3 = _mm_cmpeq_epi8(_mm_and_si128(index, bit567), bit567); + CPU.GPR[op.rc].vi = _mm_sub_epi8(res3, _mm_and_si128(k3, _mm_set1_epi32(0x7f7f7f7f))); } void spu_interpreter::MPYA(SPUThread& CPU, spu_opcode_t op) { // rt <> rc - DEFAULT(CPU, op); + const auto mask = _mm_set1_epi32(0xffff); + CPU.GPR[op.rc].vi = _mm_add_epi32(CPU.GPR[op.rt].vi, _mm_madd_epi16(_mm_and_si128(CPU.GPR[op.ra].vi, mask), _mm_and_si128(CPU.GPR[op.rb].vi, mask))); } void spu_interpreter::FNMS(SPUThread& CPU, spu_opcode_t op) From f095dec43dfccad71bdd8553c4be8eb25d3acbcb Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 22 Mar 2015 02:22:24 +0300 Subject: [PATCH 14/23] Compilation fix --- rpcs3/CMakeLists.txt | 2 +- rpcs3/Emu/Cell/SPUInterpreter.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 495d00f44e..02fb8c0683 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -34,7 +34,7 @@ if (NOT MSVC) set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_MINSIZEREL} -Os -D_NDEBUG") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O1 -D_NDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O1 -g -D_NDEBUG") - add_definitions(-msse2 -mcx16) + add_definitions(-msse -msse2 -mcx16 -mssse3) endif() if (APPLE) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 9d9cbdde35..2d0c6f8174 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -676,7 +676,7 @@ void spu_interpreter::ANDC(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::FCGT(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt].vf = _mm_cmp_ps(CPU.GPR[op.rb].vf, CPU.GPR[op.ra].vf, 1); + CPU.GPR[op.rt].vf = _mm_cmplt_ps(CPU.GPR[op.rb].vf, CPU.GPR[op.ra].vf); } void spu_interpreter::DFCGT(SPUThread& CPU, spu_opcode_t op) @@ -713,7 +713,7 @@ void spu_interpreter::ORC(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::FCMGT(SPUThread& CPU, spu_opcode_t op) { const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); - CPU.GPR[op.rt].vf = _mm_cmp_ps(_mm_and_ps(CPU.GPR[op.rb].vf, mask), _mm_and_ps(CPU.GPR[op.ra].vf, mask), 1); + CPU.GPR[op.rt].vf = _mm_cmplt_ps(_mm_and_ps(CPU.GPR[op.rb].vf, mask), _mm_and_ps(CPU.GPR[op.ra].vf, mask)); } void spu_interpreter::DFCMGT(SPUThread& CPU, spu_opcode_t op) @@ -850,7 +850,7 @@ void spu_interpreter::DFTSV(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::FCEQ(SPUThread& CPU, spu_opcode_t op) { - CPU.GPR[op.rt].vf = _mm_cmp_ps(CPU.GPR[op.rb].vf, CPU.GPR[op.ra].vf, 0); + CPU.GPR[op.rt].vf = _mm_cmpeq_ps(CPU.GPR[op.rb].vf, CPU.GPR[op.ra].vf); } void spu_interpreter::DFCEQ(SPUThread& CPU, spu_opcode_t op) @@ -887,7 +887,7 @@ void spu_interpreter::CEQH(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::FCMEQ(SPUThread& CPU, spu_opcode_t op) { const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); - CPU.GPR[op.rt].vf = _mm_cmp_ps(_mm_and_ps(CPU.GPR[op.rb].vf, mask), _mm_and_ps(CPU.GPR[op.ra].vf, mask), 0); + CPU.GPR[op.rt].vf = _mm_cmpeq_ps(_mm_and_ps(CPU.GPR[op.rb].vf, mask), _mm_and_ps(CPU.GPR[op.ra].vf, mask)); } void spu_interpreter::DFCMEQ(SPUThread& CPU, spu_opcode_t op) From 75fa95c7ff7c33cdcd932bdc10c80b6e6fd2d27d Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 24 Mar 2015 22:54:15 +0300 Subject: [PATCH 15/23] PPU: some instruction replaced --- rpcs3/Emu/Cell/PPUInterpreter.cpp | 98 +++++++++---------------------- 1 file changed, 28 insertions(+), 70 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index a2602977eb..3dca12fcb5 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -23,13 +23,14 @@ void ppu_interpreter::NOP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::TDI(PPUThread& CPU, ppu_opcode_t op) { - s64 a = CPU.GPR[op.ra]; + const s64 a = CPU.GPR[op.ra], b = op.simm16; + const u64 a_ = a, b_ = b; // unsigned - if ((a < (s64)op.simm16 && (op.bo & 0x10)) || - (a >(s64)op.simm16 && (op.bo & 0x8)) || - (a == (s64)op.simm16 && (op.bo & 0x4)) || - ((u64)a < (u64)op.simm16 && (op.bo & 0x2)) || - ((u64)a >(u64)op.simm16 && (op.bo & 0x1))) + if (((op.bo & 0x10) && a < b) || + ((op.bo & 0x8) && a > b) || + ((op.bo & 0x4) && a == b) || + ((op.bo & 0x2) && a_ < b_) || + ((op.bo & 0x1) && a_ > b_)) { throw __FUNCTION__; } @@ -37,13 +38,14 @@ void ppu_interpreter::TDI(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::TWI(PPUThread& CPU, ppu_opcode_t op) { - s32 a = (s32)CPU.GPR[op.ra]; + const s32 a = (s32)CPU.GPR[op.ra], b = op.simm16; + const u32 a_ = a, b_ = b; // unsigned - if ((a < op.simm16 && (op.bo & 0x10)) || - (a > op.simm16 && (op.bo & 0x8)) || - (a == op.simm16 && (op.bo & 0x4)) || - ((u32)a < (u32)op.simm16 && (op.bo & 0x2)) || - ((u32)a >(u32)op.simm16 && (op.bo & 0x1))) + if (((op.bo & 0x10) && a < b) || + ((op.bo & 0x8) && a > b) || + ((op.bo & 0x4) && a == b) || + ((op.bo & 0x2) && a_ < b_) || + ((op.bo & 0x1) && a_ > b_)) { throw __FUNCTION__; } @@ -70,10 +72,7 @@ void ppu_interpreter::VADDCUW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDFP(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._f[w] = CPU.VPR[op.va]._f[w] + CPU.VPR[op.vb]._f[w]; - } + CPU.VPR[op.vd] = u128::addfs(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VADDSBS(PPUThread& CPU, ppu_opcode_t op) @@ -135,10 +134,7 @@ void ppu_interpreter::VADDSWS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDUBM(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - { - CPU.VPR[op.vd]._u8[b] = CPU.VPR[op.va]._u8[b] + CPU.VPR[op.vb]._u8[b]; - } + CPU.VPR[op.vd] = u128::add8(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op) @@ -158,10 +154,7 @@ void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDUHM(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._u16[h] = CPU.VPR[op.va]._u16[h] + CPU.VPR[op.vb]._u16[h]; - } + CPU.VPR[op.vd] = u128::add16(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op) @@ -181,10 +174,7 @@ void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDUWM(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] + CPU.VPR[op.vb]._u32[w]; - } + CPU.VPR[op.vd] = u128::add32(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VADDUWS(PPUThread& CPU, ppu_opcode_t op) @@ -204,18 +194,12 @@ void ppu_interpreter::VADDUWS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VAND(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] & CPU.VPR[op.vb]._u32[w]; - } + CPU.VPR[op.vd] = CPU.VPR[op.va] & CPU.VPR[op.vb]; } void ppu_interpreter::VANDC(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] & (~CPU.VPR[op.vb]._u32[w]); - } + CPU.VPR[op.vd] = CPU.VPR[op.va] & ~CPU.VPR[op.vb]; } void ppu_interpreter::VAVGSB(PPUThread& CPU, ppu_opcode_t op) @@ -1351,18 +1335,12 @@ void ppu_interpreter::VNMSUBFP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VNOR(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = ~(CPU.VPR[op.va]._u32[w] | CPU.VPR[op.vb]._u32[w]); - } + CPU.VPR[op.vd] = ~(CPU.VPR[op.va] | CPU.VPR[op.vb]); } void ppu_interpreter::VOR(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] | CPU.VPR[op.vb]._u32[w]; - } + CPU.VPR[op.vd] = CPU.VPR[op.va] | CPU.VPR[op.vb]; } void ppu_interpreter::VPERM(PPUThread& CPU, ppu_opcode_t op) @@ -1691,10 +1669,7 @@ void ppu_interpreter::VRSQRTEFP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSEL(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - { - CPU.VPR[op.vd]._u8[b] = (CPU.VPR[op.vb]._u8[b] & CPU.VPR[op.vc]._u8[b]) | (CPU.VPR[op.va]._u8[b] & (~CPU.VPR[op.vc]._u8[b])); - } + CPU.VPR[op.vd] = (CPU.VPR[op.vb] & CPU.VPR[op.vc]) | (CPU.VPR[op.va] & ~CPU.VPR[op.vc]); } void ppu_interpreter::VSL(PPUThread& CPU, ppu_opcode_t op) @@ -1899,12 +1874,7 @@ void ppu_interpreter::VSUBCUW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSUBFP(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - const float a = CPU.VPR[op.va]._f[w]; - const float b = CPU.VPR[op.vb]._f[w]; - CPU.VPR[op.vd]._f[w] = a - b; - } + CPU.VPR[op.vd] = u128::subfs(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VSUBSBS(PPUThread& CPU, ppu_opcode_t op) @@ -1966,10 +1936,7 @@ void ppu_interpreter::VSUBSWS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSUBUBM(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - { - CPU.VPR[op.vd]._u8[b] = (u8)((CPU.VPR[op.va]._u8[b] - CPU.VPR[op.vb]._u8[b]) & 0xff); - } + CPU.VPR[op.vd] = u128::sub8(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op) @@ -1989,10 +1956,7 @@ void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSUBUHM(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._u16[h] = CPU.VPR[op.va]._u16[h] - CPU.VPR[op.vb]._u16[h]; - } + CPU.VPR[op.vd] = u128::sub16(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op) @@ -2012,10 +1976,7 @@ void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSUBUWM(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] - CPU.VPR[op.vb]._u32[w]; - } + CPU.VPR[op.vd] = u128::sub32(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VSUBUWS(PPUThread& CPU, ppu_opcode_t op) @@ -2206,10 +2167,7 @@ void ppu_interpreter::VUPKLSH(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VXOR(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd]._u32[0] = CPU.VPR[op.va]._u32[0] ^ CPU.VPR[op.vb]._u32[0]; - CPU.VPR[op.vd]._u32[1] = CPU.VPR[op.va]._u32[1] ^ CPU.VPR[op.vb]._u32[1]; - CPU.VPR[op.vd]._u32[2] = CPU.VPR[op.va]._u32[2] ^ CPU.VPR[op.vb]._u32[2]; - CPU.VPR[op.vd]._u32[3] = CPU.VPR[op.va]._u32[3] ^ CPU.VPR[op.vb]._u32[3]; + CPU.VPR[op.vd] = CPU.VPR[op.va] ^ CPU.VPR[op.vb]; } void ppu_interpreter::MULLI(PPUThread& CPU, ppu_opcode_t op) From d640aba903b05f2df373425d20b2996fc178f06f Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 26 Mar 2015 21:42:12 +0300 Subject: [PATCH 16/23] PPU: some instructions replaced --- Utilities/BEType.h | 10 --- Utilities/GNU.h | 18 +++-- rpcs3/Emu/Cell/PPUInterpreter.cpp | 107 ++++++------------------------ rpcs3/Emu/Cell/SPUInterpreter.cpp | 21 ++---- 4 files changed, 38 insertions(+), 118 deletions(-) diff --git a/Utilities/BEType.h b/Utilities/BEType.h index a4ecdc93a1..57fbadd25d 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -307,16 +307,6 @@ union _CRT_ALIGN(16) u128 return fromV(_mm_cmpeq_epi8(left.vi, right.vi)); } - static __forceinline u128 gtu8(const u128& left, const u128& right) - { - return fromV(_mm_cmpgt_epu8(left.vi, right.vi)); - } - - static __forceinline u128 leu8(const u128& left, const u128& right) - { - return fromV(_mm_cmple_epu8(left.vi, right.vi)); - } - bool operator == (const u128& right) const { return (_u64[0] == right._u64[0]) && (_u64[1] == right._u64[1]); diff --git a/Utilities/GNU.h b/Utilities/GNU.h index a8db7f8703..35c6a923ca 100644 --- a/Utilities/GNU.h +++ b/Utilities/GNU.h @@ -342,15 +342,21 @@ static __forceinline uint64_t cntlz64(uint64_t arg) } // compare 16 packed unsigned bytes (greater than) -static __forceinline __m128i _mm_cmpgt_epu8(__m128i A, __m128i B) +inline __m128i sse_cmpgt_epu8(__m128i A, __m128i B) { // (A xor 0x80) > (B xor 0x80) - return _mm_cmpgt_epi8(_mm_xor_si128(A, _mm_set1_epi8(-128)), _mm_xor_si128(B, _mm_set1_epi8(-128))); + const auto sign = _mm_set1_epi32(0x80808080); + return _mm_cmpgt_epi8(_mm_xor_si128(A, sign), _mm_xor_si128(B, sign)); } -// compare 16 packed unsigned bytes (less or equal) -static __forceinline __m128i _mm_cmple_epu8(__m128i A, __m128i B) +inline __m128i sse_cmpgt_epu16(__m128i A, __m128i B) { - // ((B xor 0x80) > (A xor 0x80)) || A == B - return _mm_or_si128(_mm_cmpgt_epu8(B, A), _mm_cmpeq_epi8(A, B)); + const auto sign = _mm_set1_epi32(0x80008000); + return _mm_cmpgt_epi16(_mm_xor_si128(A, sign), _mm_xor_si128(B, sign)); +} + +inline __m128i sse_cmpgt_epu32(__m128i A, __m128i B) +{ + const auto sign = _mm_set1_epi32(0x80000000); + return _mm_cmpgt_epi32(_mm_xor_si128(A, sign), _mm_xor_si128(B, sign)); } diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 3dca12fcb5..a2cdf055d3 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -64,10 +64,9 @@ void ppu_interpreter::MTVSCR(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDCUW(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = ~CPU.VPR[op.va]._u32[w] < CPU.VPR[op.vb]._u32[w]; - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + CPU.VPR[op.vd].vi = _mm_srli_epi32(_mm_cmpgt_epi32(_mm_xor_si128(b, _mm_set1_epi32(0x80000000)), _mm_xor_si128(a, _mm_set1_epi32(0x7fffffff))), 31); } void ppu_interpreter::VADDFP(PPUThread& CPU, ppu_opcode_t op) @@ -77,59 +76,23 @@ void ppu_interpreter::VADDFP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDSBS(PPUThread& CPU, ppu_opcode_t op) { - for (u32 b = 0; b < 16; ++b) - { - s16 result = (s16)CPU.VPR[op.va]._s8[b] + (s16)CPU.VPR[op.vb]._s8[b]; - - if (result > 0x7f) - { - CPU.VPR[op.vd]._s8[b] = 0x7f; - } - else if (result < -0x80) - { - CPU.VPR[op.vd]._s8[b] = -0x80; - } - else - CPU.VPR[op.vd]._s8[b] = (s8)result; - } + CPU.VPR[op.vd].vi = _mm_adds_epi8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VADDSHS(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - s32 result = (s32)CPU.VPR[op.va]._s16[h] + (s32)CPU.VPR[op.vb]._s16[h]; - - if (result > 0x7fff) - { - CPU.VPR[op.vd]._s16[h] = 0x7fff; - } - else if (result < -0x8000) - { - CPU.VPR[op.vd]._s16[h] = -0x8000; - } - else - CPU.VPR[op.vd]._s16[h] = result; - } + CPU.VPR[op.vd].vi = _mm_adds_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VADDSWS(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - s64 result = (s64)CPU.VPR[op.va]._s32[w] + (s64)CPU.VPR[op.vb]._s32[w]; - - if (result > 0x7fffffff) - { - CPU.VPR[op.vd]._s32[w] = 0x7fffffff; - } - else if (result < (s32)0x80000000) - { - CPU.VPR[op.vd]._s32[w] = 0x80000000; - } - else - CPU.VPR[op.vd]._s32[w] = (s32)result; - } + const auto a = CPU.VPR[op.va]; + const auto b = CPU.VPR[op.vb]; + const auto s = u128::add32(a, b); // a + b + const auto m = (a ^ s) & (b ^ s); // overflow bit + const auto x = _mm_srai_epi32(m.vi, 31); // saturation mask + const auto y = _mm_srai_epi32(_mm_and_si128(s.vi, m.vi), 31); // positive saturation mask + CPU.VPR[op.vd].vi = _mm_xor_si128(_mm_xor_si128(_mm_srli_epi32(x, 1), y), _mm_or_si128(s.vi, x)); } void ppu_interpreter::VADDUBM(PPUThread& CPU, ppu_opcode_t op) @@ -139,17 +102,7 @@ void ppu_interpreter::VADDUBM(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - { - u16 result = (u16)CPU.VPR[op.va]._u8[b] + (u16)CPU.VPR[op.vb]._u8[b]; - - if (result > 0xff) - { - CPU.VPR[op.vd]._u8[b] = 0xff; - } - else - CPU.VPR[op.vd]._u8[b] = (u8)result; - } + CPU.VPR[op.vd].vi = _mm_adds_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VADDUHM(PPUThread& CPU, ppu_opcode_t op) @@ -159,17 +112,7 @@ void ppu_interpreter::VADDUHM(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - u32 result = (u32)CPU.VPR[op.va]._u16[h] + (u32)CPU.VPR[op.vb]._u16[h]; - - if (result > 0xffff) - { - CPU.VPR[op.vd]._u16[h] = 0xffff; - } - else - CPU.VPR[op.vd]._u16[h] = result; - } + CPU.VPR[op.vd].vi = _mm_adds_epu16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VADDUWM(PPUThread& CPU, ppu_opcode_t op) @@ -179,17 +122,9 @@ void ppu_interpreter::VADDUWM(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VADDUWS(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - u64 result = (u64)CPU.VPR[op.va]._u32[w] + (u64)CPU.VPR[op.vb]._u32[w]; - - if (result > 0xffffffff) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - } - else - CPU.VPR[op.vd]._u32[w] = (u32)result; - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_add_epi32(a, b), _mm_cmpgt_epi32(_mm_xor_si128(b, _mm_set1_epi32(0x80000000)), _mm_xor_si128(a, _mm_set1_epi32(0x7fffffff)))); } void ppu_interpreter::VAND(PPUThread& CPU, ppu_opcode_t op) @@ -228,16 +163,12 @@ void ppu_interpreter::VAVGSW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VAVGUB(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - CPU.VPR[op.vd]._u8[b] = (CPU.VPR[op.va]._u8[b] + CPU.VPR[op.vb]._u8[b] + 1) >> 1; + CPU.VPR[op.vd].vi = _mm_avg_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VAVGUH(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._u16[h] = (CPU.VPR[op.va]._u16[h] + CPU.VPR[op.vb]._u16[h] + 1) >> 1; - } + CPU.VPR[op.vd].vi = _mm_avg_epu16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VAVGUW(PPUThread& CPU, ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 2d0c6f8174..5980d60e28 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -91,10 +91,7 @@ void spu_interpreter::OR(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::BG(SPUThread& CPU, spu_opcode_t op) { - for (u32 i = 0; i < 4; i++) - { - CPU.GPR[op.rt]._u32[i] = CPU.GPR[op.ra]._u32[i] <= CPU.GPR[op.rb]._u32[i]; - } + CPU.GPR[op.rt].vi = _mm_add_epi32(sse_cmpgt_epu32(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi), _mm_set1_epi32(1)); } void spu_interpreter::SFH(SPUThread& CPU, spu_opcode_t op) @@ -264,10 +261,9 @@ void spu_interpreter::AND(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::CG(SPUThread& CPU, spu_opcode_t op) { - for (u32 i = 0; i < 4; i++) - { - CPU.GPR[op.rt]._u32[i] = ~CPU.GPR[op.ra]._u32[i] < CPU.GPR[op.rb]._u32[i]; - } + const auto a = _mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0x7fffffff)); + const auto b = _mm_xor_si128(CPU.GPR[op.rb].vi, _mm_set1_epi32(0x80000000)); + CPU.GPR[op.rt].vi = _mm_srli_epi32(_mm_cmpgt_epi32(b, a), 31); } void spu_interpreter::AH(SPUThread& CPU, spu_opcode_t op) @@ -665,8 +661,7 @@ void spu_interpreter::XSBH(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::CLGT(SPUThread& CPU, spu_opcode_t op) { - const auto sign = _mm_set1_epi32(0x80000000); - CPU.GPR[op.rt].vi = _mm_cmpgt_epi32(_mm_xor_si128(CPU.GPR[op.ra].vi, sign), _mm_xor_si128(CPU.GPR[op.rb].vi, sign)); + CPU.GPR[op.rt].vi = sse_cmpgt_epu32(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); } void spu_interpreter::ANDC(SPUThread& CPU, spu_opcode_t op) @@ -701,8 +696,7 @@ void spu_interpreter::FM(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::CLGTH(SPUThread& CPU, spu_opcode_t op) { - const auto sign = _mm_set1_epi32(0x80008000); - CPU.GPR[op.rt].vi = _mm_cmpgt_epi16(_mm_xor_si128(CPU.GPR[op.ra].vi, sign), _mm_xor_si128(CPU.GPR[op.rb].vi, sign)); + CPU.GPR[op.rt].vi = sse_cmpgt_epu16(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); } void spu_interpreter::ORC(SPUThread& CPU, spu_opcode_t op) @@ -738,8 +732,7 @@ void spu_interpreter::DFM(SPUThread& CPU, spu_opcode_t op) void spu_interpreter::CLGTB(SPUThread& CPU, spu_opcode_t op) { - const auto sign = _mm_set1_epi32(0x80808080); - CPU.GPR[op.rt].vi = _mm_cmpgt_epi8(_mm_xor_si128(CPU.GPR[op.ra].vi, sign), _mm_xor_si128(CPU.GPR[op.rb].vi, sign)); + CPU.GPR[op.rt].vi = sse_cmpgt_epu8(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); } void spu_interpreter::HLGT(SPUThread& CPU, spu_opcode_t op) From 3247152ff7915dd469241d770491c56d17461799 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sun, 29 Mar 2015 14:00:10 +0300 Subject: [PATCH 17/23] PPU: some instructions replaced --- Utilities/BEType.h | 21 +- Utilities/GNU.h | 28 +++ rpcs3/Emu/Cell/PPUInterpreter.cpp | 362 ++++++------------------------ rpcs3/Emu/Cell/SPUInterpreter.cpp | 2 +- 4 files changed, 120 insertions(+), 293 deletions(-) diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 57fbadd25d..9f4c96b5de 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -203,14 +203,21 @@ union _CRT_ALIGN(16) u128 static u128 from32p(u32 value) { u128 ret; - ret.vi = _mm_set1_epi32((int)value); + ret.vi = _mm_set1_epi32(static_cast(value)); + return ret; + } + + static u128 from16p(u16 value) + { + u128 ret; + ret.vi = _mm_set1_epi16(static_cast(value)); return ret; } static u128 from8p(u8 value) { u128 ret; - ret.vi = _mm_set1_epi8((char)value); + ret.vi = _mm_set1_epi8(static_cast(value)); return ret; } @@ -307,6 +314,16 @@ union _CRT_ALIGN(16) u128 return fromV(_mm_cmpeq_epi8(left.vi, right.vi)); } + static __forceinline u128 eq16(const u128& left, const u128& right) + { + return fromV(_mm_cmpeq_epi16(left.vi, right.vi)); + } + + static __forceinline u128 eq32(const u128& left, const u128& right) + { + return fromV(_mm_cmpeq_epi32(left.vi, right.vi)); + } + bool operator == (const u128& right) const { return (_u64[0] == right._u64[0]) && (_u64[1] == right._u64[1]); diff --git a/Utilities/GNU.h b/Utilities/GNU.h index 35c6a923ca..9d876d8ce8 100644 --- a/Utilities/GNU.h +++ b/Utilities/GNU.h @@ -360,3 +360,31 @@ inline __m128i sse_cmpgt_epu32(__m128i A, __m128i B) const auto sign = _mm_set1_epi32(0x80000000); return _mm_cmpgt_epi32(_mm_xor_si128(A, sign), _mm_xor_si128(B, sign)); } + +inline __m128 sse_exp2_ps(__m128 A) +{ + const auto x0 = _mm_max_ps(_mm_min_ps(A, _mm_set1_ps(127.4999961f)), _mm_set1_ps(-127.4999961f)); + const auto x1 = _mm_add_ps(x0, _mm_set1_ps(0.5f)); + const auto x2 = _mm_sub_epi32(_mm_cvtps_epi32(x1), _mm_and_si128(_mm_castps_si128(_mm_cmpnlt_ps(_mm_setzero_ps(), x1)), _mm_set1_epi32(1))); + const auto x3 = _mm_sub_ps(x0, _mm_cvtepi32_ps(x2)); + const auto x4 = _mm_mul_ps(x3, x3); + const auto x5 = _mm_mul_ps(x3, _mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(x4, _mm_set1_ps(0.023093347705f)), _mm_set1_ps(20.20206567f)), x4), _mm_set1_ps(1513.906801f))); + const auto x6 = _mm_mul_ps(x5, _mm_rcp_ps(_mm_sub_ps(_mm_add_ps(_mm_mul_ps(_mm_set1_ps(233.1842117f), x4), _mm_set1_ps(4368.211667f)), x5))); + return _mm_mul_ps(_mm_add_ps(_mm_add_ps(x6, x6), _mm_set1_ps(1.0f)), _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(x2, _mm_set1_epi32(127)), 23))); +} + +inline __m128 sse_log2_ps(__m128 A) +{ + const auto _1 = _mm_set1_ps(1.0f); + const auto _c = _mm_set1_ps(1.442695040f); + const auto x0 = _mm_max_ps(A, _mm_castsi128_ps(_mm_set1_epi32(0x00800000))); + const auto x1 = _mm_or_ps(_mm_and_ps(x0, _mm_castsi128_ps(_mm_set1_epi32(0x807fffff))), _1); + const auto x2 = _mm_rcp_ps(_mm_add_ps(x1, _1)); + const auto x3 = _mm_mul_ps(_mm_sub_ps(x1, _1), x2); + const auto x4 = _mm_add_ps(x3, x3); + const auto x5 = _mm_mul_ps(x4, x4); + const auto x6 = _mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_set1_ps(-0.7895802789f), x5), _mm_set1_ps(16.38666457f)), x5), _mm_set1_ps(-64.1409953f)); + const auto x7 = _mm_rcp_ps(_mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_set1_ps(-35.67227983f), x5), _mm_set1_ps(312.0937664f)), x5), _mm_set1_ps(-769.6919436f))); + const auto x8 = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_castps_si128(x0), 23), _mm_set1_epi32(127))); + return _mm_add_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(x5, x6), x7), x4), _c), _mm_add_ps(_mm_mul_ps(x4, _c), x8)); +} diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index a2cdf055d3..ad3c83fbbc 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -11,6 +11,27 @@ #include "PPUInterpreter2.h" #include "Emu/CPU/CPUThreadManager.h" +class ppu_scale_table_t +{ + std::array<__m128, 32 + 31> m_data; + +public: + ppu_scale_table_t() + { + for (s32 i = -31; i < 32; i++) + { + m_data[i + 31] = _mm_set1_ps(static_cast(exp2(i))); + } + } + + __forceinline __m128 operator [] (s32 scale) const + { + return m_data[scale + 31]; + } +} +const g_ppu_scale_table; + + void ppu_interpreter::NULL_OP(PPUThread& CPU, ppu_opcode_t op) { PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); @@ -139,26 +160,32 @@ void ppu_interpreter::VANDC(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VAVGSB(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - { - CPU.VPR[op.vd]._s8[b] = (CPU.VPR[op.va]._s8[b] + CPU.VPR[op.vb]._s8[b] + 1) >> 1; - } + const auto a = CPU.VPR[op.va]; + const auto b = u128::add8(CPU.VPR[op.vb], u128::from8p(1)); // add 1 + const auto summ = u128::add8(a, b) & u128::from8p(0xfe); + const auto sign = u128::from8p(0x80); + const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq8(b, sign)) & sign; // calculate msb + CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi64(summ.vi, 1)); } void ppu_interpreter::VAVGSH(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._s16[h] = (CPU.VPR[op.va]._s16[h] + CPU.VPR[op.vb]._s16[h] + 1) >> 1; - } + const auto a = CPU.VPR[op.va]; + const auto b = u128::add16(CPU.VPR[op.vb], u128::from16p(1)); // add 1 + const auto summ = u128::add16(a, b); + const auto sign = u128::from16p(0x8000); + const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq16(b, sign)) & sign; // calculate msb + CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi16(summ.vi, 1)); } void ppu_interpreter::VAVGSW(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._s32[w] = ((s64)CPU.VPR[op.va]._s32[w] + (s64)CPU.VPR[op.vb]._s32[w] + 1) >> 1; - } + const auto a = CPU.VPR[op.va]; + const auto b = u128::add32(CPU.VPR[op.vb], u128::from32p(1)); // add 1 + const auto summ = u128::add32(a, b); + const auto sign = u128::from32p(0x80000000); + const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq32(b, sign)) & sign; // calculate msb + CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi32(summ.vi, 1)); } void ppu_interpreter::VAVGUB(PPUThread& CPU, ppu_opcode_t op) @@ -173,46 +200,32 @@ void ppu_interpreter::VAVGUH(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VAVGUW(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = ((u64)CPU.VPR[op.va]._u32[w] + (u64)CPU.VPR[op.vb]._u32[w] + 1) >> 1; - } + const auto a = CPU.VPR[op.va]; + const auto b = CPU.VPR[op.vb]; + const auto summ = u128::add32(u128::add32(a, b), u128::from32p(1)); + const auto carry = _mm_xor_si128(_mm_slli_epi32(sse_cmpgt_epu32(summ.vi, a.vi), 31), _mm_set1_epi32(0x80000000)); + CPU.VPR[op.vd].vi = _mm_or_si128(carry, _mm_srli_epi32(summ.vi, 1)); } void ppu_interpreter::VCFSX(PPUThread& CPU, ppu_opcode_t op) { - u32 scale = 1 << op.vuimm; - - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._f[w] = ((float)CPU.VPR[op.vb]._s32[w]) / scale; - } + CPU.VPR[op.vd].vf = _mm_mul_ps(_mm_cvtepi32_ps(CPU.VPR[op.vb].vi), g_ppu_scale_table[0 - op.vuimm]); } void ppu_interpreter::VCFUX(PPUThread& CPU, ppu_opcode_t op) { - u32 scale = 1 << op.vuimm; - - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._f[w] = ((float)CPU.VPR[op.vb]._u32[w]) / scale; - } + const auto b = CPU.VPR[op.vb].vi; + const auto fix = _mm_and_ps(_mm_castsi128_ps(_mm_srai_epi32(b, 31)), _mm_set1_ps(0x80000000)); + CPU.VPR[op.vd].vf = _mm_mul_ps(_mm_add_ps(_mm_cvtepi32_ps(_mm_and_si128(b, _mm_set1_epi32(0x7fffffff))), fix), g_ppu_scale_table[0 - op.vuimm]); } void ppu_interpreter::VCMPBFP(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - u32 mask = 1 << 31 | 1 << 30; - - const float a = CPU.VPR[op.va]._f[w]; - const float b = CPU.VPR[op.vb]._f[w]; - - if (a <= b) mask &= ~(1 << 31); - if (a >= -b) mask &= ~(1 << 30); - - CPU.VPR[op.vd]._u32[w] = mask; - } + const auto a = CPU.VPR[op.va].vf; + const auto b = CPU.VPR[op.vb].vf; + const auto sign = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); + const auto bneg = _mm_xor_ps(b, sign); + CPU.VPR[op.vd].vf = _mm_or_ps(_mm_and_ps(_mm_cmple_ps(a, b), sign), _mm_and_ps(_mm_cmpnlt_ps(a, bneg), _mm_castsi128_ps(_mm_set1_epi32(0x40000000)))); } void ppu_interpreter::VCMPBFP_(PPUThread& CPU, ppu_opcode_t op) @@ -242,22 +255,7 @@ void ppu_interpreter::VCMPBFP_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPEQFP(PPUThread& CPU, ppu_opcode_t op) { - int all_equal = 0x8; - int none_equal = 0x2; - - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._f[w] == CPU.VPR[op.vb]._f[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_equal = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_equal = 0; - } - } + CPU.VPR[op.vd].vf = _mm_cmpeq_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vb].vf); } void ppu_interpreter::VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op) @@ -284,22 +282,7 @@ void ppu_interpreter::VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPEQUB(PPUThread& CPU, ppu_opcode_t op) { - int all_equal = 0x8; - int none_equal = 0x2; - - for (uint b = 0; b < 16; b++) - { - if (CPU.VPR[op.va]._u8[b] == CPU.VPR[op.vb]._u8[b]) - { - CPU.VPR[op.vd]._u8[b] = 0xff; - none_equal = 0; - } - else - { - CPU.VPR[op.vd]._u8[b] = 0; - all_equal = 0; - } - } + CPU.VPR[op.vd] = u128::eq8(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op) @@ -326,22 +309,7 @@ void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPEQUH(PPUThread& CPU, ppu_opcode_t op) { - int all_equal = 0x8; - int none_equal = 0x2; - - for (uint h = 0; h < 8; h++) - { - if (CPU.VPR[op.va]._u16[h] == CPU.VPR[op.vb]._u16[h]) - { - CPU.VPR[op.vd]._u16[h] = 0xffff; - none_equal = 0; - } - else - { - CPU.VPR[op.vd]._u16[h] = 0; - all_equal = 0; - } - } + CPU.VPR[op.vd] = u128::eq16(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op) @@ -368,22 +336,7 @@ void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPEQUW(PPUThread& CPU, ppu_opcode_t op) { - int all_equal = 0x8; - int none_equal = 0x2; - - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._u32[w] == CPU.VPR[op.vb]._u32[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_equal = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_equal = 0; - } - } + CPU.VPR[op.vd] = u128::eq32(CPU.VPR[op.va], CPU.VPR[op.vb]); } void ppu_interpreter::VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op) @@ -410,22 +363,7 @@ void ppu_interpreter::VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGEFP(PPUThread& CPU, ppu_opcode_t op) { - int all_ge = 0x8; - int none_ge = 0x2; - - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._f[w] >= CPU.VPR[op.vb]._f[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_ge = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_ge = 0; - } - } + CPU.VPR[op.vd].vf = _mm_cmpge_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vb].vf); } void ppu_interpreter::VCMPGEFP_(PPUThread& CPU, ppu_opcode_t op) @@ -452,22 +390,7 @@ void ppu_interpreter::VCMPGEFP_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTFP(PPUThread& CPU, ppu_opcode_t op) { - int all_ge = 0x8; - int none_ge = 0x2; - - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._f[w] > CPU.VPR[op.vb]._f[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_ge = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_ge = 0; - } - } + CPU.VPR[op.vd].vf = _mm_cmpgt_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vb].vf); } void ppu_interpreter::VCMPGTFP_(PPUThread& CPU, ppu_opcode_t op) @@ -494,22 +417,7 @@ void ppu_interpreter::VCMPGTFP_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTSB(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; - - for (uint b = 0; b < 16; b++) - { - if (CPU.VPR[op.va]._s8[b] > CPU.VPR[op.vb]._s8[b]) - { - CPU.VPR[op.vd]._u8[b] = 0xff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u8[b] = 0; - all_gt = 0; - } - } + CPU.VPR[op.vd].vi = _mm_cmpgt_epi8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VCMPGTSB_(PPUThread& CPU, ppu_opcode_t op) @@ -536,22 +444,7 @@ void ppu_interpreter::VCMPGTSB_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTSH(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; - - for (uint h = 0; h < 8; h++) - { - if (CPU.VPR[op.va]._s16[h] > CPU.VPR[op.vb]._s16[h]) - { - CPU.VPR[op.vd]._u16[h] = 0xffff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u16[h] = 0; - all_gt = 0; - } - } + CPU.VPR[op.vd].vi = _mm_cmpgt_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VCMPGTSH_(PPUThread& CPU, ppu_opcode_t op) @@ -578,22 +471,7 @@ void ppu_interpreter::VCMPGTSH_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTSW(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; - - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._s32[w] > CPU.VPR[op.vb]._s32[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_gt = 0; - } - } + CPU.VPR[op.vd].vi = _mm_cmpgt_epi32(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VCMPGTSW_(PPUThread& CPU, ppu_opcode_t op) @@ -620,22 +498,7 @@ void ppu_interpreter::VCMPGTSW_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTUB(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; - - for (uint b = 0; b < 16; b++) - { - if (CPU.VPR[op.va]._u8[b] > CPU.VPR[op.vb]._u8[b]) - { - CPU.VPR[op.vd]._u8[b] = 0xff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u8[b] = 0; - all_gt = 0; - } - } + CPU.VPR[op.vd].vi = sse_cmpgt_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VCMPGTUB_(PPUThread& CPU, ppu_opcode_t op) @@ -662,22 +525,7 @@ void ppu_interpreter::VCMPGTUB_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTUH(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; - - for (uint h = 0; h < 8; h++) - { - if (CPU.VPR[op.va]._u16[h] > CPU.VPR[op.vb]._u16[h]) - { - CPU.VPR[op.vd]._u16[h] = 0xffff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u16[h] = 0; - all_gt = 0; - } - } + CPU.VPR[op.vd].vi = sse_cmpgt_epu16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VCMPGTUH_(PPUThread& CPU, ppu_opcode_t op) @@ -704,22 +552,7 @@ void ppu_interpreter::VCMPGTUH_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTUW(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; - - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._u32[w] > CPU.VPR[op.vb]._u32[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_gt = 0; - } - } + CPU.VPR[op.vd].vi = sse_cmpgt_epu32(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VCMPGTUW_(PPUThread& CPU, ppu_opcode_t op) @@ -746,76 +579,25 @@ void ppu_interpreter::VCMPGTUW_(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCTSXS(PPUThread& CPU, ppu_opcode_t op) { - u32 nScale = 1 << op.vuimm; - - for (uint w = 0; w < 4; w++) - { - const float b = CPU.VPR[op.vb]._f[w]; - if (std::isnan(b)) - { - CPU.VPR[op.vd]._s32[w] = 0; - } - else - { - double result = (double)b * nScale; - if (result > 0x7fffffff) - { - CPU.VPR[op.vd]._s32[w] = (int)0x7fffffff; - } - else if (result < -pow(2, 31)) - { - CPU.VPR[op.vd]._s32[w] = (int)0x80000000; - } - else - CPU.VPR[op.vd]._s32[w] = (int)trunc(result); - } - } + const auto scaled = _mm_mul_ps(CPU.VPR[op.vb].vf, g_ppu_scale_table[op.vuimm]); + CPU.VPR[op.vd].vi = _mm_xor_si128(_mm_cvttps_epi32(scaled), _mm_castps_si128(_mm_cmpge_ps(scaled, _mm_set1_ps(0x80000000)))); } void ppu_interpreter::VCTUXS(PPUThread& CPU, ppu_opcode_t op) { - u32 nScale = 1 << op.vuimm; - - for (uint w = 0; w < 4; w++) - { - const float b = CPU.VPR[op.vb]._f[w]; - if (std::isnan(b)) - { - CPU.VPR[op.vd]._s32[w] = 0; - } - else - { - double result = (double)b * nScale; - if (result > 0xffffffffu) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffffu; - } - else if (result < 0) - { - CPU.VPR[op.vd]._u32[w] = 0; - } - else - CPU.VPR[op.vd]._u32[w] = (u32)trunc(result); - } - } + const auto scaled1 = _mm_max_ps(_mm_mul_ps(CPU.VPR[op.vb].vf, g_ppu_scale_table[op.vuimm]), _mm_set1_ps(0.0f)); + const auto scaled2 = _mm_and_ps(_mm_sub_ps(scaled1, _mm_set1_ps(0x80000000)), _mm_cmpge_ps(scaled1, _mm_set1_ps(0x80000000))); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_or_si128(_mm_cvttps_epi32(scaled1), _mm_cvttps_epi32(scaled2)), _mm_castps_si128(_mm_cmpge_ps(scaled1, _mm_set1_ps(0x100000000)))); } void ppu_interpreter::VEXPTEFP(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - const float b = CPU.VPR[op.vb]._f[w]; - CPU.VPR[op.vd]._f[w] = powf(2.0f, b); - } + CPU.VPR[op.vd].vf = sse_exp2_ps(CPU.VPR[op.vb].vf); } void ppu_interpreter::VLOGEFP(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - const float b = CPU.VPR[op.vb]._f[w]; - CPU.VPR[op.vd]._f[w] = log2f(b); - } + CPU.VPR[op.vd].vf = sse_log2_ps(CPU.VPR[op.vb].vf); } void ppu_interpreter::VMADDFP(PPUThread& CPU, ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index 5980d60e28..b4cee8d155 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -27,7 +27,7 @@ public: { for (s32 i = -155; i < 174; i++) { - m_data[i + 155] = _mm_set1_ps(static_cast(pow(2, i))); + m_data[i + 155] = _mm_set1_ps(static_cast(exp2(i))); } } From 72fdcf120c7d9ef6fadd6e22cfa78ffc069110b7 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 31 Mar 2015 17:54:41 +0300 Subject: [PATCH 18/23] PPU: some instructions replaced --- rpcs3/Emu/Cell/PPUInterpreter.cpp | 134 +++++++++--------------------- 1 file changed, 38 insertions(+), 96 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index ad3c83fbbc..b12a22050e 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -602,75 +602,52 @@ void ppu_interpreter::VLOGEFP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VMADDFP(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - const float a = CPU.VPR[op.va]._f[w]; - const float b = CPU.VPR[op.vb]._f[w]; - const float c = CPU.VPR[op.vc]._f[w]; - const float result = fmaf(a, c, b); - CPU.VPR[op.vd]._f[w] = result; - } + CPU.VPR[op.vd].vf = _mm_add_ps(_mm_mul_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vc].vf), CPU.VPR[op.vb].vf); } void ppu_interpreter::VMAXFP(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - const float a = CPU.VPR[op.va]._f[w]; - const float b = CPU.VPR[op.vb]._f[w]; - if (a > b) - CPU.VPR[op.vd]._f[w] = a; - else if (b > a) - CPU.VPR[op.vd]._f[w] = b; - else if (CPU.VPR[op.vb]._u32[w] == 0x80000000) - CPU.VPR[op.vd]._f[w] = a; // max(+0,-0) = +0 - else - CPU.VPR[op.vd]._f[w] = b; - } + CPU.VPR[op.vd].vf = _mm_max_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vb].vf); } void ppu_interpreter::VMAXSB(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - CPU.VPR[op.vd]._s8[b] = std::max(CPU.VPR[op.va]._s8[b], CPU.VPR[op.vb]._s8[b]); + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = _mm_cmpgt_epi8(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(a, m), _mm_andnot_si128(m, b)); } void ppu_interpreter::VMAXSH(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._s16[h] = std::max(CPU.VPR[op.va]._s16[h], CPU.VPR[op.vb]._s16[h]); - } + CPU.VPR[op.vd].vi = _mm_max_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VMAXSW(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._s32[w] = std::max(CPU.VPR[op.va]._s32[w], CPU.VPR[op.vb]._s32[w]); - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = _mm_cmpgt_epi32(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(a, m), _mm_andnot_si128(m, b)); } void ppu_interpreter::VMAXUB(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - CPU.VPR[op.vd]._u8[b] = std::max(CPU.VPR[op.va]._u8[b], CPU.VPR[op.vb]._u8[b]); + CPU.VPR[op.vd].vi = _mm_max_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VMAXUH(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._u16[h] = std::max(CPU.VPR[op.va]._u16[h], CPU.VPR[op.vb]._u16[h]); - } + const auto mask = _mm_set1_epi32(0x80008000); + CPU.VPR[op.vd].vi = _mm_xor_si128(_mm_max_epi16(_mm_xor_si128(CPU.VPR[op.va].vi, mask), _mm_xor_si128(CPU.VPR[op.vb].vi, mask)), mask); } void ppu_interpreter::VMAXUW(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = std::max(CPU.VPR[op.va]._u32[w], CPU.VPR[op.vb]._u32[w]); - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = sse_cmpgt_epu32(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(a, m), _mm_andnot_si128(m, b)); } void ppu_interpreter::VMHADDSHS(PPUThread& CPU, ppu_opcode_t op) @@ -715,67 +692,47 @@ void ppu_interpreter::VMHRADDSHS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VMINFP(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - const float a = CPU.VPR[op.va]._f[w]; - const float b = CPU.VPR[op.vb]._f[w]; - if (a < b) - CPU.VPR[op.vd]._f[w] = a; - else if (b < a) - CPU.VPR[op.vd]._f[w] = b; - else if (CPU.VPR[op.vb]._u32[w] == 0x00000000) - CPU.VPR[op.vd]._f[w] = a; // min(-0,+0) = -0 - else - CPU.VPR[op.vd]._f[w] = b; - } + CPU.VPR[op.vd].vf = _mm_min_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vb].vf); } void ppu_interpreter::VMINSB(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - { - CPU.VPR[op.vd]._s8[b] = std::min(CPU.VPR[op.va]._s8[b], CPU.VPR[op.vb]._s8[b]); - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = _mm_cmpgt_epi8(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(a, m), _mm_and_si128(m, b)); } void ppu_interpreter::VMINSH(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._s16[h] = std::min(CPU.VPR[op.va]._s16[h], CPU.VPR[op.vb]._s16[h]); - } + CPU.VPR[op.vd].vi = _mm_min_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VMINSW(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._s32[w] = std::min(CPU.VPR[op.va]._s32[w], CPU.VPR[op.vb]._s32[w]); - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = _mm_cmpgt_epi32(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(a, m), _mm_and_si128(m, b)); } void ppu_interpreter::VMINUB(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - { - CPU.VPR[op.vd]._u8[b] = std::min(CPU.VPR[op.va]._u8[b], CPU.VPR[op.vb]._u8[b]); - } + CPU.VPR[op.vd].vi = _mm_min_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VMINUH(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._u16[h] = std::min(CPU.VPR[op.va]._u16[h], CPU.VPR[op.vb]._u16[h]); - } + const auto mask = _mm_set1_epi32(0x80008000); + CPU.VPR[op.vd].vi = _mm_xor_si128(_mm_min_epi16(_mm_xor_si128(CPU.VPR[op.va].vi, mask), _mm_xor_si128(CPU.VPR[op.vb].vi, mask)), mask); } void ppu_interpreter::VMINUW(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = std::min(CPU.VPR[op.va]._u32[w], CPU.VPR[op.vb]._u32[w]); - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = sse_cmpgt_epu32(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(a, m), _mm_and_si128(m, b)); } void ppu_interpreter::VMLADDUHM(PPUThread& CPU, ppu_opcode_t op) @@ -1036,14 +993,7 @@ void ppu_interpreter::VMULOUH(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VNMSUBFP(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - const float a = CPU.VPR[op.va]._f[w]; - const float b = CPU.VPR[op.vb]._f[w]; - const float c = CPU.VPR[op.vc]._f[w]; - const float result = -fmaf(a, c, -b); - CPU.VPR[op.vd]._f[w] = result; - } + CPU.VPR[op.vd].vf = _mm_sub_ps(CPU.VPR[op.vb].vf, _mm_mul_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vc].vf)); } void ppu_interpreter::VNOR(PPUThread& CPU, ppu_opcode_t op) @@ -1302,11 +1252,7 @@ void ppu_interpreter::VPKUWUS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VREFP(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - const float b = CPU.VPR[op.vb]._f[w]; - CPU.VPR[op.vd]._f[w] = 1.0f / b; - } + CPU.VPR[op.vd].vf = _mm_rcp_ps(CPU.VPR[op.vb].vf); } void ppu_interpreter::VRFIM(PPUThread& CPU, ppu_opcode_t op) @@ -1373,11 +1319,7 @@ void ppu_interpreter::VRLW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VRSQRTEFP(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - const float b = CPU.VPR[op.vb]._f[w]; - CPU.VPR[op.vd]._f[w] = 1.0f / sqrtf(b); - } + CPU.VPR[op.vd].vf = _mm_rsqrt_ps(CPU.VPR[op.vb].vf); } void ppu_interpreter::VSEL(PPUThread& CPU, ppu_opcode_t op) From 6b9a6c50fe55fb193f5d749f0824b597cfcc4de3 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 1 Apr 2015 17:01:04 +0300 Subject: [PATCH 19/23] PPU: some instructions replaced --- Utilities/BEType.h | 10 + rpcs3/Emu/Cell/PPUInterpreter.cpp | 293 ++++-------------------------- 2 files changed, 42 insertions(+), 261 deletions(-) diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 9f4c96b5de..806efcbdb2 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -354,6 +354,16 @@ union _CRT_ALIGN(16) u128 return from64(~_u64[0], ~_u64[1]); } + __forceinline bool test() const + { + return _u64[0] || _u64[1]; + } + + __forceinline bool inv_test() const + { + return ~_u64[0] || ~_u64[1]; + } + // result = (~left) & (right) static __forceinline u128 andnot(const u128& left, const u128& right) { diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index b12a22050e..887266218f 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -225,32 +225,14 @@ void ppu_interpreter::VCMPBFP(PPUThread& CPU, ppu_opcode_t op) const auto b = CPU.VPR[op.vb].vf; const auto sign = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); const auto bneg = _mm_xor_ps(b, sign); - CPU.VPR[op.vd].vf = _mm_or_ps(_mm_and_ps(_mm_cmple_ps(a, b), sign), _mm_and_ps(_mm_cmpnlt_ps(a, bneg), _mm_castsi128_ps(_mm_set1_epi32(0x40000000)))); + CPU.VPR[op.vd].vf = _mm_or_ps(_mm_and_ps(_mm_cmple_ps(a, b), sign), _mm_and_ps(_mm_cmpge_ps(a, bneg), _mm_castsi128_ps(_mm_set1_epi32(0x40000000)))); } void ppu_interpreter::VCMPBFP_(PPUThread& CPU, ppu_opcode_t op) { - bool allInBounds = true; + VCMPBFP(CPU, op); - for (uint w = 0; w < 4; w++) - { - u32 mask = 1 << 31 | 1 << 30; - - const float a = CPU.VPR[op.va]._f[w]; - const float b = CPU.VPR[op.vb]._f[w]; - - if (a <= b) mask &= ~(1 << 31); - if (a >= -b) mask &= ~(1 << 30); - - CPU.VPR[op.vd]._u32[w] = mask; - - if (mask) - allInBounds = false; - } - - // Bit n°2 of CR6 - CPU.SetCR(6, 0); - CPU.SetCRBit(6, 0x2, allInBounds); + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? 0 : 2; // set 2 if all in bounds } void ppu_interpreter::VCMPEQFP(PPUThread& CPU, ppu_opcode_t op) @@ -260,24 +242,9 @@ void ppu_interpreter::VCMPEQFP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op) { - int all_equal = 0x8; - int none_equal = 0x2; + VCMPEQFP(CPU, op); - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._f[w] == CPU.VPR[op.vb]._f[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_equal = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_equal = 0; - } - } - - CPU.CR.cr6 = all_equal | none_equal; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal } void ppu_interpreter::VCMPEQUB(PPUThread& CPU, ppu_opcode_t op) @@ -287,24 +254,9 @@ void ppu_interpreter::VCMPEQUB(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op) { - int all_equal = 0x8; - int none_equal = 0x2; + VCMPEQUB(CPU, op); - for (uint b = 0; b < 16; b++) - { - if (CPU.VPR[op.va]._u8[b] == CPU.VPR[op.vb]._u8[b]) - { - CPU.VPR[op.vd]._u8[b] = 0xff; - none_equal = 0; - } - else - { - CPU.VPR[op.vd]._u8[b] = 0; - all_equal = 0; - } - } - - CPU.CR.cr6 = all_equal | none_equal; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal } void ppu_interpreter::VCMPEQUH(PPUThread& CPU, ppu_opcode_t op) @@ -314,24 +266,9 @@ void ppu_interpreter::VCMPEQUH(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op) { - int all_equal = 0x8; - int none_equal = 0x2; + VCMPEQUH(CPU, op); - for (uint h = 0; h < 8; h++) - { - if (CPU.VPR[op.va]._u16[h] == CPU.VPR[op.vb]._u16[h]) - { - CPU.VPR[op.vd]._u16[h] = 0xffff; - none_equal = 0; - } - else - { - CPU.VPR[op.vd]._u16[h] = 0; - all_equal = 0; - } - } - - CPU.CR.cr6 = all_equal | none_equal; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal } void ppu_interpreter::VCMPEQUW(PPUThread& CPU, ppu_opcode_t op) @@ -341,24 +278,9 @@ void ppu_interpreter::VCMPEQUW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op) { - int all_equal = 0x8; - int none_equal = 0x2; + VCMPEQUW(CPU, op); - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._u32[w] == CPU.VPR[op.vb]._u32[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_equal = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_equal = 0; - } - } - - CPU.CR.cr6 = all_equal | none_equal; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal } void ppu_interpreter::VCMPGEFP(PPUThread& CPU, ppu_opcode_t op) @@ -368,24 +290,9 @@ void ppu_interpreter::VCMPGEFP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGEFP_(PPUThread& CPU, ppu_opcode_t op) { - int all_ge = 0x8; - int none_ge = 0x2; + VCMPGEFP(CPU, op); - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._f[w] >= CPU.VPR[op.vb]._f[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_ge = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_ge = 0; - } - } - - CPU.CR.cr6 = all_ge | none_ge; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTFP(PPUThread& CPU, ppu_opcode_t op) @@ -395,24 +302,9 @@ void ppu_interpreter::VCMPGTFP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTFP_(PPUThread& CPU, ppu_opcode_t op) { - int all_ge = 0x8; - int none_ge = 0x2; + VCMPGTFP(CPU, op); - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._f[w] > CPU.VPR[op.vb]._f[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_ge = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_ge = 0; - } - } - - CPU.CR.cr6 = all_ge | none_ge; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTSB(PPUThread& CPU, ppu_opcode_t op) @@ -422,24 +314,9 @@ void ppu_interpreter::VCMPGTSB(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTSB_(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; + VCMPGTSB(CPU, op); - for (uint b = 0; b < 16; b++) - { - if (CPU.VPR[op.va]._s8[b] > CPU.VPR[op.vb]._s8[b]) - { - CPU.VPR[op.vd]._u8[b] = 0xff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u8[b] = 0; - all_gt = 0; - } - } - - CPU.CR.cr6 = all_gt | none_gt; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTSH(PPUThread& CPU, ppu_opcode_t op) @@ -449,24 +326,9 @@ void ppu_interpreter::VCMPGTSH(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTSH_(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; + VCMPGTSH(CPU, op); - for (uint h = 0; h < 8; h++) - { - if (CPU.VPR[op.va]._s16[h] > CPU.VPR[op.vb]._s16[h]) - { - CPU.VPR[op.vd]._u16[h] = 0xffff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u16[h] = 0; - all_gt = 0; - } - } - - CPU.CR.cr6 = all_gt | none_gt; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTSW(PPUThread& CPU, ppu_opcode_t op) @@ -476,24 +338,9 @@ void ppu_interpreter::VCMPGTSW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTSW_(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; + VCMPGTSW(CPU, op); - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._s32[w] > CPU.VPR[op.vb]._s32[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_gt = 0; - } - } - - CPU.CR.cr6 = all_gt | none_gt; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTUB(PPUThread& CPU, ppu_opcode_t op) @@ -503,24 +350,9 @@ void ppu_interpreter::VCMPGTUB(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTUB_(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; + VCMPGTUB(CPU, op); - for (uint b = 0; b < 16; b++) - { - if (CPU.VPR[op.va]._u8[b] > CPU.VPR[op.vb]._u8[b]) - { - CPU.VPR[op.vd]._u8[b] = 0xff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u8[b] = 0; - all_gt = 0; - } - } - - CPU.CR.cr6 = all_gt | none_gt; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTUH(PPUThread& CPU, ppu_opcode_t op) @@ -530,24 +362,9 @@ void ppu_interpreter::VCMPGTUH(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTUH_(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; + VCMPGTUH(CPU, op); - for (uint h = 0; h < 8; h++) - { - if (CPU.VPR[op.va]._u16[h] > CPU.VPR[op.vb]._u16[h]) - { - CPU.VPR[op.vd]._u16[h] = 0xffff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u16[h] = 0; - all_gt = 0; - } - } - - CPU.CR.cr6 = all_gt | none_gt; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTUW(PPUThread& CPU, ppu_opcode_t op) @@ -557,24 +374,9 @@ void ppu_interpreter::VCMPGTUW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VCMPGTUW_(PPUThread& CPU, ppu_opcode_t op) { - int all_gt = 0x8; - int none_gt = 0x2; + VCMPGTUW(CPU, op); - for (uint w = 0; w < 4; w++) - { - if (CPU.VPR[op.va]._u32[w] > CPU.VPR[op.vb]._u32[w]) - { - CPU.VPR[op.vd]._u32[w] = 0xffffffff; - none_gt = 0; - } - else - { - CPU.VPR[op.vd]._u32[w] = 0; - all_gt = 0; - } - } - - CPU.CR.cr6 = all_gt | none_gt; + CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; } void ppu_interpreter::VCTSXS(PPUThread& CPU, ppu_opcode_t op) @@ -652,42 +454,14 @@ void ppu_interpreter::VMAXUW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VMHADDSHS(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - s32 result = (s32)CPU.VPR[op.va]._s16[h] * (s32)CPU.VPR[op.vb]._s16[h]; - result = (result >> 15) + (s32)CPU.VPR[op.vc]._s16[h]; - - if (result > INT16_MAX) - { - CPU.VPR[op.vd]._s16[h] = (s16)INT16_MAX; - } - else if (result < INT16_MIN) - { - CPU.VPR[op.vd]._s16[h] = (s16)INT16_MIN; - } - else - CPU.VPR[op.vd]._s16[h] = (s16)result; - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + CPU.VPR[op.vd].vi = _mm_adds_epi16(_mm_or_si128(_mm_srli_epi16(_mm_mullo_epi16(a, b), 15), _mm_slli_epi16(_mm_mulhi_epi16(a, b), 1)), CPU.VPR[op.vc].vi); } void ppu_interpreter::VMHRADDSHS(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - s32 result = ((s32)CPU.VPR[op.va]._s16[h] * (s32)CPU.VPR[op.vb]._s16[h]) + 0x4000; - result = (result >> 15) + (s32)CPU.VPR[op.vc]._s16[h]; - - if (result > INT16_MAX) - { - CPU.VPR[op.vd]._s16[h] = (s16)INT16_MAX; - } - else if (result < INT16_MIN) - { - CPU.VPR[op.vd]._s16[h] = (s16)INT16_MIN; - } - else - CPU.VPR[op.vd]._s16[h] = (s16)result; - } + CPU.VPR[op.vd].vi = _mm_adds_epi16(_mm_mulhrs_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi), CPU.VPR[op.vc].vi); } void ppu_interpreter::VMINFP(PPUThread& CPU, ppu_opcode_t op) @@ -737,10 +511,7 @@ void ppu_interpreter::VMINUW(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VMLADDUHM(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._u16[h] = CPU.VPR[op.va]._u16[h] * CPU.VPR[op.vb]._u16[h] + CPU.VPR[op.vc]._u16[h]; - } + CPU.VPR[op.vd].vi = _mm_add_epi16(_mm_mullo_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi), CPU.VPR[op.vc].vi); } void ppu_interpreter::VMRGHB(PPUThread& CPU, ppu_opcode_t op) From 59effb34124ffe82d8dbed7caf8b56abfade6e43 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 2 Apr 2015 18:22:43 +0300 Subject: [PATCH 20/23] PPU: some instructions replaced --- rpcs3/Emu/Cell/PPUInterpreter.cpp | 229 +++++++----------------------- 1 file changed, 55 insertions(+), 174 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 887266218f..5546f95e41 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -516,100 +516,50 @@ void ppu_interpreter::VMLADDUHM(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VMRGHB(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._u8[15 - h * 2] = VA._u8[15 - h]; - CPU.VPR[op.vd]._u8[15 - h * 2 - 1] = VB._u8[15 - h]; - } + CPU.VPR[op.vd].vi = _mm_unpackhi_epi8(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); } void ppu_interpreter::VMRGHH(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u16[7 - w * 2] = VA._u16[7 - w]; - CPU.VPR[op.vd]._u16[7 - w * 2 - 1] = VB._u16[7 - w]; - } + CPU.VPR[op.vd].vi = _mm_unpackhi_epi16(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); } void ppu_interpreter::VMRGHW(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; - for (uint d = 0; d < 2; d++) - { - CPU.VPR[op.vd]._u32[3 - d * 2] = VA._u32[3 - d]; - CPU.VPR[op.vd]._u32[3 - d * 2 - 1] = VB._u32[3 - d]; - } + CPU.VPR[op.vd].vi = _mm_unpackhi_epi32(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); } void ppu_interpreter::VMRGLB(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._u8[15 - h * 2] = VA._u8[7 - h]; - CPU.VPR[op.vd]._u8[15 - h * 2 - 1] = VB._u8[7 - h]; - } + CPU.VPR[op.vd].vi = _mm_unpacklo_epi8(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); } void ppu_interpreter::VMRGLH(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u16[7 - w * 2] = VA._u16[3 - w]; - CPU.VPR[op.vd]._u16[7 - w * 2 - 1] = VB._u16[3 - w]; - } + CPU.VPR[op.vd].vi = _mm_unpacklo_epi16(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); } void ppu_interpreter::VMRGLW(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; - for (uint d = 0; d < 2; d++) - { - CPU.VPR[op.vd]._u32[3 - d * 2] = VA._u32[1 - d]; - CPU.VPR[op.vd]._u32[3 - d * 2 - 1] = VB._u32[1 - d]; - } + CPU.VPR[op.vd].vi = _mm_unpacklo_epi32(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); } void ppu_interpreter::VMSUMMBM(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - s32 result = 0; - - for (uint b = 0; b < 4; b++) - { - result += CPU.VPR[op.va]._s8[w * 4 + b] * CPU.VPR[op.vb]._u8[w * 4 + b]; - } - - result += CPU.VPR[op.vc]._s32[w]; - CPU.VPR[op.vd]._s32[w] = result; - } + const auto a = CPU.VPR[op.va].vi; // signed bytes + const auto b = CPU.VPR[op.vb].vi; // unsigned bytes + const auto ah = _mm_srai_epi16(a, 8); + const auto bh = _mm_srli_epi16(b, 8); + const auto al = _mm_srai_epi16(_mm_srli_epi16(a, 8), 8); + const auto bl = _mm_and_si128(b, _mm_set1_epi16(0x00ff)); + const auto sh = _mm_madd_epi16(ah, bh); + const auto sl = _mm_madd_epi16(al, bl); + CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(CPU.VPR[op.vc].vi, sh), sl); } void ppu_interpreter::VMSUMSHM(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - s32 result = 0; - - for (uint h = 0; h < 2; h++) - { - result += CPU.VPR[op.va]._s16[w * 2 + h] * CPU.VPR[op.vb]._s16[w * 2 + h]; - } - - result += CPU.VPR[op.vc]._s32[w]; - CPU.VPR[op.vd]._s32[w] = result; - } + CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_madd_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi), CPU.VPR[op.vc].vi); } void ppu_interpreter::VMSUMSHS(PPUThread& CPU, ppu_opcode_t op) @@ -643,34 +593,27 @@ void ppu_interpreter::VMSUMSHS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VMSUMUBM(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - u32 result = 0; - - for (uint b = 0; b < 4; b++) - { - result += (u32)CPU.VPR[op.va]._u8[w * 4 + b] * (u32)CPU.VPR[op.vb]._u8[w * 4 + b]; - } - - result += CPU.VPR[op.vc]._u32[w]; - CPU.VPR[op.vd]._u32[w] = result; - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto mask = _mm_set1_epi16(0x00ff); + const auto ah = _mm_srli_epi16(a, 8); + const auto al = _mm_and_si128(a, mask); + const auto bh = _mm_srli_epi16(b, 8); + const auto bl = _mm_and_si128(b, mask); + const auto sh = _mm_madd_epi16(ah, bh); + const auto sl = _mm_madd_epi16(al, bl); + CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(CPU.VPR[op.vc].vi, sh), sl); } void ppu_interpreter::VMSUMUHM(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - u32 result = 0; - - for (uint h = 0; h < 2; h++) - { - result += (u32)CPU.VPR[op.va]._u16[w * 2 + h] * (u32)CPU.VPR[op.vb]._u16[w * 2 + h]; - } - - result += CPU.VPR[op.vc]._u32[w]; - CPU.VPR[op.vd]._u32[w] = result; - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto ml = _mm_mullo_epi16(a, b); // low results + const auto mh = _mm_mulhi_epu16(a, b); // high results + const auto ls = _mm_add_epi32(_mm_srli_epi32(ml, 16), _mm_and_si128(ml, _mm_set1_epi32(0x0000ffff))); + const auto hs = _mm_add_epi32(_mm_slli_epi32(mh, 16), _mm_and_si128(mh, _mm_set1_epi32(0xffff0000))); + CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(CPU.VPR[op.vc].vi, ls), hs); } void ppu_interpreter::VMSUMUHS(PPUThread& CPU, ppu_opcode_t op) @@ -700,66 +643,52 @@ void ppu_interpreter::VMSUMUHS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VMULESB(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._s16[h] = (s16)CPU.VPR[op.va]._s8[h * 2 + 1] * (s16)CPU.VPR[op.vb]._s8[h * 2 + 1]; - } + CPU.VPR[op.vd].vi = _mm_mullo_epi16(_mm_srai_epi16(CPU.VPR[op.va].vi, 8), _mm_srai_epi16(CPU.VPR[op.vb].vi, 8)); } void ppu_interpreter::VMULESH(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._s32[w] = (s32)CPU.VPR[op.va]._s16[w * 2 + 1] * (s32)CPU.VPR[op.vb]._s16[w * 2 + 1]; - } + CPU.VPR[op.vd].vi = _mm_madd_epi16(_mm_srli_epi16(CPU.VPR[op.va].vi, 16), _mm_srli_epi16(CPU.VPR[op.vb].vi, 16)); } void ppu_interpreter::VMULEUB(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._u16[h] = (u16)CPU.VPR[op.va]._u8[h * 2 + 1] * (u16)CPU.VPR[op.vb]._u8[h * 2 + 1]; - } + CPU.VPR[op.vd].vi = _mm_mullo_epi16(_mm_srli_epi16(CPU.VPR[op.va].vi, 8), _mm_srli_epi16(CPU.VPR[op.vb].vi, 8)); } void ppu_interpreter::VMULEUH(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = (u32)CPU.VPR[op.va]._u16[w * 2 + 1] * (u32)CPU.VPR[op.vb]._u16[w * 2 + 1]; - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto ml = _mm_mullo_epi16(a, b); + const auto mh = _mm_mulhi_epu16(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_srli_epi32(ml, 16), _mm_and_si128(mh, _mm_set1_epi32(0xffff0000))); } void ppu_interpreter::VMULOSB(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._s16[h] = (s16)CPU.VPR[op.va]._s8[h * 2] * (s16)CPU.VPR[op.vb]._s8[h * 2]; - } + CPU.VPR[op.vd].vi = _mm_mullo_epi16(_mm_srai_epi16(_mm_slli_epi16(CPU.VPR[op.va].vi, 8), 8), _mm_srai_epi16(_mm_slli_epi16(CPU.VPR[op.vb].vi, 8), 8)); } void ppu_interpreter::VMULOSH(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._s32[w] = (s32)CPU.VPR[op.va]._s16[w * 2] * (s32)CPU.VPR[op.vb]._s16[w * 2]; - } + const auto mask = _mm_set1_epi32(0x0000ffff); + CPU.VPR[op.vd].vi = _mm_madd_epi16(_mm_and_si128(CPU.VPR[op.va].vi, mask), _mm_and_si128(CPU.VPR[op.vb].vi, mask)); } void ppu_interpreter::VMULOUB(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - CPU.VPR[op.vd]._u16[h] = (u16)CPU.VPR[op.va]._u8[h * 2] * (u16)CPU.VPR[op.vb]._u8[h * 2]; - } + const auto mask = _mm_set1_epi16(0x00ff); + CPU.VPR[op.vd].vi = _mm_mullo_epi16(_mm_and_si128(CPU.VPR[op.va].vi, mask), _mm_and_si128(CPU.VPR[op.vb].vi, mask)); } void ppu_interpreter::VMULOUH(PPUThread& CPU, ppu_opcode_t op) { - for (uint w = 0; w < 4; w++) - { - CPU.VPR[op.vd]._u32[w] = (u32)CPU.VPR[op.va]._u16[w * 2] * (u32)CPU.VPR[op.vb]._u16[w * 2]; - } + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto ml = _mm_mullo_epi16(a, b); + const auto mh = _mm_mulhi_epu16(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_slli_epi32(mh, 16), _mm_and_si128(ml, _mm_set1_epi32(0x0000ffff))); } void ppu_interpreter::VNMSUBFP(PPUThread& CPU, ppu_opcode_t op) @@ -1305,40 +1234,12 @@ void ppu_interpreter::VSUBFP(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSUBSBS(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - { - s16 result = (s16)CPU.VPR[op.va]._s8[b] - (s16)CPU.VPR[op.vb]._s8[b]; - - if (result < INT8_MIN) - { - CPU.VPR[op.vd]._s8[b] = INT8_MIN; - } - else if (result > INT8_MAX) - { - CPU.VPR[op.vd]._s8[b] = INT8_MAX; - } - else - CPU.VPR[op.vd]._s8[b] = (s8)result; - } + CPU.VPR[op.vd].vi = _mm_subs_epi8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VSUBSHS(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - s32 result = (s32)CPU.VPR[op.va]._s16[h] - (s32)CPU.VPR[op.vb]._s16[h]; - - if (result < INT16_MIN) - { - CPU.VPR[op.vd]._s16[h] = (s16)INT16_MIN; - } - else if (result > INT16_MAX) - { - CPU.VPR[op.vd]._s16[h] = (s16)INT16_MAX; - } - else - CPU.VPR[op.vd]._s16[h] = (s16)result; - } + CPU.VPR[op.vd].vi = _mm_subs_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VSUBSWS(PPUThread& CPU, ppu_opcode_t op) @@ -1367,17 +1268,7 @@ void ppu_interpreter::VSUBUBM(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op) { - for (uint b = 0; b < 16; b++) - { - s16 result = (s16)CPU.VPR[op.va]._u8[b] - (s16)CPU.VPR[op.vb]._u8[b]; - - if (result < 0) - { - CPU.VPR[op.vd]._u8[b] = 0; - } - else - CPU.VPR[op.vd]._u8[b] = (u8)result; - } + CPU.VPR[op.vd].vi = _mm_subs_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VSUBUHM(PPUThread& CPU, ppu_opcode_t op) @@ -1387,17 +1278,7 @@ void ppu_interpreter::VSUBUHM(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op) { - for (uint h = 0; h < 8; h++) - { - s32 result = (s32)CPU.VPR[op.va]._u16[h] - (s32)CPU.VPR[op.vb]._u16[h]; - - if (result < 0) - { - CPU.VPR[op.vd]._u16[h] = 0; - } - else - CPU.VPR[op.vd]._u16[h] = (u16)result; - } + CPU.VPR[op.vd].vi = _mm_subs_epu16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); } void ppu_interpreter::VSUBUWM(PPUThread& CPU, ppu_opcode_t op) From 1c82e8b1d507bd30921b215b6dc002154445261f Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Fri, 3 Apr 2015 19:56:57 +0300 Subject: [PATCH 21/23] Bugfixes --- Utilities/BEType.h | 4 +- rpcs3/Emu/Cell/PPUInterpreter.cpp | 189 +++++++++--------------------- rpcs3/Emu/Cell/PPUInterpreter.h | 31 +++-- 3 files changed, 79 insertions(+), 145 deletions(-) diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 806efcbdb2..96ef78f59d 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -354,12 +354,12 @@ union _CRT_ALIGN(16) u128 return from64(~_u64[0], ~_u64[1]); } - __forceinline bool test() const + __forceinline bool is_any_1() const // check if any bit is 1 { return _u64[0] || _u64[1]; } - __forceinline bool inv_test() const + __forceinline bool is_any_0() const // check if any bit is 0 { return ~_u64[0] || ~_u64[1]; } diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 5546f95e41..9257b33760 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -232,7 +232,7 @@ void ppu_interpreter::VCMPBFP_(PPUThread& CPU, ppu_opcode_t op) { VCMPBFP(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? 0 : 2; // set 2 if all in bounds + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? 0 : 2; // set 2 if all in bounds } void ppu_interpreter::VCMPEQFP(PPUThread& CPU, ppu_opcode_t op) @@ -244,7 +244,7 @@ void ppu_interpreter::VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op) { VCMPEQFP(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal } void ppu_interpreter::VCMPEQUB(PPUThread& CPU, ppu_opcode_t op) @@ -256,7 +256,7 @@ void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op) { VCMPEQUB(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal } void ppu_interpreter::VCMPEQUH(PPUThread& CPU, ppu_opcode_t op) @@ -268,7 +268,7 @@ void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op) { VCMPEQUH(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal } void ppu_interpreter::VCMPEQUW(PPUThread& CPU, ppu_opcode_t op) @@ -280,7 +280,7 @@ void ppu_interpreter::VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op) { VCMPEQUW(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal } void ppu_interpreter::VCMPGEFP(PPUThread& CPU, ppu_opcode_t op) @@ -292,7 +292,7 @@ void ppu_interpreter::VCMPGEFP_(PPUThread& CPU, ppu_opcode_t op) { VCMPGEFP(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTFP(PPUThread& CPU, ppu_opcode_t op) @@ -304,7 +304,7 @@ void ppu_interpreter::VCMPGTFP_(PPUThread& CPU, ppu_opcode_t op) { VCMPGTFP(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTSB(PPUThread& CPU, ppu_opcode_t op) @@ -316,7 +316,7 @@ void ppu_interpreter::VCMPGTSB_(PPUThread& CPU, ppu_opcode_t op) { VCMPGTSB(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTSH(PPUThread& CPU, ppu_opcode_t op) @@ -328,7 +328,7 @@ void ppu_interpreter::VCMPGTSH_(PPUThread& CPU, ppu_opcode_t op) { VCMPGTSH(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTSW(PPUThread& CPU, ppu_opcode_t op) @@ -340,7 +340,7 @@ void ppu_interpreter::VCMPGTSW_(PPUThread& CPU, ppu_opcode_t op) { VCMPGTSW(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTUB(PPUThread& CPU, ppu_opcode_t op) @@ -352,7 +352,7 @@ void ppu_interpreter::VCMPGTUB_(PPUThread& CPU, ppu_opcode_t op) { VCMPGTUB(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTUH(PPUThread& CPU, ppu_opcode_t op) @@ -364,7 +364,7 @@ void ppu_interpreter::VCMPGTUH_(PPUThread& CPU, ppu_opcode_t op) { VCMPGTUH(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; } void ppu_interpreter::VCMPGTUW(PPUThread& CPU, ppu_opcode_t op) @@ -376,7 +376,7 @@ void ppu_interpreter::VCMPGTUW_(PPUThread& CPU, ppu_opcode_t op) { VCMPGTUW(CPU, op); - CPU.CR.cr6 = CPU.VPR[op.vd].test() ? (CPU.VPR[op.vd].inv_test() ? 0 : 8) : 2; + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; } void ppu_interpreter::VCTSXS(PPUThread& CPU, ppu_opcode_t op) @@ -417,7 +417,7 @@ void ppu_interpreter::VMAXSB(PPUThread& CPU, ppu_opcode_t op) const auto a = CPU.VPR[op.va].vi; const auto b = CPU.VPR[op.vb].vi; const auto m = _mm_cmpgt_epi8(a, b); - CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(a, m), _mm_andnot_si128(m, b)); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); } void ppu_interpreter::VMAXSH(PPUThread& CPU, ppu_opcode_t op) @@ -430,7 +430,7 @@ void ppu_interpreter::VMAXSW(PPUThread& CPU, ppu_opcode_t op) const auto a = CPU.VPR[op.va].vi; const auto b = CPU.VPR[op.vb].vi; const auto m = _mm_cmpgt_epi32(a, b); - CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(a, m), _mm_andnot_si128(m, b)); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); } void ppu_interpreter::VMAXUB(PPUThread& CPU, ppu_opcode_t op) @@ -449,19 +449,27 @@ void ppu_interpreter::VMAXUW(PPUThread& CPU, ppu_opcode_t op) const auto a = CPU.VPR[op.va].vi; const auto b = CPU.VPR[op.vb].vi; const auto m = sse_cmpgt_epu32(a, b); - CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(a, m), _mm_andnot_si128(m, b)); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); } void ppu_interpreter::VMHADDSHS(PPUThread& CPU, ppu_opcode_t op) { const auto a = CPU.VPR[op.va].vi; const auto b = CPU.VPR[op.vb].vi; - CPU.VPR[op.vd].vi = _mm_adds_epi16(_mm_or_si128(_mm_srli_epi16(_mm_mullo_epi16(a, b), 15), _mm_slli_epi16(_mm_mulhi_epi16(a, b), 1)), CPU.VPR[op.vc].vi); + const auto c = CPU.VPR[op.vc].vi; + const auto m = _mm_or_si128(_mm_srli_epi16(_mm_mullo_epi16(a, b), 15), _mm_slli_epi16(_mm_mulhi_epi16(a, b), 1)); + const auto s = _mm_cmpeq_epi16(m, _mm_set1_epi16(-0x8000)); // detect special case (positive 0x8000) + CPU.VPR[op.vd].vi = _mm_adds_epi16(_mm_adds_epi16(_mm_xor_si128(m, s), c), _mm_srli_epi16(s, 15)); } void ppu_interpreter::VMHRADDSHS(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd].vi = _mm_adds_epi16(_mm_mulhrs_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi), CPU.VPR[op.vc].vi); + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto c = CPU.VPR[op.vc].vi; + const auto m = _mm_mulhrs_epi16(a, b); + const auto s = _mm_cmpeq_epi16(m, _mm_set1_epi16(-0x8000)); // detect special case (positive 0x8000) + CPU.VPR[op.vd].vi = _mm_adds_epi16(_mm_adds_epi16(_mm_xor_si128(m, s), c), _mm_srli_epi16(s, 15)); } void ppu_interpreter::VMINFP(PPUThread& CPU, ppu_opcode_t op) @@ -474,7 +482,7 @@ void ppu_interpreter::VMINSB(PPUThread& CPU, ppu_opcode_t op) const auto a = CPU.VPR[op.va].vi; const auto b = CPU.VPR[op.vb].vi; const auto m = _mm_cmpgt_epi8(a, b); - CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(a, m), _mm_and_si128(m, b)); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(m, a), _mm_and_si128(m, b)); } void ppu_interpreter::VMINSH(PPUThread& CPU, ppu_opcode_t op) @@ -487,7 +495,7 @@ void ppu_interpreter::VMINSW(PPUThread& CPU, ppu_opcode_t op) const auto a = CPU.VPR[op.va].vi; const auto b = CPU.VPR[op.vb].vi; const auto m = _mm_cmpgt_epi32(a, b); - CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(a, m), _mm_and_si128(m, b)); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(m, a), _mm_and_si128(m, b)); } void ppu_interpreter::VMINUB(PPUThread& CPU, ppu_opcode_t op) @@ -506,7 +514,7 @@ void ppu_interpreter::VMINUW(PPUThread& CPU, ppu_opcode_t op) const auto a = CPU.VPR[op.va].vi; const auto b = CPU.VPR[op.vb].vi; const auto m = sse_cmpgt_epu32(a, b); - CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(a, m), _mm_and_si128(m, b)); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(m, a), _mm_and_si128(m, b)); } void ppu_interpreter::VMLADDUHM(PPUThread& CPU, ppu_opcode_t op) @@ -548,13 +556,14 @@ void ppu_interpreter::VMSUMMBM(PPUThread& CPU, ppu_opcode_t op) { const auto a = CPU.VPR[op.va].vi; // signed bytes const auto b = CPU.VPR[op.vb].vi; // unsigned bytes + const auto c = CPU.VPR[op.vc].vi; const auto ah = _mm_srai_epi16(a, 8); const auto bh = _mm_srli_epi16(b, 8); - const auto al = _mm_srai_epi16(_mm_srli_epi16(a, 8), 8); + const auto al = _mm_srai_epi16(_mm_slli_epi16(a, 8), 8); const auto bl = _mm_and_si128(b, _mm_set1_epi16(0x00ff)); const auto sh = _mm_madd_epi16(ah, bh); const auto sl = _mm_madd_epi16(al, bl); - CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(CPU.VPR[op.vc].vi, sh), sl); + CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(c, sh), sl); } void ppu_interpreter::VMSUMSHM(PPUThread& CPU, ppu_opcode_t op) @@ -595,6 +604,7 @@ void ppu_interpreter::VMSUMUBM(PPUThread& CPU, ppu_opcode_t op) { const auto a = CPU.VPR[op.va].vi; const auto b = CPU.VPR[op.vb].vi; + const auto c = CPU.VPR[op.vc].vi; const auto mask = _mm_set1_epi16(0x00ff); const auto ah = _mm_srli_epi16(a, 8); const auto al = _mm_and_si128(a, mask); @@ -602,18 +612,19 @@ void ppu_interpreter::VMSUMUBM(PPUThread& CPU, ppu_opcode_t op) const auto bl = _mm_and_si128(b, mask); const auto sh = _mm_madd_epi16(ah, bh); const auto sl = _mm_madd_epi16(al, bl); - CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(CPU.VPR[op.vc].vi, sh), sl); + CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(c, sh), sl); } void ppu_interpreter::VMSUMUHM(PPUThread& CPU, ppu_opcode_t op) { const auto a = CPU.VPR[op.va].vi; const auto b = CPU.VPR[op.vb].vi; + const auto c = CPU.VPR[op.vc].vi; const auto ml = _mm_mullo_epi16(a, b); // low results const auto mh = _mm_mulhi_epu16(a, b); // high results const auto ls = _mm_add_epi32(_mm_srli_epi32(ml, 16), _mm_and_si128(ml, _mm_set1_epi32(0x0000ffff))); const auto hs = _mm_add_epi32(_mm_slli_epi32(mh, 16), _mm_and_si128(mh, _mm_set1_epi32(0xffff0000))); - CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(CPU.VPR[op.vc].vi, ls), hs); + CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(c, ls), hs); } void ppu_interpreter::VMSUMUHS(PPUThread& CPU, ppu_opcode_t op) @@ -648,7 +659,7 @@ void ppu_interpreter::VMULESB(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VMULESH(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd].vi = _mm_madd_epi16(_mm_srli_epi16(CPU.VPR[op.va].vi, 16), _mm_srli_epi16(CPU.VPR[op.vb].vi, 16)); + CPU.VPR[op.vd].vi = _mm_madd_epi16(_mm_srli_epi32(CPU.VPR[op.va].vi, 16), _mm_srli_epi32(CPU.VPR[op.vb].vi, 16)); } void ppu_interpreter::VMULEUB(PPUThread& CPU, ppu_opcode_t op) @@ -708,16 +719,11 @@ void ppu_interpreter::VOR(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VPERM(PPUThread& CPU, ppu_opcode_t op) { - u8 tmpSRC[32]; - memcpy(tmpSRC, CPU.VPR[op.vb]._u8, 16); - memcpy(tmpSRC + 16, CPU.VPR[op.va]._u8, 16); - - for (uint b = 0; b < 16; b++) - { - u8 index = CPU.VPR[op.vc]._u8[b] & 0x1f; - - CPU.VPR[op.vd]._u8[b] = tmpSRC[0x1f - index]; - } + const auto index = _mm_andnot_si128(CPU.VPR[op.vc].vi, _mm_set1_epi8(0x1f)); + const auto mask = _mm_cmpgt_epi8(index, _mm_set1_epi8(0xf)); + const auto sa = _mm_shuffle_epi8(CPU.VPR[op.va].vi, index); + const auto sb = _mm_shuffle_epi8(CPU.VPR[op.vb].vi, index); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(mask, sa), _mm_andnot_si128(mask, sb)); } void ppu_interpreter::VPKPX(PPUThread& CPU, ppu_opcode_t op) @@ -742,108 +748,23 @@ void ppu_interpreter::VPKPX(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::VPKSHSS(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; - for (uint b = 0; b < 8; b++) - { - s16 result = VA._s16[b]; - - if (result > INT8_MAX) - { - result = INT8_MAX; - } - else if (result < INT8_MIN) - { - result = INT8_MIN; - } - - CPU.VPR[op.vd]._s8[b + 8] = (s8)result; - - result = VB._s16[b]; - - if (result > INT8_MAX) - { - result = INT8_MAX; - } - else if (result < INT8_MIN) - { - result = INT8_MIN; - } - - CPU.VPR[op.vd]._s8[b] = (s8)result; - } + CPU.VPR[op.vd].vi = _mm_packs_epi16(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); } void ppu_interpreter::VPKSHUS(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; - for (uint b = 0; b < 8; b++) - { - s16 result = VA._s16[b]; - - if (result > UINT8_MAX) - { - result = UINT8_MAX; - } - else if (result < 0) - { - result = 0; - } - - CPU.VPR[op.vd]._u8[b + 8] = (u8)result; - - result = VB._s16[b]; - - if (result > UINT8_MAX) - { - result = UINT8_MAX; - } - else if (result < 0) - { - result = 0; - } - - CPU.VPR[op.vd]._u8[b] = (u8)result; - } + CPU.VPR[op.vd].vi = _mm_packus_epi16(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); } void ppu_interpreter::VPKSWSS(PPUThread& CPU, ppu_opcode_t op) { - u128 VA = CPU.VPR[op.va]; - u128 VB = CPU.VPR[op.vb]; - for (uint h = 0; h < 4; h++) - { - s32 result = VA._s32[h]; - - if (result > INT16_MAX) - { - result = INT16_MAX; - } - else if (result < INT16_MIN) - { - result = INT16_MIN; - } - - CPU.VPR[op.vd]._s16[h + 4] = result; - - result = VB._s32[h]; - - if (result > INT16_MAX) - { - result = INT16_MAX; - } - else if (result < INT16_MIN) - { - result = INT16_MIN; - } - - CPU.VPR[op.vd]._s16[h] = result; - } + CPU.VPR[op.vd].vi = _mm_packs_epi32(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); } void ppu_interpreter::VPKSWUS(PPUThread& CPU, ppu_opcode_t op) { + //CPU.VPR[op.vd].vi = _mm_packus_epi32(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); + u128 VA = CPU.VPR[op.va]; u128 VB = CPU.VPR[op.vb]; for (uint h = 0; h < 4; h++) @@ -2047,7 +1968,8 @@ void ppu_interpreter::LBZX(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::LVX(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = vm::read128((u64)((op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfULL)); + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfull; + CPU.VPR[op.vd] = vm::read128(vm::cast(addr)); } void ppu_interpreter::NEG(PPUThread& CPU, ppu_opcode_t op) @@ -2230,7 +2152,8 @@ void ppu_interpreter::STBX(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::STVX(PPUThread& CPU, ppu_opcode_t op) { - vm::write128((u64)((op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfULL), CPU.VPR[op.vs]); + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfull; + vm::write128(vm::cast(addr), CPU.VPR[op.vs]); } void ppu_interpreter::MULLD(PPUThread& CPU, ppu_opcode_t op) @@ -2372,7 +2295,8 @@ void ppu_interpreter::LHAX(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::LVXL(PPUThread& CPU, ppu_opcode_t op) { - CPU.VPR[op.vd] = vm::read128((u64)((op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfULL)); + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfull; + CPU.VPR[op.vd] = vm::read128(vm::cast(addr)); } void ppu_interpreter::MFTB(PPUThread& CPU, ppu_opcode_t op) @@ -2511,7 +2435,8 @@ void ppu_interpreter::NAND(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::STVXL(PPUThread& CPU, ppu_opcode_t op) { - vm::write128((u64)((op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfULL), CPU.VPR[op.vs]); + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfull; + vm::write128(vm::cast(addr), CPU.VPR[op.vs]); } void ppu_interpreter::DIVD(PPUThread& CPU, ppu_opcode_t op) @@ -3186,7 +3111,7 @@ void ppu_interpreter::STDU(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::MTFSB1(PPUThread& CPU, ppu_opcode_t op) { - u64 mask = (1ULL << (31 - op.crbd)); + u32 mask = 1 << (31 - op.crbd); if ((op.crbd >= 3 && op.crbd <= 6) && !(CPU.FPSCR.FPSCR & mask)) mask |= 1ULL << 31; //FPSCR.FX if ((op.crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode enabled"); CPU.SetFPSCR(CPU.FPSCR.FPSCR | mask); @@ -3203,7 +3128,7 @@ void ppu_interpreter::MCRFS(PPUThread& CPU, ppu_opcode_t op) void ppu_interpreter::MTFSB0(PPUThread& CPU, ppu_opcode_t op) { - u64 mask = (1ULL << (31 - op.crbd)); + u32 mask = 1 << (31 - op.crbd); if ((op.crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode disabled"); CPU.SetFPSCR(CPU.FPSCR.FPSCR & ~mask); diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 9358019869..90f8acd973 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -57,6 +57,11 @@ static double SilenceNaN(double x) return (double&)bits; } +static float SilenceNaN(float x) +{ + return static_cast(SilenceNaN(static_cast(x))); +} + static void SetHostRoundingMode(u32 rn) { switch (rn) @@ -2644,7 +2649,8 @@ private: } void LVX(u32 vd, u32 ra, u32 rb) { - CPU.VPR[vd] = vm::read128((u64)((ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfULL)); + const u64 addr = (ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfull; + CPU.VPR[vd] = vm::read128(vm::cast(addr)); } void NEG(u32 rd, u32 ra, u32 oe, bool rc) { @@ -2810,7 +2816,8 @@ private: } void STVX(u32 vs, u32 ra, u32 rb) { - vm::write128((u64)((ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfULL), CPU.VPR[vs]); + const u64 addr = (ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfull; + vm::write128(vm::cast(addr), CPU.VPR[vs]); } void MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2911,7 +2918,8 @@ private: } void LVXL(u32 vd, u32 ra, u32 rb) { - CPU.VPR[vd] = vm::read128((u64)((ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfULL)); + const u64 addr = (ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfull; + CPU.VPR[vd] = vm::read128(vm::cast(addr)); } void MFTB(u32 rd, u32 spr) { @@ -3016,7 +3024,8 @@ private: } void STVXL(u32 vs, u32 ra, u32 rb) { - vm::write128((u64)((ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfULL), CPU.VPR[vs]); + const u64 addr = (ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfull; + vm::write128(vm::cast(addr), CPU.VPR[vs]); } void DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -3238,7 +3247,7 @@ private: { u64 bits = (u64&)val; u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); - vm::get_ref>(vm::cast(addr)) = (float)bits32; + vm::get_ref>(vm::cast(addr)) = bits32; } } void STVRX(u32 vs, u32 ra, u32 rb) @@ -3260,7 +3269,7 @@ private: { u64 bits = (u64&)val; u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); - vm::get_ref>(vm::cast(addr)) = (float)bits32; + vm::get_ref>(vm::cast(addr)) = bits32; } CPU.GPR[ra] = addr; } @@ -3579,7 +3588,7 @@ private: { u64 bits = (u64&)val; u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); - vm::get_ref>(vm::cast(addr)) = (float)bits32; + vm::get_ref>(vm::cast(addr)) = bits32; } } void STFSU(u32 frs, u32 ra, s32 d) @@ -3594,7 +3603,7 @@ private: { u64 bits = (u64&)val; u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); - vm::get_ref>(vm::cast(addr)) = (float)bits32; + vm::get_ref>(vm::cast(addr)) = bits32; } CPU.GPR[ra] = addr; } @@ -3687,8 +3696,8 @@ private: } void MTFSB1(u32 crbd, bool rc) { - u64 mask = (1ULL << (31 - crbd)); - if ((crbd >= 3 && crbd <= 6) && !(CPU.FPSCR.FPSCR & mask)) mask |= 1ULL << 31; //FPSCR.FX + u32 mask = 1 << (31 - crbd); + if ((crbd >= 3 && crbd <= 6) && !(CPU.FPSCR.FPSCR & mask)) mask |= 1 << 31; //FPSCR.FX if ((crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode enabled"); CPU.SetFPSCR(CPU.FPSCR.FPSCR | mask); @@ -3702,7 +3711,7 @@ private: } void MTFSB0(u32 crbd, bool rc) { - u64 mask = (1ULL << (31 - crbd)); + u32 mask = 1 << (31 - crbd); if ((crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode disabled"); CPU.SetFPSCR(CPU.FPSCR.FPSCR & ~mask); From 26b5eebc17cdffd37708642e43a857ef452ecc28 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 4 Apr 2015 19:00:02 +0300 Subject: [PATCH 22/23] Bugfixes --- rpcs3/Emu/ARMv7/Modules/sceLibc.cpp | 36 +++++++++++++------- rpcs3/Emu/Cell/PPUInterpreter.cpp | 2 +- rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp | 8 ++--- rpcs3/Emu/SysCalls/lv2/sys_process.cpp | 26 ++++++++++---- 4 files changed, 47 insertions(+), 25 deletions(-) diff --git a/rpcs3/Emu/ARMv7/Modules/sceLibc.cpp b/rpcs3/Emu/ARMv7/Modules/sceLibc.cpp index c724fe56a7..98fab490a0 100644 --- a/rpcs3/Emu/ARMv7/Modules/sceLibc.cpp +++ b/rpcs3/Emu/ARMv7/Modules/sceLibc.cpp @@ -13,6 +13,8 @@ typedef void(atexit_func_t)(vm::psv::ptr); std::vector> g_atexit; +std::mutex g_atexit_mutex; + std::string armv7_fmt(ARMv7Context& context, vm::psv::ptr fmt, u32 g_count, u32 f_count, u32 v_count) { std::string result; @@ -152,7 +154,7 @@ namespace sce_libc_func { sceLibc.Warning("__cxa_atexit(func=*0x%x, arg=*0x%x, dso=*0x%x)", func, arg, dso); - LV2_LOCK; + std::lock_guard lock(g_atexit_mutex); g_atexit.insert(g_atexit.begin(), [func, arg, dso](ARMv7Context& context) { @@ -164,7 +166,7 @@ namespace sce_libc_func { sceLibc.Warning("__aeabi_atexit(arg=*0x%x, func=*0x%x, dso=*0x%x)", arg, func, dso); - LV2_LOCK; + std::lock_guard lock(g_atexit_mutex); g_atexit.insert(g_atexit.begin(), [func, arg, dso](ARMv7Context& context) { @@ -176,19 +178,27 @@ namespace sce_libc_func { sceLibc.Warning("exit()"); - for (auto func : g_atexit) + std::lock_guard lock(g_atexit_mutex); + + if (!Emu.IsStopped()) { - func(context); + for (auto func : decltype(g_atexit)(std::move(g_atexit))) + { + func(context); + } + + sceLibc.Success("Process finished"); + + CallAfter([]() + { + Emu.Stop(); + }); + + while (!Emu.IsStopped()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } } - - g_atexit.clear(); - - sceLibc.Success("Process finished"); - - CallAfter([]() - { - Emu.Stop(); - }); } void printf(ARMv7Context& context, vm::psv::ptr fmt) // va_args... diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 9257b33760..e012641c37 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -225,7 +225,7 @@ void ppu_interpreter::VCMPBFP(PPUThread& CPU, ppu_opcode_t op) const auto b = CPU.VPR[op.vb].vf; const auto sign = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); const auto bneg = _mm_xor_ps(b, sign); - CPU.VPR[op.vd].vf = _mm_or_ps(_mm_and_ps(_mm_cmple_ps(a, b), sign), _mm_and_ps(_mm_cmpge_ps(a, bneg), _mm_castsi128_ps(_mm_set1_epi32(0x40000000)))); + CPU.VPR[op.vd].vf = _mm_or_ps(_mm_and_ps(_mm_cmpnle_ps(a, b), sign), _mm_and_ps(_mm_cmpnge_ps(a, bneg), _mm_castsi128_ps(_mm_set1_epi32(0x40000000)))); } void ppu_interpreter::VCMPBFP_(PPUThread& CPU, ppu_opcode_t op) diff --git a/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp b/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp index 34844ec5e5..f5d3426e95 100644 --- a/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp +++ b/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp @@ -230,7 +230,7 @@ s32 sys_lwmutex_lock(PPUThread& CPU, vm::ptr lwmutex, u64 timeout // locking succeeded auto old = lwmutex->owner.exchange(tid); - if (old.data() != se32(lwmutex_reserved)) + if (old.data() != se32(lwmutex_reserved) && !Emu.IsStopped()) { sysPrxForUser.Fatal("sys_lwmutex_lock(lwmutex=*0x%x): locking failed (owner=0x%x)", lwmutex, old); } @@ -301,7 +301,7 @@ s32 sys_lwmutex_trylock(PPUThread& CPU, vm::ptr lwmutex) // locking succeeded auto old = lwmutex->owner.exchange(tid); - if (old.data() != se32(lwmutex_reserved)) + if (old.data() != se32(lwmutex_reserved) && !Emu.IsStopped()) { sysPrxForUser.Fatal("sys_lwmutex_trylock(lwmutex=*0x%x): locking failed (owner=0x%x)", lwmutex, old); } @@ -592,7 +592,7 @@ s32 sys_lwcond_wait(PPUThread& CPU, vm::ptr lwcond, u64 timeout) const auto old = lwmutex->owner.exchange(tid); lwmutex->recursive_count = recursive_value; - if (old.data() != se32(lwmutex_reserved)) + if (old.data() != se32(lwmutex_reserved) && !Emu.IsStopped()) { sysPrxForUser.Fatal("sys_lwcond_wait(lwcond=*0x%x): locking failed (lwmutex->owner=0x%x)", lwcond, old); } @@ -621,7 +621,7 @@ s32 sys_lwcond_wait(PPUThread& CPU, vm::ptr lwcond, u64 timeout) const auto old = lwmutex->owner.exchange(tid); lwmutex->recursive_count = recursive_value; - if (old.data() != se32(lwmutex_reserved)) + if (old.data() != se32(lwmutex_reserved) && !Emu.IsStopped()) { sysPrxForUser.Fatal("sys_lwcond_wait(lwcond=*0x%x): locking failed after timeout (lwmutex->owner=0x%x)", lwcond, old); } diff --git a/rpcs3/Emu/SysCalls/lv2/sys_process.cpp b/rpcs3/Emu/SysCalls/lv2/sys_process.cpp index 45fb9d0a7b..ccafdcab30 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_process.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_process.cpp @@ -30,15 +30,27 @@ s32 sys_process_getppid() return 0; } -s32 sys_process_exit(s32 errorcode) +s32 sys_process_exit(s32 status) { - sys_process.Warning("sys_process_exit(%d)", errorcode); - Emu.Pause(); - sys_process.Success("Process finished"); - CallAfter([]() + sys_process.Warning("sys_process_exit(status=0x%x)", status); + + LV2_LOCK; + + if (!Emu.IsStopped()) { - Emu.Stop(); - }); + sys_process.Success("Process finished"); + + CallAfter([]() + { + Emu.Stop(); + }); + + while (!Emu.IsStopped()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } + return CELL_OK; } From 9479bcf77b8d599a152948de45fb5ce0390c2790 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 4 Apr 2015 19:16:53 +0300 Subject: [PATCH 23/23] Small fix --- rpcs3/Ini.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Ini.h b/rpcs3/Ini.h index d36f80c028..08b0ea3b86 100644 --- a/rpcs3/Ini.h +++ b/rpcs3/Ini.h @@ -248,7 +248,7 @@ public: { // Core CPUDecoderMode.Load(0); - SPUDecoderMode.Load(1); + SPUDecoderMode.Load(0); // Graphics GSRenderMode.Load(1);