diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 8cf5e9a89c..96ef78f59d 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -88,6 +88,7 @@ union _CRT_ALIGN(16) u128 double _d[2]; __m128 vf; __m128i vi; + __m128d vd; class bit_array_128 { @@ -202,14 +203,21 @@ union _CRT_ALIGN(16) u128 static u128 from32p(u32 value) { u128 ret; - ret.vi = _mm_set1_epi32((int)value); + ret.vi = _mm_set1_epi32(static_cast(value)); + return ret; + } + + static u128 from16p(u16 value) + { + u128 ret; + ret.vi = _mm_set1_epi16(static_cast(value)); return ret; } static u128 from8p(u8 value) { u128 ret; - ret.vi = _mm_set1_epi8((char)value); + ret.vi = _mm_set1_epi8(static_cast(value)); return ret; } @@ -227,16 +235,75 @@ union _CRT_ALIGN(16) u128 return ret; } + static u128 fromF(__m128 value) + { + u128 ret; + ret.vf = value; + return ret; + } + + static u128 fromD(__m128d value) + { + u128 ret; + ret.vd = value; + return ret; + } + static __forceinline u128 add8(const u128& left, const u128& right) { return fromV(_mm_add_epi8(left.vi, right.vi)); } + static __forceinline u128 add16(const u128& left, const u128& right) + { + return fromV(_mm_add_epi16(left.vi, right.vi)); + } + + static __forceinline u128 add32(const u128& left, const u128& right) + { + return fromV(_mm_add_epi32(left.vi, right.vi)); + } + + static __forceinline u128 addfs(const u128& left, const u128& right) + { + return fromF(_mm_add_ps(left.vf, right.vf)); + } + + static __forceinline u128 addfd(const u128& left, const u128& right) + { + return fromD(_mm_add_pd(left.vd, right.vd)); + } + static __forceinline u128 sub8(const u128& left, const u128& right) { return fromV(_mm_sub_epi8(left.vi, right.vi)); } + static __forceinline u128 sub16(const u128& left, const u128& right) + { + return fromV(_mm_sub_epi16(left.vi, right.vi)); + } + + static __forceinline u128 sub32(const u128& left, const u128& right) + { + return fromV(_mm_sub_epi32(left.vi, right.vi)); + } + + static __forceinline u128 subfs(const u128& left, const u128& right) + { + return fromF(_mm_sub_ps(left.vf, right.vf)); + } + + static __forceinline u128 subfd(const u128& left, const u128& right) + { + return fromD(_mm_sub_pd(left.vd, right.vd)); + } + + static __forceinline u128 maxu8(const u128& left, const u128& right) + { + return fromV(_mm_max_epu8(left.vi, right.vi)); + } + static __forceinline u128 minu8(const u128& left, const u128& right) { return fromV(_mm_min_epu8(left.vi, right.vi)); @@ -247,14 +314,14 @@ union _CRT_ALIGN(16) u128 return fromV(_mm_cmpeq_epi8(left.vi, right.vi)); } - static __forceinline u128 gtu8(const u128& left, const u128& right) + static __forceinline u128 eq16(const u128& left, const u128& right) { - return fromV(_mm_cmpgt_epu8(left.vi, right.vi)); + return fromV(_mm_cmpeq_epi16(left.vi, right.vi)); } - static __forceinline u128 leu8(const u128& left, const u128& right) + static __forceinline u128 eq32(const u128& left, const u128& right) { - return fromV(_mm_cmple_epu8(left.vi, right.vi)); + return fromV(_mm_cmpeq_epi32(left.vi, right.vi)); } bool operator == (const u128& right) const @@ -287,6 +354,16 @@ union _CRT_ALIGN(16) u128 return from64(~_u64[0], ~_u64[1]); } + __forceinline bool is_any_1() const // check if any bit is 1 + { + return _u64[0] || _u64[1]; + } + + __forceinline bool is_any_0() const // check if any bit is 0 + { + return ~_u64[0] || ~_u64[1]; + } + // result = (~left) & (right) static __forceinline u128 andnot(const u128& left, const u128& right) { diff --git a/Utilities/GNU.h b/Utilities/GNU.h index a8db7f8703..9d876d8ce8 100644 --- a/Utilities/GNU.h +++ b/Utilities/GNU.h @@ -342,15 +342,49 @@ static __forceinline uint64_t cntlz64(uint64_t arg) } // compare 16 packed unsigned bytes (greater than) -static __forceinline __m128i _mm_cmpgt_epu8(__m128i A, __m128i B) +inline __m128i sse_cmpgt_epu8(__m128i A, __m128i B) { // (A xor 0x80) > (B xor 0x80) - return _mm_cmpgt_epi8(_mm_xor_si128(A, _mm_set1_epi8(-128)), _mm_xor_si128(B, _mm_set1_epi8(-128))); + const auto sign = _mm_set1_epi32(0x80808080); + return _mm_cmpgt_epi8(_mm_xor_si128(A, sign), _mm_xor_si128(B, sign)); } -// compare 16 packed unsigned bytes (less or equal) -static __forceinline __m128i _mm_cmple_epu8(__m128i A, __m128i B) +inline __m128i sse_cmpgt_epu16(__m128i A, __m128i B) { - // ((B xor 0x80) > (A xor 0x80)) || A == B - return _mm_or_si128(_mm_cmpgt_epu8(B, A), _mm_cmpeq_epi8(A, B)); + const auto sign = _mm_set1_epi32(0x80008000); + return _mm_cmpgt_epi16(_mm_xor_si128(A, sign), _mm_xor_si128(B, sign)); +} + +inline __m128i sse_cmpgt_epu32(__m128i A, __m128i B) +{ + const auto sign = _mm_set1_epi32(0x80000000); + return _mm_cmpgt_epi32(_mm_xor_si128(A, sign), _mm_xor_si128(B, sign)); +} + +inline __m128 sse_exp2_ps(__m128 A) +{ + const auto x0 = _mm_max_ps(_mm_min_ps(A, _mm_set1_ps(127.4999961f)), _mm_set1_ps(-127.4999961f)); + const auto x1 = _mm_add_ps(x0, _mm_set1_ps(0.5f)); + const auto x2 = _mm_sub_epi32(_mm_cvtps_epi32(x1), _mm_and_si128(_mm_castps_si128(_mm_cmpnlt_ps(_mm_setzero_ps(), x1)), _mm_set1_epi32(1))); + const auto x3 = _mm_sub_ps(x0, _mm_cvtepi32_ps(x2)); + const auto x4 = _mm_mul_ps(x3, x3); + const auto x5 = _mm_mul_ps(x3, _mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(x4, _mm_set1_ps(0.023093347705f)), _mm_set1_ps(20.20206567f)), x4), _mm_set1_ps(1513.906801f))); + const auto x6 = _mm_mul_ps(x5, _mm_rcp_ps(_mm_sub_ps(_mm_add_ps(_mm_mul_ps(_mm_set1_ps(233.1842117f), x4), _mm_set1_ps(4368.211667f)), x5))); + return _mm_mul_ps(_mm_add_ps(_mm_add_ps(x6, x6), _mm_set1_ps(1.0f)), _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(x2, _mm_set1_epi32(127)), 23))); +} + +inline __m128 sse_log2_ps(__m128 A) +{ + const auto _1 = _mm_set1_ps(1.0f); + const auto _c = _mm_set1_ps(1.442695040f); + const auto x0 = _mm_max_ps(A, _mm_castsi128_ps(_mm_set1_epi32(0x00800000))); + const auto x1 = _mm_or_ps(_mm_and_ps(x0, _mm_castsi128_ps(_mm_set1_epi32(0x807fffff))), _1); + const auto x2 = _mm_rcp_ps(_mm_add_ps(x1, _1)); + const auto x3 = _mm_mul_ps(_mm_sub_ps(x1, _1), x2); + const auto x4 = _mm_add_ps(x3, x3); + const auto x5 = _mm_mul_ps(x4, x4); + const auto x6 = _mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_set1_ps(-0.7895802789f), x5), _mm_set1_ps(16.38666457f)), x5), _mm_set1_ps(-64.1409953f)); + const auto x7 = _mm_rcp_ps(_mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_set1_ps(-35.67227983f), x5), _mm_set1_ps(312.0937664f)), x5), _mm_set1_ps(-769.6919436f))); + const auto x8 = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_castps_si128(x0), 23), _mm_set1_epi32(127))); + return _mm_add_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(x5, x6), x7), x4), _c), _mm_add_ps(_mm_mul_ps(x4, _c), x8)); } diff --git a/Utilities/rFile.cpp b/Utilities/rFile.cpp index b25310ab4d..ff41566927 100644 --- a/Utilities/rFile.cpp +++ b/Utilities/rFile.cpp @@ -128,7 +128,7 @@ bool rRename(const std::string &from, const std::string &to) #ifdef _WIN32 if (!MoveFile(ConvertUTF8ToWString(from).c_str(), ConvertUTF8ToWString(to).c_str())) #else - if (rename(from.c_str(), to.c_str())) + if (int err = rename(from.c_str(), to.c_str())) #endif { LOG_ERROR(GENERAL, "Error renaming '%s' to '%s': 0x%llx", from.c_str(), to.c_str(), (u64)GET_API_ERROR); diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index c20b911b90..11d668aebd 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -34,7 +34,7 @@ if (NOT MSVC) set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_MINSIZEREL} -Os -D_NDEBUG") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O1 -D_NDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O1 -g -D_NDEBUG") - add_definitions(-msse2 -mcx16) + add_definitions(-msse -msse2 -mcx16 -mssse3) endif() if (APPLE) diff --git a/rpcs3/Emu/ARMv7/ARMv7Thread.cpp b/rpcs3/Emu/ARMv7/ARMv7Thread.cpp index e35e9c8f8d..564dd59e02 100644 --- a/rpcs3/Emu/ARMv7/ARMv7Thread.cpp +++ b/rpcs3/Emu/ARMv7/ARMv7Thread.cpp @@ -174,16 +174,17 @@ void ARMv7Thread::DoReset() void ARMv7Thread::DoRun() { + m_dec = nullptr; + switch(Ini.CPUDecoderMode.GetValue()) { case 0: - //m_dec = new ARMv7Decoder(*new ARMv7DisAsm()); - break; - case 1: - case 2: m_dec = new ARMv7Decoder(context); - break; + break; + default: + LOG_ERROR(PPU, "Invalid CPU decoder mode: %d", Ini.CPUDecoderMode.GetValue()); + Emu.Pause(); } } @@ -228,6 +229,7 @@ void ARMv7Thread::FastCall(u32 addr) void ARMv7Thread::FastStop() { m_status = Stopped; + m_events |= CPU_EVENT_STOP; } armv7_thread::armv7_thread(u32 entry, const std::string& name, u32 stack_size, s32 prio) diff --git a/rpcs3/Emu/ARMv7/Modules/sceLibc.cpp b/rpcs3/Emu/ARMv7/Modules/sceLibc.cpp index c724fe56a7..98fab490a0 100644 --- a/rpcs3/Emu/ARMv7/Modules/sceLibc.cpp +++ b/rpcs3/Emu/ARMv7/Modules/sceLibc.cpp @@ -13,6 +13,8 @@ typedef void(atexit_func_t)(vm::psv::ptr); std::vector> g_atexit; +std::mutex g_atexit_mutex; + std::string armv7_fmt(ARMv7Context& context, vm::psv::ptr fmt, u32 g_count, u32 f_count, u32 v_count) { std::string result; @@ -152,7 +154,7 @@ namespace sce_libc_func { sceLibc.Warning("__cxa_atexit(func=*0x%x, arg=*0x%x, dso=*0x%x)", func, arg, dso); - LV2_LOCK; + std::lock_guard lock(g_atexit_mutex); g_atexit.insert(g_atexit.begin(), [func, arg, dso](ARMv7Context& context) { @@ -164,7 +166,7 @@ namespace sce_libc_func { sceLibc.Warning("__aeabi_atexit(arg=*0x%x, func=*0x%x, dso=*0x%x)", arg, func, dso); - LV2_LOCK; + std::lock_guard lock(g_atexit_mutex); g_atexit.insert(g_atexit.begin(), [func, arg, dso](ARMv7Context& context) { @@ -176,19 +178,27 @@ namespace sce_libc_func { sceLibc.Warning("exit()"); - for (auto func : g_atexit) + std::lock_guard lock(g_atexit_mutex); + + if (!Emu.IsStopped()) { - func(context); + for (auto func : decltype(g_atexit)(std::move(g_atexit))) + { + func(context); + } + + sceLibc.Success("Process finished"); + + CallAfter([]() + { + Emu.Stop(); + }); + + while (!Emu.IsStopped()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } } - - g_atexit.clear(); - - sceLibc.Success("Process finished"); - - CallAfter([]() - { - Emu.Stop(); - }); } void printf(ARMv7Context& context, vm::psv::ptr fmt) // va_args... diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 9710b85c5d..a490d187d4 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -17,6 +17,7 @@ CPUThread* GetCurrentCPUThread() CPUThread::CPUThread(CPUThreadType type) : ThreadBase("CPUThread") + , m_events(0) , m_type(type) , m_stack_size(0) , m_stack_addr(0) @@ -242,6 +243,7 @@ void CPUThread::Stop() SendDbgCommand(DID_STOP_THREAD, this); m_status = Stopped; + m_events |= CPU_EVENT_STOP; if(static_cast(this) != GetCurrentNamedThread()) { diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 13e8e7ebbf..e81d79216e 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -20,11 +20,19 @@ enum CPUThreadStatus CPUThread_Step, }; +// CPU Thread Events +enum : u64 +{ + CPU_EVENT_STOP = (1ull << 0), +}; + class CPUDecoder; class CPUThread : public ThreadBase { protected: + std::atomic m_events; // flags + u32 m_status; u32 m_id; u64 m_prio; @@ -45,6 +53,8 @@ protected: virtual void DumpInformation() override; public: + void AddEvent(const u64 event) { m_events |= event; } + virtual void InitRegs() = 0; virtual void InitStack() = 0; diff --git a/rpcs3/Emu/Cell/PPUInstrTable.h b/rpcs3/Emu/Cell/PPUInstrTable.h index 4dbc1eb4cc..fbad298fbb 100644 --- a/rpcs3/Emu/Cell/PPUInstrTable.h +++ b/rpcs3/Emu/Cell/PPUInstrTable.h @@ -115,8 +115,6 @@ namespace PPU_instr */ static CodeField<30> AA; - static CodeFieldSignedOffset<6, 29, 2> LI(FIELD_BRANCH); - // static CodeFieldSignedOffset<6, 29, 2> LL(FIELD_BRANCH); /* @@ -245,7 +243,7 @@ namespace PPU_instr bind_instr(main_list, BC, BO, BI, BD, AA, LK); bind_instr(main_list, HACK, uimm26); bind_instr(main_list, SC, LEV); - bind_instr(main_list, B, LI, AA, LK); + bind_instr(main_list, B, LL, AA, LK); bind_instr(main_list, RLWIMI, RA, RS, SH, MB, ME, RC); bind_instr(main_list, RLWINM, RA, RS, SH, MB, ME, RC); bind_instr(main_list, RLWNM, RA, RS, RB, MB, ME, RC); diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp new file mode 100644 index 0000000000..e012641c37 --- /dev/null +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -0,0 +1,3328 @@ +#include "stdafx.h" +#include "Utilities/Log.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/Cell/PPUThread.h" +#include "Emu/SysCalls/SysCalls.h" +#include "Emu/SysCalls/Modules.h" +#include "Emu/Cell/PPUDecoder.h" +#include "PPUInstrTable.h" +#include "PPUInterpreter.h" +#include "PPUInterpreter2.h" +#include "Emu/CPU/CPUThreadManager.h" + +class ppu_scale_table_t +{ + std::array<__m128, 32 + 31> m_data; + +public: + ppu_scale_table_t() + { + for (s32 i = -31; i < 32; i++) + { + m_data[i + 31] = _mm_set1_ps(static_cast(exp2(i))); + } + } + + __forceinline __m128 operator [] (s32 scale) const + { + return m_data[scale + 31]; + } +} +const g_ppu_scale_table; + + +void ppu_interpreter::NULL_OP(PPUThread& CPU, ppu_opcode_t op) +{ + PPUInterpreter inter(CPU); (*PPU_instr::main_list)(&inter, op.opcode); +} + +void ppu_interpreter::NOP(PPUThread& CPU, ppu_opcode_t op) +{ +} + + +void ppu_interpreter::TDI(PPUThread& CPU, ppu_opcode_t op) +{ + const s64 a = CPU.GPR[op.ra], b = op.simm16; + const u64 a_ = a, b_ = b; // unsigned + + if (((op.bo & 0x10) && a < b) || + ((op.bo & 0x8) && a > b) || + ((op.bo & 0x4) && a == b) || + ((op.bo & 0x2) && a_ < b_) || + ((op.bo & 0x1) && a_ > b_)) + { + throw __FUNCTION__; + } +} + +void ppu_interpreter::TWI(PPUThread& CPU, ppu_opcode_t op) +{ + const s32 a = (s32)CPU.GPR[op.ra], b = op.simm16; + const u32 a_ = a, b_ = b; // unsigned + + if (((op.bo & 0x10) && a < b) || + ((op.bo & 0x8) && a > b) || + ((op.bo & 0x4) && a == b) || + ((op.bo & 0x2) && a_ < b_) || + ((op.bo & 0x1) && a_ > b_)) + { + throw __FUNCTION__; + } +} + + +void ppu_interpreter::MFVSCR(PPUThread& CPU, ppu_opcode_t op) +{ + throw __FUNCTION__; +} + +void ppu_interpreter::MTVSCR(PPUThread& CPU, ppu_opcode_t op) +{ + // ignored (MFVSCR disabled) +} + +void ppu_interpreter::VADDCUW(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + CPU.VPR[op.vd].vi = _mm_srli_epi32(_mm_cmpgt_epi32(_mm_xor_si128(b, _mm_set1_epi32(0x80000000)), _mm_xor_si128(a, _mm_set1_epi32(0x7fffffff))), 31); +} + +void ppu_interpreter::VADDFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = u128::addfs(CPU.VPR[op.va], CPU.VPR[op.vb]); +} + +void ppu_interpreter::VADDSBS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_adds_epi8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VADDSHS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_adds_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VADDSWS(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va]; + const auto b = CPU.VPR[op.vb]; + const auto s = u128::add32(a, b); // a + b + const auto m = (a ^ s) & (b ^ s); // overflow bit + const auto x = _mm_srai_epi32(m.vi, 31); // saturation mask + const auto y = _mm_srai_epi32(_mm_and_si128(s.vi, m.vi), 31); // positive saturation mask + CPU.VPR[op.vd].vi = _mm_xor_si128(_mm_xor_si128(_mm_srli_epi32(x, 1), y), _mm_or_si128(s.vi, x)); +} + +void ppu_interpreter::VADDUBM(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = u128::add8(CPU.VPR[op.va], CPU.VPR[op.vb]); +} + +void ppu_interpreter::VADDUBS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_adds_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VADDUHM(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = u128::add16(CPU.VPR[op.va], CPU.VPR[op.vb]); +} + +void ppu_interpreter::VADDUHS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_adds_epu16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VADDUWM(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = u128::add32(CPU.VPR[op.va], CPU.VPR[op.vb]); +} + +void ppu_interpreter::VADDUWS(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_add_epi32(a, b), _mm_cmpgt_epi32(_mm_xor_si128(b, _mm_set1_epi32(0x80000000)), _mm_xor_si128(a, _mm_set1_epi32(0x7fffffff)))); +} + +void ppu_interpreter::VAND(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = CPU.VPR[op.va] & CPU.VPR[op.vb]; +} + +void ppu_interpreter::VANDC(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = CPU.VPR[op.va] & ~CPU.VPR[op.vb]; +} + +void ppu_interpreter::VAVGSB(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va]; + const auto b = u128::add8(CPU.VPR[op.vb], u128::from8p(1)); // add 1 + const auto summ = u128::add8(a, b) & u128::from8p(0xfe); + const auto sign = u128::from8p(0x80); + const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq8(b, sign)) & sign; // calculate msb + CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi64(summ.vi, 1)); +} + +void ppu_interpreter::VAVGSH(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va]; + const auto b = u128::add16(CPU.VPR[op.vb], u128::from16p(1)); // add 1 + const auto summ = u128::add16(a, b); + const auto sign = u128::from16p(0x8000); + const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq16(b, sign)) & sign; // calculate msb + CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi16(summ.vi, 1)); +} + +void ppu_interpreter::VAVGSW(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va]; + const auto b = u128::add32(CPU.VPR[op.vb], u128::from32p(1)); // add 1 + const auto summ = u128::add32(a, b); + const auto sign = u128::from32p(0x80000000); + const auto overflow = (((a ^ summ) & (b ^ summ)) ^ summ ^ u128::eq32(b, sign)) & sign; // calculate msb + CPU.VPR[op.vd].vi = _mm_or_si128(overflow.vi, _mm_srli_epi32(summ.vi, 1)); +} + +void ppu_interpreter::VAVGUB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_avg_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VAVGUH(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_avg_epu16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VAVGUW(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va]; + const auto b = CPU.VPR[op.vb]; + const auto summ = u128::add32(u128::add32(a, b), u128::from32p(1)); + const auto carry = _mm_xor_si128(_mm_slli_epi32(sse_cmpgt_epu32(summ.vi, a.vi), 31), _mm_set1_epi32(0x80000000)); + CPU.VPR[op.vd].vi = _mm_or_si128(carry, _mm_srli_epi32(summ.vi, 1)); +} + +void ppu_interpreter::VCFSX(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = _mm_mul_ps(_mm_cvtepi32_ps(CPU.VPR[op.vb].vi), g_ppu_scale_table[0 - op.vuimm]); +} + +void ppu_interpreter::VCFUX(PPUThread& CPU, ppu_opcode_t op) +{ + const auto b = CPU.VPR[op.vb].vi; + const auto fix = _mm_and_ps(_mm_castsi128_ps(_mm_srai_epi32(b, 31)), _mm_set1_ps(0x80000000)); + CPU.VPR[op.vd].vf = _mm_mul_ps(_mm_add_ps(_mm_cvtepi32_ps(_mm_and_si128(b, _mm_set1_epi32(0x7fffffff))), fix), g_ppu_scale_table[0 - op.vuimm]); +} + +void ppu_interpreter::VCMPBFP(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vf; + const auto b = CPU.VPR[op.vb].vf; + const auto sign = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); + const auto bneg = _mm_xor_ps(b, sign); + CPU.VPR[op.vd].vf = _mm_or_ps(_mm_and_ps(_mm_cmpnle_ps(a, b), sign), _mm_and_ps(_mm_cmpnge_ps(a, bneg), _mm_castsi128_ps(_mm_set1_epi32(0x40000000)))); +} + +void ppu_interpreter::VCMPBFP_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPBFP(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? 0 : 2; // set 2 if all in bounds +} + +void ppu_interpreter::VCMPEQFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = _mm_cmpeq_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vb].vf); +} + +void ppu_interpreter::VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPEQFP(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal +} + +void ppu_interpreter::VCMPEQUB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = u128::eq8(CPU.VPR[op.va], CPU.VPR[op.vb]); +} + +void ppu_interpreter::VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPEQUB(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal +} + +void ppu_interpreter::VCMPEQUH(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = u128::eq16(CPU.VPR[op.va], CPU.VPR[op.vb]); +} + +void ppu_interpreter::VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPEQUH(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal +} + +void ppu_interpreter::VCMPEQUW(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = u128::eq32(CPU.VPR[op.va], CPU.VPR[op.vb]); +} + +void ppu_interpreter::VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPEQUW(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; // set 2 if none equal, 8 if all equal +} + +void ppu_interpreter::VCMPGEFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = _mm_cmpge_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vb].vf); +} + +void ppu_interpreter::VCMPGEFP_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPGEFP(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; +} + +void ppu_interpreter::VCMPGTFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = _mm_cmpgt_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vb].vf); +} + +void ppu_interpreter::VCMPGTFP_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPGTFP(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; +} + +void ppu_interpreter::VCMPGTSB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_cmpgt_epi8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VCMPGTSB_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPGTSB(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; +} + +void ppu_interpreter::VCMPGTSH(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_cmpgt_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VCMPGTSH_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPGTSH(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; +} + +void ppu_interpreter::VCMPGTSW(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_cmpgt_epi32(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VCMPGTSW_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPGTSW(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; +} + +void ppu_interpreter::VCMPGTUB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = sse_cmpgt_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VCMPGTUB_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPGTUB(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; +} + +void ppu_interpreter::VCMPGTUH(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = sse_cmpgt_epu16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VCMPGTUH_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPGTUH(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; +} + +void ppu_interpreter::VCMPGTUW(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = sse_cmpgt_epu32(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VCMPGTUW_(PPUThread& CPU, ppu_opcode_t op) +{ + VCMPGTUW(CPU, op); + + CPU.CR.cr6 = CPU.VPR[op.vd].is_any_1() ? (CPU.VPR[op.vd].is_any_0() ? 0 : 8) : 2; +} + +void ppu_interpreter::VCTSXS(PPUThread& CPU, ppu_opcode_t op) +{ + const auto scaled = _mm_mul_ps(CPU.VPR[op.vb].vf, g_ppu_scale_table[op.vuimm]); + CPU.VPR[op.vd].vi = _mm_xor_si128(_mm_cvttps_epi32(scaled), _mm_castps_si128(_mm_cmpge_ps(scaled, _mm_set1_ps(0x80000000)))); +} + +void ppu_interpreter::VCTUXS(PPUThread& CPU, ppu_opcode_t op) +{ + const auto scaled1 = _mm_max_ps(_mm_mul_ps(CPU.VPR[op.vb].vf, g_ppu_scale_table[op.vuimm]), _mm_set1_ps(0.0f)); + const auto scaled2 = _mm_and_ps(_mm_sub_ps(scaled1, _mm_set1_ps(0x80000000)), _mm_cmpge_ps(scaled1, _mm_set1_ps(0x80000000))); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_or_si128(_mm_cvttps_epi32(scaled1), _mm_cvttps_epi32(scaled2)), _mm_castps_si128(_mm_cmpge_ps(scaled1, _mm_set1_ps(0x100000000)))); +} + +void ppu_interpreter::VEXPTEFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = sse_exp2_ps(CPU.VPR[op.vb].vf); +} + +void ppu_interpreter::VLOGEFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = sse_log2_ps(CPU.VPR[op.vb].vf); +} + +void ppu_interpreter::VMADDFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = _mm_add_ps(_mm_mul_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vc].vf), CPU.VPR[op.vb].vf); +} + +void ppu_interpreter::VMAXFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = _mm_max_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vb].vf); +} + +void ppu_interpreter::VMAXSB(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = _mm_cmpgt_epi8(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); +} + +void ppu_interpreter::VMAXSH(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_max_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VMAXSW(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = _mm_cmpgt_epi32(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); +} + +void ppu_interpreter::VMAXUB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_max_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VMAXUH(PPUThread& CPU, ppu_opcode_t op) +{ + const auto mask = _mm_set1_epi32(0x80008000); + CPU.VPR[op.vd].vi = _mm_xor_si128(_mm_max_epi16(_mm_xor_si128(CPU.VPR[op.va].vi, mask), _mm_xor_si128(CPU.VPR[op.vb].vi, mask)), mask); +} + +void ppu_interpreter::VMAXUW(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = sse_cmpgt_epu32(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); +} + +void ppu_interpreter::VMHADDSHS(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto c = CPU.VPR[op.vc].vi; + const auto m = _mm_or_si128(_mm_srli_epi16(_mm_mullo_epi16(a, b), 15), _mm_slli_epi16(_mm_mulhi_epi16(a, b), 1)); + const auto s = _mm_cmpeq_epi16(m, _mm_set1_epi16(-0x8000)); // detect special case (positive 0x8000) + CPU.VPR[op.vd].vi = _mm_adds_epi16(_mm_adds_epi16(_mm_xor_si128(m, s), c), _mm_srli_epi16(s, 15)); +} + +void ppu_interpreter::VMHRADDSHS(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto c = CPU.VPR[op.vc].vi; + const auto m = _mm_mulhrs_epi16(a, b); + const auto s = _mm_cmpeq_epi16(m, _mm_set1_epi16(-0x8000)); // detect special case (positive 0x8000) + CPU.VPR[op.vd].vi = _mm_adds_epi16(_mm_adds_epi16(_mm_xor_si128(m, s), c), _mm_srli_epi16(s, 15)); +} + +void ppu_interpreter::VMINFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = _mm_min_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vb].vf); +} + +void ppu_interpreter::VMINSB(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = _mm_cmpgt_epi8(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(m, a), _mm_and_si128(m, b)); +} + +void ppu_interpreter::VMINSH(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_min_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VMINSW(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = _mm_cmpgt_epi32(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(m, a), _mm_and_si128(m, b)); +} + +void ppu_interpreter::VMINUB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_min_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VMINUH(PPUThread& CPU, ppu_opcode_t op) +{ + const auto mask = _mm_set1_epi32(0x80008000); + CPU.VPR[op.vd].vi = _mm_xor_si128(_mm_min_epi16(_mm_xor_si128(CPU.VPR[op.va].vi, mask), _mm_xor_si128(CPU.VPR[op.vb].vi, mask)), mask); +} + +void ppu_interpreter::VMINUW(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto m = sse_cmpgt_epu32(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_andnot_si128(m, a), _mm_and_si128(m, b)); +} + +void ppu_interpreter::VMLADDUHM(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_add_epi16(_mm_mullo_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi), CPU.VPR[op.vc].vi); +} + +void ppu_interpreter::VMRGHB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_unpackhi_epi8(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); +} + +void ppu_interpreter::VMRGHH(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_unpackhi_epi16(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); +} + +void ppu_interpreter::VMRGHW(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_unpackhi_epi32(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); +} + +void ppu_interpreter::VMRGLB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_unpacklo_epi8(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); +} + +void ppu_interpreter::VMRGLH(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_unpacklo_epi16(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); +} + +void ppu_interpreter::VMRGLW(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_unpacklo_epi32(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); +} + +void ppu_interpreter::VMSUMMBM(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; // signed bytes + const auto b = CPU.VPR[op.vb].vi; // unsigned bytes + const auto c = CPU.VPR[op.vc].vi; + const auto ah = _mm_srai_epi16(a, 8); + const auto bh = _mm_srli_epi16(b, 8); + const auto al = _mm_srai_epi16(_mm_slli_epi16(a, 8), 8); + const auto bl = _mm_and_si128(b, _mm_set1_epi16(0x00ff)); + const auto sh = _mm_madd_epi16(ah, bh); + const auto sl = _mm_madd_epi16(al, bl); + CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(c, sh), sl); +} + +void ppu_interpreter::VMSUMSHM(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_madd_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi), CPU.VPR[op.vc].vi); +} + +void ppu_interpreter::VMSUMSHS(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + s64 result = 0; + s32 saturated = 0; + + for (uint h = 0; h < 2; h++) + { + result += CPU.VPR[op.va]._s16[w * 2 + h] * CPU.VPR[op.vb]._s16[w * 2 + h]; + } + + result += CPU.VPR[op.vc]._s32[w]; + + if (result > 0x7fffffff) + { + saturated = 0x7fffffff; + } + else if (result < (s64)(s32)0x80000000) + { + saturated = 0x80000000; + } + else + saturated = (s32)result; + + CPU.VPR[op.vd]._s32[w] = saturated; + } +} + +void ppu_interpreter::VMSUMUBM(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto c = CPU.VPR[op.vc].vi; + const auto mask = _mm_set1_epi16(0x00ff); + const auto ah = _mm_srli_epi16(a, 8); + const auto al = _mm_and_si128(a, mask); + const auto bh = _mm_srli_epi16(b, 8); + const auto bl = _mm_and_si128(b, mask); + const auto sh = _mm_madd_epi16(ah, bh); + const auto sl = _mm_madd_epi16(al, bl); + CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(c, sh), sl); +} + +void ppu_interpreter::VMSUMUHM(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto c = CPU.VPR[op.vc].vi; + const auto ml = _mm_mullo_epi16(a, b); // low results + const auto mh = _mm_mulhi_epu16(a, b); // high results + const auto ls = _mm_add_epi32(_mm_srli_epi32(ml, 16), _mm_and_si128(ml, _mm_set1_epi32(0x0000ffff))); + const auto hs = _mm_add_epi32(_mm_slli_epi32(mh, 16), _mm_and_si128(mh, _mm_set1_epi32(0xffff0000))); + CPU.VPR[op.vd].vi = _mm_add_epi32(_mm_add_epi32(c, ls), hs); +} + +void ppu_interpreter::VMSUMUHS(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + u64 result = 0; + u32 saturated = 0; + + for (uint h = 0; h < 2; h++) + { + result += (u64)CPU.VPR[op.va]._u16[w * 2 + h] * (u64)CPU.VPR[op.vb]._u16[w * 2 + h]; + } + + result += CPU.VPR[op.vc]._u32[w]; + + if (result > 0xffffffffu) + { + saturated = 0xffffffff; + } + else + saturated = (u32)result; + + CPU.VPR[op.vd]._u32[w] = saturated; + } +} + +void ppu_interpreter::VMULESB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_mullo_epi16(_mm_srai_epi16(CPU.VPR[op.va].vi, 8), _mm_srai_epi16(CPU.VPR[op.vb].vi, 8)); +} + +void ppu_interpreter::VMULESH(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_madd_epi16(_mm_srli_epi32(CPU.VPR[op.va].vi, 16), _mm_srli_epi32(CPU.VPR[op.vb].vi, 16)); +} + +void ppu_interpreter::VMULEUB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_mullo_epi16(_mm_srli_epi16(CPU.VPR[op.va].vi, 8), _mm_srli_epi16(CPU.VPR[op.vb].vi, 8)); +} + +void ppu_interpreter::VMULEUH(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto ml = _mm_mullo_epi16(a, b); + const auto mh = _mm_mulhi_epu16(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_srli_epi32(ml, 16), _mm_and_si128(mh, _mm_set1_epi32(0xffff0000))); +} + +void ppu_interpreter::VMULOSB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_mullo_epi16(_mm_srai_epi16(_mm_slli_epi16(CPU.VPR[op.va].vi, 8), 8), _mm_srai_epi16(_mm_slli_epi16(CPU.VPR[op.vb].vi, 8), 8)); +} + +void ppu_interpreter::VMULOSH(PPUThread& CPU, ppu_opcode_t op) +{ + const auto mask = _mm_set1_epi32(0x0000ffff); + CPU.VPR[op.vd].vi = _mm_madd_epi16(_mm_and_si128(CPU.VPR[op.va].vi, mask), _mm_and_si128(CPU.VPR[op.vb].vi, mask)); +} + +void ppu_interpreter::VMULOUB(PPUThread& CPU, ppu_opcode_t op) +{ + const auto mask = _mm_set1_epi16(0x00ff); + CPU.VPR[op.vd].vi = _mm_mullo_epi16(_mm_and_si128(CPU.VPR[op.va].vi, mask), _mm_and_si128(CPU.VPR[op.vb].vi, mask)); +} + +void ppu_interpreter::VMULOUH(PPUThread& CPU, ppu_opcode_t op) +{ + const auto a = CPU.VPR[op.va].vi; + const auto b = CPU.VPR[op.vb].vi; + const auto ml = _mm_mullo_epi16(a, b); + const auto mh = _mm_mulhi_epu16(a, b); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_slli_epi32(mh, 16), _mm_and_si128(ml, _mm_set1_epi32(0x0000ffff))); +} + +void ppu_interpreter::VNMSUBFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = _mm_sub_ps(CPU.VPR[op.vb].vf, _mm_mul_ps(CPU.VPR[op.va].vf, CPU.VPR[op.vc].vf)); +} + +void ppu_interpreter::VNOR(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = ~(CPU.VPR[op.va] | CPU.VPR[op.vb]); +} + +void ppu_interpreter::VOR(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = CPU.VPR[op.va] | CPU.VPR[op.vb]; +} + +void ppu_interpreter::VPERM(PPUThread& CPU, ppu_opcode_t op) +{ + const auto index = _mm_andnot_si128(CPU.VPR[op.vc].vi, _mm_set1_epi8(0x1f)); + const auto mask = _mm_cmpgt_epi8(index, _mm_set1_epi8(0xf)); + const auto sa = _mm_shuffle_epi8(CPU.VPR[op.va].vi, index); + const auto sb = _mm_shuffle_epi8(CPU.VPR[op.vb].vi, index); + CPU.VPR[op.vd].vi = _mm_or_si128(_mm_and_si128(mask, sa), _mm_andnot_si128(mask, sb)); +} + +void ppu_interpreter::VPKPX(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 4; h++) + { + u16 bb7 = VB._u8[15 - (h * 4 + 0)] & 0x1; + u16 bb8 = VB._u8[15 - (h * 4 + 1)] >> 3; + u16 bb16 = VB._u8[15 - (h * 4 + 2)] >> 3; + u16 bb24 = VB._u8[15 - (h * 4 + 3)] >> 3; + u16 ab7 = VA._u8[15 - (h * 4 + 0)] & 0x1; + u16 ab8 = VA._u8[15 - (h * 4 + 1)] >> 3; + u16 ab16 = VA._u8[15 - (h * 4 + 2)] >> 3; + u16 ab24 = VA._u8[15 - (h * 4 + 3)] >> 3; + + CPU.VPR[op.vd]._u16[3 - h] = (bb7 << 15) | (bb8 << 10) | (bb16 << 5) | bb24; + CPU.VPR[op.vd]._u16[4 + (3 - h)] = (ab7 << 15) | (ab8 << 10) | (ab16 << 5) | ab24; + } +} + +void ppu_interpreter::VPKSHSS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_packs_epi16(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); +} + +void ppu_interpreter::VPKSHUS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_packus_epi16(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); +} + +void ppu_interpreter::VPKSWSS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_packs_epi32(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); +} + +void ppu_interpreter::VPKSWUS(PPUThread& CPU, ppu_opcode_t op) +{ + //CPU.VPR[op.vd].vi = _mm_packus_epi32(CPU.VPR[op.vb].vi, CPU.VPR[op.va].vi); + + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 4; h++) + { + s32 result = VA._s32[h]; + + if (result > UINT16_MAX) + { + result = UINT16_MAX; + } + else if (result < 0) + { + result = 0; + } + + CPU.VPR[op.vd]._u16[h + 4] = result; + + result = VB._s32[h]; + + if (result > UINT16_MAX) + { + result = UINT16_MAX; + } + else if (result < 0) + { + result = 0; + } + + CPU.VPR[op.vd]._u16[h] = result; + } +} + +void ppu_interpreter::VPKUHUM(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint b = 0; b < 8; b++) + { + CPU.VPR[op.vd]._u8[b + 8] = VA._u8[b * 2]; + CPU.VPR[op.vd]._u8[b] = VB._u8[b * 2]; + } +} + +void ppu_interpreter::VPKUHUS(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint b = 0; b < 8; b++) + { + u16 result = VA._u16[b]; + + if (result > UINT8_MAX) + { + result = UINT8_MAX; + } + + CPU.VPR[op.vd]._u8[b + 8] = (u8)result; + + result = VB._u16[b]; + + if (result > UINT8_MAX) + { + result = UINT8_MAX; + } + + CPU.VPR[op.vd]._u8[b] = (u8)result; + } +} + +void ppu_interpreter::VPKUWUM(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 4; h++) + { + CPU.VPR[op.vd]._u16[h + 4] = VA._u16[h * 2]; + CPU.VPR[op.vd]._u16[h] = VB._u16[h * 2]; + } +} + +void ppu_interpreter::VPKUWUS(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VA = CPU.VPR[op.va]; + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 4; h++) + { + u32 result = VA._u32[h]; + + if (result > UINT16_MAX) + { + result = UINT16_MAX; + } + + CPU.VPR[op.vd]._u16[h + 4] = result; + + result = VB._u32[h]; + + if (result > UINT16_MAX) + { + result = UINT16_MAX; + } + + CPU.VPR[op.vd]._u16[h] = result; + } +} + +void ppu_interpreter::VREFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = _mm_rcp_ps(CPU.VPR[op.vb].vf); +} + +void ppu_interpreter::VRFIM(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = floorf(CPU.VPR[op.vb]._f[w]); + } +} + +void ppu_interpreter::VRFIN(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = nearbyintf(CPU.VPR[op.vb]._f[w]); + } +} + +void ppu_interpreter::VRFIP(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = ceilf(CPU.VPR[op.vb]._f[w]); + } +} + +void ppu_interpreter::VRFIZ(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + const float b = CPU.VPR[op.vb]._f[w]; + CPU.VPR[op.vd]._f[w] = truncf(CPU.VPR[op.vb]._f[w]); + } +} + +void ppu_interpreter::VRLB(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint b = 0; b < 16; b++) + { + int nRot = CPU.VPR[op.vb]._u8[b] & 0x7; + + CPU.VPR[op.vd]._u8[b] = (CPU.VPR[op.va]._u8[b] << nRot) | (CPU.VPR[op.va]._u8[b] >> (8 - nRot)); + } +} + +void ppu_interpreter::VRLH(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = rotl16(CPU.VPR[op.va]._u16[h], CPU.VPR[op.vb]._u8[h * 2] & 0xf); + } +} + +void ppu_interpreter::VRLW(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = (u32)rotl32(CPU.VPR[op.va]._u32[w], CPU.VPR[op.vb]._u8[w * 4] & 0x1f); + } +} + +void ppu_interpreter::VRSQRTEFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vf = _mm_rsqrt_ps(CPU.VPR[op.vb].vf); +} + +void ppu_interpreter::VSEL(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = (CPU.VPR[op.vb] & CPU.VPR[op.vc]) | (CPU.VPR[op.va] & ~CPU.VPR[op.vc]); +} + +void ppu_interpreter::VSL(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VA = CPU.VPR[op.va]; + u8 sh = CPU.VPR[op.vb]._u8[0] & 0x7; + + CPU.VPR[op.vd]._u8[0] = VA._u8[0] << sh; + for (uint b = 1; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = (VA._u8[b] << sh) | (VA._u8[b - 1] >> (8 - sh)); + } +} + +void ppu_interpreter::VSLB(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = CPU.VPR[op.va]._u8[b] << (CPU.VPR[op.vb]._u8[b] & 0x7); + } +} + +void ppu_interpreter::VSLDOI(PPUThread& CPU, ppu_opcode_t op) +{ + u8 tmpSRC[32]; + memcpy(tmpSRC, CPU.VPR[op.vb]._u8, 16); + memcpy(tmpSRC + 16, CPU.VPR[op.va]._u8, 16); + + for (uint b = 0; b<16; b++) + { + CPU.VPR[op.vd]._u8[15 - b] = tmpSRC[31 - (b + op.vsh)]; + } +} + +void ppu_interpreter::VSLH(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = CPU.VPR[op.va]._u16[h] << (CPU.VPR[op.vb]._u16[h] & 0xf); + } +} + +void ppu_interpreter::VSLO(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VA = CPU.VPR[op.va]; + u8 nShift = (CPU.VPR[op.vb]._u8[0] >> 3) & 0xf; + + CPU.VPR[op.vd].clear(); + + for (u8 b = 0; b < 16 - nShift; b++) + { + CPU.VPR[op.vd]._u8[15 - b] = VA._u8[15 - (b + nShift)]; + } +} + +void ppu_interpreter::VSLW(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] << (CPU.VPR[op.vb]._u32[w] & 0x1f); + } +} + +void ppu_interpreter::VSPLTB(PPUThread& CPU, ppu_opcode_t op) +{ + u8 byte = CPU.VPR[op.vb]._u8[15 - op.vuimm]; + + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = byte; + } +} + +void ppu_interpreter::VSPLTH(PPUThread& CPU, ppu_opcode_t op) +{ + assert(op.vuimm < 8); + + u16 hword = CPU.VPR[op.vb]._u16[7 - op.vuimm]; + + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = hword; + } +} + +void ppu_interpreter::VSPLTISB(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = op.vsimm; + } +} + +void ppu_interpreter::VSPLTISH(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = (s16)op.vsimm; + } +} + +void ppu_interpreter::VSPLTISW(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = (s32)op.vsimm; + } +} + +void ppu_interpreter::VSPLTW(PPUThread& CPU, ppu_opcode_t op) +{ + assert(op.vuimm < 4); + + u32 word = CPU.VPR[op.vb]._u32[3 - op.vuimm]; + + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = word; + } +} + +void ppu_interpreter::VSR(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VA = CPU.VPR[op.va]; + u8 sh = CPU.VPR[op.vb]._u8[0] & 0x7; + + CPU.VPR[op.vd]._u8[15] = VA._u8[15] >> sh; + for (uint b = 14; ~b; b--) + { + CPU.VPR[op.vd]._u8[b] = (VA._u8[b] >> sh) | (VA._u8[b + 1] << (8 - sh)); + } +} + +void ppu_interpreter::VSRAB(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._s8[b] = CPU.VPR[op.va]._s8[b] >> (CPU.VPR[op.vb]._u8[b] & 0x7); + } +} + +void ppu_interpreter::VSRAH(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._s16[h] = CPU.VPR[op.va]._s16[h] >> (CPU.VPR[op.vb]._u16[h] & 0xf); + } +} + +void ppu_interpreter::VSRAW(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s32[w] = CPU.VPR[op.va]._s32[w] >> (CPU.VPR[op.vb]._u32[w] & 0x1f); + } +} + +void ppu_interpreter::VSRB(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint b = 0; b < 16; b++) + { + CPU.VPR[op.vd]._u8[b] = CPU.VPR[op.va]._u8[b] >> (CPU.VPR[op.vb]._u8[b] & 0x7); + } +} + +void ppu_interpreter::VSRH(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._u16[h] = CPU.VPR[op.va]._u16[h] >> (CPU.VPR[op.vb]._u16[h] & 0xf); + } +} + +void ppu_interpreter::VSRO(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VA = CPU.VPR[op.va]; + u8 nShift = (CPU.VPR[op.vb]._u8[0] >> 3) & 0xf; + + CPU.VPR[op.vd].clear(); + + for (u8 b = 0; b < 16 - nShift; b++) + { + CPU.VPR[op.vd]._u8[b] = VA._u8[b + nShift]; + } +} + +void ppu_interpreter::VSRW(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] >> (CPU.VPR[op.vb]._u32[w] & 0x1f); + } +} + +void ppu_interpreter::VSUBCUW(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._u32[w] = CPU.VPR[op.va]._u32[w] < CPU.VPR[op.vb]._u32[w] ? 0 : 1; + } +} + +void ppu_interpreter::VSUBFP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = u128::subfs(CPU.VPR[op.va], CPU.VPR[op.vb]); +} + +void ppu_interpreter::VSUBSBS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_subs_epi8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VSUBSHS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_subs_epi16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VSUBSWS(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + s64 result = (s64)CPU.VPR[op.va]._s32[w] - (s64)CPU.VPR[op.vb]._s32[w]; + + if (result < INT32_MIN) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MIN; + } + else if (result > INT32_MAX) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MAX; + } + else + CPU.VPR[op.vd]._s32[w] = (s32)result; + } +} + +void ppu_interpreter::VSUBUBM(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = u128::sub8(CPU.VPR[op.va], CPU.VPR[op.vb]); +} + +void ppu_interpreter::VSUBUBS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_subs_epu8(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VSUBUHM(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = u128::sub16(CPU.VPR[op.va], CPU.VPR[op.vb]); +} + +void ppu_interpreter::VSUBUHS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd].vi = _mm_subs_epu16(CPU.VPR[op.va].vi, CPU.VPR[op.vb].vi); +} + +void ppu_interpreter::VSUBUWM(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = u128::sub32(CPU.VPR[op.va], CPU.VPR[op.vb]); +} + +void ppu_interpreter::VSUBUWS(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + s64 result = (s64)CPU.VPR[op.va]._u32[w] - (s64)CPU.VPR[op.vb]._u32[w]; + + if (result < 0) + { + CPU.VPR[op.vd]._u32[w] = 0; + } + else + CPU.VPR[op.vd]._u32[w] = (u32)result; + } +} + +void ppu_interpreter::VSUMSWS(PPUThread& CPU, ppu_opcode_t op) +{ + s64 sum = CPU.VPR[op.vb]._s32[0]; + + for (uint w = 0; w < 4; w++) + { + sum += CPU.VPR[op.va]._s32[w]; + } + + CPU.VPR[op.vd].clear(); + if (sum > INT32_MAX) + { + CPU.VPR[op.vd]._s32[0] = (s32)INT32_MAX; + } + else if (sum < INT32_MIN) + { + CPU.VPR[op.vd]._s32[0] = (s32)INT32_MIN; + } + else + CPU.VPR[op.vd]._s32[0] = (s32)sum; +} + +void ppu_interpreter::VSUM2SWS(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint n = 0; n < 2; n++) + { + s64 sum = (s64)CPU.VPR[op.va]._s32[n * 2] + CPU.VPR[op.va]._s32[n * 2 + 1] + CPU.VPR[op.vb]._s32[n * 2]; + + if (sum > INT32_MAX) + { + CPU.VPR[op.vd]._s32[n * 2] = (s32)INT32_MAX; + } + else if (sum < INT32_MIN) + { + CPU.VPR[op.vd]._s32[n * 2] = (s32)INT32_MIN; + } + else + CPU.VPR[op.vd]._s32[n * 2] = (s32)sum; + } + CPU.VPR[op.vd]._s32[1] = 0; + CPU.VPR[op.vd]._s32[3] = 0; +} + +void ppu_interpreter::VSUM4SBS(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + s64 sum = CPU.VPR[op.vb]._s32[w]; + + for (uint b = 0; b < 4; b++) + { + sum += CPU.VPR[op.va]._s8[w * 4 + b]; + } + + if (sum > INT32_MAX) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MAX; + } + else if (sum < INT32_MIN) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MIN; + } + else + CPU.VPR[op.vd]._s32[w] = (s32)sum; + } +} + +void ppu_interpreter::VSUM4SHS(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + s64 sum = CPU.VPR[op.vb]._s32[w]; + + for (uint h = 0; h < 2; h++) + { + sum += CPU.VPR[op.va]._s16[w * 2 + h]; + } + + if (sum > INT32_MAX) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MAX; + } + else if (sum < INT32_MIN) + { + CPU.VPR[op.vd]._s32[w] = (s32)INT32_MIN; + } + else + CPU.VPR[op.vd]._s32[w] = (s32)sum; + } +} + +void ppu_interpreter::VSUM4UBS(PPUThread& CPU, ppu_opcode_t op) +{ + for (uint w = 0; w < 4; w++) + { + u64 sum = CPU.VPR[op.vb]._u32[w]; + + for (uint b = 0; b < 4; b++) + { + sum += CPU.VPR[op.va]._u8[w * 4 + b]; + } + + if (sum > UINT32_MAX) + { + CPU.VPR[op.vd]._u32[w] = (u32)UINT32_MAX; + } + else + CPU.VPR[op.vd]._u32[w] = (u32)sum; + } +} + +void ppu_interpreter::VUPKHPX(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VB = CPU.VPR[op.vb]; + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s8[w * 4 + 3] = VB._s8[8 + w * 2 + 1] >> 7; // signed shift sign extends + CPU.VPR[op.vd]._u8[w * 4 + 2] = (VB._u8[8 + w * 2 + 1] >> 2) & 0x1f; + CPU.VPR[op.vd]._u8[w * 4 + 1] = ((VB._u8[8 + w * 2 + 1] & 0x3) << 3) | ((VB._u8[8 + w * 2 + 0] >> 5) & 0x7); + CPU.VPR[op.vd]._u8[w * 4 + 0] = VB._u8[8 + w * 2 + 0] & 0x1f; + } +} + +void ppu_interpreter::VUPKHSB(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._s16[h] = VB._s8[8 + h]; + } +} + +void ppu_interpreter::VUPKHSH(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VB = CPU.VPR[op.vb]; + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s32[w] = VB._s16[4 + w]; + } +} + +void ppu_interpreter::VUPKLPX(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VB = CPU.VPR[op.vb]; + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s8[w * 4 + 3] = VB._s8[w * 2 + 1] >> 7; // signed shift sign extends + CPU.VPR[op.vd]._u8[w * 4 + 2] = (VB._u8[w * 2 + 1] >> 2) & 0x1f; + CPU.VPR[op.vd]._u8[w * 4 + 1] = ((VB._u8[w * 2 + 1] & 0x3) << 3) | ((VB._u8[w * 2 + 0] >> 5) & 0x7); + CPU.VPR[op.vd]._u8[w * 4 + 0] = VB._u8[w * 2 + 0] & 0x1f; + } +} + +void ppu_interpreter::VUPKLSB(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VB = CPU.VPR[op.vb]; + for (uint h = 0; h < 8; h++) + { + CPU.VPR[op.vd]._s16[h] = VB._s8[h]; + } +} + +void ppu_interpreter::VUPKLSH(PPUThread& CPU, ppu_opcode_t op) +{ + u128 VB = CPU.VPR[op.vb]; + for (uint w = 0; w < 4; w++) + { + CPU.VPR[op.vd]._s32[w] = VB._s16[w]; + } +} + +void ppu_interpreter::VXOR(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.VPR[op.vd] = CPU.VPR[op.va] ^ CPU.VPR[op.vb]; +} + +void ppu_interpreter::MULLI(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.rd] = (s64)CPU.GPR[op.ra] * op.simm16; +} + +void ppu_interpreter::SUBFIC(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + const u64 IMM = (s64)op.simm16; + CPU.GPR[op.rd] = ~RA + IMM + 1; + + CPU.XER.CA = CPU.IsCarry(~RA, IMM, 1); +} + +void ppu_interpreter::CMPLI(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.UpdateCRnU(op.l10, op.crfd, CPU.GPR[op.ra], op.uimm16); +} + +void ppu_interpreter::CMPI(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.UpdateCRnS(op.l10, op.crfd, CPU.GPR[op.ra], op.simm16); +} + +void ppu_interpreter::ADDIC(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = RA + op.simm16; + CPU.XER.CA = CPU.IsCarry(RA, op.simm16); +} + +void ppu_interpreter::ADDIC_(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = RA + op.simm16; + CPU.XER.CA = CPU.IsCarry(RA, op.simm16); + CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::ADDI(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.rd] = op.ra ? ((s64)CPU.GPR[op.ra] + op.simm16) : op.simm16; +} + +void ppu_interpreter::ADDIS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.rd] = op.ra ? ((s64)CPU.GPR[op.ra] + (op.simm16 << 16)) : (op.simm16 << 16); +} + +void ppu_interpreter::BC(PPUThread& CPU, ppu_opcode_t op) +{ + const u8 bo0 = (op.bo & 0x10) ? 1 : 0; + const u8 bo1 = (op.bo & 0x08) ? 1 : 0; + const u8 bo2 = (op.bo & 0x04) ? 1 : 0; + const u8 bo3 = (op.bo & 0x02) ? 1 : 0; + + if (!bo2) --CPU.CTR; + + const u8 ctr_ok = bo2 | ((CPU.CTR != 0) ^ bo3); + const u8 cond_ok = bo0 | (CPU.IsCR(op.bi) ^ (~bo1 & 0x1)); + + if (ctr_ok && cond_ok) + { + const u32 nextLR = CPU.PC + 4; + CPU.SetBranch(PPUOpcodes::branchTarget((op.aa ? 0 : CPU.PC), op.simm16), op.lk); + if (op.lk) CPU.LR = nextLR; + } +} + +void ppu_interpreter::HACK(PPUThread& CPU, ppu_opcode_t op) +{ + execute_ppu_func_by_index(CPU, op.opcode & 0x3ffffff); +} + +void ppu_interpreter::SC(PPUThread& CPU, ppu_opcode_t op) +{ + switch (op.lev) + { + case 0x0: SysCalls::DoSyscall(CPU, CPU.GPR[11]); break; + case 0x3: CPU.FastStop(); break; + default: throw __FUNCTION__; + } +} + +void ppu_interpreter::B(PPUThread& CPU, ppu_opcode_t op) +{ + const u32 nextLR = CPU.PC + 4; + CPU.SetBranch(PPUOpcodes::branchTarget(op.aa ? 0 : CPU.PC, op.ll), op.lk); + if (op.lk) CPU.LR = nextLR; +} + +void ppu_interpreter::MCRF(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.SetCR(op.crfd, CPU.GetCR(op.crfs)); +} + +void ppu_interpreter::BCLR(PPUThread& CPU, ppu_opcode_t op) +{ + const u8 bo0 = (op.bo & 0x10) ? 1 : 0; + const u8 bo1 = (op.bo & 0x08) ? 1 : 0; + const u8 bo2 = (op.bo & 0x04) ? 1 : 0; + const u8 bo3 = (op.bo & 0x02) ? 1 : 0; + + if (!bo2) --CPU.CTR; + + const u8 ctr_ok = bo2 | ((CPU.CTR != 0) ^ bo3); + const u8 cond_ok = bo0 | (CPU.IsCR(op.bi) ^ (~bo1 & 0x1)); + + if (ctr_ok && cond_ok) + { + const u32 nextLR = CPU.PC + 4; + CPU.SetBranch(PPUOpcodes::branchTarget(0, (u32)CPU.LR), true); + if (op.lk) CPU.LR = nextLR; + } +} + +void ppu_interpreter::CRNOR(PPUThread& CPU, ppu_opcode_t op) +{ + const u8 v = 1 ^ (CPU.IsCR(op.crba) | CPU.IsCR(op.crbb)); + CPU.SetCRBit2(op.crbd, v & 0x1); +} + +void ppu_interpreter::CRANDC(PPUThread& CPU, ppu_opcode_t op) +{ + const u8 v = CPU.IsCR(op.crba) & (1 ^ CPU.IsCR(op.crbb)); + CPU.SetCRBit2(op.crbd, v & 0x1); +} + +void ppu_interpreter::ISYNC(PPUThread& CPU, ppu_opcode_t op) +{ + _mm_mfence(); +} + +void ppu_interpreter::CRXOR(PPUThread& CPU, ppu_opcode_t op) +{ + const u8 v = CPU.IsCR(op.crba) ^ CPU.IsCR(op.crbb); + CPU.SetCRBit2(op.crbd, v & 0x1); +} + +void ppu_interpreter::CRNAND(PPUThread& CPU, ppu_opcode_t op) +{ + const u8 v = 1 ^ (CPU.IsCR(op.crba) & CPU.IsCR(op.crbb)); + CPU.SetCRBit2(op.crbd, v & 0x1); +} + +void ppu_interpreter::CRAND(PPUThread& CPU, ppu_opcode_t op) +{ + const u8 v = CPU.IsCR(op.crba) & CPU.IsCR(op.crbb); + CPU.SetCRBit2(op.crbd, v & 0x1); +} + +void ppu_interpreter::CREQV(PPUThread& CPU, ppu_opcode_t op) +{ + const u8 v = 1 ^ (CPU.IsCR(op.crba) ^ CPU.IsCR(op.crbb)); + CPU.SetCRBit2(op.crbd, v & 0x1); +} + +void ppu_interpreter::CRORC(PPUThread& CPU, ppu_opcode_t op) +{ + const u8 v = CPU.IsCR(op.crba) | (1 ^ CPU.IsCR(op.crbb)); + CPU.SetCRBit2(op.crbd, v & 0x1); +} + +void ppu_interpreter::CROR(PPUThread& CPU, ppu_opcode_t op) +{ + const u8 v = CPU.IsCR(op.crba) | CPU.IsCR(op.crbb); + CPU.SetCRBit2(op.crbd, v & 0x1); +} + +void ppu_interpreter::BCCTR(PPUThread& CPU, ppu_opcode_t op) +{ + if (op.bo & 0x10 || CPU.IsCR(op.bi) == ((op.bo & 0x8) != 0)) + { + const u32 nextLR = CPU.PC + 4; + CPU.SetBranch(PPUOpcodes::branchTarget(0, (u32)CPU.CTR), true); + if (op.lk) CPU.LR = nextLR; + } +} + +void ppu_interpreter::RLWIMI(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 mask = rotate_mask[32 + op.mb][32 + op.me]; + CPU.GPR[op.ra] = (CPU.GPR[op.ra] & ~mask) | (rotl32(CPU.GPR[op.rs], op.sh) & mask); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::RLWINM(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = rotl32(CPU.GPR[op.rs], op.sh) & rotate_mask[32 + op.mb][32 + op.me]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::RLWNM(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = rotl32(CPU.GPR[op.rs], CPU.GPR[op.rb] & 0x1f) & rotate_mask[32 + op.mb][32 + op.me]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::ORI(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = CPU.GPR[op.rs] | op.uimm16; +} + +void ppu_interpreter::ORIS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = CPU.GPR[op.rs] | ((u64)op.uimm16 << 16); +} + +void ppu_interpreter::XORI(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = CPU.GPR[op.rs] ^ op.uimm16; +} + +void ppu_interpreter::XORIS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = CPU.GPR[op.rs] ^ ((u64)op.uimm16 << 16); +} + +void ppu_interpreter::ANDI_(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = CPU.GPR[op.rs] & op.uimm16; + CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::ANDIS_(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = CPU.GPR[op.rs] & ((u64)op.uimm16 << 16); + CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::RLDICL(PPUThread& CPU, ppu_opcode_t op) +{ + auto sh = (op.shh << 5) | op.shl; + auto mb = (op.mbmeh << 5) | op.mbmel; + + CPU.GPR[op.ra] = rotl64(CPU.GPR[op.rs], sh) & rotate_mask[mb][63]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::RLDICR(PPUThread& CPU, ppu_opcode_t op) +{ + auto sh = (op.shh << 5) | op.shl; + auto me = (op.mbmeh << 5) | op.mbmel; + + CPU.GPR[op.ra] = rotl64(CPU.GPR[op.rs], sh) & rotate_mask[0][me]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::RLDIC(PPUThread& CPU, ppu_opcode_t op) +{ + auto sh = (op.shh << 5) | op.shl; + auto mb = (op.mbmeh << 5) | op.mbmel; + + CPU.GPR[op.ra] = rotl64(CPU.GPR[op.rs], sh) & rotate_mask[mb][63 - sh]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::RLDIMI(PPUThread& CPU, ppu_opcode_t op) +{ + auto sh = (op.shh << 5) | op.shl; + auto mb = (op.mbmeh << 5) | op.mbmel; + + const u64 mask = rotate_mask[mb][63 - sh]; + CPU.GPR[op.ra] = (CPU.GPR[op.ra] & ~mask) | (rotl64(CPU.GPR[op.rs], sh) & mask); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::RLDC_LR(PPUThread& CPU, ppu_opcode_t op) +{ + auto sh = (u32)(CPU.GPR[op.rb] & 0x3F); + auto mbme = (op.mbmeh << 5) | op.mbmel; + + if (op.aa) // rldcr + { + CPU.GPR[op.ra] = rotl64(CPU.GPR[op.rs], sh) & rotate_mask[0][mbme]; + } + else // rldcl + { + CPU.GPR[op.ra] = rotl64(CPU.GPR[op.rs], sh) & rotate_mask[mbme][63]; + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::CMP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.UpdateCRnS(op.l10, op.crfd, CPU.GPR[op.ra], CPU.GPR[op.rb]); +} + +void ppu_interpreter::TW(PPUThread& CPU, ppu_opcode_t op) +{ + s32 a = (s32)CPU.GPR[op.ra]; + s32 b = (s32)CPU.GPR[op.rb]; + + if ((a < b && (op.bo & 0x10)) || + (a > b && (op.bo & 0x8)) || + (a == b && (op.bo & 0x4)) || + ((u32)a < (u32)b && (op.bo & 0x2)) || + ((u32)a >(u32)b && (op.bo & 0x1))) + { + throw __FUNCTION__; + } +} + +void ppu_interpreter::LVSL(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + static const u64 lvsl_values[0x10][2] = + { + { 0x08090A0B0C0D0E0F, 0x0001020304050607 }, + { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, + { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, + { 0x0B0C0D0E0F101112, 0x030405060708090A }, + { 0x0C0D0E0F10111213, 0x0405060708090A0B }, + { 0x0D0E0F1011121314, 0x05060708090A0B0C }, + { 0x0E0F101112131415, 0x060708090A0B0C0D }, + { 0x0F10111213141516, 0x0708090A0B0C0D0E }, + { 0x1011121314151617, 0x08090A0B0C0D0E0F }, + { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, + { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, + { 0x131415161718191A, 0x0B0C0D0E0F101112 }, + { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, + { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, + { 0x161718191A1B1C1D, 0x0E0F101112131415 }, + { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, + }; + + CPU.VPR[op.vd]._u64[0] = lvsl_values[addr & 0xf][0]; + CPU.VPR[op.vd]._u64[1] = lvsl_values[addr & 0xf][1]; +} + +void ppu_interpreter::LVEBX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.VPR[op.vd]._u8[15 - (addr & 0xf)] = vm::read8(vm::cast(addr)); +} + +void ppu_interpreter::SUBFC(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = ~RA + RB + 1; + CPU.XER.CA = CPU.IsCarry(~RA, RB, 1); + if (op.oe) CPU.SetOV((~RA >> 63 == RB >> 63) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::MULHDU(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.rd] = __umulh(CPU.GPR[op.ra], CPU.GPR[op.rb]); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::ADDC(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = RA + RB; + CPU.XER.CA = CPU.IsCarry(RA, RB); + if (op.oe) CPU.SetOV((RA >> 63 == RB >> 63) && (RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::MULHWU(PPUThread& CPU, ppu_opcode_t op) +{ + u32 a = (u32)CPU.GPR[op.ra]; + u32 b = (u32)CPU.GPR[op.rb]; + CPU.GPR[op.rd] = ((u64)a * (u64)b) >> 32; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::MFOCRF(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.rd] = CPU.CR.CR; +} + +void ppu_interpreter::LWARX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + be_t value; + vm::reservation_acquire(&value, vm::cast(addr), sizeof(value)); + + CPU.GPR[op.rd] = value; +} + +void ppu_interpreter::LDX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read64(vm::cast(addr)); +} + +void ppu_interpreter::LWZX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read32(vm::cast(addr)); +} + +void ppu_interpreter::SLW(PPUThread& CPU, ppu_opcode_t op) +{ + u32 n = CPU.GPR[op.rb] & 0x1f; + u32 r = (u32)rotl32((u32)CPU.GPR[op.rs], n); + u32 m = ((u32)CPU.GPR[op.rb] & 0x20) ? 0 : (u32)rotate_mask[32][63 - n]; + + CPU.GPR[op.ra] = r & m; + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::CNTLZW(PPUThread& CPU, ppu_opcode_t op) +{ + u32 i; + for (i = 0; i < 32; i++) + { + if (CPU.GPR[op.rs] & (1ULL << (31 - i))) break; + } + + CPU.GPR[op.ra] = i; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::SLD(PPUThread& CPU, ppu_opcode_t op) +{ + u32 n = CPU.GPR[op.rb] & 0x3f; + u64 r = rotl64(CPU.GPR[op.rs], n); + u64 m = (CPU.GPR[op.rb] & 0x40) ? 0 : rotate_mask[0][63 - n]; + + CPU.GPR[op.ra] = r & m; + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::AND(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = CPU.GPR[op.rs] & CPU.GPR[op.rb]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::CMPL(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.UpdateCRnU(op.l10, op.crfd, CPU.GPR[op.ra], CPU.GPR[op.rb]); +} + +void ppu_interpreter::LVSR(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + static const u64 lvsr_values[0x10][2] = + { + { 0x18191A1B1C1D1E1F, 0x1011121314151617 }, + { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, + { 0x161718191A1B1C1D, 0x0E0F101112131415 }, + { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, + { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, + { 0x131415161718191A, 0x0B0C0D0E0F101112 }, + { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, + { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, + { 0x1011121314151617, 0x08090A0B0C0D0E0F }, + { 0x0F10111213141516, 0x0708090A0B0C0D0E }, + { 0x0E0F101112131415, 0x060708090A0B0C0D }, + { 0x0D0E0F1011121314, 0x05060708090A0B0C }, + { 0x0C0D0E0F10111213, 0x0405060708090A0B }, + { 0x0B0C0D0E0F101112, 0x030405060708090A }, + { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, + { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, + }; + + CPU.VPR[op.vd]._u64[0] = lvsr_values[addr & 0xf][0]; + CPU.VPR[op.vd]._u64[1] = lvsr_values[addr & 0xf][1]; +} + +void ppu_interpreter::LVEHX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~1ULL; + CPU.VPR[op.vd]._u16[7 - ((addr >> 1) & 0x7)] = vm::read16(vm::cast(addr)); +} + +void ppu_interpreter::SUBF(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = RB - RA; + if (op.oe) CPU.SetOV((~RA >> 63 == RB >> 63) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::LDUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read64(vm::cast(addr)); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::DCBST(PPUThread& CPU, ppu_opcode_t op) +{ +} + +void ppu_interpreter::LWZUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read32(vm::cast(addr)); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::CNTLZD(PPUThread& CPU, ppu_opcode_t op) +{ + u32 i; + for (i = 0; i < 64; i++) + { + if (CPU.GPR[op.rs] & (1ULL << (63 - i))) break; + } + + CPU.GPR[op.ra] = i; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::ANDC(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = CPU.GPR[op.rs] & ~CPU.GPR[op.rb]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::TD(PPUThread& CPU, ppu_opcode_t op) +{ + throw __FUNCTION__; +} + +void ppu_interpreter::LVEWX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~3ULL; + CPU.VPR[op.vd]._u32[3 - ((addr >> 2) & 0x3)] = vm::read32(vm::cast(addr)); +} + +void ppu_interpreter::MULHD(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.rd] = __mulh(CPU.GPR[op.ra], CPU.GPR[op.rb]); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::MULHW(PPUThread& CPU, ppu_opcode_t op) +{ + s32 a = (s32)CPU.GPR[op.ra]; + s32 b = (s32)CPU.GPR[op.rb]; + CPU.GPR[op.rd] = ((s64)a * (s64)b) >> 32; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::LDARX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + be_t value; + vm::reservation_acquire(&value, vm::cast(addr), sizeof(value)); + + CPU.GPR[op.rd] = value; +} + +void ppu_interpreter::DCBF(PPUThread& CPU, ppu_opcode_t op) +{ +} + +void ppu_interpreter::LBZX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read8(vm::cast(addr)); +} + +void ppu_interpreter::LVX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfull; + CPU.VPR[op.vd] = vm::read128(vm::cast(addr)); +} + +void ppu_interpreter::NEG(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = 0 - RA; + if (op.oe) CPU.SetOV((~RA >> 63 == 0) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::LBZUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read8(vm::cast(addr)); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::NOR(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = ~(CPU.GPR[op.rs] | CPU.GPR[op.rb]); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::STVEBX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u8 eb = addr & 0xf; + vm::write8(vm::cast(addr), CPU.VPR[op.vs]._u8[15 - eb]); +} + +void ppu_interpreter::SUBFE(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = ~RA + RB + CPU.XER.CA; + CPU.XER.CA = CPU.IsCarry(~RA, RB, CPU.XER.CA); + if (op.oe) CPU.SetOV((~RA >> 63 == RB >> 63) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::ADDE(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + if (CPU.XER.CA) + { + if (RA == ~0ULL) //-1 + { + CPU.GPR[op.rd] = RB; + CPU.XER.CA = 1; + } + else + { + CPU.GPR[op.rd] = RA + 1 + RB; + CPU.XER.CA = CPU.IsCarry(RA + 1, RB); + } + } + else + { + CPU.GPR[op.rd] = RA + RB; + CPU.XER.CA = CPU.IsCarry(RA, RB); + } + if (op.oe) CPU.SetOV((RA >> 63 == RB >> 63) && (RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::MTOCRF(PPUThread& CPU, ppu_opcode_t op) +{ + if (op.l11) + { + u32 n = 0, count = 0; + for (u32 i = 0; i<8; ++i) + { + if (op.crm & (1 << i)) + { + n = i; + count++; + } + } + + if (count == 1) + { + //CR[4*n : 4*n+3] = RS[32+4*n : 32+4*n+3]; + CPU.SetCR(7 - n, (CPU.GPR[op.rs] >> (4 * n)) & 0xf); + } + else + CPU.CR.CR = 0; + } + else + { + for (u32 i = 0; i<8; ++i) + { + if (op.crm & (1 << i)) + { + CPU.SetCR(7 - i, (CPU.GPR[op.rs] >> (i * 4)) & 0xf); + } + } + } +} + +void ppu_interpreter::STDX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::write64(vm::cast(addr), CPU.GPR[op.rs]); +} + +void ppu_interpreter::STWCX_(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + const be_t value = be_t::make((u32)CPU.GPR[op.rs]); + CPU.SetCR_EQ(0, vm::reservation_update(vm::cast(addr), &value, sizeof(value))); +} + +void ppu_interpreter::STWX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::write32(vm::cast(addr), (u32)CPU.GPR[op.rs]); +} + +void ppu_interpreter::STVEHX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~1ULL; + const u8 eb = (addr & 0xf) >> 1; + vm::write16(vm::cast(addr), CPU.VPR[op.vs]._u16[7 - eb]); +} + +void ppu_interpreter::STDUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::write64(vm::cast(addr), CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::STWUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::write32(vm::cast(addr), (u32)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::STVEWX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~3ULL; + const u8 eb = (addr & 0xf) >> 2; + vm::write32(vm::cast(addr), CPU.VPR[op.vs]._u32[3 - eb]); +} + +void ppu_interpreter::SUBFZE(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = ~RA + CPU.XER.CA; + CPU.XER.CA = CPU.IsCarry(~RA, CPU.XER.CA); + if (op.oe) CPU.SetOV((~RA >> 63 == 0) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::ADDZE(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = RA + CPU.XER.CA; + CPU.XER.CA = CPU.IsCarry(RA, CPU.XER.CA); + if (op.oe) CPU.SetOV((RA >> 63 == 0) && (RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::STDCX_(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + const be_t value = be_t::make(CPU.GPR[op.rs]); + CPU.SetCR_EQ(0, vm::reservation_update(vm::cast(addr), &value, sizeof(value))); +} + +void ppu_interpreter::STBX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::write8(vm::cast(addr), (u8)CPU.GPR[op.rs]); +} + +void ppu_interpreter::STVX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfull; + vm::write128(vm::cast(addr), CPU.VPR[op.vs]); +} + +void ppu_interpreter::MULLD(PPUThread& CPU, ppu_opcode_t op) +{ + const s64 RA = CPU.GPR[op.ra]; + const s64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = (s64)(RA * RB); + if (op.oe) + { + const s64 high = __mulh(RA, RB); + CPU.SetOV(high != s64(CPU.GPR[op.rd]) >> 63); + } + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::SUBFME(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = ~RA + CPU.XER.CA + ~0ULL; + CPU.XER.CA = CPU.IsCarry(~RA, CPU.XER.CA, ~0ULL); + if (op.oe) CPU.SetOV((~RA >> 63 == 1) && (~RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::ADDME(PPUThread& CPU, ppu_opcode_t op) +{ + const s64 RA = CPU.GPR[op.ra]; + CPU.GPR[op.rd] = RA + CPU.XER.CA - 1; + CPU.XER.CA |= RA != 0; + + if (op.oe) CPU.SetOV((u64(RA) >> 63 == 1) && (u64(RA) >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::MULLW(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.rd] = (s64)((s64)(s32)CPU.GPR[op.ra] * (s64)(s32)CPU.GPR[op.rb]); + if (op.oe) CPU.SetOV(s64(CPU.GPR[op.rd]) < s64(-1) << 31 || s64(CPU.GPR[op.rd]) >= s64(1) << 31); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::DCBTST(PPUThread& CPU, ppu_opcode_t op) +{ +} + +void ppu_interpreter::STBUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::write8(vm::cast(addr), (u8)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::ADD(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + CPU.GPR[op.rd] = RA + RB; + if (op.oe) CPU.SetOV((RA >> 63 == RB >> 63) && (RA >> 63 != CPU.GPR[op.rd] >> 63)); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::DCBT(PPUThread& CPU, ppu_opcode_t op) +{ +} + +void ppu_interpreter::LHZX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read16(vm::cast(addr)); +} + +void ppu_interpreter::EQV(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = ~(CPU.GPR[op.rs] ^ CPU.GPR[op.rb]); + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::ECIWX(PPUThread& CPU, ppu_opcode_t op) +{ + throw __FUNCTION__; +} + +void ppu_interpreter::LHZUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::read16(vm::cast(addr)); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::XOR(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = CPU.GPR[op.rs] ^ CPU.GPR[op.rb]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::MFSPR(PPUThread& CPU, ppu_opcode_t op) +{ + const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5); + + switch (n) + { + case 0x001: CPU.GPR[op.rd] = CPU.XER.XER; return; + case 0x008: CPU.GPR[op.rd] = CPU.LR; return; + case 0x009: CPU.GPR[op.rd] = CPU.CTR; return; + case 0x100: CPU.GPR[op.rd] = CPU.VRSAVE; return; + case 0x103: CPU.GPR[op.rd] = CPU.SPRG[3]; return; + + case 0x10C: CPU.TB = get_time(); CPU.GPR[op.rd] = CPU.TB; return; + case 0x10D: CPU.TB = get_time(); CPU.GPR[op.rd] = CPU.TB >> 32; return; + + case 0x110: + case 0x111: + case 0x112: + case 0x113: + case 0x114: + case 0x115: + case 0x116: + case 0x117: CPU.GPR[op.rd] = CPU.SPRG[n - 0x110]; return; + } + + throw __FUNCTION__; +} + +void ppu_interpreter::LWAX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = (s64)(s32)vm::read32(vm::cast(addr)); +} + +void ppu_interpreter::DST(PPUThread& CPU, ppu_opcode_t op) +{ +} + +void ppu_interpreter::LHAX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = (s64)(s16)vm::read16(vm::cast(addr)); +} + +void ppu_interpreter::LVXL(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfull; + CPU.VPR[op.vd] = vm::read128(vm::cast(addr)); +} + +void ppu_interpreter::MFTB(PPUThread& CPU, ppu_opcode_t op) +{ + const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5); + + CPU.TB = get_time(); + switch (n) + { + case 0x10C: CPU.GPR[op.rd] = CPU.TB; break; + case 0x10D: CPU.GPR[op.rd] = CPU.TB >> 32; break; + default: throw __FUNCTION__; + } +} + +void ppu_interpreter::LWAUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = (s64)(s32)vm::read32(vm::cast(addr)); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::DSTST(PPUThread& CPU, ppu_opcode_t op) +{ +} + +void ppu_interpreter::LHAUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = (s64)(s16)vm::read16(vm::cast(addr)); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::STHX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::write16(vm::cast(addr), (u16)CPU.GPR[op.rs]); +} + +void ppu_interpreter::ORC(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = CPU.GPR[op.rs] | ~CPU.GPR[op.rb]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::ECOWX(PPUThread& CPU, ppu_opcode_t op) +{ + throw __FUNCTION__; +} + +void ppu_interpreter::STHUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::write16(vm::cast(addr), (u16)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::OR(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = CPU.GPR[op.rs] | CPU.GPR[op.rb]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::DIVDU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 RA = CPU.GPR[op.ra]; + const u64 RB = CPU.GPR[op.rb]; + + if (RB == 0) + { + if (op.oe) CPU.SetOV(true); + CPU.GPR[op.rd] = 0; + } + else + { + if (op.oe) CPU.SetOV(false); + CPU.GPR[op.rd] = RA / RB; + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::DIVWU(PPUThread& CPU, ppu_opcode_t op) +{ + const u32 RA = (u32)CPU.GPR[op.ra]; + const u32 RB = (u32)CPU.GPR[op.rb]; + + if (RB == 0) + { + if (op.oe) CPU.SetOV(true); + CPU.GPR[op.rd] = 0; + } + else + { + if (op.oe) CPU.SetOV(false); + CPU.GPR[op.rd] = RA / RB; + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::MTSPR(PPUThread& CPU, ppu_opcode_t op) +{ + const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5); + + switch (n) + { + case 0x001: CPU.XER.XER = CPU.GPR[op.rs]; return; + case 0x008: CPU.LR = CPU.GPR[op.rs]; return; + case 0x009: CPU.CTR = CPU.GPR[op.rs]; return; + case 0x100: CPU.VRSAVE = (u32)CPU.GPR[op.rs]; return; + + case 0x110: + case 0x111: + case 0x112: + case 0x113: + case 0x114: + case 0x115: + case 0x116: + case 0x117: CPU.SPRG[n - 0x110] = CPU.GPR[op.rs]; return; + } + + throw __FUNCTION__; +} + +void ppu_interpreter::DCBI(PPUThread& CPU, ppu_opcode_t op) +{ +} + +void ppu_interpreter::NAND(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = ~(CPU.GPR[op.rs] & CPU.GPR[op.rb]); + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::STVXL(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = (op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]) & ~0xfull; + vm::write128(vm::cast(addr), CPU.VPR[op.vs]); +} + +void ppu_interpreter::DIVD(PPUThread& CPU, ppu_opcode_t op) +{ + const s64 RA = CPU.GPR[op.ra]; + const s64 RB = CPU.GPR[op.rb]; + + if (RB == 0 || ((u64)RA == (1ULL << 63) && RB == -1)) + { + if (op.oe) CPU.SetOV(true); + CPU.GPR[op.rd] = /*(((u64)RA & (1ULL << 63)) && RB == 0) ? -1 :*/ 0; + } + else + { + if (op.oe) CPU.SetOV(false); + CPU.GPR[op.rd] = RA / RB; + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::DIVW(PPUThread& CPU, ppu_opcode_t op) +{ + const s32 RA = (s32)CPU.GPR[op.ra]; + const s32 RB = (s32)CPU.GPR[op.rb]; + + if (RB == 0 || ((u32)RA == (1 << 31) && RB == -1)) + { + if (op.oe) CPU.SetOV(true); + CPU.GPR[op.rd] = /*(((u32)RA & (1 << 31)) && RB == 0) ? -1 :*/ 0; + } + else + { + if (op.oe) CPU.SetOV(false); + CPU.GPR[op.rd] = (u32)(RA / RB); + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.rd]); +} + +void ppu_interpreter::LVLX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u32 eb = addr & 0xf; + + CPU.VPR[op.vd].clear(); + for (u32 i = 0; i < 16u - eb; ++i) CPU.VPR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i)); +} + +void ppu_interpreter::LDBRX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::get_ref(vm::cast(addr)); +} + +void ppu_interpreter::LSWX(PPUThread& CPU, ppu_opcode_t op) +{ + u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + u32 count = CPU.XER.XER & 0x7F; + for (; count >= 4; count -= 4, addr += 4, op.rd = (op.rd + 1) & 31) + { + CPU.GPR[op.rd] = vm::get_ref>(vm::cast(addr)); + } + if (count) + { + u32 value = 0; + for (u32 byte = 0; byte < count; byte++) + { + u32 byte_value = vm::get_ref(vm::cast(addr + byte)); + value |= byte_value << ((3 ^ byte) * 8); + } + CPU.GPR[op.rd] = value; + } +} + +void ppu_interpreter::LWBRX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::get_ref(vm::cast(addr)); +} + +void ppu_interpreter::LFSX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); +} + +void ppu_interpreter::SRW(PPUThread& CPU, ppu_opcode_t op) +{ + u32 n = CPU.GPR[op.rb] & 0x1f; + u32 r = (u32)rotl32((u32)CPU.GPR[op.rs], 64 - n); + u32 m = ((u32)CPU.GPR[op.rb] & 0x20) ? 0 : (u32)rotate_mask[32 + n][63]; + CPU.GPR[op.ra] = r & m; + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::SRD(PPUThread& CPU, ppu_opcode_t op) +{ + u32 n = CPU.GPR[op.rb] & 0x3f; + u64 r = rotl64(CPU.GPR[op.rs], 64 - n); + u64 m = (CPU.GPR[op.rb] & 0x40) ? 0 : rotate_mask[n][63]; + CPU.GPR[op.ra] = r & m; + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::LVRX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u8 eb = addr & 0xf; + + CPU.VPR[op.vd].clear(); + for (u32 i = 16 - eb; i < 16; ++i) CPU.VPR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i - 16)); +} + +void ppu_interpreter::LSWI(PPUThread& CPU, ppu_opcode_t op) +{ + u64 addr = op.ra ? CPU.GPR[op.ra] : 0; + u64 N = op.rb ? op.rb : 32; + u8 reg = op.rd; + + while (N > 0) + { + if (N > 3) + { + CPU.GPR[reg] = vm::read32(vm::cast(addr)); + addr += 4; + N -= 4; + } + else + { + u32 buf = 0; + u32 i = 3; + while (N > 0) + { + N = N - 1; + buf |= vm::read8(vm::cast(addr)) << (i * 8); + addr++; + i--; + } + CPU.GPR[reg] = buf; + } + reg = (reg + 1) % 32; + } +} + +void ppu_interpreter::LFSUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::SYNC(PPUThread& CPU, ppu_opcode_t op) +{ + _mm_mfence(); +} + +void ppu_interpreter::LFDX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); +} + +void ppu_interpreter::LFDUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::STVLX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u32 eb = addr & 0xf; + + for (u32 i = 0; i < 16u - eb; ++i) vm::write8(vm::cast(addr + i), CPU.VPR[op.vs]._u8[15 - i]); +} + +void ppu_interpreter::STDBRX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::get_ref(vm::cast(addr)) = CPU.GPR[op.rs]; +} + +void ppu_interpreter::STSWX(PPUThread& CPU, ppu_opcode_t op) +{ + u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + u32 count = CPU.XER.XER & 0x7F; + for (; count >= 4; count -= 4, addr += 4, op.rs = (op.rs + 1) & 31) + { + vm::write32(vm::cast(addr), (u32)CPU.GPR[op.rs]); + } + if (count) + { + u32 value = (u32)CPU.GPR[op.rs]; + for (u32 byte = 0; byte < count; byte++) + { + u32 byte_value = (u8)(value >> ((3 ^ byte) * 8)); + vm::write8(vm::cast(addr + byte), byte_value); + } + } +} + +void ppu_interpreter::STWBRX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::get_ref(vm::cast(addr)) = (u32)CPU.GPR[op.rs]; +} + +void ppu_interpreter::STFSX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::get_ref>(vm::cast(addr)) = static_cast(CPU.FPR[op.frs]); +} + +void ppu_interpreter::STVRX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u8 eb = addr & 0xf; + + for (u32 i = 16 - eb; i < 16; ++i) vm::write8(vm::cast(addr + i - 16), CPU.VPR[op.vs]._u8[15 - i]); +} + +void ppu_interpreter::STFSUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::get_ref>(vm::cast(addr)) = static_cast(CPU.FPR[op.frs]); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::STSWI(PPUThread& CPU, ppu_opcode_t op) +{ + u64 addr = op.ra ? CPU.GPR[op.ra] : 0; + u64 N = op.rb ? op.rb : 32; + u8 reg = op.rd; + + while (N > 0) + { + if (N > 3) + { + vm::write32(vm::cast(addr), (u32)CPU.GPR[reg]); + addr += 4; + N -= 4; + } + else + { + u32 buf = (u32)CPU.GPR[reg]; + while (N > 0) + { + N = N - 1; + vm::write8(vm::cast(addr), (0xFF000000 & buf) >> 24); + buf <<= 8; + addr++; + } + } + reg = (reg + 1) % 32; + } +} + +void ppu_interpreter::STFDX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::get_ref>(vm::cast(addr)) = CPU.FPR[op.frs]; +} + +void ppu_interpreter::STFDUX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + CPU.GPR[op.rb]; + vm::get_ref>(vm::cast(addr)) = CPU.FPR[op.frs]; + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::LVLXL(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u32 eb = addr & 0xf; + + CPU.VPR[op.vd].clear(); + for (u32 i = 0; i < 16u - eb; ++i) CPU.VPR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i)); +} + +void ppu_interpreter::LHBRX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + CPU.GPR[op.rd] = vm::get_ref(vm::cast(addr)); +} + +void ppu_interpreter::SRAW(PPUThread& CPU, ppu_opcode_t op) +{ + s32 RS = (s32)CPU.GPR[op.rs]; + u8 shift = CPU.GPR[op.rb] & 63; + if (shift > 31) + { + CPU.GPR[op.ra] = 0 - (RS < 0); + CPU.XER.CA = (RS < 0); + } + else + { + CPU.GPR[op.ra] = RS >> shift; + CPU.XER.CA = (RS < 0) & ((CPU.GPR[op.ra] << shift) != RS); + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::SRAD(PPUThread& CPU, ppu_opcode_t op) +{ + s64 RS = CPU.GPR[op.rs]; + u8 shift = CPU.GPR[op.rb] & 127; + if (shift > 63) + { + CPU.GPR[op.ra] = 0 - (RS < 0); + CPU.XER.CA = (RS < 0); + } + else + { + CPU.GPR[op.ra] = RS >> shift; + CPU.XER.CA = (RS < 0) & ((CPU.GPR[op.ra] << shift) != RS); + } + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::LVRXL(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u8 eb = addr & 0xf; + + CPU.VPR[op.vd].clear(); + for (u32 i = 16 - eb; i < 16; ++i) CPU.VPR[op.vd]._u8[15 - i] = vm::read8(vm::cast(addr + i - 16)); +} + +void ppu_interpreter::DSS(PPUThread& CPU, ppu_opcode_t op) +{ +} + +void ppu_interpreter::SRAWI(PPUThread& CPU, ppu_opcode_t op) +{ + s32 RS = (u32)CPU.GPR[op.rs]; + CPU.GPR[op.ra] = RS >> op.sh; + CPU.XER.CA = (RS < 0) & ((u32)(CPU.GPR[op.ra] << op.sh) != RS); + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::SRADI(PPUThread& CPU, ppu_opcode_t op) +{ + auto sh = (op.shh << 5) | op.shl; + s64 RS = CPU.GPR[op.rs]; + CPU.GPR[op.ra] = RS >> sh; + CPU.XER.CA = (RS < 0) & ((CPU.GPR[op.ra] << sh) != RS); + + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::EIEIO(PPUThread& CPU, ppu_opcode_t op) +{ + _mm_mfence(); +} + +void ppu_interpreter::STVLXL(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u32 eb = addr & 0xf; + + for (u32 i = 0; i < 16u - eb; ++i) vm::write8(vm::cast(addr + i), CPU.VPR[op.vs]._u8[15 - i]); +} + +void ppu_interpreter::STHBRX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::get_ref(vm::cast(addr)) = (u16)CPU.GPR[op.rs]; +} + +void ppu_interpreter::EXTSH(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = (s64)(s16)CPU.GPR[op.rs]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::STVRXL(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + const u8 eb = addr & 0xf; + + for (u32 i = 16 - eb; i < 16; ++i) vm::write8(vm::cast(addr + i - 16), CPU.VPR[op.vs]._u8[15 - i]); +} + +void ppu_interpreter::EXTSB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = (s64)(s8)CPU.GPR[op.rs]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::STFIWX(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + vm::write32(vm::cast(addr), (u32&)CPU.FPR[op.frs]); +} + +void ppu_interpreter::EXTSW(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.GPR[op.ra] = (s64)(s32)CPU.GPR[op.rs]; + if (op.rc) CPU.UpdateCR0(CPU.GPR[op.ra]); +} + +void ppu_interpreter::ICBI(PPUThread& CPU, ppu_opcode_t op) +{ +} + +void ppu_interpreter::DCBZ(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + CPU.GPR[op.rb] : CPU.GPR[op.rb]; + + memset(vm::get_ptr(vm::cast(addr) & ~127), 0, 128); +} + +void ppu_interpreter::LWZ(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.GPR[op.rd] = vm::read32(vm::cast(addr)); +} + +void ppu_interpreter::LWZU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.GPR[op.rd] = vm::read32(vm::cast(addr)); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::LBZ(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.GPR[op.rd] = vm::read8(vm::cast(addr)); +} + +void ppu_interpreter::LBZU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.GPR[op.rd] = vm::read8(vm::cast(addr)); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::STW(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + vm::write32(vm::cast(addr), (u32)CPU.GPR[op.rs]); +} + +void ppu_interpreter::STWU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + op.simm16; + vm::write32(vm::cast(addr), (u32)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::STB(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + vm::write8(vm::cast(addr), (u8)CPU.GPR[op.rs]); +} + +void ppu_interpreter::STBU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + op.simm16; + vm::write8(vm::cast(addr), (u8)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::LHZ(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.GPR[op.rd] = vm::read16(vm::cast(addr)); +} + +void ppu_interpreter::LHZU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.GPR[op.rd] = vm::read16(vm::cast(addr)); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::LHA(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.GPR[op.rd] = (s64)(s16)vm::read16(vm::cast(addr)); +} + +void ppu_interpreter::LHAU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.GPR[op.rd] = (s64)(s16)vm::read16(vm::cast(addr)); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::STH(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + vm::write16(vm::cast(addr), (u16)CPU.GPR[op.rs]); +} + +void ppu_interpreter::STHU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + op.simm16; + vm::write16(vm::cast(addr), (u16)CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::LMW(PPUThread& CPU, ppu_opcode_t op) +{ + u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + for (u32 i = op.rd; i<32; ++i, addr += 4) + { + CPU.GPR[i] = vm::read32(vm::cast(addr)); + } +} + +void ppu_interpreter::STMW(PPUThread& CPU, ppu_opcode_t op) +{ + u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + for (u32 i = op.rs; i<32; ++i, addr += 4) + { + vm::write32(vm::cast(addr), (u32)CPU.GPR[i]); + } +} + +void ppu_interpreter::LFS(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); +} + +void ppu_interpreter::LFSU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::LFD(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); +} + +void ppu_interpreter::LFDU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + op.simm16; + CPU.FPR[op.frd]._double = vm::get_ref>(vm::cast(addr)).value(); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::STFS(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + vm::get_ref>(vm::cast(addr)) = static_cast(CPU.FPR[op.frs]); +} + +void ppu_interpreter::STFSU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + op.simm16; + vm::get_ref>(vm::cast(addr)) = static_cast(CPU.FPR[op.frs]); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::STFD(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = op.ra ? CPU.GPR[op.ra] + op.simm16 : op.simm16; + vm::get_ref>(vm::cast(addr)) = CPU.FPR[op.frs]; +} + +void ppu_interpreter::STFDU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + op.simm16; + vm::get_ref>(vm::cast(addr)) = CPU.FPR[op.frs]; + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::LD(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = (op.simm16 & ~3) + (op.ra ? CPU.GPR[op.ra] : 0); + CPU.GPR[op.rd] = vm::read64(vm::cast(addr)); +} + +void ppu_interpreter::LDU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + (op.simm16 & ~3); + CPU.GPR[op.rd] = vm::read64(vm::cast(addr)); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::LWA(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = (op.simm16 & ~3) + (op.ra ? CPU.GPR[op.ra] : 0); + CPU.GPR[op.rd] = (s64)(s32)vm::read32(vm::cast(addr)); +} + +void ppu_interpreter::FDIVS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] / CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FSUBS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FADDS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FSQRTS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = sqrt(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FRES(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = 1.0 / CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FMULS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FMADDS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc] + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FMSUBS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc] - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FNMSUBS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = -(CPU.FPR[op.fra] * CPU.FPR[op.frc]) + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FNMADDS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = -(CPU.FPR[op.fra] * CPU.FPR[op.frc]) - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::STD(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = (op.simm16 & ~3) + (op.ra ? CPU.GPR[op.ra] : 0); + vm::write64(vm::cast(addr), CPU.GPR[op.rs]); +} + +void ppu_interpreter::STDU(PPUThread& CPU, ppu_opcode_t op) +{ + const u64 addr = CPU.GPR[op.ra] + (op.simm16 & ~3); + vm::write64(vm::cast(addr), CPU.GPR[op.rs]); + CPU.GPR[op.ra] = addr; +} + +void ppu_interpreter::MTFSB1(PPUThread& CPU, ppu_opcode_t op) +{ + u32 mask = 1 << (31 - op.crbd); + if ((op.crbd >= 3 && op.crbd <= 6) && !(CPU.FPSCR.FPSCR & mask)) mask |= 1ULL << 31; //FPSCR.FX + if ((op.crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode enabled"); + CPU.SetFPSCR(CPU.FPSCR.FPSCR | mask); + + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::MCRFS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.SetCR(op.crfd, (CPU.FPSCR.FPSCR >> ((7 - op.crfs) * 4)) & 0xf); + const u32 exceptions_mask = 0x9FF80700; + CPU.SetFPSCR(CPU.FPSCR.FPSCR & ~(exceptions_mask & 0xf << ((7 - op.crfs) * 4))); +} + +void ppu_interpreter::MTFSB0(PPUThread& CPU, ppu_opcode_t op) +{ + u32 mask = 1 << (31 - op.crbd); + if ((op.crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode disabled"); + CPU.SetFPSCR(CPU.FPSCR.FPSCR & ~mask); + + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::MTFSFI(PPUThread& CPU, ppu_opcode_t op) +{ + u32 mask = 0xF0000000 >> (op.crfd * 4); + u32 val = (op.i & 0xF) << ((7 - op.crfd) * 4); + + const u32 oldNI = CPU.FPSCR.NI; + CPU.SetFPSCR((CPU.FPSCR.FPSCR & ~mask) | val); + if (CPU.FPSCR.NI != oldNI) + { + if (oldNI) + LOG_WARNING(PPU, "Non-IEEE mode disabled"); + else + LOG_WARNING(PPU, "Non-IEEE mode enabled"); + } + + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::MFFS(PPUThread& CPU, ppu_opcode_t op) +{ + (u64&)CPU.FPR[op.frd]._double = CPU.FPSCR.FPSCR; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::MTFSF(PPUThread& CPU, ppu_opcode_t op) +{ + u32 mask = 0; + for (u32 i = 0; i<8; ++i) + { + if (op.flm & (1 << i)) mask |= 0xf << (i * 4); + } + mask &= ~0x60000000; + + const u32 oldNI = CPU.FPSCR.NI; + CPU.SetFPSCR((CPU.FPSCR.FPSCR & ~mask) | ((u32&)CPU.FPR[op.frb] & mask)); + if (CPU.FPSCR.NI != oldNI) + { + if (oldNI) + LOG_WARNING(PPU, "Non-IEEE mode disabled"); + else + LOG_WARNING(PPU, "Non-IEEE mode enabled"); + } + if (op.rc) CPU.UpdateCR1(); +} + + +void ppu_interpreter::FCMPU(PPUThread& CPU, ppu_opcode_t op) +{ + s32 cmp_res = FPRdouble::Cmp(CPU.FPR[op.fra], CPU.FPR[op.frb]); + //CPU.FPSCR.FPRF = cmp_res; + CPU.SetCR(op.crfd, cmp_res); +} + +void ppu_interpreter::FRSP(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = static_cast(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FCTIW(PPUThread& CPU, ppu_opcode_t op) +{ + (s32&)CPU.FPR[op.frd]._double = lrint(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FCTIWZ(PPUThread& CPU, ppu_opcode_t op) +{ + (s32&)CPU.FPR[op.frd]._double = static_cast(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FDIV(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] / CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FSUB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FADD(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FSQRT(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = sqrt(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FSEL(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] >= 0.0 ? CPU.FPR[op.frc] : CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FMUL(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FRSQRTE(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = 1.0 / sqrt(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FMSUB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc] - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FMADD(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.fra] * CPU.FPR[op.frc] + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FNMSUB(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = -(CPU.FPR[op.fra] * CPU.FPR[op.frc]) + CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FNMADD(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = -(CPU.FPR[op.fra] * CPU.FPR[op.frc]) - CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FCMPO(PPUThread& CPU, ppu_opcode_t op) +{ + s32 cmp_res = FPRdouble::Cmp(CPU.FPR[op.fra], CPU.FPR[op.frb]); + //CPU.FPSCR.FPRF = cmp_res; + CPU.SetCR(op.crfd, cmp_res); +} + +void ppu_interpreter::FNEG(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = -CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FMR(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = CPU.FPR[op.frb]; + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FNABS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = -fabs(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FABS(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = fabs(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FCTID(PPUThread& CPU, ppu_opcode_t op) +{ + (s64&)CPU.FPR[op.frd]._double = llrint(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FCTIDZ(PPUThread& CPU, ppu_opcode_t op) +{ + (s64&)CPU.FPR[op.frd]._double = static_cast(CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); +} + +void ppu_interpreter::FCFID(PPUThread& CPU, ppu_opcode_t op) +{ + CPU.FPR[op.frd]._double = static_cast((s64&)CPU.FPR[op.frb]); + if (op.rc) CPU.UpdateCR1(); +} + + +void ppu_interpreter::UNK(PPUThread& CPU, ppu_opcode_t op) +{ + throw __FUNCTION__; +} diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 7e53616f8b..90f8acd973 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -18,6 +18,7 @@ #include extern u64 rotate_mask[64][64]; // defined in PPUThread.cpp, static didn't work correctly in GCC 4.9 for some reason + inline void InitRotateMask() { static bool inited = false; @@ -56,6 +57,11 @@ static double SilenceNaN(double x) return (double&)bits; } +static float SilenceNaN(float x) +{ + return static_cast(SilenceNaN(static_cast(x))); +} + static void SetHostRoundingMode(u32 rn) { switch (rn) @@ -91,7 +97,6 @@ private: public: PPUInterpreter(PPUThread& cpu) : CPU(cpu) { - InitRotateMask(); } private: @@ -2445,6 +2450,11 @@ private: if(oe) CPU.SetOV((~RA>>63 == RB>>63) && (~RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } + void MULHDU(u32 rd, u32 ra, u32 rb, bool rc) + { + CPU.GPR[rd] = __umulh(CPU.GPR[ra], CPU.GPR[rb]); + if(rc) CPU.UpdateCR0(CPU.GPR[rd]); + } void ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { const u64 RA = CPU.GPR[ra]; @@ -2454,11 +2464,6 @@ private: if(oe) CPU.SetOV((RA>>63 == RB>>63) && (RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } - void MULHDU(u32 rd, u32 ra, u32 rb, bool rc) - { - CPU.GPR[rd] = __umulh(CPU.GPR[ra], CPU.GPR[rb]); - if(rc) CPU.UpdateCR0(CPU.GPR[rd]); - } void MULHWU(u32 rd, u32 ra, u32 rb, bool rc) { u32 a = (u32)CPU.GPR[ra]; @@ -2644,7 +2649,8 @@ private: } void LVX(u32 vd, u32 ra, u32 rb) { - CPU.VPR[vd] = vm::read128((u64)((ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfULL)); + const u64 addr = (ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfull; + CPU.VPR[vd] = vm::read128(vm::cast(addr)); } void NEG(u32 rd, u32 ra, u32 oe, bool rc) { @@ -2780,14 +2786,6 @@ private: const u8 eb = (addr & 0xf) >> 2; vm::write32(vm::cast(addr), CPU.VPR[vs]._u32[3 - eb]); } - void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) - { - const u64 RA = CPU.GPR[ra]; - CPU.GPR[rd] = RA + CPU.XER.CA; - CPU.XER.CA = CPU.IsCarry(RA, CPU.XER.CA); - if(oe) CPU.SetOV((RA>>63 == 0) && (RA>>63 != CPU.GPR[rd]>>63)); - if(rc) CPU.UpdateCR0(CPU.GPR[rd]); - } void SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { const u64 RA = CPU.GPR[ra]; @@ -2796,6 +2794,14 @@ private: if(oe) CPU.SetOV((~RA>>63 == 0) && (~RA>>63 != CPU.GPR[rd]>>63)); if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } + void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) + { + const u64 RA = CPU.GPR[ra]; + CPU.GPR[rd] = RA + CPU.XER.CA; + CPU.XER.CA = CPU.IsCarry(RA, CPU.XER.CA); + if(oe) CPU.SetOV((RA>>63 == 0) && (RA>>63 != CPU.GPR[rd]>>63)); + if(rc) CPU.UpdateCR0(CPU.GPR[rd]); + } void STDCX_(u32 rs, u32 ra, u32 rb) { const u64 addr = ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]; @@ -2810,15 +2816,8 @@ private: } void STVX(u32 vs, u32 ra, u32 rb) { - vm::write128((u64)((ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfULL), CPU.VPR[vs]); - } - void SUBFME(u32 rd, u32 ra, u32 oe, bool rc) - { - const u64 RA = CPU.GPR[ra]; - CPU.GPR[rd] = ~RA + CPU.XER.CA + ~0ULL; - CPU.XER.CA = CPU.IsCarry(~RA, CPU.XER.CA, ~0ULL); - if(oe) CPU.SetOV((~RA>>63 == 1) && (~RA>>63 != CPU.GPR[rd]>>63)); - if(rc) CPU.UpdateCR0(CPU.GPR[rd]); + const u64 addr = (ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfull; + vm::write128(vm::cast(addr), CPU.VPR[vs]); } void MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -2832,6 +2831,14 @@ private: } if(rc) CPU.UpdateCR0(CPU.GPR[rd]); } + void SUBFME(u32 rd, u32 ra, u32 oe, bool rc) + { + const u64 RA = CPU.GPR[ra]; + CPU.GPR[rd] = ~RA + CPU.XER.CA + ~0ULL; + CPU.XER.CA = CPU.IsCarry(~RA, CPU.XER.CA, ~0ULL); + if(oe) CPU.SetOV((~RA>>63 == 1) && (~RA>>63 != CPU.GPR[rd]>>63)); + if(rc) CPU.UpdateCR0(CPU.GPR[rd]); + } void ADDME(u32 rd, u32 ra, u32 oe, bool rc) { const s64 RA = CPU.GPR[ra]; @@ -2911,7 +2918,8 @@ private: } void LVXL(u32 vd, u32 ra, u32 rb) { - CPU.VPR[vd] = vm::read128((u64)((ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfULL)); + const u64 addr = (ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfull; + CPU.VPR[vd] = vm::read128(vm::cast(addr)); } void MFTB(u32 rd, u32 spr) { @@ -3016,7 +3024,8 @@ private: } void STVXL(u32 vs, u32 ra, u32 rb) { - vm::write128((u64)((ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfULL), CPU.VPR[vs]); + const u64 addr = (ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]) & ~0xfull; + vm::write128(vm::cast(addr), CPU.VPR[vs]); } void DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { @@ -3238,7 +3247,7 @@ private: { u64 bits = (u64&)val; u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); - vm::get_ref>(vm::cast(addr)) = (float)bits32; + vm::get_ref>(vm::cast(addr)) = bits32; } } void STVRX(u32 vs, u32 ra, u32 rb) @@ -3260,7 +3269,7 @@ private: { u64 bits = (u64&)val; u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); - vm::get_ref>(vm::cast(addr)) = (float)bits32; + vm::get_ref>(vm::cast(addr)) = bits32; } CPU.GPR[ra] = addr; } @@ -3432,9 +3441,7 @@ private: { const u64 addr = ra ? CPU.GPR[ra] + CPU.GPR[rb] : CPU.GPR[rb]; - auto const cache_line = vm::get_ptr(vm::cast(addr) & ~127); - if (cache_line) - memset(cache_line, 0, 128); + memset(vm::get_ptr(vm::cast(addr) & ~127), 0, 128); } void LWZ(u32 rd, u32 ra, s32 d) { @@ -3581,7 +3588,7 @@ private: { u64 bits = (u64&)val; u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); - vm::get_ref>(vm::cast(addr)) = (float)bits32; + vm::get_ref>(vm::cast(addr)) = bits32; } } void STFSU(u32 frs, u32 ra, s32 d) @@ -3596,7 +3603,7 @@ private: { u64 bits = (u64&)val; u32 bits32 = (bits>>32 & 0x80000000) | (bits>>29 & 0x7fffffff); - vm::get_ref>(vm::cast(addr)) = (float)bits32; + vm::get_ref>(vm::cast(addr)) = bits32; } CPU.GPR[ra] = addr; } @@ -3618,7 +3625,6 @@ private: } void LDU(u32 rd, u32 ra, s32 ds) { - //if(ra == 0 || rt == ra) return; const u64 addr = CPU.GPR[ra] + ds; CPU.GPR[rd] = vm::read64(vm::cast(addr)); CPU.GPR[ra] = addr; @@ -3684,15 +3690,14 @@ private: } void STDU(u32 rs, u32 ra, s32 ds) { - //if(ra == 0 || rs == ra) return; const u64 addr = CPU.GPR[ra] + ds; vm::write64(vm::cast(addr), CPU.GPR[rs]); CPU.GPR[ra] = addr; } void MTFSB1(u32 crbd, bool rc) { - u64 mask = (1ULL << (31 - crbd)); - if ((crbd >= 3 && crbd <= 6) && !(CPU.FPSCR.FPSCR & mask)) mask |= 1ULL << 31; //FPSCR.FX + u32 mask = 1 << (31 - crbd); + if ((crbd >= 3 && crbd <= 6) && !(CPU.FPSCR.FPSCR & mask)) mask |= 1 << 31; //FPSCR.FX if ((crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode enabled"); CPU.SetFPSCR(CPU.FPSCR.FPSCR | mask); @@ -3706,7 +3711,7 @@ private: } void MTFSB0(u32 crbd, bool rc) { - u64 mask = (1ULL << (31 - crbd)); + u32 mask = 1 << (31 - crbd); if ((crbd == 29) && !CPU.FPSCR.NI) LOG_WARNING(PPU, "Non-IEEE mode disabled"); CPU.SetFPSCR(CPU.FPSCR.FPSCR & ~mask); diff --git a/rpcs3/Emu/Cell/PPUInterpreter2.h b/rpcs3/Emu/Cell/PPUInterpreter2.h new file mode 100644 index 0000000000..9ad6989714 --- /dev/null +++ b/rpcs3/Emu/Cell/PPUInterpreter2.h @@ -0,0 +1,953 @@ +#pragma once +#include "PPUOpcodes.h" + +class PPUThread; + +union ppu_opcode_t +{ + u32 opcode; + + struct + { + u32 : 1; // 31 + u32 shh : 1; // 30 + u32 : 3; // 27..29 + u32 mbmeh : 1; // 26 + u32 mbmel : 5; // 21..25 + u32 shl : 5; // 16..20 + u32 vuimm : 5; // 11..15 + u32 vs : 5; // 6..10 + u32 : 6; + }; + + struct + { + u32 : 6; // 26..31 + u32 vsh : 4; // 22..25 + u32 oe : 1; // 21 + u32 spr : 10; // 11..20 + u32 : 11; + }; + + struct + { + u32 : 6; // 26..31 + u32 vc : 5; // 21..25 + u32 vb : 5; // 16..20 + u32 va : 5; // 11..15 + u32 vd : 5; // 6..10 + u32 : 6; + }; + + struct + { + u32 lk : 1; // 31 + u32 aa : 1; // 30 + u32 : 4; // 26..29 + u32 : 5; // 21..25 + u32 rb : 5; // 16..20 + u32 ra : 5; // 11..15 + u32 rd : 5; // 6..10 + u32 : 6; + }; + + struct + { + u32 uimm16 : 16; // 16..31 + u32 : 4; // 12..15 + u32 l11 : 1; // 11 + u32 rs : 5; // 6..10 + u32 : 6; + }; + + struct + { + s32 simm16 : 16; // 16..31 + s32 vsimm : 5; // 11..15 + s32 : 11; + }; + + struct + { + s32 ll : 26; // 6..31 + s32 : 6; + }; + + struct + { + u32 : 5; // 27..31 + u32 lev : 7; // 20..26 + u32 i : 4; // 16..19 + u32 : 2; // 14..15 + u32 crfs : 3; // 11..13 + u32 l10 : 1; // 10 + u32 : 1; // 9 + u32 crfd : 3; // 6..8 + u32 : 6; + }; + + struct + { + u32 : 1; // 31 + u32 : 1; // 30 + u32 : 4; // 26..29 + u32 : 5; // 21..25 + u32 crbb : 5; // 16..20 + u32 crba : 5; // 11..15 + u32 crbd : 5; // 6..10 + u32 : 6; + }; + + struct + { + u32 rc : 1; // 31 + u32 me : 5; // 26..30 + u32 mb : 5; // 21..25 + u32 sh : 5; // 16..20 + u32 bi : 5; // 11..15 + u32 bo : 5; // 6..10 + u32 : 6; + }; + + struct + { + u32 : 6; // 26..31 + u32 frc : 5; // 21..25 + u32 frb : 5; // 16..20 + u32 fra : 5; // 11..15 + u32 frd : 5; // 6..10 + u32 : 6; + }; + + struct + { + u32 : 12; // 20..31 + u32 crm : 8; // 12..19 + u32 : 1; // 11 + u32 frs : 5; // 6..10 + u32 : 6; + }; + + struct + { + u32 : 17; // 15..31 + u32 flm : 8; // 7..14 + u32 : 7; + }; +}; + +using ppu_inter_func_t = void(*)(PPUThread& CPU, ppu_opcode_t opcode); + +namespace ppu_interpreter +{ + void NULL_OP(PPUThread& CPU, ppu_opcode_t op); + void NOP(PPUThread& CPU, ppu_opcode_t op); + + void TDI(PPUThread& CPU, ppu_opcode_t op); + void TWI(PPUThread& CPU, ppu_opcode_t op); + + void MFVSCR(PPUThread& CPU, ppu_opcode_t op); + void MTVSCR(PPUThread& CPU, ppu_opcode_t op); + void VADDCUW(PPUThread& CPU, ppu_opcode_t op); + void VADDFP(PPUThread& CPU, ppu_opcode_t op); + void VADDSBS(PPUThread& CPU, ppu_opcode_t op); + void VADDSHS(PPUThread& CPU, ppu_opcode_t op); + void VADDSWS(PPUThread& CPU, ppu_opcode_t op); + void VADDUBM(PPUThread& CPU, ppu_opcode_t op); + void VADDUBS(PPUThread& CPU, ppu_opcode_t op); + void VADDUHM(PPUThread& CPU, ppu_opcode_t op); + void VADDUHS(PPUThread& CPU, ppu_opcode_t op); + void VADDUWM(PPUThread& CPU, ppu_opcode_t op); + void VADDUWS(PPUThread& CPU, ppu_opcode_t op); + void VAND(PPUThread& CPU, ppu_opcode_t op); + void VANDC(PPUThread& CPU, ppu_opcode_t op); + void VAVGSB(PPUThread& CPU, ppu_opcode_t op); + void VAVGSH(PPUThread& CPU, ppu_opcode_t op); + void VAVGSW(PPUThread& CPU, ppu_opcode_t op); + void VAVGUB(PPUThread& CPU, ppu_opcode_t op); + void VAVGUH(PPUThread& CPU, ppu_opcode_t op); + void VAVGUW(PPUThread& CPU, ppu_opcode_t op); + void VCFSX(PPUThread& CPU, ppu_opcode_t op); + void VCFUX(PPUThread& CPU, ppu_opcode_t op); + void VCMPBFP(PPUThread& CPU, ppu_opcode_t op); + void VCMPBFP_(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQFP(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQFP_(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUB(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUB_(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUH(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUH_(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUW(PPUThread& CPU, ppu_opcode_t op); + void VCMPEQUW_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGEFP(PPUThread& CPU, ppu_opcode_t op); + void VCMPGEFP_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTFP(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTFP_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSB(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSB_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSH(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSH_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSW(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTSW_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUB(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUB_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUH(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUH_(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUW(PPUThread& CPU, ppu_opcode_t op); + void VCMPGTUW_(PPUThread& CPU, ppu_opcode_t op); + void VCTSXS(PPUThread& CPU, ppu_opcode_t op); + void VCTUXS(PPUThread& CPU, ppu_opcode_t op); + void VEXPTEFP(PPUThread& CPU, ppu_opcode_t op); + void VLOGEFP(PPUThread& CPU, ppu_opcode_t op); + void VMADDFP(PPUThread& CPU, ppu_opcode_t op); + void VMAXFP(PPUThread& CPU, ppu_opcode_t op); + void VMAXSB(PPUThread& CPU, ppu_opcode_t op); + void VMAXSH(PPUThread& CPU, ppu_opcode_t op); + void VMAXSW(PPUThread& CPU, ppu_opcode_t op); + void VMAXUB(PPUThread& CPU, ppu_opcode_t op); + void VMAXUH(PPUThread& CPU, ppu_opcode_t op); + void VMAXUW(PPUThread& CPU, ppu_opcode_t op); + void VMHADDSHS(PPUThread& CPU, ppu_opcode_t op); + void VMHRADDSHS(PPUThread& CPU, ppu_opcode_t op); + void VMINFP(PPUThread& CPU, ppu_opcode_t op); + void VMINSB(PPUThread& CPU, ppu_opcode_t op); + void VMINSH(PPUThread& CPU, ppu_opcode_t op); + void VMINSW(PPUThread& CPU, ppu_opcode_t op); + void VMINUB(PPUThread& CPU, ppu_opcode_t op); + void VMINUH(PPUThread& CPU, ppu_opcode_t op); + void VMINUW(PPUThread& CPU, ppu_opcode_t op); + void VMLADDUHM(PPUThread& CPU, ppu_opcode_t op); + void VMRGHB(PPUThread& CPU, ppu_opcode_t op); + void VMRGHH(PPUThread& CPU, ppu_opcode_t op); + void VMRGHW(PPUThread& CPU, ppu_opcode_t op); + void VMRGLB(PPUThread& CPU, ppu_opcode_t op); + void VMRGLH(PPUThread& CPU, ppu_opcode_t op); + void VMRGLW(PPUThread& CPU, ppu_opcode_t op); + void VMSUMMBM(PPUThread& CPU, ppu_opcode_t op); + void VMSUMSHM(PPUThread& CPU, ppu_opcode_t op); + void VMSUMSHS(PPUThread& CPU, ppu_opcode_t op); + void VMSUMUBM(PPUThread& CPU, ppu_opcode_t op); + void VMSUMUHM(PPUThread& CPU, ppu_opcode_t op); + void VMSUMUHS(PPUThread& CPU, ppu_opcode_t op); + void VMULESB(PPUThread& CPU, ppu_opcode_t op); + void VMULESH(PPUThread& CPU, ppu_opcode_t op); + void VMULEUB(PPUThread& CPU, ppu_opcode_t op); + void VMULEUH(PPUThread& CPU, ppu_opcode_t op); + void VMULOSB(PPUThread& CPU, ppu_opcode_t op); + void VMULOSH(PPUThread& CPU, ppu_opcode_t op); + void VMULOUB(PPUThread& CPU, ppu_opcode_t op); + void VMULOUH(PPUThread& CPU, ppu_opcode_t op); + void VNMSUBFP(PPUThread& CPU, ppu_opcode_t op); + void VNOR(PPUThread& CPU, ppu_opcode_t op); + void VOR(PPUThread& CPU, ppu_opcode_t op); + void VPERM(PPUThread& CPU, ppu_opcode_t op); + void VPKPX(PPUThread& CPU, ppu_opcode_t op); + void VPKSHSS(PPUThread& CPU, ppu_opcode_t op); + void VPKSHUS(PPUThread& CPU, ppu_opcode_t op); + void VPKSWSS(PPUThread& CPU, ppu_opcode_t op); + void VPKSWUS(PPUThread& CPU, ppu_opcode_t op); + void VPKUHUM(PPUThread& CPU, ppu_opcode_t op); + void VPKUHUS(PPUThread& CPU, ppu_opcode_t op); + void VPKUWUM(PPUThread& CPU, ppu_opcode_t op); + void VPKUWUS(PPUThread& CPU, ppu_opcode_t op); + void VREFP(PPUThread& CPU, ppu_opcode_t op); + void VRFIM(PPUThread& CPU, ppu_opcode_t op); + void VRFIN(PPUThread& CPU, ppu_opcode_t op); + void VRFIP(PPUThread& CPU, ppu_opcode_t op); + void VRFIZ(PPUThread& CPU, ppu_opcode_t op); + void VRLB(PPUThread& CPU, ppu_opcode_t op); + void VRLH(PPUThread& CPU, ppu_opcode_t op); + void VRLW(PPUThread& CPU, ppu_opcode_t op); + void VRSQRTEFP(PPUThread& CPU, ppu_opcode_t op); + void VSEL(PPUThread& CPU, ppu_opcode_t op); + void VSL(PPUThread& CPU, ppu_opcode_t op); + void VSLB(PPUThread& CPU, ppu_opcode_t op); + void VSLDOI(PPUThread& CPU, ppu_opcode_t op); + void VSLH(PPUThread& CPU, ppu_opcode_t op); + void VSLO(PPUThread& CPU, ppu_opcode_t op); + void VSLW(PPUThread& CPU, ppu_opcode_t op); + void VSPLTB(PPUThread& CPU, ppu_opcode_t op); + void VSPLTH(PPUThread& CPU, ppu_opcode_t op); + void VSPLTISB(PPUThread& CPU, ppu_opcode_t op); + void VSPLTISH(PPUThread& CPU, ppu_opcode_t op); + void VSPLTISW(PPUThread& CPU, ppu_opcode_t op); + void VSPLTW(PPUThread& CPU, ppu_opcode_t op); + void VSR(PPUThread& CPU, ppu_opcode_t op); + void VSRAB(PPUThread& CPU, ppu_opcode_t op); + void VSRAH(PPUThread& CPU, ppu_opcode_t op); + void VSRAW(PPUThread& CPU, ppu_opcode_t op); + void VSRB(PPUThread& CPU, ppu_opcode_t op); + void VSRH(PPUThread& CPU, ppu_opcode_t op); + void VSRO(PPUThread& CPU, ppu_opcode_t op); + void VSRW(PPUThread& CPU, ppu_opcode_t op); + void VSUBCUW(PPUThread& CPU, ppu_opcode_t op); + void VSUBFP(PPUThread& CPU, ppu_opcode_t op); + void VSUBSBS(PPUThread& CPU, ppu_opcode_t op); + void VSUBSHS(PPUThread& CPU, ppu_opcode_t op); + void VSUBSWS(PPUThread& CPU, ppu_opcode_t op); + void VSUBUBM(PPUThread& CPU, ppu_opcode_t op); + void VSUBUBS(PPUThread& CPU, ppu_opcode_t op); + void VSUBUHM(PPUThread& CPU, ppu_opcode_t op); + void VSUBUHS(PPUThread& CPU, ppu_opcode_t op); + void VSUBUWM(PPUThread& CPU, ppu_opcode_t op); + void VSUBUWS(PPUThread& CPU, ppu_opcode_t op); + void VSUMSWS(PPUThread& CPU, ppu_opcode_t op); + void VSUM2SWS(PPUThread& CPU, ppu_opcode_t op); + void VSUM4SBS(PPUThread& CPU, ppu_opcode_t op); + void VSUM4SHS(PPUThread& CPU, ppu_opcode_t op); + void VSUM4UBS(PPUThread& CPU, ppu_opcode_t op); + void VUPKHPX(PPUThread& CPU, ppu_opcode_t op); + void VUPKHSB(PPUThread& CPU, ppu_opcode_t op); + void VUPKHSH(PPUThread& CPU, ppu_opcode_t op); + void VUPKLPX(PPUThread& CPU, ppu_opcode_t op); + void VUPKLSB(PPUThread& CPU, ppu_opcode_t op); + void VUPKLSH(PPUThread& CPU, ppu_opcode_t op); + void VXOR(PPUThread& CPU, ppu_opcode_t op); + void MULLI(PPUThread& CPU, ppu_opcode_t op); + void SUBFIC(PPUThread& CPU, ppu_opcode_t op); + void CMPLI(PPUThread& CPU, ppu_opcode_t op); + void CMPI(PPUThread& CPU, ppu_opcode_t op); + void ADDIC(PPUThread& CPU, ppu_opcode_t op); + void ADDIC_(PPUThread& CPU, ppu_opcode_t op); + void ADDI(PPUThread& CPU, ppu_opcode_t op); + void ADDIS(PPUThread& CPU, ppu_opcode_t op); + void BC(PPUThread& CPU, ppu_opcode_t op); + void HACK(PPUThread& CPU, ppu_opcode_t op); + void SC(PPUThread& CPU, ppu_opcode_t op); + void B(PPUThread& CPU, ppu_opcode_t op); + void MCRF(PPUThread& CPU, ppu_opcode_t op); + void BCLR(PPUThread& CPU, ppu_opcode_t op); + void CRNOR(PPUThread& CPU, ppu_opcode_t op); + void CRANDC(PPUThread& CPU, ppu_opcode_t op); + void ISYNC(PPUThread& CPU, ppu_opcode_t op); + void CRXOR(PPUThread& CPU, ppu_opcode_t op); + void CRNAND(PPUThread& CPU, ppu_opcode_t op); + void CRAND(PPUThread& CPU, ppu_opcode_t op); + void CREQV(PPUThread& CPU, ppu_opcode_t op); + void CRORC(PPUThread& CPU, ppu_opcode_t op); + void CROR(PPUThread& CPU, ppu_opcode_t op); + void BCCTR(PPUThread& CPU, ppu_opcode_t op); + void RLWIMI(PPUThread& CPU, ppu_opcode_t op); + void RLWINM(PPUThread& CPU, ppu_opcode_t op); + void RLWNM(PPUThread& CPU, ppu_opcode_t op); + void ORI(PPUThread& CPU, ppu_opcode_t op); + void ORIS(PPUThread& CPU, ppu_opcode_t op); + void XORI(PPUThread& CPU, ppu_opcode_t op); + void XORIS(PPUThread& CPU, ppu_opcode_t op); + void ANDI_(PPUThread& CPU, ppu_opcode_t op); + void ANDIS_(PPUThread& CPU, ppu_opcode_t op); + void RLDICL(PPUThread& CPU, ppu_opcode_t op); + void RLDICR(PPUThread& CPU, ppu_opcode_t op); + void RLDIC(PPUThread& CPU, ppu_opcode_t op); + void RLDIMI(PPUThread& CPU, ppu_opcode_t op); + void RLDC_LR(PPUThread& CPU, ppu_opcode_t op); + void CMP(PPUThread& CPU, ppu_opcode_t op); + void TW(PPUThread& CPU, ppu_opcode_t op); + void LVSL(PPUThread& CPU, ppu_opcode_t op); + void LVEBX(PPUThread& CPU, ppu_opcode_t op); + void SUBFC(PPUThread& CPU, ppu_opcode_t op); + void MULHDU(PPUThread& CPU, ppu_opcode_t op); + void ADDC(PPUThread& CPU, ppu_opcode_t op); + void MULHWU(PPUThread& CPU, ppu_opcode_t op); + void MFOCRF(PPUThread& CPU, ppu_opcode_t op); + void LWARX(PPUThread& CPU, ppu_opcode_t op); + void LDX(PPUThread& CPU, ppu_opcode_t op); + void LWZX(PPUThread& CPU, ppu_opcode_t op); + void SLW(PPUThread& CPU, ppu_opcode_t op); + void CNTLZW(PPUThread& CPU, ppu_opcode_t op); + void SLD(PPUThread& CPU, ppu_opcode_t op); + void AND(PPUThread& CPU, ppu_opcode_t op); + void CMPL(PPUThread& CPU, ppu_opcode_t op); + void LVSR(PPUThread& CPU, ppu_opcode_t op); + void LVEHX(PPUThread& CPU, ppu_opcode_t op); + void SUBF(PPUThread& CPU, ppu_opcode_t op); + void LDUX(PPUThread& CPU, ppu_opcode_t op); + void DCBST(PPUThread& CPU, ppu_opcode_t op); + void LWZUX(PPUThread& CPU, ppu_opcode_t op); + void CNTLZD(PPUThread& CPU, ppu_opcode_t op); + void ANDC(PPUThread& CPU, ppu_opcode_t op); + void TD(PPUThread& CPU, ppu_opcode_t op); + void LVEWX(PPUThread& CPU, ppu_opcode_t op); + void MULHD(PPUThread& CPU, ppu_opcode_t op); + void MULHW(PPUThread& CPU, ppu_opcode_t op); + void LDARX(PPUThread& CPU, ppu_opcode_t op); + void DCBF(PPUThread& CPU, ppu_opcode_t op); + void LBZX(PPUThread& CPU, ppu_opcode_t op); + void LVX(PPUThread& CPU, ppu_opcode_t op); + void NEG(PPUThread& CPU, ppu_opcode_t op); + void LBZUX(PPUThread& CPU, ppu_opcode_t op); + void NOR(PPUThread& CPU, ppu_opcode_t op); + void STVEBX(PPUThread& CPU, ppu_opcode_t op); + void SUBFE(PPUThread& CPU, ppu_opcode_t op); + void ADDE(PPUThread& CPU, ppu_opcode_t op); + void MTOCRF(PPUThread& CPU, ppu_opcode_t op); + void STDX(PPUThread& CPU, ppu_opcode_t op); + void STWCX_(PPUThread& CPU, ppu_opcode_t op); + void STWX(PPUThread& CPU, ppu_opcode_t op); + void STVEHX(PPUThread& CPU, ppu_opcode_t op); + void STDUX(PPUThread& CPU, ppu_opcode_t op); + void STWUX(PPUThread& CPU, ppu_opcode_t op); + void STVEWX(PPUThread& CPU, ppu_opcode_t op); + void SUBFZE(PPUThread& CPU, ppu_opcode_t op); + void ADDZE(PPUThread& CPU, ppu_opcode_t op); + void STDCX_(PPUThread& CPU, ppu_opcode_t op); + void STBX(PPUThread& CPU, ppu_opcode_t op); + void STVX(PPUThread& CPU, ppu_opcode_t op); + void MULLD(PPUThread& CPU, ppu_opcode_t op); + void SUBFME(PPUThread& CPU, ppu_opcode_t op); + void ADDME(PPUThread& CPU, ppu_opcode_t op); + void MULLW(PPUThread& CPU, ppu_opcode_t op); + void DCBTST(PPUThread& CPU, ppu_opcode_t op); + void STBUX(PPUThread& CPU, ppu_opcode_t op); + void ADD(PPUThread& CPU, ppu_opcode_t op); + void DCBT(PPUThread& CPU, ppu_opcode_t op); + void LHZX(PPUThread& CPU, ppu_opcode_t op); + void EQV(PPUThread& CPU, ppu_opcode_t op); + void ECIWX(PPUThread& CPU, ppu_opcode_t op); + void LHZUX(PPUThread& CPU, ppu_opcode_t op); + void XOR(PPUThread& CPU, ppu_opcode_t op); + void MFSPR(PPUThread& CPU, ppu_opcode_t op); + void LWAX(PPUThread& CPU, ppu_opcode_t op); + void DST(PPUThread& CPU, ppu_opcode_t op); + void LHAX(PPUThread& CPU, ppu_opcode_t op); + void LVXL(PPUThread& CPU, ppu_opcode_t op); + void MFTB(PPUThread& CPU, ppu_opcode_t op); + void LWAUX(PPUThread& CPU, ppu_opcode_t op); + void DSTST(PPUThread& CPU, ppu_opcode_t op); + void LHAUX(PPUThread& CPU, ppu_opcode_t op); + void STHX(PPUThread& CPU, ppu_opcode_t op); + void ORC(PPUThread& CPU, ppu_opcode_t op); + void ECOWX(PPUThread& CPU, ppu_opcode_t op); + void STHUX(PPUThread& CPU, ppu_opcode_t op); + void OR(PPUThread& CPU, ppu_opcode_t op); + void DIVDU(PPUThread& CPU, ppu_opcode_t op); + void DIVWU(PPUThread& CPU, ppu_opcode_t op); + void MTSPR(PPUThread& CPU, ppu_opcode_t op); + void DCBI(PPUThread& CPU, ppu_opcode_t op); + void NAND(PPUThread& CPU, ppu_opcode_t op); + void STVXL(PPUThread& CPU, ppu_opcode_t op); + void DIVD(PPUThread& CPU, ppu_opcode_t op); + void DIVW(PPUThread& CPU, ppu_opcode_t op); + void LVLX(PPUThread& CPU, ppu_opcode_t op); + void LDBRX(PPUThread& CPU, ppu_opcode_t op); + void LSWX(PPUThread& CPU, ppu_opcode_t op); + void LWBRX(PPUThread& CPU, ppu_opcode_t op); + void LFSX(PPUThread& CPU, ppu_opcode_t op); + void SRW(PPUThread& CPU, ppu_opcode_t op); + void SRD(PPUThread& CPU, ppu_opcode_t op); + void LVRX(PPUThread& CPU, ppu_opcode_t op); + void LSWI(PPUThread& CPU, ppu_opcode_t op); + void LFSUX(PPUThread& CPU, ppu_opcode_t op); + void SYNC(PPUThread& CPU, ppu_opcode_t op); + void LFDX(PPUThread& CPU, ppu_opcode_t op); + void LFDUX(PPUThread& CPU, ppu_opcode_t op); + void STVLX(PPUThread& CPU, ppu_opcode_t op); + void STDBRX(PPUThread& CPU, ppu_opcode_t op); + void STSWX(PPUThread& CPU, ppu_opcode_t op); + void STWBRX(PPUThread& CPU, ppu_opcode_t op); + void STFSX(PPUThread& CPU, ppu_opcode_t op); + void STVRX(PPUThread& CPU, ppu_opcode_t op); + void STFSUX(PPUThread& CPU, ppu_opcode_t op); + void STSWI(PPUThread& CPU, ppu_opcode_t op); + void STFDX(PPUThread& CPU, ppu_opcode_t op); + void STFDUX(PPUThread& CPU, ppu_opcode_t op); + void LVLXL(PPUThread& CPU, ppu_opcode_t op); + void LHBRX(PPUThread& CPU, ppu_opcode_t op); + void SRAW(PPUThread& CPU, ppu_opcode_t op); + void SRAD(PPUThread& CPU, ppu_opcode_t op); + void LVRXL(PPUThread& CPU, ppu_opcode_t op); + void DSS(PPUThread& CPU, ppu_opcode_t op); + void SRAWI(PPUThread& CPU, ppu_opcode_t op); + void SRADI(PPUThread& CPU, ppu_opcode_t op); + void EIEIO(PPUThread& CPU, ppu_opcode_t op); + void STVLXL(PPUThread& CPU, ppu_opcode_t op); + void STHBRX(PPUThread& CPU, ppu_opcode_t op); + void EXTSH(PPUThread& CPU, ppu_opcode_t op); + void STVRXL(PPUThread& CPU, ppu_opcode_t op); + void EXTSB(PPUThread& CPU, ppu_opcode_t op); + void STFIWX(PPUThread& CPU, ppu_opcode_t op); + void EXTSW(PPUThread& CPU, ppu_opcode_t op); + void ICBI(PPUThread& CPU, ppu_opcode_t op); + void DCBZ(PPUThread& CPU, ppu_opcode_t op); + void LWZ(PPUThread& CPU, ppu_opcode_t op); + void LWZU(PPUThread& CPU, ppu_opcode_t op); + void LBZ(PPUThread& CPU, ppu_opcode_t op); + void LBZU(PPUThread& CPU, ppu_opcode_t op); + void STW(PPUThread& CPU, ppu_opcode_t op); + void STWU(PPUThread& CPU, ppu_opcode_t op); + void STB(PPUThread& CPU, ppu_opcode_t op); + void STBU(PPUThread& CPU, ppu_opcode_t op); + void LHZ(PPUThread& CPU, ppu_opcode_t op); + void LHZU(PPUThread& CPU, ppu_opcode_t op); + void LHA(PPUThread& CPU, ppu_opcode_t op); + void LHAU(PPUThread& CPU, ppu_opcode_t op); + void STH(PPUThread& CPU, ppu_opcode_t op); + void STHU(PPUThread& CPU, ppu_opcode_t op); + void LMW(PPUThread& CPU, ppu_opcode_t op); + void STMW(PPUThread& CPU, ppu_opcode_t op); + void LFS(PPUThread& CPU, ppu_opcode_t op); + void LFSU(PPUThread& CPU, ppu_opcode_t op); + void LFD(PPUThread& CPU, ppu_opcode_t op); + void LFDU(PPUThread& CPU, ppu_opcode_t op); + void STFS(PPUThread& CPU, ppu_opcode_t op); + void STFSU(PPUThread& CPU, ppu_opcode_t op); + void STFD(PPUThread& CPU, ppu_opcode_t op); + void STFDU(PPUThread& CPU, ppu_opcode_t op); + void LD(PPUThread& CPU, ppu_opcode_t op); + void LDU(PPUThread& CPU, ppu_opcode_t op); + void LWA(PPUThread& CPU, ppu_opcode_t op); + void FDIVS(PPUThread& CPU, ppu_opcode_t op); + void FSUBS(PPUThread& CPU, ppu_opcode_t op); + void FADDS(PPUThread& CPU, ppu_opcode_t op); + void FSQRTS(PPUThread& CPU, ppu_opcode_t op); + void FRES(PPUThread& CPU, ppu_opcode_t op); + void FMULS(PPUThread& CPU, ppu_opcode_t op); + void FMADDS(PPUThread& CPU, ppu_opcode_t op); + void FMSUBS(PPUThread& CPU, ppu_opcode_t op); + void FNMSUBS(PPUThread& CPU, ppu_opcode_t op); + void FNMADDS(PPUThread& CPU, ppu_opcode_t op); + void STD(PPUThread& CPU, ppu_opcode_t op); + void STDU(PPUThread& CPU, ppu_opcode_t op); + void MTFSB1(PPUThread& CPU, ppu_opcode_t op); + void MCRFS(PPUThread& CPU, ppu_opcode_t op); + void MTFSB0(PPUThread& CPU, ppu_opcode_t op); + void MTFSFI(PPUThread& CPU, ppu_opcode_t op); + void MFFS(PPUThread& CPU, ppu_opcode_t op); + void MTFSF(PPUThread& CPU, ppu_opcode_t op); + + void FCMPU(PPUThread& CPU, ppu_opcode_t op); + void FRSP(PPUThread& CPU, ppu_opcode_t op); + void FCTIW(PPUThread& CPU, ppu_opcode_t op); + void FCTIWZ(PPUThread& CPU, ppu_opcode_t op); + void FDIV(PPUThread& CPU, ppu_opcode_t op); + void FSUB(PPUThread& CPU, ppu_opcode_t op); + void FADD(PPUThread& CPU, ppu_opcode_t op); + void FSQRT(PPUThread& CPU, ppu_opcode_t op); + void FSEL(PPUThread& CPU, ppu_opcode_t op); + void FMUL(PPUThread& CPU, ppu_opcode_t op); + void FRSQRTE(PPUThread& CPU, ppu_opcode_t op); + void FMSUB(PPUThread& CPU, ppu_opcode_t op); + void FMADD(PPUThread& CPU, ppu_opcode_t op); + void FNMSUB(PPUThread& CPU, ppu_opcode_t op); + void FNMADD(PPUThread& CPU, ppu_opcode_t op); + void FCMPO(PPUThread& CPU, ppu_opcode_t op); + void FNEG(PPUThread& CPU, ppu_opcode_t op); + void FMR(PPUThread& CPU, ppu_opcode_t op); + void FNABS(PPUThread& CPU, ppu_opcode_t op); + void FABS(PPUThread& CPU, ppu_opcode_t op); + void FCTID(PPUThread& CPU, ppu_opcode_t op); + void FCTIDZ(PPUThread& CPU, ppu_opcode_t op); + void FCFID(PPUThread& CPU, ppu_opcode_t op); + + void UNK(PPUThread& CPU, ppu_opcode_t op); +} + +class PPUInterpreter2 : public PPUOpcodes +{ +public: + virtual ~PPUInterpreter2() {} + + ppu_inter_func_t func; + + virtual void NULL_OP() { func = ppu_interpreter::NULL_OP; } + virtual void NOP() { func = ppu_interpreter::NOP; } + + virtual void TDI(u32 to, u32 ra, s32 simm16) { func = ppu_interpreter::TDI; } + virtual void TWI(u32 to, u32 ra, s32 simm16) { func = ppu_interpreter::TWI; } + + virtual void MFVSCR(u32 vd) { func = ppu_interpreter::MFVSCR; } + virtual void MTVSCR(u32 vb) { func = ppu_interpreter::MTVSCR; } + virtual void VADDCUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDCUW; } + virtual void VADDFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDFP; } + virtual void VADDSBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDSBS; } + virtual void VADDSHS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDSHS; } + virtual void VADDSWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDSWS; } + virtual void VADDUBM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUBM; } + virtual void VADDUBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUBS; } + virtual void VADDUHM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUHM; } + virtual void VADDUHS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUHS; } + virtual void VADDUWM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUWM; } + virtual void VADDUWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VADDUWS; } + virtual void VAND(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAND; } + virtual void VANDC(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VANDC; } + virtual void VAVGSB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGSB; } + virtual void VAVGSH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGSH; } + virtual void VAVGSW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGSW; } + virtual void VAVGUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGUB; } + virtual void VAVGUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGUH; } + virtual void VAVGUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VAVGUW; } + virtual void VCFSX(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VCFSX; } + virtual void VCFUX(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VCFUX; } + virtual void VCMPBFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPBFP; } + virtual void VCMPBFP_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPBFP_; } + virtual void VCMPEQFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQFP; } + virtual void VCMPEQFP_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQFP_; } + virtual void VCMPEQUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUB; } + virtual void VCMPEQUB_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUB_; } + virtual void VCMPEQUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUH; } + virtual void VCMPEQUH_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUH_; } + virtual void VCMPEQUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUW; } + virtual void VCMPEQUW_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPEQUW_; } + virtual void VCMPGEFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGEFP; } + virtual void VCMPGEFP_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGEFP_; } + virtual void VCMPGTFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTFP; } + virtual void VCMPGTFP_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTFP_; } + virtual void VCMPGTSB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSB; } + virtual void VCMPGTSB_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSB_; } + virtual void VCMPGTSH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSH; } + virtual void VCMPGTSH_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSH_; } + virtual void VCMPGTSW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSW; } + virtual void VCMPGTSW_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTSW_; } + virtual void VCMPGTUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUB; } + virtual void VCMPGTUB_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUB_; } + virtual void VCMPGTUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUH; } + virtual void VCMPGTUH_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUH_; } + virtual void VCMPGTUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUW; } + virtual void VCMPGTUW_(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VCMPGTUW_; } + virtual void VCTSXS(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VCTSXS; } + virtual void VCTUXS(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VCTUXS; } + virtual void VEXPTEFP(u32 vd, u32 vb) { func = ppu_interpreter::VEXPTEFP; } + virtual void VLOGEFP(u32 vd, u32 vb) { func = ppu_interpreter::VLOGEFP; } + virtual void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) { func = ppu_interpreter::VMADDFP; } + virtual void VMAXFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXFP; } + virtual void VMAXSB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXSB; } + virtual void VMAXSH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXSH; } + virtual void VMAXSW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXSW; } + virtual void VMAXUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXUB; } + virtual void VMAXUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXUH; } + virtual void VMAXUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMAXUW; } + virtual void VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMHADDSHS; } + virtual void VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMHRADDSHS; } + virtual void VMINFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINFP; } + virtual void VMINSB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINSB; } + virtual void VMINSH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINSH; } + virtual void VMINSW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINSW; } + virtual void VMINUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINUB; } + virtual void VMINUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINUH; } + virtual void VMINUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMINUW; } + virtual void VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMLADDUHM; } + virtual void VMRGHB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGHB; } + virtual void VMRGHH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGHH; } + virtual void VMRGHW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGHW; } + virtual void VMRGLB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGLB; } + virtual void VMRGLH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGLH; } + virtual void VMRGLW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMRGLW; } + virtual void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMMBM; } + virtual void VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMSHM; } + virtual void VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMSHS; } + virtual void VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMUBM; } + virtual void VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMUHM; } + virtual void VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VMSUMUHS; } + virtual void VMULESB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULESB; } + virtual void VMULESH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULESH; } + virtual void VMULEUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULEUB; } + virtual void VMULEUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULEUH; } + virtual void VMULOSB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULOSB; } + virtual void VMULOSH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULOSH; } + virtual void VMULOUB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULOUB; } + virtual void VMULOUH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VMULOUH; } + virtual void VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) { func = ppu_interpreter::VNMSUBFP; } + virtual void VNOR(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VNOR; } + virtual void VOR(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VOR; } + virtual void VPERM(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VPERM; } + virtual void VPKPX(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKPX; } + virtual void VPKSHSS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKSHSS; } + virtual void VPKSHUS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKSHUS; } + virtual void VPKSWSS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKSWSS; } + virtual void VPKSWUS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKSWUS; } + virtual void VPKUHUM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKUHUM; } + virtual void VPKUHUS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKUHUS; } + virtual void VPKUWUM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKUWUM; } + virtual void VPKUWUS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VPKUWUS; } + virtual void VREFP(u32 vd, u32 vb) { func = ppu_interpreter::VREFP; } + virtual void VRFIM(u32 vd, u32 vb) { func = ppu_interpreter::VRFIM; } + virtual void VRFIN(u32 vd, u32 vb) { func = ppu_interpreter::VRFIN; } + virtual void VRFIP(u32 vd, u32 vb) { func = ppu_interpreter::VRFIP; } + virtual void VRFIZ(u32 vd, u32 vb) { func = ppu_interpreter::VRFIZ; } + virtual void VRLB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VRLB; } + virtual void VRLH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VRLH; } + virtual void VRLW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VRLW; } + virtual void VRSQRTEFP(u32 vd, u32 vb) { func = ppu_interpreter::VRSQRTEFP; } + virtual void VSEL(u32 vd, u32 va, u32 vb, u32 vc) { func = ppu_interpreter::VSEL; } + virtual void VSL(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSL; } + virtual void VSLB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSLB; } + virtual void VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) { func = ppu_interpreter::VSLDOI; } + virtual void VSLH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSLH; } + virtual void VSLO(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSLO; } + virtual void VSLW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSLW; } + virtual void VSPLTB(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VSPLTB; } + virtual void VSPLTH(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VSPLTH; } + virtual void VSPLTISB(u32 vd, s32 simm5) { func = ppu_interpreter::VSPLTISB; } + virtual void VSPLTISH(u32 vd, s32 simm5) { func = ppu_interpreter::VSPLTISH; } + virtual void VSPLTISW(u32 vd, s32 simm5) { func = ppu_interpreter::VSPLTISW; } + virtual void VSPLTW(u32 vd, u32 uimm5, u32 vb) { func = ppu_interpreter::VSPLTW; } + virtual void VSR(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSR; } + virtual void VSRAB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRAB; } + virtual void VSRAH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRAH; } + virtual void VSRAW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRAW; } + virtual void VSRB(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRB; } + virtual void VSRH(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRH; } + virtual void VSRO(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRO; } + virtual void VSRW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSRW; } + virtual void VSUBCUW(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBCUW; } + virtual void VSUBFP(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBFP; } + virtual void VSUBSBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBSBS; } + virtual void VSUBSHS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBSHS; } + virtual void VSUBSWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBSWS; } + virtual void VSUBUBM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUBM; } + virtual void VSUBUBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUBS; } + virtual void VSUBUHM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUHM; } + virtual void VSUBUHS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUHS; } + virtual void VSUBUWM(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUWM; } + virtual void VSUBUWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUBUWS; } + virtual void VSUMSWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUMSWS; } + virtual void VSUM2SWS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUM2SWS; } + virtual void VSUM4SBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUM4SBS; } + virtual void VSUM4SHS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUM4SHS; } + virtual void VSUM4UBS(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VSUM4UBS; } + virtual void VUPKHPX(u32 vd, u32 vb) { func = ppu_interpreter::VUPKHPX; } + virtual void VUPKHSB(u32 vd, u32 vb) { func = ppu_interpreter::VUPKHSB; } + virtual void VUPKHSH(u32 vd, u32 vb) { func = ppu_interpreter::VUPKHSH; } + virtual void VUPKLPX(u32 vd, u32 vb) { func = ppu_interpreter::VUPKLPX; } + virtual void VUPKLSB(u32 vd, u32 vb) { func = ppu_interpreter::VUPKLSB; } + virtual void VUPKLSH(u32 vd, u32 vb) { func = ppu_interpreter::VUPKLSH; } + virtual void VXOR(u32 vd, u32 va, u32 vb) { func = ppu_interpreter::VXOR; } + virtual void MULLI(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::MULLI; } + virtual void SUBFIC(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::SUBFIC; } + virtual void CMPLI(u32 bf, u32 l, u32 ra, u32 uimm16) { func = ppu_interpreter::CMPLI; } + virtual void CMPI(u32 bf, u32 l, u32 ra, s32 simm16) { func = ppu_interpreter::CMPI; } + virtual void ADDIC(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::ADDIC; } + virtual void ADDIC_(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::ADDIC_; } + virtual void ADDI(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::ADDI; } + virtual void ADDIS(u32 rd, u32 ra, s32 simm16) { func = ppu_interpreter::ADDIS; } + virtual void BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { func = ppu_interpreter::BC; } + virtual void HACK(u32 index) { func = ppu_interpreter::HACK; } + virtual void SC(u32 lev) { func = ppu_interpreter::SC; } + virtual void B(s32 ll, u32 aa, u32 lk) { func = ppu_interpreter::B; } + virtual void MCRF(u32 crfd, u32 crfs) { func = ppu_interpreter::MCRF; } + virtual void BCLR(u32 bo, u32 bi, u32 bh, u32 lk) { func = ppu_interpreter::BCLR; } + virtual void CRNOR(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRNOR; } + virtual void CRANDC(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRANDC; } + virtual void ISYNC() { func = ppu_interpreter::ISYNC; } + virtual void CRXOR(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRXOR; } + virtual void CRNAND(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRNAND; } + virtual void CRAND(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRAND; } + virtual void CREQV(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CREQV; } + virtual void CRORC(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CRORC; } + virtual void CROR(u32 bt, u32 ba, u32 bb) { func = ppu_interpreter::CROR; } + virtual void BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { func = ppu_interpreter::BCCTR; } + virtual void RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { func = ppu_interpreter::RLWIMI; } + virtual void RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, bool rc) { func = ppu_interpreter::RLWINM; } + virtual void RLWNM(u32 ra, u32 rs, u32 rb, u32 MB, u32 ME, bool rc) { func = ppu_interpreter::RLWNM; } + virtual void ORI(u32 rs, u32 ra, u32 uimm16) { func = ppu_interpreter::ORI; } + virtual void ORIS(u32 rs, u32 ra, u32 uimm16) { func = ppu_interpreter::ORIS; } + virtual void XORI(u32 ra, u32 rs, u32 uimm16) { func = ppu_interpreter::XORI; } + virtual void XORIS(u32 ra, u32 rs, u32 uimm16) { func = ppu_interpreter::XORIS; } + virtual void ANDI_(u32 ra, u32 rs, u32 uimm16) { func = ppu_interpreter::ANDI_; } + virtual void ANDIS_(u32 ra, u32 rs, u32 uimm16) { func = ppu_interpreter::ANDIS_; } + virtual void RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { func = ppu_interpreter::RLDICL; } + virtual void RLDICR(u32 ra, u32 rs, u32 sh, u32 me, bool rc) { func = ppu_interpreter::RLDICR; } + virtual void RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { func = ppu_interpreter::RLDIC; } + virtual void RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, bool rc) { func = ppu_interpreter::RLDIMI; } + virtual void RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, bool is_r, bool rc) { func = ppu_interpreter::RLDC_LR; } + virtual void CMP(u32 crfd, u32 l, u32 ra, u32 rb) { func = ppu_interpreter::CMP; } + virtual void TW(u32 to, u32 ra, u32 rb) { func = ppu_interpreter::TW; } + virtual void LVSL(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVSL; } + virtual void LVEBX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVEBX; } + virtual void SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::SUBFC; } + virtual void MULHDU(u32 rd, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::MULHDU; } + virtual void ADDC(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::ADDC; } + virtual void MULHWU(u32 rd, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::MULHWU; } + virtual void MFOCRF(u32 a, u32 rd, u32 crm) { func = ppu_interpreter::MFOCRF; } + virtual void LWARX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWARX; } + virtual void LDX(u32 ra, u32 rs, u32 rb) { func = ppu_interpreter::LDX; } + virtual void LWZX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWZX; } + virtual void SLW(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SLW; } + virtual void CNTLZW(u32 ra, u32 rs, bool rc) { func = ppu_interpreter::CNTLZW; } + virtual void SLD(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SLD; } + virtual void AND(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::AND; } + virtual void CMPL(u32 bf, u32 l, u32 ra, u32 rb) { func = ppu_interpreter::CMPL; } + virtual void LVSR(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVSR; } + virtual void LVEHX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVEHX; } + virtual void SUBF(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::SUBF; } + virtual void LDUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LDUX; } + virtual void DCBST(u32 ra, u32 rb) { func = ppu_interpreter::DCBST; } + virtual void LWZUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWZUX; } + virtual void CNTLZD(u32 ra, u32 rs, bool rc) { func = ppu_interpreter::CNTLZD; } + virtual void ANDC(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::ANDC; } + virtual void TD(u32 to, u32 ra, u32 rb) { func = ppu_interpreter::TD; } + virtual void LVEWX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVEWX; } + virtual void MULHD(u32 rd, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::MULHD; } + virtual void MULHW(u32 rd, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::MULHW; } + virtual void LDARX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LDARX; } + virtual void DCBF(u32 ra, u32 rb) { func = ppu_interpreter::DCBF; } + virtual void LBZX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LBZX; } + virtual void LVX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVX; } + virtual void NEG(u32 rd, u32 ra, u32 oe, bool rc) { func = ppu_interpreter::NEG; } + virtual void LBZUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LBZUX; } + virtual void NOR(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::NOR; } + virtual void STVEBX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVEBX; } + virtual void SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::SUBFE; } + virtual void ADDE(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::ADDE; } + virtual void MTOCRF(u32 l, u32 crm, u32 rs) { func = ppu_interpreter::MTOCRF; } + virtual void STDX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STDX; } + virtual void STWCX_(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STWCX_; } + virtual void STWX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STWX; } + virtual void STVEHX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVEHX; } + virtual void STDUX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STDUX; } + virtual void STWUX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STWUX; } + virtual void STVEWX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVEWX; } + virtual void SUBFZE(u32 rd, u32 ra, u32 oe, bool rc) { func = ppu_interpreter::SUBFZE; } + virtual void ADDZE(u32 rd, u32 ra, u32 oe, bool rc) { func = ppu_interpreter::ADDZE; } + virtual void STDCX_(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STDCX_; } + virtual void STBX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STBX; } + virtual void STVX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVX; } + virtual void MULLD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::MULLD; } + virtual void SUBFME(u32 rd, u32 ra, u32 oe, bool rc) { func = ppu_interpreter::SUBFME; } + virtual void ADDME(u32 rd, u32 ra, u32 oe, bool rc) { func = ppu_interpreter::ADDME; } + virtual void MULLW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::MULLW; } + virtual void DCBTST(u32 ra, u32 rb, u32 th) { func = ppu_interpreter::DCBTST; } + virtual void STBUX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STBUX; } + virtual void ADD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::ADD; } + virtual void DCBT(u32 ra, u32 rb, u32 th) { func = ppu_interpreter::DCBT; } + virtual void LHZX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LHZX; } + virtual void EQV(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::EQV; } + virtual void ECIWX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::ECIWX; } + virtual void LHZUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LHZUX; } + virtual void XOR(u32 rs, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::XOR; } + virtual void MFSPR(u32 rd, u32 spr) { func = ppu_interpreter::MFSPR; } + virtual void LWAX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWAX; } + virtual void DST(u32 ra, u32 rb, u32 strm, u32 t) { func = ppu_interpreter::DST; } + virtual void LHAX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LHAX; } + virtual void LVXL(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVXL; } + virtual void MFTB(u32 rd, u32 spr) { func = ppu_interpreter::MFTB; } + virtual void LWAUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWAUX; } + virtual void DSTST(u32 ra, u32 rb, u32 strm, u32 t) { func = ppu_interpreter::DSTST; } + virtual void LHAUX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LHAUX; } + virtual void STHX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STHX; } + virtual void ORC(u32 rs, u32 ra, u32 rb, bool rc) { func = ppu_interpreter::ORC; } + virtual void ECOWX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::ECOWX; } + virtual void STHUX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STHUX; } + virtual void OR(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::OR; } + virtual void DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::DIVDU; } + virtual void DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::DIVWU; } + virtual void MTSPR(u32 spr, u32 rs) { func = ppu_interpreter::MTSPR; } + virtual void DCBI(u32 ra, u32 rb) { func = ppu_interpreter::DCBI; } + virtual void NAND(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::NAND; } + virtual void STVXL(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVXL; } + virtual void DIVD(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::DIVD; } + virtual void DIVW(u32 rd, u32 ra, u32 rb, u32 oe, bool rc) { func = ppu_interpreter::DIVW; } + virtual void LVLX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVLX; } + virtual void LDBRX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LDBRX; } + virtual void LSWX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LSWX; } + virtual void LWBRX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LWBRX; } + virtual void LFSX(u32 frd, u32 ra, u32 rb) { func = ppu_interpreter::LFSX; } + virtual void SRW(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SRW; } + virtual void SRD(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SRD; } + virtual void LVRX(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVRX; } + virtual void LSWI(u32 rd, u32 ra, u32 nb) { func = ppu_interpreter::LSWI; } + virtual void LFSUX(u32 frd, u32 ra, u32 rb) { func = ppu_interpreter::LFSUX; } + virtual void SYNC(u32 l) { func = ppu_interpreter::SYNC; } + virtual void LFDX(u32 frd, u32 ra, u32 rb) { func = ppu_interpreter::LFDX; } + virtual void LFDUX(u32 frd, u32 ra, u32 rb) { func = ppu_interpreter::LFDUX; } + virtual void STVLX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVLX; } + virtual void STDBRX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STDBRX; } + virtual void STSWX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STSWX; } + virtual void STWBRX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STWBRX; } + virtual void STFSX(u32 frs, u32 ra, u32 rb) { func = ppu_interpreter::STFSX; } + virtual void STVRX(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVRX; } + virtual void STFSUX(u32 frs, u32 ra, u32 rb) { func = ppu_interpreter::STFSUX; } + virtual void STSWI(u32 rd, u32 ra, u32 nb) { func = ppu_interpreter::STSWI; } + virtual void STFDX(u32 frs, u32 ra, u32 rb) { func = ppu_interpreter::STFDX; } + virtual void STFDUX(u32 frs, u32 ra, u32 rb) { func = ppu_interpreter::STFDUX; } + virtual void LVLXL(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVLXL; } + virtual void LHBRX(u32 rd, u32 ra, u32 rb) { func = ppu_interpreter::LHBRX; } + virtual void SRAW(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SRAW; } + virtual void SRAD(u32 ra, u32 rs, u32 rb, bool rc) { func = ppu_interpreter::SRAD; } + virtual void LVRXL(u32 vd, u32 ra, u32 rb) { func = ppu_interpreter::LVRXL; } + virtual void DSS(u32 strm, u32 a) { func = ppu_interpreter::DSS; } + virtual void SRAWI(u32 ra, u32 rs, u32 sh, bool rc) { func = ppu_interpreter::SRAWI; } + virtual void SRADI1(u32 ra, u32 rs, u32 sh, bool rc) { func = ppu_interpreter::SRADI; } + virtual void SRADI2(u32 ra, u32 rs, u32 sh, bool rc) { func = ppu_interpreter::SRADI; } + virtual void EIEIO() { func = ppu_interpreter::EIEIO; } + virtual void STVLXL(u32 vs, u32 ra, u32 rb) { func = ppu_interpreter::STVLXL; } + virtual void STHBRX(u32 rs, u32 ra, u32 rb) { func = ppu_interpreter::STHBRX; } + virtual void EXTSH(u32 ra, u32 rs, bool rc) { func = ppu_interpreter::EXTSH; } + virtual void STVRXL(u32 sd, u32 ra, u32 rb) { func = ppu_interpreter::STVRXL; } + virtual void EXTSB(u32 ra, u32 rs, bool rc) { func = ppu_interpreter::EXTSB; } + virtual void STFIWX(u32 frs, u32 ra, u32 rb) { func = ppu_interpreter::STFIWX; } + virtual void EXTSW(u32 ra, u32 rs, bool rc) { func = ppu_interpreter::EXTSW; } + virtual void ICBI(u32 ra, u32 rb) { func = ppu_interpreter::ICBI; } + virtual void DCBZ(u32 ra, u32 rb) { func = ppu_interpreter::DCBZ; } + virtual void LWZ(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LWZ; } + virtual void LWZU(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LWZU; } + virtual void LBZ(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LBZ; } + virtual void LBZU(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LBZU; } + virtual void STW(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STW; } + virtual void STWU(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STWU; } + virtual void STB(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STB; } + virtual void STBU(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STBU; } + virtual void LHZ(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LHZ; } + virtual void LHZU(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LHZU; } + virtual void LHA(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::LHA; } + virtual void LHAU(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::LHAU; } + virtual void STH(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STH; } + virtual void STHU(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STHU; } + virtual void LMW(u32 rd, u32 ra, s32 d) { func = ppu_interpreter::LMW; } + virtual void STMW(u32 rs, u32 ra, s32 d) { func = ppu_interpreter::STMW; } + virtual void LFS(u32 frd, u32 ra, s32 d) { func = ppu_interpreter::LFS; } + virtual void LFSU(u32 frd, u32 ra, s32 d) { func = ppu_interpreter::LFSU; } + virtual void LFD(u32 frd, u32 ra, s32 d) { func = ppu_interpreter::LFD; } + virtual void LFDU(u32 frd, u32 ra, s32 d) { func = ppu_interpreter::LFDU; } + virtual void STFS(u32 frs, u32 ra, s32 d) { func = ppu_interpreter::STFS; } + virtual void STFSU(u32 frs, u32 ra, s32 d) { func = ppu_interpreter::STFSU; } + virtual void STFD(u32 frs, u32 ra, s32 d) { func = ppu_interpreter::STFD; } + virtual void STFDU(u32 frs, u32 ra, s32 d) { func = ppu_interpreter::STFDU; } + virtual void LD(u32 rd, u32 ra, s32 ds) { func = ppu_interpreter::LD; } + virtual void LDU(u32 rd, u32 ra, s32 ds) { func = ppu_interpreter::LDU; } + virtual void LWA(u32 rd, u32 ra, s32 ds) { func = ppu_interpreter::LWA; } + virtual void FDIVS(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FDIVS; } + virtual void FSUBS(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FSUBS; } + virtual void FADDS(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FADDS; } + virtual void FSQRTS(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FSQRTS; } + virtual void FRES(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FRES; } + virtual void FMULS(u32 frd, u32 fra, u32 frc, bool rc) { func = ppu_interpreter::FMULS; } + virtual void FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FMADDS; } + virtual void FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FMSUBS; } + virtual void FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FNMSUBS; } + virtual void FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FNMADDS; } + virtual void STD(u32 rs, u32 ra, s32 ds) { func = ppu_interpreter::STD; } + virtual void STDU(u32 rs, u32 ra, s32 ds) { func = ppu_interpreter::STDU; } + virtual void MTFSB1(u32 bt, bool rc) { func = ppu_interpreter::MTFSB1; } + virtual void MCRFS(u32 bf, u32 bfa) { func = ppu_interpreter::MCRFS; } + virtual void MTFSB0(u32 bt, bool rc) { func = ppu_interpreter::MTFSB0; } + virtual void MTFSFI(u32 crfd, u32 i, bool rc) { func = ppu_interpreter::MTFSFI; } + virtual void MFFS(u32 frd, bool rc) { func = ppu_interpreter::MFFS; } + virtual void MTFSF(u32 flm, u32 frb, bool rc) { func = ppu_interpreter::MTFSF; } + + virtual void FCMPU(u32 bf, u32 fra, u32 frb) { func = ppu_interpreter::FCMPU; } + virtual void FRSP(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FRSP; } + virtual void FCTIW(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FCTIW; } + virtual void FCTIWZ(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FCTIWZ; } + virtual void FDIV(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FDIV; } + virtual void FSUB(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FSUB; } + virtual void FADD(u32 frd, u32 fra, u32 frb, bool rc) { func = ppu_interpreter::FADD; } + virtual void FSQRT(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FSQRT; } + virtual void FSEL(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FSEL; } + virtual void FMUL(u32 frd, u32 fra, u32 frc, bool rc) { func = ppu_interpreter::FMUL; } + virtual void FRSQRTE(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FRSQRTE; } + virtual void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FMSUB; } + virtual void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FMADD; } + virtual void FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FNMSUB; } + virtual void FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, bool rc) { func = ppu_interpreter::FNMADD; } + virtual void FCMPO(u32 crfd, u32 fra, u32 frb) { func = ppu_interpreter::FCMPO; } + virtual void FNEG(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FNEG; } + virtual void FMR(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FMR; } + virtual void FNABS(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FNABS; } + virtual void FABS(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FABS; } + virtual void FCTID(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FCTID; } + virtual void FCTIDZ(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FCTIDZ; } + virtual void FCFID(u32 frd, u32 frb, bool rc) { func = ppu_interpreter::FCFID; } + + virtual void UNK(const u32 code, const u32 opcode, const u32 gcode) { func = ppu_interpreter::UNK; } +}; \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 412937419e..393efddff1 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -8,15 +8,487 @@ #include "Emu/SysCalls/Modules.h" #include "Emu/Cell/PPUDecoder.h" #include "Emu/Cell/PPUInterpreter.h" +#include "Emu/Cell/PPUInterpreter2.h" #include "Emu/Cell/PPULLVMRecompiler.h" //#include "Emu/Cell/PPURecompiler.h" #include "Emu/CPU/CPUThreadManager.h" +#ifdef _WIN32 +#include +#else +#include +#include +#endif + u64 rotate_mask[64][64]; +const ppu_inter_func_t g_ppu_inter_func_list[] = +{ + ppu_interpreter::NULL_OP, + ppu_interpreter::NOP, + + ppu_interpreter::TDI, + ppu_interpreter::TWI, + + ppu_interpreter::MFVSCR, + ppu_interpreter::MTVSCR, + ppu_interpreter::VADDCUW, + ppu_interpreter::VADDFP, + ppu_interpreter::VADDSBS, + ppu_interpreter::VADDSHS, + ppu_interpreter::VADDSWS, + ppu_interpreter::VADDUBM, + ppu_interpreter::VADDUBS, + ppu_interpreter::VADDUHM, + ppu_interpreter::VADDUHS, + ppu_interpreter::VADDUWM, + ppu_interpreter::VADDUWS, + ppu_interpreter::VAND, + ppu_interpreter::VANDC, + ppu_interpreter::VAVGSB, + ppu_interpreter::VAVGSH, + ppu_interpreter::VAVGSW, + ppu_interpreter::VAVGUB, + ppu_interpreter::VAVGUH, + ppu_interpreter::VAVGUW, + ppu_interpreter::VCFSX, + ppu_interpreter::VCFUX, + ppu_interpreter::VCMPBFP, + ppu_interpreter::VCMPBFP_, + ppu_interpreter::VCMPEQFP, + ppu_interpreter::VCMPEQFP_, + ppu_interpreter::VCMPEQUB, + ppu_interpreter::VCMPEQUB_, + ppu_interpreter::VCMPEQUH, + ppu_interpreter::VCMPEQUH_, + ppu_interpreter::VCMPEQUW, + ppu_interpreter::VCMPEQUW_, + ppu_interpreter::VCMPGEFP, + ppu_interpreter::VCMPGEFP_, + ppu_interpreter::VCMPGTFP, + ppu_interpreter::VCMPGTFP_, + ppu_interpreter::VCMPGTSB, + ppu_interpreter::VCMPGTSB_, + ppu_interpreter::VCMPGTSH, + ppu_interpreter::VCMPGTSH_, + ppu_interpreter::VCMPGTSW, + ppu_interpreter::VCMPGTSW_, + ppu_interpreter::VCMPGTUB, + ppu_interpreter::VCMPGTUB_, + ppu_interpreter::VCMPGTUH, + ppu_interpreter::VCMPGTUH_, + ppu_interpreter::VCMPGTUW, + ppu_interpreter::VCMPGTUW_, + ppu_interpreter::VCTSXS, + ppu_interpreter::VCTUXS, + ppu_interpreter::VEXPTEFP, + ppu_interpreter::VLOGEFP, + ppu_interpreter::VMADDFP, + ppu_interpreter::VMAXFP, + ppu_interpreter::VMAXSB, + ppu_interpreter::VMAXSH, + ppu_interpreter::VMAXSW, + ppu_interpreter::VMAXUB, + ppu_interpreter::VMAXUH, + ppu_interpreter::VMAXUW, + ppu_interpreter::VMHADDSHS, + ppu_interpreter::VMHRADDSHS, + ppu_interpreter::VMINFP, + ppu_interpreter::VMINSB, + ppu_interpreter::VMINSH, + ppu_interpreter::VMINSW, + ppu_interpreter::VMINUB, + ppu_interpreter::VMINUH, + ppu_interpreter::VMINUW, + ppu_interpreter::VMLADDUHM, + ppu_interpreter::VMRGHB, + ppu_interpreter::VMRGHH, + ppu_interpreter::VMRGHW, + ppu_interpreter::VMRGLB, + ppu_interpreter::VMRGLH, + ppu_interpreter::VMRGLW, + ppu_interpreter::VMSUMMBM, + ppu_interpreter::VMSUMSHM, + ppu_interpreter::VMSUMSHS, + ppu_interpreter::VMSUMUBM, + ppu_interpreter::VMSUMUHM, + ppu_interpreter::VMSUMUHS, + ppu_interpreter::VMULESB, + ppu_interpreter::VMULESH, + ppu_interpreter::VMULEUB, + ppu_interpreter::VMULEUH, + ppu_interpreter::VMULOSB, + ppu_interpreter::VMULOSH, + ppu_interpreter::VMULOUB, + ppu_interpreter::VMULOUH, + ppu_interpreter::VNMSUBFP, + ppu_interpreter::VNOR, + ppu_interpreter::VOR, + ppu_interpreter::VPERM, + ppu_interpreter::VPKPX, + ppu_interpreter::VPKSHSS, + ppu_interpreter::VPKSHUS, + ppu_interpreter::VPKSWSS, + ppu_interpreter::VPKSWUS, + ppu_interpreter::VPKUHUM, + ppu_interpreter::VPKUHUS, + ppu_interpreter::VPKUWUM, + ppu_interpreter::VPKUWUS, + ppu_interpreter::VREFP, + ppu_interpreter::VRFIM, + ppu_interpreter::VRFIN, + ppu_interpreter::VRFIP, + ppu_interpreter::VRFIZ, + ppu_interpreter::VRLB, + ppu_interpreter::VRLH, + ppu_interpreter::VRLW, + ppu_interpreter::VRSQRTEFP, + ppu_interpreter::VSEL, + ppu_interpreter::VSL, + ppu_interpreter::VSLB, + ppu_interpreter::VSLDOI, + ppu_interpreter::VSLH, + ppu_interpreter::VSLO, + ppu_interpreter::VSLW, + ppu_interpreter::VSPLTB, + ppu_interpreter::VSPLTH, + ppu_interpreter::VSPLTISB, + ppu_interpreter::VSPLTISH, + ppu_interpreter::VSPLTISW, + ppu_interpreter::VSPLTW, + ppu_interpreter::VSR, + ppu_interpreter::VSRAB, + ppu_interpreter::VSRAH, + ppu_interpreter::VSRAW, + ppu_interpreter::VSRB, + ppu_interpreter::VSRH, + ppu_interpreter::VSRO, + ppu_interpreter::VSRW, + ppu_interpreter::VSUBCUW, + ppu_interpreter::VSUBFP, + ppu_interpreter::VSUBSBS, + ppu_interpreter::VSUBSHS, + ppu_interpreter::VSUBSWS, + ppu_interpreter::VSUBUBM, + ppu_interpreter::VSUBUBS, + ppu_interpreter::VSUBUHM, + ppu_interpreter::VSUBUHS, + ppu_interpreter::VSUBUWM, + ppu_interpreter::VSUBUWS, + ppu_interpreter::VSUMSWS, + ppu_interpreter::VSUM2SWS, + ppu_interpreter::VSUM4SBS, + ppu_interpreter::VSUM4SHS, + ppu_interpreter::VSUM4UBS, + ppu_interpreter::VUPKHPX, + ppu_interpreter::VUPKHSB, + ppu_interpreter::VUPKHSH, + ppu_interpreter::VUPKLPX, + ppu_interpreter::VUPKLSB, + ppu_interpreter::VUPKLSH, + ppu_interpreter::VXOR, + ppu_interpreter::MULLI, + ppu_interpreter::SUBFIC, + ppu_interpreter::CMPLI, + ppu_interpreter::CMPI, + ppu_interpreter::ADDIC, + ppu_interpreter::ADDIC_, + ppu_interpreter::ADDI, + ppu_interpreter::ADDIS, + ppu_interpreter::BC, + ppu_interpreter::HACK, + ppu_interpreter::SC, + ppu_interpreter::B, + ppu_interpreter::MCRF, + ppu_interpreter::BCLR, + ppu_interpreter::CRNOR, + ppu_interpreter::CRANDC, + ppu_interpreter::ISYNC, + ppu_interpreter::CRXOR, + ppu_interpreter::CRNAND, + ppu_interpreter::CRAND, + ppu_interpreter::CREQV, + ppu_interpreter::CRORC, + ppu_interpreter::CROR, + ppu_interpreter::BCCTR, + ppu_interpreter::RLWIMI, + ppu_interpreter::RLWINM, + ppu_interpreter::RLWNM, + ppu_interpreter::ORI, + ppu_interpreter::ORIS, + ppu_interpreter::XORI, + ppu_interpreter::XORIS, + ppu_interpreter::ANDI_, + ppu_interpreter::ANDIS_, + ppu_interpreter::RLDICL, + ppu_interpreter::RLDICR, + ppu_interpreter::RLDIC, + ppu_interpreter::RLDIMI, + ppu_interpreter::RLDC_LR, + ppu_interpreter::CMP, + ppu_interpreter::TW, + ppu_interpreter::LVSL, + ppu_interpreter::LVEBX, + ppu_interpreter::SUBFC, + ppu_interpreter::MULHDU, + ppu_interpreter::ADDC, + ppu_interpreter::MULHWU, + ppu_interpreter::MFOCRF, + ppu_interpreter::LWARX, + ppu_interpreter::LDX, + ppu_interpreter::LWZX, + ppu_interpreter::SLW, + ppu_interpreter::CNTLZW, + ppu_interpreter::SLD, + ppu_interpreter::AND, + ppu_interpreter::CMPL, + ppu_interpreter::LVSR, + ppu_interpreter::LVEHX, + ppu_interpreter::SUBF, + ppu_interpreter::LDUX, + ppu_interpreter::DCBST, + ppu_interpreter::LWZUX, + ppu_interpreter::CNTLZD, + ppu_interpreter::ANDC, + ppu_interpreter::TD, + ppu_interpreter::LVEWX, + ppu_interpreter::MULHD, + ppu_interpreter::MULHW, + ppu_interpreter::LDARX, + ppu_interpreter::DCBF, + ppu_interpreter::LBZX, + ppu_interpreter::LVX, + ppu_interpreter::NEG, + ppu_interpreter::LBZUX, + ppu_interpreter::NOR, + ppu_interpreter::STVEBX, + ppu_interpreter::SUBFE, + ppu_interpreter::ADDE, + ppu_interpreter::MTOCRF, + ppu_interpreter::STDX, + ppu_interpreter::STWCX_, + ppu_interpreter::STWX, + ppu_interpreter::STVEHX, + ppu_interpreter::STDUX, + ppu_interpreter::STWUX, + ppu_interpreter::STVEWX, + ppu_interpreter::SUBFZE, + ppu_interpreter::ADDZE, + ppu_interpreter::STDCX_, + ppu_interpreter::STBX, + ppu_interpreter::STVX, + ppu_interpreter::MULLD, + ppu_interpreter::SUBFME, + ppu_interpreter::ADDME, + ppu_interpreter::MULLW, + ppu_interpreter::DCBTST, + ppu_interpreter::STBUX, + ppu_interpreter::ADD, + ppu_interpreter::DCBT, + ppu_interpreter::LHZX, + ppu_interpreter::EQV, + ppu_interpreter::ECIWX, + ppu_interpreter::LHZUX, + ppu_interpreter::XOR, + ppu_interpreter::MFSPR, + ppu_interpreter::LWAX, + ppu_interpreter::DST, + ppu_interpreter::LHAX, + ppu_interpreter::LVXL, + ppu_interpreter::MFTB, + ppu_interpreter::LWAUX, + ppu_interpreter::DSTST, + ppu_interpreter::LHAUX, + ppu_interpreter::STHX, + ppu_interpreter::ORC, + ppu_interpreter::ECOWX, + ppu_interpreter::STHUX, + ppu_interpreter::OR, + ppu_interpreter::DIVDU, + ppu_interpreter::DIVWU, + ppu_interpreter::MTSPR, + ppu_interpreter::DCBI, + ppu_interpreter::NAND, + ppu_interpreter::STVXL, + ppu_interpreter::DIVD, + ppu_interpreter::DIVW, + ppu_interpreter::LVLX, + ppu_interpreter::LDBRX, + ppu_interpreter::LSWX, + ppu_interpreter::LWBRX, + ppu_interpreter::LFSX, + ppu_interpreter::SRW, + ppu_interpreter::SRD, + ppu_interpreter::LVRX, + ppu_interpreter::LSWI, + ppu_interpreter::LFSUX, + ppu_interpreter::SYNC, + ppu_interpreter::LFDX, + ppu_interpreter::LFDUX, + ppu_interpreter::STVLX, + ppu_interpreter::STDBRX, + ppu_interpreter::STSWX, + ppu_interpreter::STWBRX, + ppu_interpreter::STFSX, + ppu_interpreter::STVRX, + ppu_interpreter::STFSUX, + ppu_interpreter::STSWI, + ppu_interpreter::STFDX, + ppu_interpreter::STFDUX, + ppu_interpreter::LVLXL, + ppu_interpreter::LHBRX, + ppu_interpreter::SRAW, + ppu_interpreter::SRAD, + ppu_interpreter::LVRXL, + ppu_interpreter::DSS, + ppu_interpreter::SRAWI, + ppu_interpreter::SRADI, + ppu_interpreter::EIEIO, + ppu_interpreter::STVLXL, + ppu_interpreter::STHBRX, + ppu_interpreter::EXTSH, + ppu_interpreter::STVRXL, + ppu_interpreter::EXTSB, + ppu_interpreter::STFIWX, + ppu_interpreter::EXTSW, + ppu_interpreter::ICBI, + ppu_interpreter::DCBZ, + ppu_interpreter::LWZ, + ppu_interpreter::LWZU, + ppu_interpreter::LBZ, + ppu_interpreter::LBZU, + ppu_interpreter::STW, + ppu_interpreter::STWU, + ppu_interpreter::STB, + ppu_interpreter::STBU, + ppu_interpreter::LHZ, + ppu_interpreter::LHZU, + ppu_interpreter::LHA, + ppu_interpreter::LHAU, + ppu_interpreter::STH, + ppu_interpreter::STHU, + ppu_interpreter::LMW, + ppu_interpreter::STMW, + ppu_interpreter::LFS, + ppu_interpreter::LFSU, + ppu_interpreter::LFD, + ppu_interpreter::LFDU, + ppu_interpreter::STFS, + ppu_interpreter::STFSU, + ppu_interpreter::STFD, + ppu_interpreter::STFDU, + ppu_interpreter::LD, + ppu_interpreter::LDU, + ppu_interpreter::LWA, + ppu_interpreter::FDIVS, + ppu_interpreter::FSUBS, + ppu_interpreter::FADDS, + ppu_interpreter::FSQRTS, + ppu_interpreter::FRES, + ppu_interpreter::FMULS, + ppu_interpreter::FMADDS, + ppu_interpreter::FMSUBS, + ppu_interpreter::FNMSUBS, + ppu_interpreter::FNMADDS, + ppu_interpreter::STD, + ppu_interpreter::STDU, + ppu_interpreter::MTFSB1, + ppu_interpreter::MCRFS, + ppu_interpreter::MTFSB0, + ppu_interpreter::MTFSFI, + ppu_interpreter::MFFS, + ppu_interpreter::MTFSF, + + ppu_interpreter::FCMPU, + ppu_interpreter::FRSP, + ppu_interpreter::FCTIW, + ppu_interpreter::FCTIWZ, + ppu_interpreter::FDIV, + ppu_interpreter::FSUB, + ppu_interpreter::FADD, + ppu_interpreter::FSQRT, + ppu_interpreter::FSEL, + ppu_interpreter::FMUL, + ppu_interpreter::FRSQRTE, + ppu_interpreter::FMSUB, + ppu_interpreter::FMADD, + ppu_interpreter::FNMSUB, + ppu_interpreter::FNMADD, + ppu_interpreter::FCMPO, + ppu_interpreter::FNEG, + ppu_interpreter::FMR, + ppu_interpreter::FNABS, + ppu_interpreter::FABS, + ppu_interpreter::FCTID, + ppu_interpreter::FCTIDZ, + ppu_interpreter::FCFID, + + ppu_interpreter::UNK, +}; + extern u32 ppu_get_tls(u32 thread); extern void ppu_free_tls(u32 thread); +void* g_ppu_exec_map = nullptr; + +void finalize_ppu_exec_map() +{ + if (g_ppu_exec_map) + { +#ifdef _WIN32 + VirtualFree(g_ppu_exec_map, 0, MEM_RELEASE); +#else + munmap(g_ppu_exec_map, 0x100000000); +#endif + g_ppu_exec_map = nullptr; + } +} + +void initialize_ppu_exec_map() +{ + finalize_ppu_exec_map(); + +#ifdef _WIN32 + g_ppu_exec_map = VirtualAlloc(NULL, 0x100000000, MEM_RESERVE, PAGE_NOACCESS); +#else + g_ppu_exec_map = mmap(nullptr, 0x100000000, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); +#endif +} + +void fill_ppu_exec_map(u32 addr, u32 size) +{ +#ifdef _WIN32 + VirtualAlloc((u8*)g_ppu_exec_map + addr, size, MEM_COMMIT, PAGE_READWRITE); +#else + mprotect((u8*)g_ppu_exec_map + addr, size, PROT_READ | PROT_WRITE); +#endif + + PPUInterpreter2* inter; + PPUDecoder dec(inter = new PPUInterpreter2); + + for (u32 pos = addr; pos < addr + size; pos += 4) + { + inter->func = ppu_interpreter::NULL_OP; + + // decode PPU opcode + dec.Decode(vm::read32(pos)); + + u32 index = 0; + + // find function index + for (; index < sizeof(g_ppu_inter_func_list) / sizeof(ppu_inter_func_t); index++) + { + if (inter->func == g_ppu_inter_func_list[index]) + { + break; + } + } + + // write index in memory + *(u32*)((u8*)g_ppu_exec_map + pos) = index; + } +} + PPUThread& GetCurrentPPUThread() { CPUThread* thread = GetCurrentCPUThread(); @@ -29,6 +501,7 @@ PPUThread& GetCurrentPPUThread() PPUThread::PPUThread() : CPUThread(CPU_THREAD_PPU) { Reset(); + InitRotateMask(); } PPUThread::~PPUThread() @@ -94,18 +567,21 @@ void PPUThread::CloseStack() void PPUThread::DoRun() { - switch(Ini.CPUDecoderMode.GetValue()) - { - case 0: - //m_dec = new PPUDecoder(*new PPUDisAsm()); - break; + m_dec = nullptr; - case 1: + switch (auto mode = Ini.CPUDecoderMode.GetValue()) + { + case 0: // original interpreter { auto ppui = new PPUInterpreter(*this); m_dec = new PPUDecoder(ppui); + break; + } + + case 1: // alternative interpreter + { + break; } - break; case 2: #ifdef PPU_LLVM_RECOMPILER @@ -122,9 +598,11 @@ void PPUThread::DoRun() //case 3: m_dec = new PPURecompiler(*this); break; default: - LOG_ERROR(PPU, "Invalid CPU decoder mode: %d", Ini.CPUDecoderMode.GetValue()); + { + LOG_ERROR(PPU, "Invalid CPU decoder mode: %d", mode); Emu.Pause(); } + } } void PPUThread::DoResume() @@ -189,14 +667,16 @@ void PPUThread::FastCall2(u32 addr, u32 rtoc) auto old_rtoc = GPR[2]; auto old_LR = LR; auto old_thread = GetCurrentNamedThread(); + auto old_task = decltype(custom_task)(); m_status = Running; PC = addr; GPR[2] = rtoc; LR = Emu.GetCPUThreadStop(); SetCurrentNamedThread(this); + custom_task.swap(old_task); - CPUThread::Task(); + Task(); m_status = old_status; PC = old_PC; @@ -204,22 +684,58 @@ void PPUThread::FastCall2(u32 addr, u32 rtoc) GPR[2] = old_rtoc; LR = old_LR; SetCurrentNamedThread(old_thread); + custom_task.swap(old_task); } void PPUThread::FastStop() { m_status = Stopped; + m_events |= CPU_EVENT_STOP; } void PPUThread::Task() { + SetHostRoundingMode(FPSCR_RN_NEAR); + if (custom_task) { - custom_task(*this); + return custom_task(*this); } - else + + if (m_dec) { - CPUThread::Task(); + return CPUThread::Task(); + } + + while (true) + { + // get interpreter function + const auto func = g_ppu_inter_func_list[*(u32*)((u8*)g_ppu_exec_map + PC)]; + + if (m_events) + { + // process events + if (Emu.IsStopped()) + { + return; + } + + if (m_events & CPU_EVENT_STOP && (IsStopped() || IsPaused())) + { + m_events &= ~CPU_EVENT_STOP; + return; + } + } + + // read opcode + const ppu_opcode_t opcode = { vm::read32(PC) }; + + // call interpreter function + func(*this, opcode); + + // next instruction + //PC += 4; + NextPc(4); } } diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp new file mode 100644 index 0000000000..b4cee8d155 --- /dev/null +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -0,0 +1,1274 @@ +#include "stdafx.h" +#include "Utilities/Log.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" + +#include "SPUThread.h" +#include "SPUInstrTable.h" +#include "SPUInterpreter.h" +#include "SPUInterpreter2.h" + +#ifdef _MSC_VER +#include +#define rotl32 _rotl +#define rotl16 _rotl16 +#else +#include +#define rotl16(x,r) (((u16)(x) << (r)) | ((u16)(x) >> (16 - (r)))) +#define rotl32(x,r) (((u32)(x) << (r)) | ((u32)(x) >> (32 - (r)))) +#endif + +class spu_scale_table_t +{ + std::array<__m128, 155 + 174> m_data; + +public: + spu_scale_table_t() + { + for (s32 i = -155; i < 174; i++) + { + m_data[i + 155] = _mm_set1_ps(static_cast(exp2(i))); + } + } + + __forceinline __m128 operator [] (s32 scale) const + { + return m_data[scale + 155]; + } +} +const g_spu_scale_table; + + +void spu_interpreter::DEFAULT(SPUThread& CPU, spu_opcode_t op) +{ + SPUInterpreter inter(CPU); (*SPU_instr::rrr_list)(&inter, op.opcode); +} + + +void spu_interpreter::STOP(SPUThread& CPU, spu_opcode_t op) +{ + CPU.stop_and_signal(op.opcode & 0x3fff); +} + +void spu_interpreter::LNOP(SPUThread& CPU, spu_opcode_t op) +{ +} + +void spu_interpreter::SYNC(SPUThread& CPU, spu_opcode_t op) +{ + _mm_mfence(); +} + +void spu_interpreter::DSYNC(SPUThread& CPU, spu_opcode_t op) +{ + _mm_mfence(); +} + +void spu_interpreter::MFSPR(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].clear(); +} + +void spu_interpreter::RDCH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::from32r(CPU.get_ch_value(op.ra)); +} + +void spu_interpreter::RCHCNT(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::from32r(CPU.get_ch_count(op.ra)); +} + +void spu_interpreter::SF(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]); +} + +void spu_interpreter::OR(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = CPU.GPR[op.ra] | CPU.GPR[op.rb]; +} + +void spu_interpreter::BG(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_add_epi32(sse_cmpgt_epu32(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi), _mm_set1_epi32(1)); +} + +void spu_interpreter::SFH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::sub16(CPU.GPR[op.rb], CPU.GPR[op.ra]); +} + +void spu_interpreter::NOR(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = ~(CPU.GPR[op.ra] | CPU.GPR[op.rb]); +} + +void spu_interpreter::ABSDB(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + CPU.GPR[op.rt] = u128::sub8(u128::maxu8(a, b), u128::minu8(a, b)); +} + +void spu_interpreter::ROT(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + CPU.GPR[op.rt]._u32[i] = rotl32(a._u32[i], b._s32[i]); + } +} + +void spu_interpreter::ROTM(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + const u64 value = a._u32[i]; + CPU.GPR[op.rt]._u32[i] = static_cast(value >> (0 - b._u32[i])); + } +} + +void spu_interpreter::ROTMA(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + const s64 value = a._s32[i]; + CPU.GPR[op.rt]._s32[i] = static_cast(value >> (0 - b._u32[i])); + } +} + +void spu_interpreter::SHL(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 4; i++) + { + const u64 value = a._u32[i]; + CPU.GPR[op.rt]._u32[i] = static_cast(value << b._u32[i]); + } +} + +void spu_interpreter::ROTH(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + CPU.GPR[op.rt]._u16[i] = rotl16(a._u16[i], b._u8[i * 2]); + } +} + +void spu_interpreter::ROTHM(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + const u32 value = a._u16[i]; + CPU.GPR[op.rt]._u16[i] = static_cast(value >> (0 - b._u16[i])); + } +} + +void spu_interpreter::ROTMAH(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + const s32 value = a._s16[i]; + CPU.GPR[op.rt]._s16[i] = static_cast(value >> (0 - b._u16[i])); + } +} + +void spu_interpreter::SHLH(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra]; + const auto b = CPU.GPR[op.rb]; + + for (u32 i = 0; i < 8; i++) + { + const u32 value = a._u16[i]; + CPU.GPR[op.rt]._u16[i] = static_cast(value << b._u16[i]); + } +} + +void spu_interpreter::ROTI(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra].vi; + const s32 n = op.si7 & 0x1f; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi32(a, n), _mm_srli_epi32(a, 32 - n)); +} + +void spu_interpreter::ROTMI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_srli_epi32(CPU.GPR[op.ra].vi, -op.si7 & 0x3f); +} + +void spu_interpreter::ROTMAI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_srai_epi32(CPU.GPR[op.ra].vi, -op.si7 & 0x3f); +} + +void spu_interpreter::SHLI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_slli_epi32(CPU.GPR[op.ra].vi, op.si7 & 0x3f); +} + +void spu_interpreter::ROTHI(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra].vi; + const s32 n = op.si7 & 0xf; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi16(a, n), _mm_srli_epi16(a, 16 - n)); +} + +void spu_interpreter::ROTHMI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_srli_epi16(CPU.GPR[op.ra].vi, -op.si7 & 0x1f); +} + +void spu_interpreter::ROTMAHI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_srai_epi16(CPU.GPR[op.ra].vi, -op.si7 & 0x1f); +} + +void spu_interpreter::SHLHI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_slli_epi16(CPU.GPR[op.ra].vi, op.si7 & 0x1f); +} + +void spu_interpreter::A(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]); +} + +void spu_interpreter::AND(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = CPU.GPR[op.ra] & CPU.GPR[op.rb]; +} + +void spu_interpreter::CG(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = _mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0x7fffffff)); + const auto b = _mm_xor_si128(CPU.GPR[op.rb].vi, _mm_set1_epi32(0x80000000)); + CPU.GPR[op.rt].vi = _mm_srli_epi32(_mm_cmpgt_epi32(b, a), 31); +} + +void spu_interpreter::AH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::add16(CPU.GPR[op.ra], CPU.GPR[op.rb]); +} + +void spu_interpreter::NAND(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = ~(CPU.GPR[op.ra] & CPU.GPR[op.rb]); +} + +void spu_interpreter::AVGB(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_avg_epu8(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); +} + +void spu_interpreter::MTSPR(SPUThread& CPU, spu_opcode_t op) +{ +} + +void spu_interpreter::WRCH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.set_ch_value(op.ra, CPU.GPR[op.rt]._u32[3]); +} + +void spu_interpreter::BIZ(SPUThread& CPU, spu_opcode_t op) +{ + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u32[3] == 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } +} + +void spu_interpreter::BINZ(SPUThread& CPU, spu_opcode_t op) +{ + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u32[3] != 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } +} + +void spu_interpreter::BIHZ(SPUThread& CPU, spu_opcode_t op) +{ + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u16[6] == 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } +} + +void spu_interpreter::BIHNZ(SPUThread& CPU, spu_opcode_t op) +{ + if (op.d || op.e) + { + throw __FUNCTION__; + } + + if (CPU.GPR[op.rt]._u16[6] != 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); + } +} + +void spu_interpreter::STOPD(SPUThread& CPU, spu_opcode_t op) +{ + throw __FUNCTION__; +} + +void spu_interpreter::STQX(SPUThread& CPU, spu_opcode_t op) +{ + CPU.write128((CPU.GPR[op.ra]._u32[3] + CPU.GPR[op.rb]._u32[3]) & 0x3fff0, CPU.GPR[op.rt]); +} + +void spu_interpreter::BI(SPUThread& CPU, spu_opcode_t op) +{ + if (op.d || op.e) + { + throw __FUNCTION__; + } + + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0)); +} + +void spu_interpreter::BISL(SPUThread& CPU, spu_opcode_t op) +{ + if (op.d || op.e) + { + throw __FUNCTION__; + } + + const u32 target = SPUOpcodes::branchTarget(CPU.GPR[op.ra]._u32[3], 0); + CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4); + CPU.SetBranch(target); +} + +void spu_interpreter::IRET(SPUThread& CPU, spu_opcode_t op) +{ + throw __FUNCTION__; +} + +void spu_interpreter::BISLED(SPUThread& CPU, spu_opcode_t op) +{ + throw __FUNCTION__; +} + +void spu_interpreter::HBR(SPUThread& CPU, spu_opcode_t op) +{ +} + +void spu_interpreter::GB(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0)), 7))); +} + +void spu_interpreter::GBH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(_mm_shuffle_epi8(CPU.GPR[op.ra].vi, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0)), 7))); +} + +void spu_interpreter::GBB(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::from32r(_mm_movemask_epi8(_mm_slli_epi64(CPU.GPR[op.ra].vi, 7))); +} + +void spu_interpreter::FSM(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = g_imm_table.fsm_table[CPU.GPR[op.ra]._u32[3] & 0xf]; +} + +void spu_interpreter::FSMH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = g_imm_table.fsmh_table[CPU.GPR[op.ra]._u32[3] & 0xff]; +} + +void spu_interpreter::FSMB(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = g_imm_table.fsmb_table[CPU.GPR[op.ra]._u32[3] & 0xffff]; +} + +void spu_interpreter::FREST(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vf = _mm_rcp_ps(CPU.GPR[op.ra].vf); +} + +void spu_interpreter::FRSQEST(SPUThread& CPU, spu_opcode_t op) +{ + const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + CPU.GPR[op.rt].vf = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[op.ra].vf, mask)); +} + +void spu_interpreter::LQX(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = CPU.read128((CPU.GPR[op.ra]._u32[3] + CPU.GPR[op.rb]._u32[3]) & 0x3fff0); +} + +void spu_interpreter::ROTQBYBI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.rldq_pshufb[CPU.GPR[op.rb]._u32[3] >> 3 & 0xf]); +} + +void spu_interpreter::ROTQMBYBI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.srdq_pshufb[-(CPU.GPR[op.rb]._s32[3] >> 3) & 0x1f]); +} + +void spu_interpreter::SHLQBYBI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.sldq_pshufb[CPU.GPR[op.rb]._u32[3] >> 3 & 0x1f]); +} + +void spu_interpreter::CBX(SPUThread& CPU, spu_opcode_t op) +{ + const s32 t = ~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xf; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u8[t] = 0x03; +} + +void spu_interpreter::CHX(SPUThread& CPU, spu_opcode_t op) +{ + const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u16[t] = 0x0203; +} + +void spu_interpreter::CWX(SPUThread& CPU, spu_opcode_t op) +{ + const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u32[t] = 0x00010203; +} + +void spu_interpreter::CDX(SPUThread& CPU, spu_opcode_t op) +{ + const s32 t = (~(CPU.GPR[op.rb]._u32[3] + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull; +} + +void spu_interpreter::ROTQBI(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra].vi; + const s32 n = CPU.GPR[op.rb]._s32[3] & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_alignr_epi8(a, a, 8), 64 - n)); +} + +void spu_interpreter::ROTQMBI(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra].vi; + const s32 n = -CPU.GPR[op.rb]._s32[3] & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_srli_epi64(a, n), _mm_slli_epi64(_mm_srli_si128(a, 8), 64 - n)); +} + +void spu_interpreter::SHLQBI(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra].vi; + const s32 n = CPU.GPR[op.rb]._u32[3] & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_slli_si128(a, 8), 64 - n)); +} + +void spu_interpreter::ROTQBY(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.rldq_pshufb[CPU.GPR[op.rb]._u32[3] & 0xf]); +} + +void spu_interpreter::ROTQMBY(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.srdq_pshufb[-CPU.GPR[op.rb]._s32[3] & 0x1f]); +} + +void spu_interpreter::SHLQBY(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.sldq_pshufb[CPU.GPR[op.rb]._u32[3] & 0x1f]); +} + +void spu_interpreter::ORX(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::from32r(CPU.GPR[op.ra]._u32[0] | CPU.GPR[op.ra]._u32[1] | CPU.GPR[op.ra]._u32[2] | CPU.GPR[op.ra]._u32[3]); +} + +void spu_interpreter::CBD(SPUThread& CPU, spu_opcode_t op) +{ + const s32 t = ~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xf; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u8[t] = 0x03; +} + +void spu_interpreter::CHD(SPUThread& CPU, spu_opcode_t op) +{ + const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xe) >> 1; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u16[t] = 0x0203; +} + +void spu_interpreter::CWD(SPUThread& CPU, spu_opcode_t op) +{ + const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0xc) >> 2; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u32[t] = 0x00010203; +} + +void spu_interpreter::CDD(SPUThread& CPU, spu_opcode_t op) +{ + const s32 t = (~(op.i7 + CPU.GPR[op.ra]._u32[3]) & 0x8) >> 3; + CPU.GPR[op.rt] = u128::from64(0x18191A1B1C1D1E1Full, 0x1011121314151617ull); + CPU.GPR[op.rt]._u64[t] = 0x0001020304050607ull; +} + +void spu_interpreter::ROTQBII(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra].vi; + const s32 n = op.i7 & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_alignr_epi8(a, a, 8), 64 - n)); +} + +void spu_interpreter::ROTQMBII(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra].vi; + const s32 n = -op.si7 & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_srli_epi64(a, n), _mm_slli_epi64(_mm_srli_si128(a, 8), 64 - n)); +} + +void spu_interpreter::SHLQBII(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra].vi; + const s32 n = op.i7 & 0x7; + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi64(a, n), _mm_srli_epi64(_mm_slli_si128(a, 8), 64 - n)); +} + +void spu_interpreter::ROTQBYI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.rldq_pshufb[op.i7 & 0xf]); +} + +void spu_interpreter::ROTQMBYI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.srdq_pshufb[-op.si7 & 0x1f]); +} + +void spu_interpreter::SHLQBYI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, g_imm_table.sldq_pshufb[op.i7 & 0x1f]); +} + +void spu_interpreter::NOP(SPUThread& CPU, spu_opcode_t op) +{ +} + +void spu_interpreter::CGT(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpgt_epi32(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); +} + +void spu_interpreter::XOR(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = CPU.GPR[op.ra] ^ CPU.GPR[op.rb]; +} + +void spu_interpreter::CGTH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpgt_epi16(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); +} + +void spu_interpreter::EQV(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = ~(CPU.GPR[op.ra] ^ CPU.GPR[op.rb]); +} + +void spu_interpreter::CGTB(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpgt_epi8(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); +} + +void spu_interpreter::SUMB(SPUThread& CPU, spu_opcode_t op) +{ + const auto ones = _mm_set1_epi8(1); + const auto a = _mm_maddubs_epi16(CPU.GPR[op.ra].vi, ones); + const auto b = _mm_maddubs_epi16(CPU.GPR[op.rb].vi, ones); + CPU.GPR[op.rt].vi = _mm_shuffle_epi8(_mm_hadd_epi16(a, b), _mm_set_epi8(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0)); +} + +void spu_interpreter::HGT(SPUThread& CPU, spu_opcode_t op) +{ + if (CPU.GPR[op.ra]._s32[3] > CPU.GPR[op.rb]._s32[3]) + { + CPU.halt(); + } +} + +void spu_interpreter::CLZ(SPUThread& CPU, spu_opcode_t op) +{ + for (u32 i = 0; i < 4; i++) + { + CPU.GPR[op.rt]._u32[i] = cntlz32(CPU.GPR[op.ra]._u32[i]); + } +} + +void spu_interpreter::XSWD(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt]._s64[0] = CPU.GPR[op.ra]._s32[0]; + CPU.GPR[op.rt]._s64[1] = CPU.GPR[op.ra]._s32[2]; +} + +void spu_interpreter::XSHW(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_srai_epi32(_mm_slli_epi32(CPU.GPR[op.ra].vi, 16), 16); +} + +void spu_interpreter::CNTB(SPUThread& CPU, spu_opcode_t op) +{ + const auto counts = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + const auto mask = _mm_set1_epi8(0xf); + const auto a = CPU.GPR[op.ra].vi; + CPU.GPR[op.rt].vi = _mm_add_epi8(_mm_shuffle_epi8(counts, _mm_and_si128(a, mask)), _mm_shuffle_epi8(counts, _mm_and_si128(_mm_srli_epi64(a, 4), mask))); +} + +void spu_interpreter::XSBH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_srai_epi16(_mm_slli_epi16(CPU.GPR[op.ra].vi, 8), 8); +} + +void spu_interpreter::CLGT(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = sse_cmpgt_epu32(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); +} + +void spu_interpreter::ANDC(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::andnot(CPU.GPR[op.rb], CPU.GPR[op.ra]); +} + +void spu_interpreter::FCGT(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vf = _mm_cmplt_ps(CPU.GPR[op.rb].vf, CPU.GPR[op.ra].vf); +} + +void spu_interpreter::DFCGT(SPUThread& CPU, spu_opcode_t op) +{ + throw __FUNCTION__; +} + +void spu_interpreter::FA(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::addfs(CPU.GPR[op.ra], CPU.GPR[op.rb]); +} + +void spu_interpreter::FS(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::subfs(CPU.GPR[op.ra], CPU.GPR[op.rb]); +} + +void spu_interpreter::FM(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vf = _mm_mul_ps(CPU.GPR[op.ra].vf, CPU.GPR[op.rb].vf); +} + +void spu_interpreter::CLGTH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = sse_cmpgt_epu16(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); +} + +void spu_interpreter::ORC(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = CPU.GPR[op.ra] | ~CPU.GPR[op.rb]; +} + +void spu_interpreter::FCMGT(SPUThread& CPU, spu_opcode_t op) +{ + const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + CPU.GPR[op.rt].vf = _mm_cmplt_ps(_mm_and_ps(CPU.GPR[op.rb].vf, mask), _mm_and_ps(CPU.GPR[op.ra].vf, mask)); +} + +void spu_interpreter::DFCMGT(SPUThread& CPU, spu_opcode_t op) +{ + throw __FUNCTION__; +} + +void spu_interpreter::DFA(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::addfd(CPU.GPR[op.ra], CPU.GPR[op.rb]); +} + +void spu_interpreter::DFS(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::subfd(CPU.GPR[op.ra], CPU.GPR[op.rb]); +} + +void spu_interpreter::DFM(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vd = _mm_mul_pd(CPU.GPR[op.ra].vd, CPU.GPR[op.rb].vd); +} + +void spu_interpreter::CLGTB(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = sse_cmpgt_epu8(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); +} + +void spu_interpreter::HLGT(SPUThread& CPU, spu_opcode_t op) +{ + if (CPU.GPR[op.ra]._u32[3] > CPU.GPR[op.rb]._u32[3]) + { + CPU.halt(); + } +} + +void spu_interpreter::DFMA(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vd = _mm_add_pd(_mm_mul_pd(CPU.GPR[op.ra].vd, CPU.GPR[op.rb].vd), CPU.GPR[op.rt].vd); +} + +void spu_interpreter::DFMS(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vd = _mm_sub_pd(_mm_mul_pd(CPU.GPR[op.ra].vd, CPU.GPR[op.rb].vd), CPU.GPR[op.rt].vd); +} + +void spu_interpreter::DFNMS(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vd = _mm_sub_pd(CPU.GPR[op.rt].vd, _mm_mul_pd(CPU.GPR[op.ra].vd, CPU.GPR[op.rb].vd)); +} + +void spu_interpreter::DFNMA(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vd = _mm_sub_pd(_mm_set1_pd(0.0), _mm_add_pd(_mm_mul_pd(CPU.GPR[op.ra].vd, CPU.GPR[op.rb].vd), CPU.GPR[op.rt].vd)); +} + +void spu_interpreter::CEQ(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpeq_epi32(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); +} + +void spu_interpreter::MPYHHU(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = _mm_srli_epi32(CPU.GPR[op.ra].vi, 16); + const auto b = _mm_srli_epi32(CPU.GPR[op.rb].vi, 16); + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi32(_mm_mulhi_epu16(a, b), 16), _mm_mullo_epi16(a, b)); +} + +void spu_interpreter::ADDX(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::add32(u128::add32(CPU.GPR[op.ra], CPU.GPR[op.rb]), CPU.GPR[op.rt] & u128::from32p(1)); +} + +void spu_interpreter::SFX(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = u128::sub32(u128::sub32(CPU.GPR[op.rb], CPU.GPR[op.ra]), u128::andnot(CPU.GPR[op.rt], u128::from32p(1))); +} + +void spu_interpreter::CGX(SPUThread& CPU, spu_opcode_t op) +{ + for (s32 i = 0; i < 4; i++) + { + const u64 carry = CPU.GPR[op.rt]._u32[i] & 1; + CPU.GPR[op.rt]._u32[i] = (carry + CPU.GPR[op.ra]._u32[i] + CPU.GPR[op.rb]._u32[i]) >> 32; + } +} + +void spu_interpreter::BGX(SPUThread& CPU, spu_opcode_t op) +{ + for (s32 i = 0; i < 4; i++) + { + const s64 result = (u64)CPU.GPR[op.rb]._u32[i] - (u64)CPU.GPR[op.ra]._u32[i] - (u64)(1 - (CPU.GPR[op.rt]._u32[i] & 1)); + CPU.GPR[op.rt]._u32[i] = result >= 0; + } +} + +void spu_interpreter::MPYHHA(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_add_epi32(CPU.GPR[op.rt].vi, _mm_madd_epi16(_mm_srli_epi32(CPU.GPR[op.ra].vi, 16), _mm_srli_epi32(CPU.GPR[op.rb].vi, 16))); +} + +void spu_interpreter::MPYHHAU(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = _mm_srli_epi32(CPU.GPR[op.ra].vi, 16); + const auto b = _mm_srli_epi32(CPU.GPR[op.rb].vi, 16); + CPU.GPR[op.rt].vi = _mm_add_epi32(CPU.GPR[op.rt].vi, _mm_or_si128(_mm_slli_epi32(_mm_mulhi_epu16(a, b), 16), _mm_mullo_epi16(a, b))); +} + +void spu_interpreter::FSCRRD(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].clear(); +} + +void spu_interpreter::FESD(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra].vf; + CPU.GPR[op.rt].vd = _mm_cvtps_pd(_mm_shuffle_ps(a, a, 0x8d)); +} + +void spu_interpreter::FRDS(SPUThread& CPU, spu_opcode_t op) +{ + const auto t = _mm_cvtpd_ps(CPU.GPR[op.ra].vd); + CPU.GPR[op.rt].vf = _mm_shuffle_ps(t, t, 0x72); +} + +void spu_interpreter::FSCRWR(SPUThread& CPU, spu_opcode_t op) +{ +} + +void spu_interpreter::DFTSV(SPUThread& CPU, spu_opcode_t op) +{ + throw __FUNCTION__; +} + +void spu_interpreter::FCEQ(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vf = _mm_cmpeq_ps(CPU.GPR[op.rb].vf, CPU.GPR[op.ra].vf); +} + +void spu_interpreter::DFCEQ(SPUThread& CPU, spu_opcode_t op) +{ + throw __FUNCTION__; +} + +void spu_interpreter::MPY(SPUThread& CPU, spu_opcode_t op) +{ + const auto mask = _mm_set1_epi32(0xffff); + CPU.GPR[op.rt].vi = _mm_madd_epi16(_mm_and_si128(CPU.GPR[op.ra].vi, mask), _mm_and_si128(CPU.GPR[op.rb].vi, mask)); +} + +void spu_interpreter::MPYH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(CPU.GPR[op.ra].vi, 16), CPU.GPR[op.rb].vi), 16); +} + +void spu_interpreter::MPYHH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_madd_epi16(_mm_srli_epi32(CPU.GPR[op.ra].vi, 16), _mm_srli_epi32(CPU.GPR[op.rb].vi, 16)); +} + +void spu_interpreter::MPYS(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_srai_epi32(_mm_slli_epi32(_mm_mulhi_epi16(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi), 16), 16); +} + +void spu_interpreter::CEQH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpeq_epi16(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); +} + +void spu_interpreter::FCMEQ(SPUThread& CPU, spu_opcode_t op) +{ + const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + CPU.GPR[op.rt].vf = _mm_cmpeq_ps(_mm_and_ps(CPU.GPR[op.rb].vf, mask), _mm_and_ps(CPU.GPR[op.ra].vf, mask)); +} + +void spu_interpreter::DFCMEQ(SPUThread& CPU, spu_opcode_t op) +{ + throw __FUNCTION__; +} + +void spu_interpreter::MPYU(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = _mm_and_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0xffff)); + const auto b = _mm_and_si128(CPU.GPR[op.rb].vi, _mm_set1_epi32(0xffff)); + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi32(_mm_mulhi_epu16(a, b), 16), _mm_mullo_epi16(a, b)); +} + +void spu_interpreter::CEQB(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpeq_epi8(CPU.GPR[op.ra].vi, CPU.GPR[op.rb].vi); +} + +void spu_interpreter::FI(SPUThread& CPU, spu_opcode_t op) +{ + const auto mask_se = _mm_castsi128_ps(_mm_set1_epi32(0xff800000)); // sign and exponent mask + const auto mask_bf = _mm_castsi128_ps(_mm_set1_epi32(0x007ffc00)); // base fraction mask + const auto mask_sf = _mm_set1_epi32(0x000003ff); // step fraction mask + const auto mask_yf = _mm_set1_epi32(0x0007ffff); // Y fraction mask (bits 13..31) + const auto base = _mm_or_ps(_mm_and_ps(CPU.GPR[op.rb].vf, mask_bf), _mm_castsi128_ps(_mm_set1_epi32(0x3f800000))); + const auto step = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(CPU.GPR[op.rb].vi, mask_sf)), g_spu_scale_table[-13]); + const auto y = _mm_mul_ps(_mm_cvtepi32_ps(_mm_and_si128(CPU.GPR[op.ra].vi, mask_yf)), g_spu_scale_table[-19]); + CPU.GPR[op.rt].vf = _mm_or_ps(_mm_and_ps(mask_se, CPU.GPR[op.rb].vf), _mm_andnot_ps(mask_se, _mm_sub_ps(base, _mm_mul_ps(step, y)))); +} + +void spu_interpreter::HEQ(SPUThread& CPU, spu_opcode_t op) +{ + if (CPU.GPR[op.ra]._s32[3] == CPU.GPR[op.rb]._s32[3]) + { + CPU.halt(); + } +} + + +void spu_interpreter::CFLTS(SPUThread& CPU, spu_opcode_t op) +{ + const auto scaled = _mm_mul_ps(CPU.GPR[op.ra].vf, g_spu_scale_table[173 - op.i8]); + CPU.GPR[op.rt].vi = _mm_xor_si128(_mm_cvttps_epi32(scaled), _mm_castps_si128(_mm_cmpge_ps(scaled, _mm_set1_ps(0x80000000)))); +} + +void spu_interpreter::CFLTU(SPUThread& CPU, spu_opcode_t op) +{ + const auto scaled1 = _mm_max_ps(_mm_mul_ps(CPU.GPR[op.ra].vf, g_spu_scale_table[173 - op.i8]), _mm_set1_ps(0.0f)); + const auto scaled2 = _mm_and_ps(_mm_sub_ps(scaled1, _mm_set1_ps(0x80000000)), _mm_cmpge_ps(scaled1, _mm_set1_ps(0x80000000))); + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_or_si128(_mm_cvttps_epi32(scaled1), _mm_cvttps_epi32(scaled2)), _mm_castps_si128(_mm_cmpge_ps(scaled1, _mm_set1_ps(0x100000000)))); +} + +void spu_interpreter::CSFLT(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vf = _mm_mul_ps(_mm_cvtepi32_ps(CPU.GPR[op.ra].vi), g_spu_scale_table[op.i8 - 155]); +} + +void spu_interpreter::CUFLT(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = CPU.GPR[op.ra].vi; + const auto fix = _mm_and_ps(_mm_castsi128_ps(_mm_srai_epi32(a, 31)), _mm_set1_ps(0x80000000)); + CPU.GPR[op.rt].vf = _mm_mul_ps(_mm_add_ps(_mm_cvtepi32_ps(_mm_and_si128(a, _mm_set1_epi32(0x7fffffff))), fix), g_spu_scale_table[op.i8 - 155]); +} + + +void spu_interpreter::BRZ(SPUThread& CPU, spu_opcode_t op) +{ + if (CPU.GPR[op.rt]._u32[3] == 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.PC, op.i16)); + } +} + +void spu_interpreter::STQA(SPUThread& CPU, spu_opcode_t op) +{ + CPU.write128((op.i16 << 2) & 0x3fff0, CPU.GPR[op.rt]); +} + +void spu_interpreter::BRNZ(SPUThread& CPU, spu_opcode_t op) +{ + if (CPU.GPR[op.rt]._u32[3] != 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.PC, op.i16)); + } +} + +void spu_interpreter::BRHZ(SPUThread& CPU, spu_opcode_t op) +{ + if (CPU.GPR[op.rt]._u16[6] == 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.PC, op.i16)); + } +} + +void spu_interpreter::BRHNZ(SPUThread& CPU, spu_opcode_t op) +{ + if (CPU.GPR[op.rt]._u16[6] != 0) + { + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.PC, op.i16)); + } +} + +void spu_interpreter::STQR(SPUThread& CPU, spu_opcode_t op) +{ + CPU.write128(SPUOpcodes::branchTarget(CPU.PC, op.i16) & 0x3fff0, CPU.GPR[op.rt]); +} + +void spu_interpreter::BRA(SPUThread& CPU, spu_opcode_t op) +{ + CPU.SetBranch(SPUOpcodes::branchTarget(0, op.i16)); +} + +void spu_interpreter::LQA(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = CPU.read128((op.i16 << 2) & 0x3fff0); +} + +void spu_interpreter::BRASL(SPUThread& CPU, spu_opcode_t op) +{ + const u32 target = SPUOpcodes::branchTarget(0, op.i16); + CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4); + CPU.SetBranch(target); +} + +void spu_interpreter::BR(SPUThread& CPU, spu_opcode_t op) +{ + CPU.SetBranch(SPUOpcodes::branchTarget(CPU.PC, op.i16)); +} + +void spu_interpreter::FSMBI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = g_imm_table.fsmb_table[op.i16]; +} + +void spu_interpreter::BRSL(SPUThread& CPU, spu_opcode_t op) +{ + const u32 target = SPUOpcodes::branchTarget(CPU.PC, op.i16); + CPU.GPR[op.rt] = u128::from32r(CPU.PC + 4); + CPU.SetBranch(target); +} + +void spu_interpreter::LQR(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = CPU.read128(SPUOpcodes::branchTarget(CPU.PC, op.i16) & 0x3fff0); +} + +void spu_interpreter::IL(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_set1_epi32(op.si16); +} + +void spu_interpreter::ILHU(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_set1_epi32(op.i16 << 16); +} + +void spu_interpreter::ILH(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_set1_epi16(op.i16); +} + +void spu_interpreter::IOHL(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_or_si128(CPU.GPR[op.rt].vi, _mm_set1_epi32(op.i16)); +} + + +void spu_interpreter::ORI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_or_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10)); +} + +void spu_interpreter::ORHI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_or_si128(CPU.GPR[op.ra].vi, _mm_set1_epi16(op.si10)); +} + +void spu_interpreter::ORBI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_or_si128(CPU.GPR[op.ra].vi, _mm_set1_epi8(op.i8)); +} + +void spu_interpreter::SFI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_sub_epi32(_mm_set1_epi32(op.si10), CPU.GPR[op.ra].vi); +} + +void spu_interpreter::SFHI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_sub_epi16(_mm_set1_epi16(op.si10), CPU.GPR[op.ra].vi); +} + +void spu_interpreter::ANDI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_and_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10)); +} + +void spu_interpreter::ANDHI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_and_si128(CPU.GPR[op.ra].vi, _mm_set1_epi16(op.si10)); +} + +void spu_interpreter::ANDBI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_and_si128(CPU.GPR[op.ra].vi, _mm_set1_epi8(op.i8)); +} + +void spu_interpreter::AI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_add_epi32(_mm_set1_epi32(op.si10), CPU.GPR[op.ra].vi); +} + +void spu_interpreter::AHI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_add_epi16(_mm_set1_epi16(op.si10), CPU.GPR[op.ra].vi); +} + +void spu_interpreter::STQD(SPUThread& CPU, spu_opcode_t op) +{ + CPU.write128((CPU.GPR[op.ra]._s32[3] + (op.si10 << 4)) & 0x3fff0, CPU.GPR[op.rt]); +} + +void spu_interpreter::LQD(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt] = CPU.read128((CPU.GPR[op.ra]._s32[3] + (op.si10 << 4)) & 0x3fff0); +} + +void spu_interpreter::XORI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10)); +} + +void spu_interpreter::XORHI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi16(op.si10)); +} + +void spu_interpreter::XORBI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi8(op.i8)); +} + +void spu_interpreter::CGTI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpgt_epi32(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10)); +} + +void spu_interpreter::CGTHI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpgt_epi16(CPU.GPR[op.ra].vi, _mm_set1_epi16(op.si10)); +} + +void spu_interpreter::CGTBI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpgt_epi8(CPU.GPR[op.ra].vi, _mm_set1_epi8(op.i8)); +} + +void spu_interpreter::HGTI(SPUThread& CPU, spu_opcode_t op) +{ + if (CPU.GPR[op.ra]._s32[3] > op.si10) + { + CPU.halt(); + } +} + +void spu_interpreter::CLGTI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpgt_epi32(_mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0x80000000)), _mm_set1_epi32(op.si10 ^ 0x80000000)); +} + +void spu_interpreter::CLGTHI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpgt_epi16(_mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0x80008000)), _mm_set1_epi16(op.si10 ^ 0x8000)); +} + +void spu_interpreter::CLGTBI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpgt_epi8(_mm_xor_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0x80808080)), _mm_set1_epi8(op.i8 ^ 0x80)); +} + +void spu_interpreter::HLGTI(SPUThread& CPU, spu_opcode_t op) +{ + if (CPU.GPR[op.ra]._u32[3] > static_cast(op.si10)) + { + CPU.halt(); + } +} + +void spu_interpreter::MPYI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_madd_epi16(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10 & 0xffff)); +} + +void spu_interpreter::MPYUI(SPUThread& CPU, spu_opcode_t op) +{ + const auto a = _mm_and_si128(CPU.GPR[op.ra].vi, _mm_set1_epi32(0xffff)); + const auto i = _mm_set1_epi32(op.si10 & 0xffff); + CPU.GPR[op.rt].vi = _mm_or_si128(_mm_slli_epi32(_mm_mulhi_epu16(a, i), 16), _mm_mullo_epi16(a, i)); +} + +void spu_interpreter::CEQI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpeq_epi32(CPU.GPR[op.ra].vi, _mm_set1_epi32(op.si10)); +} + +void spu_interpreter::CEQHI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpeq_epi16(CPU.GPR[op.ra].vi, _mm_set1_epi16(op.si10)); +} + +void spu_interpreter::CEQBI(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_cmpeq_epi8(CPU.GPR[op.ra].vi, _mm_set1_epi8(op.i8)); +} + +void spu_interpreter::HEQI(SPUThread& CPU, spu_opcode_t op) +{ + if (CPU.GPR[op.ra]._s32[3] == op.si10) + { + CPU.halt(); + } +} + + +void spu_interpreter::HBRA(SPUThread& CPU, spu_opcode_t op) +{ +} + +void spu_interpreter::HBRR(SPUThread& CPU, spu_opcode_t op) +{ +} + +void spu_interpreter::ILA(SPUThread& CPU, spu_opcode_t op) +{ + CPU.GPR[op.rt].vi = _mm_set1_epi32(op.i18); +} + + +void spu_interpreter::SELB(SPUThread& CPU, spu_opcode_t op) +{ + // rt <> rc + CPU.GPR[op.rc] = (CPU.GPR[op.rt] & CPU.GPR[op.rb]) | u128::andnot(CPU.GPR[op.rt], CPU.GPR[op.ra]); +} + +void spu_interpreter::SHUFB(SPUThread& CPU, spu_opcode_t op) +{ + // rt <> rc + const auto index = _mm_xor_si128(CPU.GPR[op.rt].vi, _mm_set1_epi32(0x0f0f0f0f)); + const auto res1 = _mm_shuffle_epi8(CPU.GPR[op.ra].vi, index); + const auto bit4 = _mm_set1_epi32(0x10101010); + const auto k1 = _mm_cmpeq_epi8(_mm_and_si128(index, bit4), bit4); + const auto res2 = _mm_or_si128(_mm_and_si128(k1, _mm_shuffle_epi8(CPU.GPR[op.rb].vi, index)), _mm_andnot_si128(k1, res1)); + const auto bit67 = _mm_set1_epi32(0xc0c0c0c0); + const auto k2 = _mm_cmpeq_epi8(_mm_and_si128(index, bit67), bit67); + const auto res3 = _mm_or_si128(res2, k2); + const auto bit567 = _mm_set1_epi32(0xe0e0e0e0); + const auto k3 = _mm_cmpeq_epi8(_mm_and_si128(index, bit567), bit567); + CPU.GPR[op.rc].vi = _mm_sub_epi8(res3, _mm_and_si128(k3, _mm_set1_epi32(0x7f7f7f7f))); +} + +void spu_interpreter::MPYA(SPUThread& CPU, spu_opcode_t op) +{ + // rt <> rc + const auto mask = _mm_set1_epi32(0xffff); + CPU.GPR[op.rc].vi = _mm_add_epi32(CPU.GPR[op.rt].vi, _mm_madd_epi16(_mm_and_si128(CPU.GPR[op.ra].vi, mask), _mm_and_si128(CPU.GPR[op.rb].vi, mask))); +} + +void spu_interpreter::FNMS(SPUThread& CPU, spu_opcode_t op) +{ + // rt <> rc + CPU.GPR[op.rc].vf = _mm_sub_ps(CPU.GPR[op.rt].vf, _mm_mul_ps(CPU.GPR[op.ra].vf, CPU.GPR[op.rb].vf)); +} + +void spu_interpreter::FMA(SPUThread& CPU, spu_opcode_t op) +{ + // rt <> rc + CPU.GPR[op.rc].vf = _mm_add_ps(_mm_mul_ps(CPU.GPR[op.ra].vf, CPU.GPR[op.rb].vf), CPU.GPR[op.rt].vf); +} + +void spu_interpreter::FMS(SPUThread& CPU, spu_opcode_t op) +{ + // rt <> rc + CPU.GPR[op.rc].vf = _mm_sub_ps(_mm_mul_ps(CPU.GPR[op.ra].vf, CPU.GPR[op.rb].vf), CPU.GPR[op.rt].vf); +} + + +void spu_interpreter::UNK(SPUThread& CPU, spu_opcode_t op) +{ + throw __FUNCTION__; +} diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 2361d0c376..97d4934b3c 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -315,11 +315,9 @@ private: } void BIZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -336,11 +334,9 @@ private: } void BINZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -357,11 +353,9 @@ private: } void BIHZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -378,11 +372,9 @@ private: } void BIHNZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -409,11 +401,9 @@ private: } void BI(u32 intr, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -423,11 +413,9 @@ private: } void BISL(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } diff --git a/rpcs3/Emu/Cell/SPUInterpreter2.h b/rpcs3/Emu/Cell/SPUInterpreter2.h new file mode 100644 index 0000000000..8d1afd0c6e --- /dev/null +++ b/rpcs3/Emu/Cell/SPUInterpreter2.h @@ -0,0 +1,512 @@ +#pragma once + +class SPUThread; + +union spu_opcode_t +{ + u32 opcode; + + struct + { + u32 rt : 7; // 25..31, it's actually RC in 4-op instructions + u32 ra : 7; // 18..24 + u32 rb : 7; // 11..17 + u32 rc : 7; // 4..10, it's actually RT in 4-op instructions + }; + + struct + { + u32 : 14; // 18..31 + u32 i7 : 7; // 11..17 + }; + + struct + { + u32 : 14; // 18..31 + u32 i8 : 8; // 10..17 + }; + + struct + { + u32 : 14; // 18..31 + u32 i10 : 10; // 8..17 + }; + + struct + { + u32 : 7; // 25..31 + u32 i16 : 16; // 9..24 + }; + + struct + { + u32 : 7; // 25..31 + u32 i18 : 18; // 7..24 + }; + + struct + { + s32 : 14; // 18..31 + s32 si7 : 7; // 11..17 + }; + + struct + { + s32 : 14; // 18..31 + s32 si8 : 8; // 10..17 + }; + + struct + { + s32 : 14; // 18..31 + s32 si10 : 10; // 8..17 + }; + + struct + { + s32 : 7; // 25..31 + s32 si16 : 16; // 9..24 + }; + + struct + { + s32 : 7; // 25..31 + s32 si18 : 18; // 7..24 + }; + + struct + { + u32 : 18; // 14..31 + u32 e : 1; // 13, "enable interrupts" bit + u32 d : 1; // 12, "disable interrupts" bit + }; +}; + +using spu_inter_func_t = void(*)(SPUThread& CPU, spu_opcode_t opcode); + +namespace spu_interpreter +{ + void DEFAULT(SPUThread& CPU, spu_opcode_t op); + + void STOP(SPUThread& CPU, spu_opcode_t op); + void LNOP(SPUThread& CPU, spu_opcode_t op); + void SYNC(SPUThread& CPU, spu_opcode_t op); + void DSYNC(SPUThread& CPU, spu_opcode_t op); + void MFSPR(SPUThread& CPU, spu_opcode_t op); + void RDCH(SPUThread& CPU, spu_opcode_t op); + void RCHCNT(SPUThread& CPU, spu_opcode_t op); + void SF(SPUThread& CPU, spu_opcode_t op); + void OR(SPUThread& CPU, spu_opcode_t op); + void BG(SPUThread& CPU, spu_opcode_t op); + void SFH(SPUThread& CPU, spu_opcode_t op); + void NOR(SPUThread& CPU, spu_opcode_t op); + void ABSDB(SPUThread& CPU, spu_opcode_t op); + void ROT(SPUThread& CPU, spu_opcode_t op); + void ROTM(SPUThread& CPU, spu_opcode_t op); + void ROTMA(SPUThread& CPU, spu_opcode_t op); + void SHL(SPUThread& CPU, spu_opcode_t op); + void ROTH(SPUThread& CPU, spu_opcode_t op); + void ROTHM(SPUThread& CPU, spu_opcode_t op); + void ROTMAH(SPUThread& CPU, spu_opcode_t op); + void SHLH(SPUThread& CPU, spu_opcode_t op); + void ROTI(SPUThread& CPU, spu_opcode_t op); + void ROTMI(SPUThread& CPU, spu_opcode_t op); + void ROTMAI(SPUThread& CPU, spu_opcode_t op); + void SHLI(SPUThread& CPU, spu_opcode_t op); + void ROTHI(SPUThread& CPU, spu_opcode_t op); + void ROTHMI(SPUThread& CPU, spu_opcode_t op); + void ROTMAHI(SPUThread& CPU, spu_opcode_t op); + void SHLHI(SPUThread& CPU, spu_opcode_t op); + void A(SPUThread& CPU, spu_opcode_t op); + void AND(SPUThread& CPU, spu_opcode_t op); + void CG(SPUThread& CPU, spu_opcode_t op); + void AH(SPUThread& CPU, spu_opcode_t op); + void NAND(SPUThread& CPU, spu_opcode_t op); + void AVGB(SPUThread& CPU, spu_opcode_t op); + void MTSPR(SPUThread& CPU, spu_opcode_t op); + void WRCH(SPUThread& CPU, spu_opcode_t op); + void BIZ(SPUThread& CPU, spu_opcode_t op); + void BINZ(SPUThread& CPU, spu_opcode_t op); + void BIHZ(SPUThread& CPU, spu_opcode_t op); + void BIHNZ(SPUThread& CPU, spu_opcode_t op); + void STOPD(SPUThread& CPU, spu_opcode_t op); + void STQX(SPUThread& CPU, spu_opcode_t op); + void BI(SPUThread& CPU, spu_opcode_t op); + void BISL(SPUThread& CPU, spu_opcode_t op); + void IRET(SPUThread& CPU, spu_opcode_t op); + void BISLED(SPUThread& CPU, spu_opcode_t op); + void HBR(SPUThread& CPU, spu_opcode_t op); + void GB(SPUThread& CPU, spu_opcode_t op); + void GBH(SPUThread& CPU, spu_opcode_t op); + void GBB(SPUThread& CPU, spu_opcode_t op); + void FSM(SPUThread& CPU, spu_opcode_t op); + void FSMH(SPUThread& CPU, spu_opcode_t op); + void FSMB(SPUThread& CPU, spu_opcode_t op); + void FREST(SPUThread& CPU, spu_opcode_t op); + void FRSQEST(SPUThread& CPU, spu_opcode_t op); + void LQX(SPUThread& CPU, spu_opcode_t op); + void ROTQBYBI(SPUThread& CPU, spu_opcode_t op); + void ROTQMBYBI(SPUThread& CPU, spu_opcode_t op); + void SHLQBYBI(SPUThread& CPU, spu_opcode_t op); + void CBX(SPUThread& CPU, spu_opcode_t op); + void CHX(SPUThread& CPU, spu_opcode_t op); + void CWX(SPUThread& CPU, spu_opcode_t op); + void CDX(SPUThread& CPU, spu_opcode_t op); + void ROTQBI(SPUThread& CPU, spu_opcode_t op); + void ROTQMBI(SPUThread& CPU, spu_opcode_t op); + void SHLQBI(SPUThread& CPU, spu_opcode_t op); + void ROTQBY(SPUThread& CPU, spu_opcode_t op); + void ROTQMBY(SPUThread& CPU, spu_opcode_t op); + void SHLQBY(SPUThread& CPU, spu_opcode_t op); + void ORX(SPUThread& CPU, spu_opcode_t op); + void CBD(SPUThread& CPU, spu_opcode_t op); + void CHD(SPUThread& CPU, spu_opcode_t op); + void CWD(SPUThread& CPU, spu_opcode_t op); + void CDD(SPUThread& CPU, spu_opcode_t op); + void ROTQBII(SPUThread& CPU, spu_opcode_t op); + void ROTQMBII(SPUThread& CPU, spu_opcode_t op); + void SHLQBII(SPUThread& CPU, spu_opcode_t op); + void ROTQBYI(SPUThread& CPU, spu_opcode_t op); + void ROTQMBYI(SPUThread& CPU, spu_opcode_t op); + void SHLQBYI(SPUThread& CPU, spu_opcode_t op); + void NOP(SPUThread& CPU, spu_opcode_t op); + void CGT(SPUThread& CPU, spu_opcode_t op); + void XOR(SPUThread& CPU, spu_opcode_t op); + void CGTH(SPUThread& CPU, spu_opcode_t op); + void EQV(SPUThread& CPU, spu_opcode_t op); + void CGTB(SPUThread& CPU, spu_opcode_t op); + void SUMB(SPUThread& CPU, spu_opcode_t op); + void HGT(SPUThread& CPU, spu_opcode_t op); + void CLZ(SPUThread& CPU, spu_opcode_t op); + void XSWD(SPUThread& CPU, spu_opcode_t op); + void XSHW(SPUThread& CPU, spu_opcode_t op); + void CNTB(SPUThread& CPU, spu_opcode_t op); + void XSBH(SPUThread& CPU, spu_opcode_t op); + void CLGT(SPUThread& CPU, spu_opcode_t op); + void ANDC(SPUThread& CPU, spu_opcode_t op); + void FCGT(SPUThread& CPU, spu_opcode_t op); + void DFCGT(SPUThread& CPU, spu_opcode_t op); + void FA(SPUThread& CPU, spu_opcode_t op); + void FS(SPUThread& CPU, spu_opcode_t op); + void FM(SPUThread& CPU, spu_opcode_t op); + void CLGTH(SPUThread& CPU, spu_opcode_t op); + void ORC(SPUThread& CPU, spu_opcode_t op); + void FCMGT(SPUThread& CPU, spu_opcode_t op); + void DFCMGT(SPUThread& CPU, spu_opcode_t op); + void DFA(SPUThread& CPU, spu_opcode_t op); + void DFS(SPUThread& CPU, spu_opcode_t op); + void DFM(SPUThread& CPU, spu_opcode_t op); + void CLGTB(SPUThread& CPU, spu_opcode_t op); + void HLGT(SPUThread& CPU, spu_opcode_t op); + void DFMA(SPUThread& CPU, spu_opcode_t op); + void DFMS(SPUThread& CPU, spu_opcode_t op); + void DFNMS(SPUThread& CPU, spu_opcode_t op); + void DFNMA(SPUThread& CPU, spu_opcode_t op); + void CEQ(SPUThread& CPU, spu_opcode_t op); + void MPYHHU(SPUThread& CPU, spu_opcode_t op); + void ADDX(SPUThread& CPU, spu_opcode_t op); + void SFX(SPUThread& CPU, spu_opcode_t op); + void CGX(SPUThread& CPU, spu_opcode_t op); + void BGX(SPUThread& CPU, spu_opcode_t op); + void MPYHHA(SPUThread& CPU, spu_opcode_t op); + void MPYHHAU(SPUThread& CPU, spu_opcode_t op); + void FSCRRD(SPUThread& CPU, spu_opcode_t op); + void FESD(SPUThread& CPU, spu_opcode_t op); + void FRDS(SPUThread& CPU, spu_opcode_t op); + void FSCRWR(SPUThread& CPU, spu_opcode_t op); + void DFTSV(SPUThread& CPU, spu_opcode_t op); + void FCEQ(SPUThread& CPU, spu_opcode_t op); + void DFCEQ(SPUThread& CPU, spu_opcode_t op); + void MPY(SPUThread& CPU, spu_opcode_t op); + void MPYH(SPUThread& CPU, spu_opcode_t op); + void MPYHH(SPUThread& CPU, spu_opcode_t op); + void MPYS(SPUThread& CPU, spu_opcode_t op); + void CEQH(SPUThread& CPU, spu_opcode_t op); + void FCMEQ(SPUThread& CPU, spu_opcode_t op); + void DFCMEQ(SPUThread& CPU, spu_opcode_t op); + void MPYU(SPUThread& CPU, spu_opcode_t op); + void CEQB(SPUThread& CPU, spu_opcode_t op); + void FI(SPUThread& CPU, spu_opcode_t op); + void HEQ(SPUThread& CPU, spu_opcode_t op); + + void CFLTS(SPUThread& CPU, spu_opcode_t op); + void CFLTU(SPUThread& CPU, spu_opcode_t op); + void CSFLT(SPUThread& CPU, spu_opcode_t op); + void CUFLT(SPUThread& CPU, spu_opcode_t op); + + void BRZ(SPUThread& CPU, spu_opcode_t op); + void STQA(SPUThread& CPU, spu_opcode_t op); + void BRNZ(SPUThread& CPU, spu_opcode_t op); + void BRHZ(SPUThread& CPU, spu_opcode_t op); + void BRHNZ(SPUThread& CPU, spu_opcode_t op); + void STQR(SPUThread& CPU, spu_opcode_t op); + void BRA(SPUThread& CPU, spu_opcode_t op); + void LQA(SPUThread& CPU, spu_opcode_t op); + void BRASL(SPUThread& CPU, spu_opcode_t op); + void BR(SPUThread& CPU, spu_opcode_t op); + void FSMBI(SPUThread& CPU, spu_opcode_t op); + void BRSL(SPUThread& CPU, spu_opcode_t op); + void LQR(SPUThread& CPU, spu_opcode_t op); + void IL(SPUThread& CPU, spu_opcode_t op); + void ILHU(SPUThread& CPU, spu_opcode_t op); + void ILH(SPUThread& CPU, spu_opcode_t op); + void IOHL(SPUThread& CPU, spu_opcode_t op); + + void ORI(SPUThread& CPU, spu_opcode_t op); + void ORHI(SPUThread& CPU, spu_opcode_t op); + void ORBI(SPUThread& CPU, spu_opcode_t op); + void SFI(SPUThread& CPU, spu_opcode_t op); + void SFHI(SPUThread& CPU, spu_opcode_t op); + void ANDI(SPUThread& CPU, spu_opcode_t op); + void ANDHI(SPUThread& CPU, spu_opcode_t op); + void ANDBI(SPUThread& CPU, spu_opcode_t op); + void AI(SPUThread& CPU, spu_opcode_t op); + void AHI(SPUThread& CPU, spu_opcode_t op); + void STQD(SPUThread& CPU, spu_opcode_t op); + void LQD(SPUThread& CPU, spu_opcode_t op); + void XORI(SPUThread& CPU, spu_opcode_t op); + void XORHI(SPUThread& CPU, spu_opcode_t op); + void XORBI(SPUThread& CPU, spu_opcode_t op); + void CGTI(SPUThread& CPU, spu_opcode_t op); + void CGTHI(SPUThread& CPU, spu_opcode_t op); + void CGTBI(SPUThread& CPU, spu_opcode_t op); + void HGTI(SPUThread& CPU, spu_opcode_t op); + void CLGTI(SPUThread& CPU, spu_opcode_t op); + void CLGTHI(SPUThread& CPU, spu_opcode_t op); + void CLGTBI(SPUThread& CPU, spu_opcode_t op); + void HLGTI(SPUThread& CPU, spu_opcode_t op); + void MPYI(SPUThread& CPU, spu_opcode_t op); + void MPYUI(SPUThread& CPU, spu_opcode_t op); + void CEQI(SPUThread& CPU, spu_opcode_t op); + void CEQHI(SPUThread& CPU, spu_opcode_t op); + void CEQBI(SPUThread& CPU, spu_opcode_t op); + void HEQI(SPUThread& CPU, spu_opcode_t op); + + void HBRA(SPUThread& CPU, spu_opcode_t op); + void HBRR(SPUThread& CPU, spu_opcode_t op); + void ILA(SPUThread& CPU, spu_opcode_t op); + + void SELB(SPUThread& CPU, spu_opcode_t op); + void SHUFB(SPUThread& CPU, spu_opcode_t op); + void MPYA(SPUThread& CPU, spu_opcode_t op); + void FNMS(SPUThread& CPU, spu_opcode_t op); + void FMA(SPUThread& CPU, spu_opcode_t op); + void FMS(SPUThread& CPU, spu_opcode_t op); + + void UNK(SPUThread& CPU, spu_opcode_t op); +} + +class SPUInterpreter2 : public SPUOpcodes +{ +public: + virtual ~SPUInterpreter2() {} + + spu_inter_func_t func; + + virtual void STOP(u32 code) { func = spu_interpreter::STOP; } + virtual void LNOP() { func = spu_interpreter::LNOP; } + virtual void SYNC(u32 Cbit) { func = spu_interpreter::SYNC; } + virtual void DSYNC() { func = spu_interpreter::DSYNC; } + virtual void MFSPR(u32 rt, u32 sa) { func = spu_interpreter::MFSPR; } + virtual void RDCH(u32 rt, u32 ra) { func = spu_interpreter::RDCH; } + virtual void RCHCNT(u32 rt, u32 ra) { func = spu_interpreter::RCHCNT; } + virtual void SF(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SF; } + virtual void OR(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::OR; } + virtual void BG(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::BG; } + virtual void SFH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SFH; } + virtual void NOR(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::NOR; } + virtual void ABSDB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ABSDB; } + virtual void ROT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROT; } + virtual void ROTM(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTM; } + virtual void ROTMA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTMA; } + virtual void SHL(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SHL; } + virtual void ROTH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTH; } + virtual void ROTHM(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTHM; } + virtual void ROTMAH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTMAH; } + virtual void SHLH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SHLH; } + virtual void ROTI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTI; } + virtual void ROTMI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTMI; } + virtual void ROTMAI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTMAI; } + virtual void SHLI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::SHLI; } + virtual void ROTHI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTHI; } + virtual void ROTHMI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTHMI; } + virtual void ROTMAHI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTMAHI; } + virtual void SHLHI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::SHLHI; } + virtual void A(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::A; } + virtual void AND(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::AND; } + virtual void CG(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CG; } + virtual void AH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::AH; } + virtual void NAND(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::NAND; } + virtual void AVGB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::AVGB; } + virtual void MTSPR(u32 rt, u32 sa) { func = spu_interpreter::MTSPR; } + virtual void WRCH(u32 ra, u32 rt) { func = spu_interpreter::WRCH; } + virtual void BIZ(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BIZ; } + virtual void BINZ(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BINZ; } + virtual void BIHZ(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BIHZ; } + virtual void BIHNZ(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BIHNZ; } + virtual void STOPD(u32 rc, u32 ra, u32 rb) { func = spu_interpreter::STOPD; } + virtual void STQX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::STQX; } + virtual void BI(u32 intr, u32 ra) { func = spu_interpreter::BI; } + virtual void BISL(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BISL; } + virtual void IRET(u32 ra) { func = spu_interpreter::IRET; } + virtual void BISLED(u32 intr, u32 rt, u32 ra) { func = spu_interpreter::BISLED; } + virtual void HBR(u32 p, u32 ro, u32 ra) { func = spu_interpreter::HBR; } + virtual void GB(u32 rt, u32 ra) { func = spu_interpreter::GB; } + virtual void GBH(u32 rt, u32 ra) { func = spu_interpreter::GBH; } + virtual void GBB(u32 rt, u32 ra) { func = spu_interpreter::GBB; } + virtual void FSM(u32 rt, u32 ra) { func = spu_interpreter::FSM; } + virtual void FSMH(u32 rt, u32 ra) { func = spu_interpreter::FSMH; } + virtual void FSMB(u32 rt, u32 ra) { func = spu_interpreter::FSMB; } + virtual void FREST(u32 rt, u32 ra) { func = spu_interpreter::FREST; } + virtual void FRSQEST(u32 rt, u32 ra) { func = spu_interpreter::FRSQEST; } + virtual void LQX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::LQX; } + virtual void ROTQBYBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQBYBI; } + virtual void ROTQMBYBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQMBYBI; } + virtual void SHLQBYBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SHLQBYBI; } + virtual void CBX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CBX; } + virtual void CHX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CHX; } + virtual void CWX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CWX; } + virtual void CDX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CDX; } + virtual void ROTQBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQBI; } + virtual void ROTQMBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQMBI; } + virtual void SHLQBI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SHLQBI; } + virtual void ROTQBY(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQBY; } + virtual void ROTQMBY(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ROTQMBY; } + virtual void SHLQBY(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SHLQBY; } + virtual void ORX(u32 rt, u32 ra) { func = spu_interpreter::ORX; } + virtual void CBD(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::CBD; } + virtual void CHD(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::CHD; } + virtual void CWD(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::CWD; } + virtual void CDD(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::CDD; } + virtual void ROTQBII(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTQBII; } + virtual void ROTQMBII(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTQMBII; } + virtual void SHLQBII(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::SHLQBII; } + virtual void ROTQBYI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTQBYI; } + virtual void ROTQMBYI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::ROTQMBYI; } + virtual void SHLQBYI(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::SHLQBYI; } + virtual void NOP(u32 rt) { func = spu_interpreter::NOP; } + virtual void CGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CGT; } + virtual void XOR(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::XOR; } + virtual void CGTH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CGTH; } + virtual void EQV(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::EQV; } + virtual void CGTB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CGTB; } + virtual void SUMB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SUMB; } + virtual void HGT(u32 rt, s32 ra, s32 rb) { func = spu_interpreter::HGT; } + virtual void CLZ(u32 rt, u32 ra) { func = spu_interpreter::CLZ; } + virtual void XSWD(u32 rt, u32 ra) { func = spu_interpreter::XSWD; } + virtual void XSHW(u32 rt, u32 ra) { func = spu_interpreter::XSHW; } + virtual void CNTB(u32 rt, u32 ra) { func = spu_interpreter::CNTB; } + virtual void XSBH(u32 rt, u32 ra) { func = spu_interpreter::XSBH; } + virtual void CLGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CLGT; } + virtual void ANDC(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ANDC; } + virtual void FCGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FCGT; } + virtual void DFCGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFCGT; } + virtual void FA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FA; } + virtual void FS(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FS; } + virtual void FM(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FM; } + virtual void CLGTH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CLGTH; } + virtual void ORC(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ORC; } + virtual void FCMGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FCMGT; } + virtual void DFCMGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFCMGT; } + virtual void DFA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFA; } + virtual void DFS(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFS; } + virtual void DFM(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFM; } + virtual void CLGTB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CLGTB; } + virtual void HLGT(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::HLGT; } + virtual void DFMA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFMA; } + virtual void DFMS(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFMS; } + virtual void DFNMS(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFNMS; } + virtual void DFNMA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFNMA; } + virtual void CEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CEQ; } + virtual void MPYHHU(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYHHU; } + virtual void ADDX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::ADDX; } + virtual void SFX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::SFX; } + virtual void CGX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CGX; } + virtual void BGX(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::BGX; } + virtual void MPYHHA(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYHHA; } + virtual void MPYHHAU(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYHHAU; } + virtual void FSCRRD(u32 rt) { func = spu_interpreter::FSCRRD; } + virtual void FESD(u32 rt, u32 ra) { func = spu_interpreter::FESD; } + virtual void FRDS(u32 rt, u32 ra) { func = spu_interpreter::FRDS; } + virtual void FSCRWR(u32 rt, u32 ra) { func = spu_interpreter::FSCRWR; } + virtual void DFTSV(u32 rt, u32 ra, s32 i7) { func = spu_interpreter::DFTSV; } + virtual void FCEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FCEQ; } + virtual void DFCEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFCEQ; } + virtual void MPY(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPY; } + virtual void MPYH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYH; } + virtual void MPYHH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYHH; } + virtual void MPYS(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYS; } + virtual void CEQH(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CEQH; } + virtual void FCMEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FCMEQ; } + virtual void DFCMEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::DFCMEQ; } + virtual void MPYU(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::MPYU; } + virtual void CEQB(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::CEQB; } + virtual void FI(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::FI; } + virtual void HEQ(u32 rt, u32 ra, u32 rb) { func = spu_interpreter::HEQ; } + + virtual void CFLTS(u32 rt, u32 ra, s32 i8) { func = spu_interpreter::CFLTS; } + virtual void CFLTU(u32 rt, u32 ra, s32 i8) { func = spu_interpreter::CFLTU; } + virtual void CSFLT(u32 rt, u32 ra, s32 i8) { func = spu_interpreter::CSFLT; } + virtual void CUFLT(u32 rt, u32 ra, s32 i8) { func = spu_interpreter::CUFLT; } + + virtual void BRZ(u32 rt, s32 i16) { func = spu_interpreter::BRZ; } + virtual void STQA(u32 rt, s32 i16) { func = spu_interpreter::STQA; } + virtual void BRNZ(u32 rt, s32 i16) { func = spu_interpreter::BRNZ; } + virtual void BRHZ(u32 rt, s32 i16) { func = spu_interpreter::BRHZ; } + virtual void BRHNZ(u32 rt, s32 i16) { func = spu_interpreter::BRHNZ; } + virtual void STQR(u32 rt, s32 i16) { func = spu_interpreter::STQR; } + virtual void BRA(s32 i16) { func = spu_interpreter::BRA; } + virtual void LQA(u32 rt, s32 i16) { func = spu_interpreter::LQA; } + virtual void BRASL(u32 rt, s32 i16) { func = spu_interpreter::BRASL; } + virtual void BR(s32 i16) { func = spu_interpreter::BR; } + virtual void FSMBI(u32 rt, s32 i16) { func = spu_interpreter::FSMBI; } + virtual void BRSL(u32 rt, s32 i16) { func = spu_interpreter::BRSL; } + virtual void LQR(u32 rt, s32 i16) { func = spu_interpreter::LQR; } + virtual void IL(u32 rt, s32 i16) { func = spu_interpreter::IL; } + virtual void ILHU(u32 rt, s32 i16) { func = spu_interpreter::ILHU; } + virtual void ILH(u32 rt, s32 i16) { func = spu_interpreter::ILH; } + virtual void IOHL(u32 rt, s32 i16) { func = spu_interpreter::IOHL; } + + virtual void ORI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ORI; } + virtual void ORHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ORHI; } + virtual void ORBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ORBI; } + virtual void SFI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::SFI; } + virtual void SFHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::SFHI; } + virtual void ANDI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ANDI; } + virtual void ANDHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ANDHI; } + virtual void ANDBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::ANDBI; } + virtual void AI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::AI; } + virtual void AHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::AHI; } + virtual void STQD(u32 rt, s32 i10, u32 ra) { func = spu_interpreter::STQD; } + virtual void LQD(u32 rt, s32 i10, u32 ra) { func = spu_interpreter::LQD; } + virtual void XORI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::XORI; } + virtual void XORHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::XORHI; } + virtual void XORBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::XORBI; } + virtual void CGTI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CGTI; } + virtual void CGTHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CGTHI; } + virtual void CGTBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CGTBI; } + virtual void HGTI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::HGTI; } + virtual void CLGTI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CLGTI; } + virtual void CLGTHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CLGTHI; } + virtual void CLGTBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CLGTBI; } + virtual void HLGTI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::HLGTI; } + virtual void MPYI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::MPYI; } + virtual void MPYUI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::MPYUI; } + virtual void CEQI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CEQI; } + virtual void CEQHI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CEQHI; } + virtual void CEQBI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::CEQBI; } + virtual void HEQI(u32 rt, u32 ra, s32 i10) { func = spu_interpreter::HEQI; } + + virtual void HBRA(s32 ro, s32 i16) { func = spu_interpreter::HBRA; } + virtual void HBRR(s32 ro, s32 i16) { func = spu_interpreter::HBRR; } + virtual void ILA(u32 rt, u32 i18) { func = spu_interpreter::ILA; } + + virtual void SELB(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::SELB; } + virtual void SHUFB(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::SHUFB; } + virtual void MPYA(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::MPYA; } + virtual void FNMS(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::FNMS; } + virtual void FMA(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::FMA; } + virtual void FMS(u32 rc, u32 ra, u32 rb, u32 rt) { func = spu_interpreter::FMS; } + + virtual void UNK(u32 code, u32 opcode, u32 gcode) { func = spu_interpreter::UNK; } +}; diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 85ba680249..0391532151 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -1073,11 +1073,9 @@ private: } void BIZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1094,11 +1092,9 @@ private: } void BINZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1115,11 +1111,9 @@ private: } void BIHZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1136,11 +1130,9 @@ private: } void BIHNZ(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1188,11 +1180,9 @@ private: } void BI(u32 intr, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } @@ -1206,11 +1196,9 @@ private: } void BISL(u32 intr, u32 rt, u32 ra) { - switch (intr) + switch (intr & 0x30) { case 0: break; - case 0x10: break; // enable interrupts - case 0x20: break; // disable interrupts default: UNIMPLEMENTED(); return; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 854567c362..c7909bee55 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -17,10 +17,38 @@ #include "Emu/Cell/SPUThread.h" #include "Emu/Cell/SPUDecoder.h" #include "Emu/Cell/SPUInterpreter.h" +#include "Emu/Cell/SPUInterpreter2.h" #include "Emu/Cell/SPURecompiler.h" #include +class spu_inter_func_list_t +{ + std::array funcs; + +public: + spu_inter_func_list_t() + { + auto inter = new SPUInterpreter2; + SPUDecoder dec(*inter); + + for (u32 i = 0; i < funcs.size(); i++) + { + inter->func = spu_interpreter::DEFAULT; + + dec.Decode(i << 21); + + funcs[i] = inter->func; + } + } + + __forceinline spu_inter_func_t operator [] (u32 opcode) const + { + return funcs[opcode >> 21]; + } +} +const g_spu_inter_func_list; + SPUThread& GetCurrentSPUThread() { CPUThread* thread = GetCurrentCPUThread(); @@ -46,23 +74,48 @@ SPUThread::~SPUThread() void SPUThread::Task() { - const int round = std::fegetround(); std::fesetround(FE_TOWARDZERO); if (m_custom_task) { - m_custom_task(*this); - } - else - { - CPUThread::Task(); + return m_custom_task(*this); } - if (std::fegetround() != FE_TOWARDZERO) + if (m_dec) { - LOG_ERROR(SPU, "Rounding mode has changed(%d)", std::fegetround()); + return CPUThread::Task(); + } + + while (true) + { + // read opcode + const spu_opcode_t opcode = { vm::read32(PC + offset) }; + + // get interpreter function + const auto func = g_spu_inter_func_list[opcode.opcode]; + + if (m_events) + { + // process events + if (Emu.IsStopped()) + { + return; + } + + if (m_events & CPU_EVENT_STOP && (IsStopped() || IsPaused())) + { + m_events &= ~CPU_EVENT_STOP; + return; + } + } + + // call interpreter function + func(*this, opcode); + + // next instruction + //PC += 4; + NextPc(4); } - std::fesetround(round); } void SPUThread::DoReset() @@ -122,19 +175,33 @@ void SPUThread::CloseStack() void SPUThread::DoRun() { - switch(Ini.SPUDecoderMode.GetValue()) + m_dec = nullptr; + + switch (auto mode = Ini.SPUDecoderMode.GetValue()) + { + case 0: // original interpreter { - case 1: m_dec = new SPUDecoder(*new SPUInterpreter(*this)); - break; + break; + } + + case 1: // alternative interpreter + { + break; + } + case 2: + { m_dec = new SPURecompilerCore(*this); - break; + break; + } default: - LOG_ERROR(SPU, "Invalid SPU decoder mode: %d", Ini.SPUDecoderMode.GetValue()); + { + LOG_ERROR(SPU, "Invalid SPU decoder mode: %d", mode); Emu.Pause(); } + } } void SPUThread::DoResume() @@ -163,21 +230,25 @@ void SPUThread::FastCall(u32 ls_addr) auto old_PC = PC; auto old_LR = GPR[0]._u32[3]; auto old_stack = GPR[1]._u32[3]; // only saved and restored (may be wrong) + auto old_task = decltype(m_custom_task)(); m_status = Running; PC = ls_addr; GPR[0]._u32[3] = 0x0; + m_custom_task.swap(m_custom_task); - CPUThread::Task(); + SPUThread::Task(); PC = old_PC; GPR[0]._u32[3] = old_LR; GPR[1]._u32[3] = old_stack; + m_custom_task.swap(m_custom_task); } void SPUThread::FastStop() { m_status = Stopped; + m_events |= CPU_EVENT_STOP; } void SPUThread::FastRun() diff --git a/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp b/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp index 34844ec5e5..f5d3426e95 100644 --- a/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp +++ b/rpcs3/Emu/SysCalls/Modules/sysPrxForUser.cpp @@ -230,7 +230,7 @@ s32 sys_lwmutex_lock(PPUThread& CPU, vm::ptr lwmutex, u64 timeout // locking succeeded auto old = lwmutex->owner.exchange(tid); - if (old.data() != se32(lwmutex_reserved)) + if (old.data() != se32(lwmutex_reserved) && !Emu.IsStopped()) { sysPrxForUser.Fatal("sys_lwmutex_lock(lwmutex=*0x%x): locking failed (owner=0x%x)", lwmutex, old); } @@ -301,7 +301,7 @@ s32 sys_lwmutex_trylock(PPUThread& CPU, vm::ptr lwmutex) // locking succeeded auto old = lwmutex->owner.exchange(tid); - if (old.data() != se32(lwmutex_reserved)) + if (old.data() != se32(lwmutex_reserved) && !Emu.IsStopped()) { sysPrxForUser.Fatal("sys_lwmutex_trylock(lwmutex=*0x%x): locking failed (owner=0x%x)", lwmutex, old); } @@ -592,7 +592,7 @@ s32 sys_lwcond_wait(PPUThread& CPU, vm::ptr lwcond, u64 timeout) const auto old = lwmutex->owner.exchange(tid); lwmutex->recursive_count = recursive_value; - if (old.data() != se32(lwmutex_reserved)) + if (old.data() != se32(lwmutex_reserved) && !Emu.IsStopped()) { sysPrxForUser.Fatal("sys_lwcond_wait(lwcond=*0x%x): locking failed (lwmutex->owner=0x%x)", lwcond, old); } @@ -621,7 +621,7 @@ s32 sys_lwcond_wait(PPUThread& CPU, vm::ptr lwcond, u64 timeout) const auto old = lwmutex->owner.exchange(tid); lwmutex->recursive_count = recursive_value; - if (old.data() != se32(lwmutex_reserved)) + if (old.data() != se32(lwmutex_reserved) && !Emu.IsStopped()) { sysPrxForUser.Fatal("sys_lwcond_wait(lwcond=*0x%x): locking failed after timeout (lwmutex->owner=0x%x)", lwcond, old); } diff --git a/rpcs3/Emu/SysCalls/lv2/sys_process.cpp b/rpcs3/Emu/SysCalls/lv2/sys_process.cpp index 45fb9d0a7b..ccafdcab30 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_process.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_process.cpp @@ -30,15 +30,27 @@ s32 sys_process_getppid() return 0; } -s32 sys_process_exit(s32 errorcode) +s32 sys_process_exit(s32 status) { - sys_process.Warning("sys_process_exit(%d)", errorcode); - Emu.Pause(); - sys_process.Success("Process finished"); - CallAfter([]() + sys_process.Warning("sys_process_exit(status=0x%x)", status); + + LV2_LOCK; + + if (!Emu.IsStopped()) { - Emu.Stop(); - }); + sys_process.Success("Process finished"); + + CallAfter([]() + { + Emu.Stop(); + }); + + while (!Emu.IsStopped()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } + return CELL_OK; } diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 82d200e275..e5d6bf05b3 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -39,6 +39,8 @@ static const std::string& BreakPointsDBName = "BreakPoints.dat"; static const u16 bpdb_version = 0x1000; extern std::atomic g_thread_count; +extern void finalize_ppu_exec_map(); + Emulator::Emulator() : m_status(Stopped) , m_mode(DisAsm) @@ -98,40 +100,41 @@ void Emulator::SetTitle(const std::string& title) void Emulator::CheckStatus() { - //auto& threads = GetCPU().GetThreads(); + //auto threads = GetCPU().GetThreads(); + //if (!threads.size()) //{ // Stop(); // return; //} - //bool IsAllPaused = true; - //for (u32 i = 0; i < threads.size(); ++i) + //bool AllPaused = true; + + //for (auto& t : threads) //{ - // if (threads[i]->IsPaused()) continue; - // IsAllPaused = false; + // if (t->IsPaused()) continue; + // AllPaused = false; // break; //} - //if(IsAllPaused) + //if (AllPaused) //{ - // //ConLog.Warning("all paused!"); // Pause(); // return; //} - //bool IsAllStoped = true; - //for (u32 i = 0; i < threads.size(); ++i) + //bool AllStopped = true; + + //for (auto& t : threads) //{ - // if (threads[i]->IsStopped()) continue; - // IsAllStoped = false; + // if (t->IsStopped()) continue; + // AllStopped = false; // break; //} - //if (IsAllStoped) + //if (AllStopped) //{ - // //LOG_WARNING(GENERAL, "all stoped!"); - // Pause(); //Stop(); + // Pause(); //} } @@ -327,8 +330,18 @@ void Emulator::Stop() if(IsStopped()) return; SendDbgCommand(DID_STOP_EMU); + m_status = Stopped; + { + auto threads = GetCPU().GetThreads(); + + for (auto& t : threads) + { + t->AddEvent(CPU_EVENT_STOP); + } + } + while (g_thread_count) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); @@ -370,6 +383,8 @@ void Emulator::Stop() CurGameInfo.Reset(); Memory.Close(); + + finalize_ppu_exec_map(); SendDbgCommand(DID_STOPPED_EMU); } diff --git a/rpcs3/Gui/MainFrame.cpp b/rpcs3/Gui/MainFrame.cpp index 48c7cdc97f..dd0298777e 100644 --- a/rpcs3/Gui/MainFrame.cpp +++ b/rpcs3/Gui/MainFrame.cpp @@ -437,9 +437,11 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) wxCheckBox* chbox_dbg_ap_functioncall = new wxCheckBox(p_hle, wxID_ANY, "Auto Pause at Function Call"); cbox_cpu_decoder->Append("PPU Interpreter"); + cbox_cpu_decoder->Append("PPU Interpreter 2"); cbox_cpu_decoder->Append("PPU JIT (LLVM)"); cbox_spu_decoder->Append("SPU Interpreter"); + cbox_spu_decoder->Append("SPU Interpreter 2"); cbox_spu_decoder->Append("SPU JIT (ASMJIT)"); cbox_gs_render->Append("Null"); @@ -531,8 +533,8 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) chbox_dbg_ap_systemcall ->SetValue(Ini.DBGAutoPauseSystemCall.GetValue()); chbox_dbg_ap_functioncall->SetValue(Ini.DBGAutoPauseFunctionCall.GetValue()); - cbox_cpu_decoder ->SetSelection(Ini.CPUDecoderMode.GetValue() ? Ini.CPUDecoderMode.GetValue() - 1 : 0); - cbox_spu_decoder ->SetSelection(Ini.SPUDecoderMode.GetValue() ? Ini.SPUDecoderMode.GetValue() - 1 : 0); + cbox_cpu_decoder ->SetSelection(Ini.CPUDecoderMode.GetValue() ? Ini.CPUDecoderMode.GetValue() : 0); + cbox_spu_decoder ->SetSelection(Ini.SPUDecoderMode.GetValue() ? Ini.SPUDecoderMode.GetValue() : 0); cbox_gs_render ->SetSelection(Ini.GSRenderMode.GetValue()); cbox_gs_resolution ->SetSelection(ResolutionIdToNum(Ini.GSResolution.GetValue()) - 1); cbox_gs_aspect ->SetSelection(Ini.GSAspectRatio.GetValue() - 1); @@ -632,8 +634,8 @@ void MainFrame::Config(wxCommandEvent& WXUNUSED(event)) if(diag.ShowModal() == wxID_OK) { - Ini.CPUDecoderMode.SetValue(cbox_cpu_decoder->GetSelection() + 1); - Ini.SPUDecoderMode.SetValue(cbox_spu_decoder->GetSelection() + 1); + Ini.CPUDecoderMode.SetValue(cbox_cpu_decoder->GetSelection()); + Ini.SPUDecoderMode.SetValue(cbox_spu_decoder->GetSelection()); Ini.GSRenderMode.SetValue(cbox_gs_render->GetSelection()); Ini.GSResolution.SetValue(ResolutionNumToId(cbox_gs_resolution->GetSelection() + 1)); Ini.GSAspectRatio.SetValue(cbox_gs_aspect->GetSelection() + 1); diff --git a/rpcs3/Ini.h b/rpcs3/Ini.h index 6f2b9a7a11..08b0ea3b86 100644 --- a/rpcs3/Ini.h +++ b/rpcs3/Ini.h @@ -247,8 +247,8 @@ public: void Load() { // Core - CPUDecoderMode.Load(1); - SPUDecoderMode.Load(1); + CPUDecoderMode.Load(0); + SPUDecoderMode.Load(0); // Graphics GSRenderMode.Load(1); diff --git a/rpcs3/Loader/ELF64.cpp b/rpcs3/Loader/ELF64.cpp index 6c89d7cf04..56999154e1 100644 --- a/rpcs3/Loader/ELF64.cpp +++ b/rpcs3/Loader/ELF64.cpp @@ -16,6 +16,9 @@ using namespace PPU_instr; +extern void initialize_ppu_exec_map(); +extern void fill_ppu_exec_map(u32 addr, u32 size); + namespace loader { namespace handlers @@ -547,6 +550,16 @@ namespace loader main_thread.args({ Emu.GetPath()/*, "-emu"*/ }).run(); main_thread.gpr(11, OPD.addr()).gpr(12, Emu.GetMallocPageSize()); + initialize_ppu_exec_map(); + + for (u32 page = 0; page < 0x20000000; page += 4096) + { + if (vm::check_addr(page, 4096)) + { + fill_ppu_exec_map(page, 4096); + } + } + return ok; } diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index f7f0b774a9..c3329dfd33 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -37,6 +37,8 @@ + + @@ -366,6 +368,7 @@ + @@ -374,6 +377,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 9d50d29f54..6ef16d3491 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -860,6 +860,12 @@ Emu\SysCalls\Modules + + Emu\CPU\Cell + + + Emu\CPU\Cell + @@ -1543,5 +1549,11 @@ Emu\SysCalls\Modules + + Emu\CPU\Cell + + + Emu\CPU\Cell + \ No newline at end of file