diff --git a/asmjit.vcxproj b/asmjit.vcxproj index 80ffab87f4..98dbfb40b4 100644 --- a/asmjit.vcxproj +++ b/asmjit.vcxproj @@ -92,12 +92,12 @@ - .\libs\$(Configuration)\ + .\libs\$(Configuration)_x86\ - .\libs\$(Configuration)\ + .\libs\$(Configuration)_x86\ @@ -115,7 +115,7 @@ Level3 Disabled - true + false ASMJIT_STATIC;_MBCS;%(PreprocessorDefinitions) @@ -139,7 +139,7 @@ MaxSpeed true true - true + false ASMJIT_STATIC;_UNICODE;UNICODE;%(PreprocessorDefinitions) @@ -154,7 +154,7 @@ MaxSpeed true true - true + false ASMJIT_STATIC;_UNICODE;UNICODE;%(PreprocessorDefinitions) diff --git a/rpcs3/Emu/Cell/PPCDecoder.h b/rpcs3/Emu/Cell/PPCDecoder.h index 3185a441a9..fa54d4112c 100644 --- a/rpcs3/Emu/Cell/PPCDecoder.h +++ b/rpcs3/Emu/Cell/PPCDecoder.h @@ -12,19 +12,19 @@ public: template -static InstrList<1 << CodeField::size, TO>* new_list(const CodeField& func, InstrCaller* error_func = nullptr) +static InstrList<(1 << (CodeField::size)), TO>* new_list(const CodeField& func, InstrCaller* error_func = nullptr) { - return new InstrList<1 << CodeField::size, TO>(func, error_func); + return new InstrList<(1 << (CodeField::size)), TO>(func, error_func); } template -static InstrList<1 << CodeField::size, TO>* new_list(InstrList* parent, int opcode, const CodeField& func, InstrCaller* error_func = nullptr) +static InstrList<(1 << (CodeField::size)), TO>* new_list(InstrList* parent, int opcode, const CodeField& func, InstrCaller* error_func = nullptr) { - return connect_list(parent, new InstrList<1 << CodeField::size, TO>(func, error_func), opcode); + return connect_list(parent, new InstrList<(1 << (CodeField::size)), TO>(func, error_func), opcode); } template -static InstrList<1 << CodeField::size, TO>* new_list(InstrList* parent, const CodeField& func, InstrCaller* error_func = nullptr) +static InstrList<(1 << (CodeField::size)), TO>* new_list(InstrList* parent, const CodeField& func, InstrCaller* error_func = nullptr) { - return connect_list(parent, new InstrList<1 << CodeField::size, TO>(func, error_func)); + return connect_list(parent, new InstrList<(1 << (CodeField::size)), TO>(func, error_func)); } \ No newline at end of file diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index dfca767c9c..fe55ed219e 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -32,121 +32,19 @@ private: //0 - 10 void STOP(u32 code) { - CPU.SetExitStatus(code); // exit code (not status) - - switch (code) - { - case 0x110: /* ===== sys_spu_thread_receive_event ===== */ - { - u32 spuq = 0; - if (!CPU.SPU.Out_MBox.Pop(spuq)) - { - ConLog.Error("sys_spu_thread_receive_event: cannot read Out_MBox"); - CPU.SPU.In_MBox.PushUncond(CELL_EINVAL); // ??? - return; - } - - if (CPU.SPU.In_MBox.GetCount()) - { - ConLog.Error("sys_spu_thread_receive_event(spuq=0x%x): In_MBox is not empty", spuq); - CPU.SPU.In_MBox.PushUncond(CELL_EBUSY); // ??? - return; - } - - if (Ini.HLELogging.GetValue()) - { - ConLog.Write("sys_spu_thread_receive_event(spuq=0x%x)", spuq); - } - - EventQueue* eq; - if (!CPU.SPUQs.GetEventQueue(FIX_SPUQ(spuq), eq)) - { - CPU.SPU.In_MBox.PushUncond(CELL_EINVAL); // TODO: check error value - return; - } - - u32 tid = GetCurrentSPUThread().GetId(); - - eq->sq.push(tid); // add thread to sleep queue - - while (true) - { - switch (eq->owner.trylock(tid)) - { - case SMR_OK: - if (!eq->events.count()) - { - eq->owner.unlock(tid); - break; - } - else - { - u32 next = (eq->protocol == SYS_SYNC_FIFO) ? eq->sq.pop() : eq->sq.pop_prio(); - if (next != tid) - { - eq->owner.unlock(tid, next); - break; - } - } - case SMR_SIGNAL: - { - sys_event_data event; - eq->events.pop(event); - eq->owner.unlock(tid); - CPU.SPU.In_MBox.PushUncond(CELL_OK); - CPU.SPU.In_MBox.PushUncond(event.data1); - CPU.SPU.In_MBox.PushUncond(event.data2); - CPU.SPU.In_MBox.PushUncond(event.data3); - return; - } - case SMR_FAILED: break; - default: eq->sq.invalidate(tid); CPU.SPU.In_MBox.PushUncond(CELL_ECANCELED); return; - } - - Sleep(1); - if (Emu.IsStopped()) - { - ConLog.Warning("sys_spu_thread_receive_event(spuq=0x%x) aborted", spuq); - eq->sq.invalidate(tid); - return; - } - } - } - break; - case 0x102: - if (!CPU.SPU.Out_MBox.GetCount()) - { - ConLog.Error("sys_spu_thread_exit (no status, code 0x102)"); - } - else if (Ini.HLELogging.GetValue()) - { - // the real exit status - ConLog.Write("sys_spu_thread_exit (status=0x%x)", CPU.SPU.Out_MBox.GetValue()); - } - CPU.Stop(); - break; - default: - if (!CPU.SPU.Out_MBox.GetCount()) - { - ConLog.Error("Unknown STOP code: 0x%x (no message)", code); - } - else - { - ConLog.Error("Unknown STOP code: 0x%x (message=0x%x)", code, CPU.SPU.Out_MBox.GetValue()); - } - CPU.Stop(); - break; - } + CPU.DoStop(code); } void LNOP() { } void SYNC(u32 Cbit) { + // This instruction must be used following a store instruction that modifies the instruction stream. _mm_mfence(); } void DSYNC() { + // This instruction forces all earlier load, store, and channel instructions to complete before proceeding. _mm_mfence(); } void MFSPR(u32 rt, u32 sa) @@ -389,6 +287,7 @@ private: } void STOPD(u32 rc, u32 ra, u32 rb) { + UNIMPLEMENTED(); Emu.Pause(); } void STQX(u32 rt, u32 ra, u32 rb) diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index f78cdf8339..478d5d3d16 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -16,8 +16,12 @@ using namespace asmjit::host; struct SPUImmTable { - __m128i s19_to_s32[1 << 18]; + __m128i s19_to_s32[1 << 19]; __m128i fsmbi_mask[1 << 16]; + __m128 scale_to_float[256]; + __m128 scale_to_int[256]; + __m128i min_int; + __m128i max_int; SPUImmTable() { @@ -38,6 +42,34 @@ struct SPUImmTable fsmbi_mask[i].m128i_i8[j] = ((i >> j) & 0x1) ? 0xff : 0; } } + // scale table for (u)int -> float conversion + for (s32 i = 0; i < sizeof(scale_to_float) / sizeof(__m128); i++) + { + const float v = pow(2, i - 155); + scale_to_float[i].m128_f32[0] = v; + scale_to_float[i].m128_f32[1] = v; + scale_to_float[i].m128_f32[2] = v; + scale_to_float[i].m128_f32[3] = v; + } + // scale table for float -> (u)int conversion + for (s32 i = 0; i < sizeof(scale_to_int) / sizeof(__m128); i++) + { + const float v = pow(2, 173 - i); + scale_to_int[i].m128_f32[0] = v; + scale_to_int[i].m128_f32[1] = v; + scale_to_int[i].m128_f32[2] = v; + scale_to_int[i].m128_f32[3] = v; + } + // sign bit + min_int.m128i_u32[0] = 0x80000000; + min_int.m128i_u32[1] = 0x80000000; + min_int.m128i_u32[2] = 0x80000000; + min_int.m128i_u32[3] = 0x80000000; + // + max_int.m128i_u32[0] = 0x7fffffff; + max_int.m128i_u32[1] = 0x7fffffff; + max_int.m128i_u32[2] = 0x7fffffff; + max_int.m128i_u32[3] = 0x7fffffff; } }; @@ -46,10 +78,10 @@ class SPURecompiler; class SPURecompilerCore : public CPUDecoder { SPURecompiler* m_enc; - SPUInterpreter* m_inter; SPUThread& CPU; public: + SPUInterpreter* inter; JitRuntime runtime; Compiler compiler; @@ -74,14 +106,29 @@ public: virtual u8 DecodeMemory(const u64 address); }; -#define cpu_xmm(x) oword_ptr(*cpu_var, offsetof(SPUThread, x)) -#define cpu_qword(x) qword_ptr(*cpu_var, offsetof(SPUThread, x)) -#define cpu_dword(x,...) dword_ptr(*cpu_var, __VA_ARGS__, offsetof(SPUThread, x)) -#define cpu_word(x) word_ptr(*cpu_var, offsetof(SPUThread, x)) -#define cpu_byte(x) byte_ptr(*cpu_var, offsetof(SPUThread, x)) +#define cpu_xmm(x) oword_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 16) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 16") +#define cpu_qword(x) qword_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 8) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 8") +#define cpu_dword(x) dword_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 4) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 4") +#define cpu_word(x) word_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 2) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 2") +#define cpu_byte(x) byte_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 1) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 1") #define imm_xmm(x) oword_ptr(*imm_var, offsetof(SPUImmTable, x)) +#define WRAPPER_BEGIN(a0, a1, a2, a3) struct opcode_wrapper \ +{ \ + static void opcode(u32 a0, u32 a1, u32 a2, u32 a3) \ +{ \ + SPUThread& CPU = *(SPUThread*)GetCurrentCPUThread(); + +#define WRAPPER_END(a0, a1, a2, a3) } \ +}; \ + X86X64CallNode* call = c.call(imm_ptr(&opcode_wrapper::opcode), kFuncConvHost, FuncBuilder4()); \ + call->setArg(0, imm_u(a0)); \ + call->setArg(1, imm_u(a1)); \ + call->setArg(2, imm_u(a2)); \ + call->setArg(3, imm_u(a3)); + + class SPURecompiler : public SPUOpcodes { private: @@ -94,6 +141,7 @@ public: GpVar* cpu_var; GpVar* ls_var; GpVar* imm_var; + GpVar* pos_var; SPURecompiler(SPUThread& cpu, SPURecompilerCore& rec) : CPU(cpu), rec(rec), c(rec.compiler) { @@ -103,19 +151,31 @@ private: //0 - 10 void STOP(u32 code) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(code, xx, yy, zz); + CPU.DoStop(code); + WRAPPER_END(code, 0, 0, 0); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + do_finalize = true; + ConLog.Write("STOP(code=%d)", code); } void LNOP() { - UNIMPLEMENTED(); + /*c.mov(*pos_var, (CPU.PC >> 2) + 1); + do_finalize = true; + ConLog.Write("LNOP()");*/ } void SYNC(u32 Cbit) { - UNIMPLEMENTED(); + // This instruction must be used following a store instruction that modifies the instruction stream. + c.mfence(); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + do_finalize = true; + ConLog.Write("SYNC()"); } void DSYNC() { - UNIMPLEMENTED(); + // This instruction forces all earlier load, store, and channel instructions to complete before proceeding. + c.mfence(); } void MFSPR(u32 rt, u32 sa) { @@ -134,233 +194,326 @@ private: } void RDCH(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); CPU.ReadChannel(CPU.GPR[rt], ra); + WRAPPER_END(rt, ra, 0, 0); + // TODO } void RCHCNT(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.GetChannelCount(ra); + WRAPPER_END(rt, ra, 0, 0); + // TODO } void SF(u32 rt, u32 ra, u32 rb) { - XmmVar v0(c); + WRAPPER_BEGIN(rt, ra, rb, zz); + CPU.GPR[rt]._u32[0] = CPU.GPR[rb]._u32[0] - CPU.GPR[ra]._u32[0]; + CPU.GPR[rt]._u32[1] = CPU.GPR[rb]._u32[1] - CPU.GPR[ra]._u32[1]; + CPU.GPR[rt]._u32[2] = CPU.GPR[rb]._u32[2] - CPU.GPR[ra]._u32[2]; + CPU.GPR[rt]._u32[3] = CPU.GPR[rb]._u32[3] - CPU.GPR[ra]._u32[3]; + WRAPPER_END(rt, ra, rb, 0); + /*XmmVar v0(c); if (ra == rb) { // zero c.xorps(v0, v0); c.movaps(cpu_xmm(GPR[rt]), v0); } + else { // sub from c.movdqa(v0, cpu_xmm(GPR[rb])); c.psubd(v0, cpu_xmm(GPR[ra])); c.movdqa(cpu_xmm(GPR[rt]), v0); - } + }*/ } void OR(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._u32[0] | CPU.GPR[rb]._u32[0]; CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._u32[1] | CPU.GPR[rb]._u32[1]; CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._u32[2] | CPU.GPR[rb]._u32[2]; CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[3] | CPU.GPR[rb]._u32[3]; + WRAPPER_END(rt, ra, rb, 0); + // TODO } void BG(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._u32[0] > CPU.GPR[rb]._u32[0] ? 0 : 1; CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._u32[1] > CPU.GPR[rb]._u32[1] ? 0 : 1; CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._u32[2] > CPU.GPR[rb]._u32[2] ? 0 : 1; CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[3] > CPU.GPR[rb]._u32[3] ? 0 : 1; + WRAPPER_END(rt, ra, rb, 0); + /*XmmVar v0(c); + if (ra == rb) + { + // load {1,1,1,1} + c.movaps(v0, imm_xmm(s19_to_s32[1])); + c.movaps(cpu_xmm(GPR[rt]), v0); + } + else + { + // compare if-greater-then + c.movdqa(v0, cpu_xmm(GPR[rb])); + c.psubd(v0, cpu_xmm(GPR[ra])); + c.psrad(v0, 32); + c.paddd(v0, imm_xmm(s19_to_s32[1])); + c.movdqa(cpu_xmm(GPR[rt]), v0); + }*/ } void SFH(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = CPU.GPR[rb]._u16[h] - CPU.GPR[ra]._u16[h]; + WRAPPER_END(rt, ra, rb, 0); } void NOR(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = ~(CPU.GPR[ra]._u32[0] | CPU.GPR[rb]._u32[0]); CPU.GPR[rt]._u32[1] = ~(CPU.GPR[ra]._u32[1] | CPU.GPR[rb]._u32[1]); CPU.GPR[rt]._u32[2] = ~(CPU.GPR[ra]._u32[2] | CPU.GPR[rb]._u32[2]); CPU.GPR[rt]._u32[3] = ~(CPU.GPR[ra]._u32[3] | CPU.GPR[rb]._u32[3]); + WRAPPER_END(rt, ra, rb, 0); + // TODO } void ABSDB(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = CPU.GPR[rb]._u8[b] > CPU.GPR[ra]._u8[b] ? CPU.GPR[rb]._u8[b] - CPU.GPR[ra]._u8[b] : CPU.GPR[ra]._u8[b] - CPU.GPR[rb]._u8[b]; + WRAPPER_END(rt, ra, rb, 0); } void ROT(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << (CPU.GPR[rb]._u32[0] & 0x1f)) | (CPU.GPR[ra]._u32[0] >> (32 - (CPU.GPR[rb]._u32[0] & 0x1f))); CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << (CPU.GPR[rb]._u32[1] & 0x1f)) | (CPU.GPR[ra]._u32[1] >> (32 - (CPU.GPR[rb]._u32[1] & 0x1f))); CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << (CPU.GPR[rb]._u32[2] & 0x1f)) | (CPU.GPR[ra]._u32[2] >> (32 - (CPU.GPR[rb]._u32[2] & 0x1f))); CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << (CPU.GPR[rb]._u32[3] & 0x1f)) | (CPU.GPR[ra]._u32[3] >> (32 - (CPU.GPR[rb]._u32[3] & 0x1f))); + WRAPPER_END(rt, ra, rb, 0); } void ROTM(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = ((0 - CPU.GPR[rb]._u32[0]) % 64) < 32 ? CPU.GPR[ra]._u32[0] >> ((0 - CPU.GPR[rb]._u32[0]) % 64) : 0; CPU.GPR[rt]._u32[1] = ((0 - CPU.GPR[rb]._u32[1]) % 64) < 32 ? CPU.GPR[ra]._u32[1] >> ((0 - CPU.GPR[rb]._u32[1]) % 64) : 0; CPU.GPR[rt]._u32[2] = ((0 - CPU.GPR[rb]._u32[2]) % 64) < 32 ? CPU.GPR[ra]._u32[2] >> ((0 - CPU.GPR[rb]._u32[2]) % 64) : 0; CPU.GPR[rt]._u32[3] = ((0 - CPU.GPR[rb]._u32[3]) % 64) < 32 ? CPU.GPR[ra]._u32[3] >> ((0 - CPU.GPR[rb]._u32[3]) % 64) : 0; + WRAPPER_END(rt, ra, rb, 0); } void ROTMA(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._i32[0] = ((0 - CPU.GPR[rb]._i32[0]) % 64) < 32 ? CPU.GPR[ra]._i32[0] >> ((0 - CPU.GPR[rb]._i32[0]) % 64) : CPU.GPR[ra]._i32[0] >> 31; CPU.GPR[rt]._i32[1] = ((0 - CPU.GPR[rb]._i32[1]) % 64) < 32 ? CPU.GPR[ra]._i32[1] >> ((0 - CPU.GPR[rb]._i32[1]) % 64) : CPU.GPR[ra]._i32[1] >> 31; CPU.GPR[rt]._i32[2] = ((0 - CPU.GPR[rb]._i32[2]) % 64) < 32 ? CPU.GPR[ra]._i32[2] >> ((0 - CPU.GPR[rb]._i32[2]) % 64) : CPU.GPR[ra]._i32[2] >> 31; CPU.GPR[rt]._i32[3] = ((0 - CPU.GPR[rb]._i32[3]) % 64) < 32 ? CPU.GPR[ra]._i32[3] >> ((0 - CPU.GPR[rb]._i32[3]) % 64) : CPU.GPR[ra]._i32[3] >> 31; + WRAPPER_END(rt, ra, rb, 0); } void SHL(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = (CPU.GPR[rb]._u32[0] & 0x3f) > 31 ? 0 : CPU.GPR[ra]._u32[0] << (CPU.GPR[rb]._u32[0] & 0x3f); CPU.GPR[rt]._u32[1] = (CPU.GPR[rb]._u32[1] & 0x3f) > 31 ? 0 : CPU.GPR[ra]._u32[1] << (CPU.GPR[rb]._u32[1] & 0x3f); CPU.GPR[rt]._u32[2] = (CPU.GPR[rb]._u32[2] & 0x3f) > 31 ? 0 : CPU.GPR[ra]._u32[2] << (CPU.GPR[rb]._u32[2] & 0x3f); CPU.GPR[rt]._u32[3] = (CPU.GPR[rb]._u32[3] & 0x3f) > 31 ? 0 : CPU.GPR[ra]._u32[3] << (CPU.GPR[rb]._u32[3] & 0x3f); + WRAPPER_END(rt, ra, rb, 0); + // AVX2: masking with 0x3f + VPSLLVD may be better + /*for (u32 i = 0; i < 4; i++) + { + GpVar v0(c, kVarTypeUInt32); + c.mov(v0, cpu_dword(GPR[ra]._u32[i])); + GpVar shift(c, kVarTypeUInt32); + c.mov(shift, cpu_dword(GPR[rb]._u32[i])); + GpVar z(c); + c.xor_(z, z); + c.test(shift, 0x20); + c.cmovnz(v0, z); + c.shl(v0, shift); + c.mov(cpu_dword(GPR[rt]._u32[i]), v0); + }*/ } void ROTH(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._u16[h] << (CPU.GPR[rb]._u16[h] & 0xf)) | (CPU.GPR[ra]._u16[h] >> (16 - (CPU.GPR[rb]._u16[h] & 0xf))); + WRAPPER_END(rt, ra, rb, 0); } void ROTHM(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = ((0 - CPU.GPR[rb]._u16[h]) % 32) < 16 ? CPU.GPR[ra]._u16[h] >> ((0 - CPU.GPR[rb]._u16[h]) % 32) : 0; + WRAPPER_END(rt, ra, rb, 0); } void ROTMAH(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int h = 0; h < 8; h++) CPU.GPR[rt]._i16[h] = ((0 - CPU.GPR[rb]._i16[h]) % 32) < 16 ? CPU.GPR[ra]._i16[h] >> ((0 - CPU.GPR[rb]._i16[h]) % 32) : CPU.GPR[ra]._i16[h] >> 15; + WRAPPER_END(rt, ra, rb, 0); } void SHLH(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = (CPU.GPR[rb]._u16[h] & 0x1f) > 15 ? 0 : CPU.GPR[ra]._u16[h] << (CPU.GPR[rb]._u16[h] & 0x1f); + WRAPPER_END(rt, ra, rb, 0); } void ROTI(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const int nRot = i7 & 0x1f; CPU.GPR[rt]._u32[0] = (CPU.GPR[ra]._u32[0] << nRot) | (CPU.GPR[ra]._u32[0] >> (32 - nRot)); CPU.GPR[rt]._u32[1] = (CPU.GPR[ra]._u32[1] << nRot) | (CPU.GPR[ra]._u32[1] >> (32 - nRot)); CPU.GPR[rt]._u32[2] = (CPU.GPR[ra]._u32[2] << nRot) | (CPU.GPR[ra]._u32[2] >> (32 - nRot)); CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[3] << nRot) | (CPU.GPR[ra]._u32[3] >> (32 - nRot)); + WRAPPER_END(rt, ra, i7, 0); } void ROTMI(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); - const int nRot = (0 - i7) % 64; + WRAPPER_BEGIN(rt, ra, i7, zz); + const int nRot = (0 - (s32)i7) % 64; CPU.GPR[rt]._u32[0] = nRot < 32 ? CPU.GPR[ra]._u32[0] >> nRot : 0; CPU.GPR[rt]._u32[1] = nRot < 32 ? CPU.GPR[ra]._u32[1] >> nRot : 0; CPU.GPR[rt]._u32[2] = nRot < 32 ? CPU.GPR[ra]._u32[2] >> nRot : 0; CPU.GPR[rt]._u32[3] = nRot < 32 ? CPU.GPR[ra]._u32[3] >> nRot : 0; + WRAPPER_END(rt, ra, i7, 0); + // TODO } void ROTMAI(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); - const int nRot = (0 - i7) % 64; + WRAPPER_BEGIN(rt, ra, i7, zz); + const int nRot = (0 - (s32)i7) % 64; CPU.GPR[rt]._i32[0] = nRot < 32 ? CPU.GPR[ra]._i32[0] >> nRot : CPU.GPR[ra]._i32[0] >> 31; CPU.GPR[rt]._i32[1] = nRot < 32 ? CPU.GPR[ra]._i32[1] >> nRot : CPU.GPR[ra]._i32[1] >> 31; CPU.GPR[rt]._i32[2] = nRot < 32 ? CPU.GPR[ra]._i32[2] >> nRot : CPU.GPR[ra]._i32[2] >> 31; CPU.GPR[rt]._i32[3] = nRot < 32 ? CPU.GPR[ra]._i32[3] >> nRot : CPU.GPR[ra]._i32[3] >> 31; + WRAPPER_END(rt, ra, i7, 0); } void SHLI(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const u32 s = i7 & 0x3f; - for (u32 j = 0; j < 4; ++j) CPU.GPR[rt]._u32[j] = CPU.GPR[ra]._u32[j] << s; + WRAPPER_END(rt, ra, i7, 0); + // TODO } void ROTHI(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const int nRot = i7 & 0xf; - for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._u16[h] << nRot) | (CPU.GPR[ra]._u16[h] >> (16 - nRot)); + WRAPPER_END(rt, ra, i7, 0); } void ROTHMI(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); - const int nRot = (0 - i7) % 32; - + WRAPPER_BEGIN(rt, ra, i7, zz); + const int nRot = (0 - (s32)i7) % 32; for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = nRot < 16 ? CPU.GPR[ra]._u16[h] >> nRot : 0; + WRAPPER_END(rt, ra, i7, 0); } void ROTMAHI(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); - const int nRot = (0 - i7) % 32; - + WRAPPER_BEGIN(rt, ra, i7, zz); + const int nRot = (0 - (s32)i7) % 32; for (int h = 0; h < 8; h++) CPU.GPR[rt]._i16[h] = nRot < 16 ? CPU.GPR[ra]._i16[h] >> nRot : CPU.GPR[ra]._i16[h] >> 15; + WRAPPER_END(rt, ra, i7, 0); } void SHLHI(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const int nRot = i7 & 0x1f; - for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[0] = nRot > 15 ? 0 : CPU.GPR[ra]._u16[0] << nRot; + WRAPPER_END(rt, ra, i7, 0); } void A(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._u32[0] + CPU.GPR[rb]._u32[0]; CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._u32[1] + CPU.GPR[rb]._u32[1]; CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._u32[2] + CPU.GPR[rb]._u32[2]; CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]; + WRAPPER_END(rt, ra, rb, 0); + /*XmmVar v0(c); + c.movdqa(v0, cpu_xmm(GPR[ra])); + c.paddd(v0, cpu_xmm(GPR[rb])); + c.movdqa(cpu_xmm(GPR[rt]), v0);*/ } void AND(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._u32[0] & CPU.GPR[rb]._u32[0]; CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._u32[1] & CPU.GPR[rb]._u32[1]; CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._u32[2] & CPU.GPR[rb]._u32[2]; CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[3] & CPU.GPR[rb]._u32[3]; + WRAPPER_END(rt, ra, rb, 0); + /*XmmVar v0(c); + if (ra == rb) + { + if (rt == ra) + { + // nop + } + else + { + // mov + c.movaps(v0, cpu_xmm(GPR[ra])); + c.movaps(cpu_xmm(GPR[rt]), v0); + } + } + else + { + // and + c.movaps(v0, cpu_xmm(GPR[ra])); + c.andps(v0, cpu_xmm(GPR[rb])); + c.movaps(cpu_xmm(GPR[rt]), v0); + }*/ } void CG(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = ((CPU.GPR[ra]._u32[0] + CPU.GPR[rb]._u32[0]) < CPU.GPR[ra]._u32[0]) ? 1 : 0; CPU.GPR[rt]._u32[1] = ((CPU.GPR[ra]._u32[1] + CPU.GPR[rb]._u32[1]) < CPU.GPR[ra]._u32[1]) ? 1 : 0; CPU.GPR[rt]._u32[2] = ((CPU.GPR[ra]._u32[2] + CPU.GPR[rb]._u32[2]) < CPU.GPR[ra]._u32[2]) ? 1 : 0; CPU.GPR[rt]._u32[3] = ((CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) < CPU.GPR[ra]._u32[3]) ? 1 : 0; + WRAPPER_END(rt, ra, rb, 0); + // TODO } void AH(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._u16[h] + CPU.GPR[rb]._u16[h]; + WRAPPER_END(rt, ra, rb, 0); } void NAND(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = ~(CPU.GPR[ra]._u32[0] & CPU.GPR[rb]._u32[0]); CPU.GPR[rt]._u32[1] = ~(CPU.GPR[ra]._u32[1] & CPU.GPR[rb]._u32[1]); CPU.GPR[rt]._u32[2] = ~(CPU.GPR[ra]._u32[2] & CPU.GPR[rb]._u32[2]); CPU.GPR[rt]._u32[3] = ~(CPU.GPR[ra]._u32[3] & CPU.GPR[rb]._u32[3]); + WRAPPER_END(rt, ra, rb, 0); } void AVGB(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = (CPU.GPR[ra]._u8[b] + CPU.GPR[rb]._u8[b] + 1) >> 1; + WRAPPER_END(rt, ra, rb, 0); } void MTSPR(u32 rt, u32 sa) { @@ -373,8 +526,40 @@ private: } void WRCH(u32 ra, u32 rt) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(ra, rt, yy, zz); CPU.WriteChannel(ra, CPU.GPR[rt]); + WRAPPER_END(ra, rt, 0, 0); + /*GpVar v(c, kVarTypeUInt32); + c.mov(v, cpu_dword(GPR[rt]._u32[3])); + switch (ra) + { + case MFC_LSA: + c.mov(cpu_dword(MFC1.LSA.m_value[0]), v); + break; + + case MFC_EAH: + c.mov(cpu_dword(MFC1.EAH.m_value[0]), v); + break; + + case MFC_EAL: + c.mov(cpu_dword(MFC1.EAL.m_value[0]), v); + break; + + case MFC_Size: + c.mov(cpu_word(MFC1.Size_Tag.m_val16[1]), v); + break; + + case MFC_TagID: + c.mov(cpu_word(MFC1.Size_Tag.m_val16[0]), v); + break; + + default: + { + X86X64CallNode* call = c.call(imm_ptr(&WRCH_wrapper::WRCH), kFuncConvHost, FuncBuilder2()); + call->setArg(0, imm_u(ra)); + call->setArg(1, v); + } + }*/ } void BIZ(u32 rt, u32 ra) { @@ -407,7 +592,7 @@ private: } void STQX(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); u32 lsa = (CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) & 0x3fff0; if(!CPU.IsGoodLSA(lsa)) { @@ -417,19 +602,27 @@ private: } CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); + WRAPPER_END(rt, ra, rb, 0); } void BI(u32 ra) { - UNIMPLEMENTED(); - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + do_finalize = true; + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.shr(*pos_var, 2); + //ConLog.Write("BI(ra=%d)", ra); } void BISL(u32 rt, u32 ra) { - UNIMPLEMENTED(); - const u32 NewPC = CPU.GPR[ra]._u32[3]; - CPU.GPR[rt].Reset(); - CPU.GPR[rt]._u32[3] = CPU.PC + 4; - CPU.SetBranch(branchTarget(NewPC, 0)); + do_finalize = true; + c.int3(); + c.xor_(*pos_var, *pos_var); + c.mov(cpu_dword(GPR[rt]._u32[0]), *pos_var); + c.mov(cpu_dword(GPR[rt]._u32[1]), *pos_var); + c.mov(cpu_dword(GPR[rt]._u32[2]), *pos_var); + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.mov(cpu_dword(GPR[rt]._u32[3]), (CPU.PC >> 2) + 1); + c.shr(*pos_var, 2); + ConLog.Write("BISL(rt=%d,ra=%d)", rt, ra); } void IRET(u32 ra) { @@ -442,77 +635,90 @@ private: } void HBR(u32 p, u32 ro, u32 ra) { - UNIMPLEMENTED(); } void GB(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); CPU.GPR[rt]._u32[3] = (CPU.GPR[ra]._u32[0] & 1) | ((CPU.GPR[ra]._u32[1] & 1) << 1) | ((CPU.GPR[ra]._u32[2] & 1) << 2) | ((CPU.GPR[ra]._u32[3] & 1) << 3); CPU.GPR[rt]._u32[2] = 0; CPU.GPR[rt]._u64[0] = 0; + WRAPPER_END(rt, ra, 0, 0); + // TODO } void GBH(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); u32 temp = 0; for (int h = 0; h < 8; h++) temp |= (CPU.GPR[ra]._u16[h] & 1) << h; CPU.GPR[rt]._u32[3] = temp; CPU.GPR[rt]._u32[2] = 0; CPU.GPR[rt]._u64[0] = 0; + WRAPPER_END(rt, ra, 0, 0); } void GBB(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); u32 temp = 0; for (int b = 0; b < 16; b++) temp |= (CPU.GPR[ra]._u8[b] & 1) << b; CPU.GPR[rt]._u32[3] = temp; CPU.GPR[rt]._u32[2] = 0; CPU.GPR[rt]._u64[0] = 0; + WRAPPER_END(rt, ra, 0, 0); } void FSM(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); const u32 pref = CPU.GPR[ra]._u32[3]; for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = (pref & (1 << w)) ? ~0 : 0; + WRAPPER_END(rt, ra, 0, 0); } void FSMH(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); const u32 pref = CPU.GPR[ra]._u32[3]; for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = (pref & (1 << h)) ? ~0 : 0; + WRAPPER_END(rt, ra, 0, 0); } void FSMB(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); const u32 pref = CPU.GPR[ra]._u32[3]; for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = (pref & (1 << b)) ? ~0 : 0; + WRAPPER_END(rt, ra, 0, 0); } void FREST(u32 rt, u32 ra) { - UNIMPLEMENTED(); - //CPU.GPR[rt]._m128 = _mm_rcp_ps(CPU.GPR[ra]._m128); + WRAPPER_BEGIN(rt, ra, yy, zz); for (int i = 0; i < 4; i++) CPU.GPR[rt]._f[i] = 1 / CPU.GPR[ra]._f[i]; + WRAPPER_END(rt, ra, 0, 0); + /*XmmVar v0(c); + c.rcpps(v0, cpu_xmm(GPR[ra])); + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } void FRSQEST(u32 rt, u32 ra) { - UNIMPLEMENTED(); - //const __u32x4 FloatAbsMask = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; - //CPU.GPR[rt]._m128 = _mm_rsqrt_ps(_mm_and_ps(CPU.GPR[ra]._m128, FloatAbsMask.m128)); + WRAPPER_BEGIN(rt, ra, yy, zz); for (int i = 0; i < 4; i++) CPU.GPR[rt]._f[i] = 1 / sqrt(abs(CPU.GPR[ra]._f[i])); + WRAPPER_END(rt, ra, 0, 0); + /*XmmVar v0(c); + c.movaps(v0, cpu_xmm(GPR[ra])); + c.andps(v0, imm_xmm(max_int)); + c.rsqrtps(v0, v0); + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } void LQX(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); u32 a = CPU.GPR[ra]._u32[3], b = CPU.GPR[rb]._u32[3]; u32 lsa = (a + b) & 0x3fff0; @@ -525,218 +731,290 @@ private: } CPU.GPR[rt]._u128 = CPU.ReadLS128(lsa); + WRAPPER_END(rt, ra, rb, 0); } void ROTQBYBI(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0xf; const SPU_GPR_hdr temp = CPU.GPR[ra]; for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf]; + WRAPPER_END(rt, ra, rb, 0); } void ROTQMBYBI(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const int s = (0 - (CPU.GPR[rb]._u32[3] >> 3)) & 0x1f; const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = 0; b < 16 - s; b++) CPU.GPR[rt]._u8[b] = temp._u8[b + s]; + WRAPPER_END(rt, ra, rb, 0); } void SHLQBYBI(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const int s = (CPU.GPR[rb]._u32[3] >> 3) & 0x1f; const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = s; b < 16; b++) CPU.GPR[rt]._u8[b] = temp._u8[b - s]; + WRAPPER_END(rt, ra, rb, 0); } void CBX(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const u32 t = (CPU.GPR[rb]._u32[3] + CPU.GPR[ra]._u32[3]) & 0xF; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; CPU.GPR[rt]._u8[15 - t] = 0x03; + WRAPPER_END(rt, ra, rb, 0); } void CHX(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const u32 t = (CPU.GPR[rb]._u32[3] + CPU.GPR[ra]._u32[3]) & 0xE; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; CPU.GPR[rt]._u16[7 - (t >> 1)] = 0x0203; + WRAPPER_END(rt, ra, rb, 0); } void CWX(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const u32 t = (CPU.GPR[ra]._u32[3] + CPU.GPR[rb]._u32[3]) & 0xC; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; CPU.GPR[rt]._u32[3 - (t >> 2)] = 0x00010203; + WRAPPER_END(rt, ra, rb, 0); } void CDX(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const u32 t = (CPU.GPR[rb]._u32[3] + CPU.GPR[ra]._u32[3]) & 0x8; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; CPU.GPR[rt]._u64[1 - (t >> 3)] = (u64)0x0001020304050607; + WRAPPER_END(rt, ra, rb, 0); } void ROTQBI(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const int t = CPU.GPR[rb]._u32[3] & 0x7; const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] << t) | (temp._u32[3] >> (32 - t)); CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t)); CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t)); CPU.GPR[rt]._u32[3] = (temp._u32[3] << t) | (temp._u32[2] >> (32 - t)); + WRAPPER_END(rt, ra, rb, 0); } void ROTQMBI(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const int t = (0 - CPU.GPR[rb]._u32[3]) & 0x7; const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] >> t) | (temp._u32[1] << (32 - t)); CPU.GPR[rt]._u32[1] = (temp._u32[1] >> t) | (temp._u32[2] << (32 - t)); CPU.GPR[rt]._u32[2] = (temp._u32[2] >> t) | (temp._u32[3] << (32 - t)); CPU.GPR[rt]._u32[3] = (temp._u32[3] >> t); + WRAPPER_END(rt, ra, rb, 0); } void SHLQBI(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const int t = CPU.GPR[rb]._u32[3] & 0x7; const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] << t); CPU.GPR[rt]._u32[1] = (temp._u32[1] << t) | (temp._u32[0] >> (32 - t)); CPU.GPR[rt]._u32[2] = (temp._u32[2] << t) | (temp._u32[1] >> (32 - t)); CPU.GPR[rt]._u32[3] = (temp._u32[3] << t) | (temp._u32[2] >> (32 - t)); + WRAPPER_END(rt, ra, rb, 0); } void ROTQBY(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const int s = CPU.GPR[rb]._u32[3] & 0xf; const SPU_GPR_hdr temp = CPU.GPR[ra]; for (int b = 0; b < 16; ++b) CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf]; + WRAPPER_END(rt, ra, rb, 0); } void ROTQMBY(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const int s = (0 - CPU.GPR[rb]._u32[3]) & 0x1f; const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = 0; b < 16 - s; b++) CPU.GPR[rt]._u8[b] = temp._u8[b + s]; + WRAPPER_END(rt, ra, rb, 0); } void SHLQBY(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const int s = CPU.GPR[rb]._u32[3] & 0x1f; const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = s; b < 16; b++) CPU.GPR[rt]._u8[b] = temp._u8[b - s]; + WRAPPER_END(rt, ra, rb, 0); } void ORX(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._u32[0] | CPU.GPR[ra]._u32[1] | CPU.GPR[ra]._u32[2] | CPU.GPR[ra]._u32[3]; CPU.GPR[rt]._u32[2] = 0; CPU.GPR[rt]._u64[0] = 0; + WRAPPER_END(rt, ra, 0, 0); } void CBD(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xF; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; CPU.GPR[rt]._u8[15 - t] = 0x03; + WRAPPER_END(rt, ra, i7, 0); } void CHD(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xE; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; CPU.GPR[rt]._u16[7 - (t >> 1)] = 0x0203; + WRAPPER_END(rt, ra, i7, 0); } void CWD(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const int t = (CPU.GPR[ra]._u32[3] + i7) & 0xC; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; CPU.GPR[rt]._u32[3 - (t >> 2)] = 0x00010203; + WRAPPER_END(rt, ra, i7, 0); + // TODO } void CDD(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const int t = (CPU.GPR[ra]._u32[3] + i7) & 0x8; CPU.GPR[rt]._u64[0] = (u64)0x18191A1B1C1D1E1F; CPU.GPR[rt]._u64[1] = (u64)0x1011121314151617; CPU.GPR[rt]._u64[1 - (t >> 3)] = (u64)0x0001020304050607; + WRAPPER_END(rt, ra, i7, 0); } void ROTQBII(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const int s = i7 & 0x7; const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] << s) | (temp._u32[3] >> (32 - s)); CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s)); CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s)); CPU.GPR[rt]._u32[3] = (temp._u32[3] << s) | (temp._u32[2] >> (32 - s)); + WRAPPER_END(rt, ra, i7, 0); } void ROTQMBII(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); - const int s = (0 - i7) & 0x7; + WRAPPER_BEGIN(rt, ra, i7, zz); + const int s = (0 - (s32)i7) & 0x7; const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] >> s) | (temp._u32[1] << (32 - s)); CPU.GPR[rt]._u32[1] = (temp._u32[1] >> s) | (temp._u32[2] << (32 - s)); CPU.GPR[rt]._u32[2] = (temp._u32[2] >> s) | (temp._u32[3] << (32 - s)); CPU.GPR[rt]._u32[3] = (temp._u32[3] >> s); + WRAPPER_END(rt, ra, i7, 0); } void SHLQBII(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const int s = i7 & 0x7; const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt]._u32[0] = (temp._u32[0] << s); CPU.GPR[rt]._u32[1] = (temp._u32[1] << s) | (temp._u32[0] >> (32 - s)); CPU.GPR[rt]._u32[2] = (temp._u32[2] << s) | (temp._u32[1] >> (32 - s)); CPU.GPR[rt]._u32[3] = (temp._u32[3] << s) | (temp._u32[2] >> (32 - s)); + WRAPPER_END(rt, ra, i7, 0); } void ROTQBYI(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const int s = i7 & 0xf; const SPU_GPR_hdr temp = CPU.GPR[ra]; for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = temp._u8[(b - s) & 0xf]; + WRAPPER_END(rt, ra, i7, 0); + /*const int s = i7 & 0xf; + + XmmVar v0(c); + XmmVar v1(c); + c.movdqa(v0, cpu_xmm(GPR[ra])); + c.movdqa(v1, v0); + c.pslldq(v0, s); + c.psrldq(v1, 0xf - s); + c.por(v0, v1); + c.movdqa(cpu_xmm(GPR[rt]), v0);*/ } void ROTQMBYI(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); - const int s = (0 - i7) & 0x1f; + WRAPPER_BEGIN(rt, ra, i7, zz); + const int s = (0 - (s32)i7) & 0x1f; const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = 0; b < 16 - s; b++) CPU.GPR[rt]._u8[b] = temp._u8[b + s]; + WRAPPER_END(rt, ra, i7, 0); + /*const int s = (0 - i7) & 0x1f; + + XmmVar v0(c); + if (s == 0) + { + if (ra == rt) + { + // nop + } + else + { + // mov + c.movaps(v0, cpu_xmm(GPR[ra])); + c.movaps(cpu_xmm(GPR[rt]), v0); + } + } + else if (s > 15) + { + // zero + c.xorps(v0, v0); + c.movaps(cpu_xmm(GPR[rt]), v0); + } + else + { + // shift right + c.movdqa(v0, cpu_xmm(GPR[ra])); + c.psrldq(v0, s); + c.movdqa(cpu_xmm(GPR[rt]), v0); + }*/ } void SHLQBYI(u32 rt, u32 ra, s32 i7) { + WRAPPER_BEGIN(rt, ra, i7, zz); const int s = i7 & 0x1f; + const SPU_GPR_hdr temp = CPU.GPR[ra]; + CPU.GPR[rt].Reset(); + for (int b = s; b < 16; b++) + CPU.GPR[rt]._u8[b] = temp._u8[b - s]; + WRAPPER_END(rt, ra, i7, 0); + /*const int s = i7 & 0x1f; + XmmVar v0(c); if (s == 0) { @@ -763,45 +1041,49 @@ private: c.movdqa(v0, cpu_xmm(GPR[ra])); c.pslldq(v0, s); c.movdqa(cpu_xmm(GPR[rt]), v0); - } + }*/ } void NOP(u32 rt) { - UNIMPLEMENTED(); } void CGT(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] > CPU.GPR[rb]._i32[w] ? 0xffffffff : 0; + WRAPPER_END(rt, ra, rb, 0); } void XOR(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] ^ CPU.GPR[rb]._u32[w]; + WRAPPER_END(rt, ra, rb, 0); } void CGTH(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._i16[h] > CPU.GPR[rb]._i16[h] ? 0xffff : 0; + WRAPPER_END(rt, ra, rb, 0); } void EQV(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] ^ (~CPU.GPR[rb]._u32[w]); + WRAPPER_END(rt, ra, rb, 0); } void CGTB(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._i8[b] > CPU.GPR[rb]._i8[b] ? 0xff : 0; + WRAPPER_END(rt, ra, rb, 0); } void SUMB(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); const SPU_GPR_hdr _a = CPU.GPR[ra]; const SPU_GPR_hdr _b = CPU.GPR[rb]; for (int w = 0; w < 4; w++) @@ -809,6 +1091,7 @@ private: CPU.GPR[rt]._u16[w*2] = _a._u8[w*4] + _a._u8[w*4 + 1] + _a._u8[w*4 + 2] + _a._u8[w*4 + 3]; CPU.GPR[rt]._u16[w*2 + 1] = _b._u8[w*4] + _b._u8[w*4 + 1] + _b._u8[w*4 + 2] + _b._u8[w*4 + 3]; } + WRAPPER_END(rt, ra, rb, 0); } //HGT uses signed values. HLGT uses unsigned values void HGT(u32 rt, s32 ra, s32 rb) @@ -818,7 +1101,7 @@ private: } void CLZ(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); for (int w = 0; w < 4; w++) { int nPos; @@ -829,135 +1112,178 @@ private: CPU.GPR[rt]._u32[w] = nPos; } + WRAPPER_END(rt, ra, 0, 0); } void XSWD(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); CPU.GPR[rt]._i64[0] = (s64)CPU.GPR[ra]._i32[0]; CPU.GPR[rt]._i64[1] = (s64)CPU.GPR[ra]._i32[2]; + WRAPPER_END(rt, ra, 0, 0); } void XSHW(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._i32[w] = (s32)CPU.GPR[ra]._i16[w*2]; + WRAPPER_END(rt, ra, 0, 0); } void CNTB(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); const SPU_GPR_hdr temp = CPU.GPR[ra]; CPU.GPR[rt].Reset(); for (int b = 0; b < 16; b++) for (int i = 0; i < 8; i++) CPU.GPR[rt]._u8[b] += (temp._u8[b] & (1 << i)) ? 1 : 0; + WRAPPER_END(rt, ra, 0, 0); } void XSBH(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); for (int h = 0; h < 8; h++) CPU.GPR[rt]._i16[h] = (s16)CPU.GPR[ra]._i8[h*2]; + WRAPPER_END(rt, ra, 0, 0); } void CLGT(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); - for(u32 i = 0; i < 4; ++i) + WRAPPER_BEGIN(rt, ra, rb, zz); + for (u32 i = 0; i < 4; ++i) { CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] > CPU.GPR[rb]._u32[i]) ? 0xffffffff : 0x00000000; } + WRAPPER_END(rt, ra, rb, 0); + /*XmmVar v0(c); + if (ra == rb) + { + // zero + c.xorps(v0, v0); + c.movaps(cpu_xmm(GPR[rt]), v0); + } + else + { + // compare if-greater-then + c.movdqa(v0, cpu_xmm(GPR[rb])); + c.psubd(v0, cpu_xmm(GPR[ra])); + c.psrad(v0, 32); + c.movdqa(cpu_xmm(GPR[rt]), v0); + }*/ } void ANDC(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] & (~CPU.GPR[rb]._u32[w]); + WRAPPER_END(rt, ra, rb, 0); + // TODO } void FCGT(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._f[0] > CPU.GPR[rb]._f[0] ? 0xffffffff : 0; CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._f[1] > CPU.GPR[rb]._f[1] ? 0xffffffff : 0; CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._f[2] > CPU.GPR[rb]._f[2] ? 0xffffffff : 0; CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._f[3] > CPU.GPR[rb]._f[3] ? 0xffffffff : 0; + WRAPPER_END(rt, ra, rb, 0); + // TODO } void DFCGT(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u64[0] = CPU.GPR[ra]._d[0] > CPU.GPR[rb]._d[0] ? 0xffffffffffffffff : 0; CPU.GPR[rt]._u64[1] = CPU.GPR[ra]._d[1] > CPU.GPR[rb]._d[1] ? 0xffffffffffffffff : 0; + WRAPPER_END(rt, ra, rb, 0);; } void FA(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] + CPU.GPR[rb]._f[0]; CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] + CPU.GPR[rb]._f[1]; CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] + CPU.GPR[rb]._f[2]; CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] + CPU.GPR[rb]._f[3]; + WRAPPER_END(rt, ra, rb, 0); + // TODO } void FS(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] - CPU.GPR[rb]._f[0]; CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] - CPU.GPR[rb]._f[1]; CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] - CPU.GPR[rb]._f[2]; CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] - CPU.GPR[rb]._f[3]; + WRAPPER_END(rt, ra, rb, 0); + // TODO } void FM(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0]; CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1]; CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2]; CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3]; + WRAPPER_END(rt, ra, rb, 0); + /*XmmVar v0(c); + c.movaps(v0, cpu_xmm(GPR[ra])); + c.mulps(v0, cpu_xmm(GPR[rb])); + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } void CLGTH(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._u16[h] > CPU.GPR[rb]._u16[h] ? 0xffff : 0; + WRAPPER_END(rt, ra, rb, 0); } void ORC(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] | (~CPU.GPR[rb]._u32[w]); + WRAPPER_END(rt, ra, rb, 0); } void FCMGT(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = fabs(CPU.GPR[ra]._f[0]) > fabs(CPU.GPR[rb]._f[0]) ? 0xffffffff : 0; CPU.GPR[rt]._u32[1] = fabs(CPU.GPR[ra]._f[1]) > fabs(CPU.GPR[rb]._f[1]) ? 0xffffffff : 0; CPU.GPR[rt]._u32[2] = fabs(CPU.GPR[ra]._f[2]) > fabs(CPU.GPR[rb]._f[2]) ? 0xffffffff : 0; CPU.GPR[rt]._u32[3] = fabs(CPU.GPR[ra]._f[3]) > fabs(CPU.GPR[rb]._f[3]) ? 0xffffffff : 0; + WRAPPER_END(rt, ra, rb, 0); } void DFCMGT(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u64[0] = fabs(CPU.GPR[ra]._d[0]) > fabs(CPU.GPR[rb]._d[0]) ? 0xffffffffffffffff : 0; CPU.GPR[rt]._u64[1] = fabs(CPU.GPR[ra]._d[1]) > fabs(CPU.GPR[rb]._d[1]) ? 0xffffffffffffffff : 0; + WRAPPER_END(rt, ra, rb, 0); } void DFA(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] + CPU.GPR[rb]._d[0]; CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] + CPU.GPR[rb]._d[1]; + WRAPPER_END(rt, ra, rb, 0); } void DFS(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] - CPU.GPR[rb]._d[0]; CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] - CPU.GPR[rb]._d[1]; + WRAPPER_END(rt, ra, rb, 0); } void DFM(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0]; CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1]; + WRAPPER_END(rt, ra, rb, 0); } void CLGTB(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b] > CPU.GPR[rb]._u8[b] ? 0xff : 0; + WRAPPER_END(rt, ra, rb, 0); } void HLGT(u32 rt, u32 ra, u32 rb) { @@ -966,61 +1292,80 @@ private: } void DFMA(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._d[0] += CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0]; CPU.GPR[rt]._d[1] += CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1]; + WRAPPER_END(rt, ra, rb, 0); } void DFMS(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._d[0] = CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0] - CPU.GPR[rt]._d[0]; CPU.GPR[rt]._d[1] = CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1] - CPU.GPR[rt]._d[1]; + WRAPPER_END(rt, ra, rb, 0); } void DFNMS(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._d[0] -= CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0]; CPU.GPR[rt]._d[1] -= CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1]; + WRAPPER_END(rt, ra, rb, 0); } void DFNMA(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._d[0] = -(CPU.GPR[ra]._d[0] * CPU.GPR[rb]._d[0] + CPU.GPR[rt]._d[0]); CPU.GPR[rt]._d[1] = -(CPU.GPR[ra]._d[1] * CPU.GPR[rb]._d[1] + CPU.GPR[rt]._d[1]); + WRAPPER_END(rt, ra, rb, 0); } void CEQ(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] == CPU.GPR[rb]._i32[w] ? 0xffffffff : 0; + WRAPPER_END(rt, ra, rb, 0); } void MPYHHU(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u16[w*2+1] * CPU.GPR[rb]._u16[w*2+1]; + WRAPPER_END(rt, ra, rb, 0); } void ADDX(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u32[w] + CPU.GPR[rb]._u32[w] + (CPU.GPR[rt]._u32[w] & 1); + WRAPPER_END(rt, ra, rb, 0); + // TODO } void SFX(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = CPU.GPR[rb]._u32[w] - CPU.GPR[ra]._u32[w] - (1 - (CPU.GPR[rt]._u32[w] & 1)); + WRAPPER_END(rt, ra, rb, 0); + /*XmmVar v0(c), v1(c), v2(c); + c.movdqa(v1, imm_xmm(s19_to_s32[1])); + c.movdqa(v0, cpu_xmm(GPR[rb])); + c.movdqa(v2, cpu_xmm(GPR[rt])); + c.psubd(v0, cpu_xmm(GPR[ra])); + c.pand(v2, v1); + c.paddd(v0, v2); + c.psubd(v0, v1); + c.movdqa(cpu_xmm(GPR[rt]), v0);*/ } void CGX(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = ((u64)CPU.GPR[ra]._u32[w] + (u64)CPU.GPR[rb]._u32[w] + (u64)(CPU.GPR[rt]._u32[w] & 1)) >> 32; + WRAPPER_END(rt, ra, rb, 0); } void BGX(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); s64 nResult; for (int w = 0; w < 4; w++) @@ -1028,18 +1373,21 @@ private: nResult = (u64)CPU.GPR[rb]._u32[w] - (u64)CPU.GPR[ra]._u32[w] - (u64)(1 - (CPU.GPR[rt]._u32[w] & 1)); CPU.GPR[rt]._u32[w] = nResult < 0 ? 0 : 1; } + WRAPPER_END(rt, ra, rb, 0); } void MPYHHA(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._i32[w] += CPU.GPR[ra]._i16[w*2+1] * CPU.GPR[rb]._i16[w*2+1]; + WRAPPER_END(rt, ra, rb, 0); } void MPYHHAU(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] += CPU.GPR[ra]._u16[w*2+1] * CPU.GPR[rb]._u16[w*2+1]; + WRAPPER_END(rt, ra, rb, 0); } //Forced bits to 0, hence the shift: @@ -1052,17 +1400,19 @@ private: } void FESD(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); CPU.GPR[rt]._d[0] = (double)CPU.GPR[ra]._f[1]; CPU.GPR[rt]._d[1] = (double)CPU.GPR[ra]._f[3]; + WRAPPER_END(rt, ra, 0, 0); } void FRDS(u32 rt, u32 ra) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, yy, zz); CPU.GPR[rt]._f[1] = (float)CPU.GPR[ra]._d[0]; CPU.GPR[rt]._u32[0] = 0x00000000; CPU.GPR[rt]._f[3] = (float)CPU.GPR[ra]._d[1]; CPU.GPR[rt]._u32[2] = 0x00000000; + WRAPPER_END(rt, ra, 0, 0); } void FSCRWR(u32 rt, u32 ra) { @@ -1070,7 +1420,7 @@ private: } void DFTSV(u32 rt, u32 ra, s32 i7) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i7, zz); const u64 DoubleExpMask = 0x7ff0000000000000; const u64 DoubleFracMask = 0x000fffffffffffff; const u64 DoubleSignMask = 0x8000000000000000; @@ -1121,83 +1471,99 @@ private: if ((temp._u64[i] & DoubleExpMask) == DoubleExpMask) CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; } + WRAPPER_END(rt, ra, i7, 0); } void FCEQ(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = CPU.GPR[ra]._f[0] == CPU.GPR[rb]._f[0] ? 0xffffffff : 0; CPU.GPR[rt]._u32[1] = CPU.GPR[ra]._f[1] == CPU.GPR[rb]._f[1] ? 0xffffffff : 0; CPU.GPR[rt]._u32[2] = CPU.GPR[ra]._f[2] == CPU.GPR[rb]._f[2] ? 0xffffffff : 0; CPU.GPR[rt]._u32[3] = CPU.GPR[ra]._f[3] == CPU.GPR[rb]._f[3] ? 0xffffffff : 0; + WRAPPER_END(rt, ra, rb, 0); } void DFCEQ(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u64[0] = CPU.GPR[ra]._d[0] == CPU.GPR[rb]._d[0] ? 0xffffffffffffffff : 0; CPU.GPR[rt]._u64[1] = CPU.GPR[ra]._d[1] == CPU.GPR[rb]._d[1] ? 0xffffffffffffffff : 0; + WRAPPER_END(rt, ra, rb, 0); } void MPY(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * CPU.GPR[rb]._i16[w*2]; + WRAPPER_END(rt, ra, rb, 0); } void MPYH(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._i32[w] = (CPU.GPR[ra]._i16[w*2+1] * CPU.GPR[rb]._i16[w*2]) << 16; + WRAPPER_END(rt, ra, rb, 0); + // TODO } void MPYHH(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2+1] * CPU.GPR[rb]._i16[w*2+1]; + WRAPPER_END(rt, ra, rb, 0); } void MPYS(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._i32[w] = (CPU.GPR[ra]._i16[w*2] * CPU.GPR[rb]._i16[w*2]) >> 16; + WRAPPER_END(rt, ra, rb, 0); } void CEQH(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._u16[h] == CPU.GPR[rb]._u16[h] ? 0xffff : 0; + WRAPPER_END(rt, ra, rb, 0); } void FCMEQ(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u32[0] = fabs(CPU.GPR[ra]._f[0]) == fabs(CPU.GPR[rb]._f[0]) ? 0xffffffff : 0; CPU.GPR[rt]._u32[1] = fabs(CPU.GPR[ra]._f[1]) == fabs(CPU.GPR[rb]._f[1]) ? 0xffffffff : 0; CPU.GPR[rt]._u32[2] = fabs(CPU.GPR[ra]._f[2]) == fabs(CPU.GPR[rb]._f[2]) ? 0xffffffff : 0; CPU.GPR[rt]._u32[3] = fabs(CPU.GPR[ra]._f[3]) == fabs(CPU.GPR[rb]._f[3]) ? 0xffffffff : 0; + WRAPPER_END(rt, ra, rb, 0); } void DFCMEQ(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt]._u64[0] = fabs(CPU.GPR[ra]._d[0]) == fabs(CPU.GPR[rb]._d[0]) ? 0xffffffffffffffff : 0; CPU.GPR[rt]._u64[1] = fabs(CPU.GPR[ra]._d[1]) == fabs(CPU.GPR[rb]._d[1]) ? 0xffffffffffffffff : 0; + WRAPPER_END(rt, ra, rb, 0); } void MPYU(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u16[w*2] * CPU.GPR[rb]._u16[w*2]; + WRAPPER_END(rt, ra, rb, 0); + // TODO } void CEQB(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, zz); for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b] == CPU.GPR[rb]._u8[b] ? 0xff : 0; + WRAPPER_END(rt, ra, rb, 0); } void FI(u32 rt, u32 ra, u32 rb) { - UNIMPLEMENTED(); - //Floating Interpolation: ra will be ignored. - //It should work correctly if result of preceding FREST or FRSQEST is sufficiently exact + WRAPPER_BEGIN(rt, ra, rb, zz); CPU.GPR[rt] = CPU.GPR[rb]; + WRAPPER_END(rt, ra, rb, 0); + /*XmmVar v0(c); + c.movaps(v0, cpu_xmm(GPR[rb])); + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } void HEQ(u32 rt, u32 ra, u32 rb) { @@ -1208,30 +1574,38 @@ private: //0 - 9 void CFLTS(u32 rt, u32 ra, s32 i8) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i8, zz); const u32 scale = 173 - (i8 & 0xff); //unsigned immediate for (int i = 0; i < 4; i++) { u32 exp = ((CPU.GPR[ra]._u32[i] >> 23) & 0xff) + scale; - if (exp > 255) + if (exp > 255) exp = 255; CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] & 0x807fffff) | (exp << 23); CPU.GPR[rt]._u32[i] = (u32)CPU.GPR[rt]._f[i]; //trunc } - //CPU.GPR[rt]._m128i = _mm_cvttps_epi32(CPU.GPR[rt]._m128); + WRAPPER_END(rt, ra, i8, 0); + /*XmmVar v0(c); + c.movaps(v0, cpu_xmm(GPR[ra])); + if (i8 != 173) + { + c.mulps(v0, imm_xmm(scale_to_int[i8 & 0xff])); // scale + } + c.cvtps2dq(v0, v0); // convert to ints + c.movdqa(cpu_xmm(GPR[rt]), v0);*/ } void CFLTU(u32 rt, u32 ra, s32 i8) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i8, zz); const u32 scale = 173 - (i8 & 0xff); //unsigned immediate for (int i = 0; i < 4; i++) { u32 exp = ((CPU.GPR[ra]._u32[i] >> 23) & 0xff) + scale; - if (exp > 255) + if (exp > 255) exp = 255; if (CPU.GPR[ra]._u32[i] & 0x80000000) //if negative, result = 0 @@ -1246,11 +1620,21 @@ private: CPU.GPR[rt]._u32[i] = floor(CPU.GPR[rt]._f[i]); } } + WRAPPER_END(rt, ra, i8, 0); + /*XmmVar v0(c); + c.movaps(v0, cpu_xmm(GPR[ra])); + if (i8 != 173) + { + c.mulps(v0, imm_xmm(scale_to_int[i8 & 0xff])); // scale + } + // TODO: handle negative values and convert to unsigned value + // c.int3(); + c.cvtps2dq(v0, v0); // convert to signed ints + c.movdqa(cpu_xmm(GPR[rt]), v0);*/ } void CSFLT(u32 rt, u32 ra, s32 i8) { - UNIMPLEMENTED(); - //CPU.GPR[rt]._m128 = _mm_cvtepi32_ps(CPU.GPR[ra]._m128i); + WRAPPER_BEGIN(rt, ra, i8, zz); const u32 scale = 155 - (i8 & 0xff); //unsigned immediate for (int i = 0; i < 4; i++) { @@ -1263,10 +1647,19 @@ private: CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] & 0x807fffff) | (exp << 23); } + WRAPPER_END(rt, ra, i8, 0); + /*XmmVar v0(c); + c.movdqa(v0, cpu_xmm(GPR[ra])); + c.cvtdq2ps(v0, v0); // convert to floats + if (i8 != 155) + { + c.mulps(v0, imm_xmm(scale_to_float[i8 & 0xff])); // scale + } + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } void CUFLT(u32 rt, u32 ra, s32 i8) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i8, zz); const u32 scale = 155 - (i8 & 0xff); //unsigned immediate for (int i = 0; i < 4; i++) { @@ -1278,18 +1671,33 @@ private: CPU.GPR[rt]._u32[i] = (CPU.GPR[rt]._u32[i] & 0x807fffff) | (exp << 23); } + WRAPPER_END(rt, ra, i8, 0); + /*XmmVar v0(c); + c.movdqa(v0, cpu_xmm(GPR[ra])); + // TODO: convert from unsigned value + // c.int3(); + c.cvtdq2ps(v0, v0); // convert to floats as signed + if (i8 != 155) + { + c.mulps(v0, imm_xmm(scale_to_float[i8 & 0xff])); // scale + } + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } //0 - 8 void BRZ(u32 rt, s32 i16) { - UNIMPLEMENTED(); - if (CPU.GPR[rt]._u32[3] == 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + do_finalize = true; + GpVar pos_next(c, kVarTypeUInt32); + c.mov(pos_next, (CPU.PC >> 2) + 1); + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); + c.cmovnz(*pos_var, pos_next); + //ConLog.Write("BRZ(rt=%d,i16=%d)", rt, i16); } void STQA(u32 rt, s32 i16) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, i16, yy, zz); u32 lsa = (i16 << 2) & 0x3fff0; if(!CPU.IsGoodLSA(lsa)) { @@ -1299,30 +1707,43 @@ private: } CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); + WRAPPER_END(rt, i16, 0, 0); } void BRNZ(u32 rt, s32 i16) { - UNIMPLEMENTED(); - if (CPU.GPR[rt]._u32[3] != 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + do_finalize = true; + GpVar pos_next(c, kVarTypeUInt32); + c.mov(pos_next, (CPU.PC >> 2) + 1); + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); + c.cmovz(*pos_var, pos_next); + //ConLog.Write("BRNZ(rt=%d,i16=%d)", rt, i16); } void BRHZ(u32 rt, s32 i16) { - UNIMPLEMENTED(); - if (CPU.GPR[rt]._u16[6] == 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + do_finalize = true; + GpVar pos_next(c, kVarTypeUInt32); + c.mov(pos_next, (CPU.PC >> 2) + 1); + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + c.cmp(cpu_word(GPR[rt]._u16[6]), 0); + c.cmovnz(*pos_var, pos_next); + ConLog.Write("BRHZ(rt=%d,i16=%d)", rt, i16); } void BRHNZ(u32 rt, s32 i16) { - UNIMPLEMENTED(); - if (CPU.GPR[rt]._u16[6] != 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + do_finalize = true; + GpVar pos_next(c, kVarTypeUInt32); + c.mov(pos_next, (CPU.PC >> 2) + 1); + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + c.cmp(cpu_word(GPR[rt]._u16[6]), 0); + c.cmovz(*pos_var, pos_next); + ConLog.Write("BRHNZ(rt=%d,i16=%d)", rt, i16); } void STQR(u32 rt, s32 i16) { - UNIMPLEMENTED(); - u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0; - if(!CPU.IsGoodLSA(lsa)) + WRAPPER_BEGIN(rt, i16, PC, zz); + u32 lsa = branchTarget(PC, i16) & 0x3fff0; + if (!CPU.IsGoodLSA(lsa)) { ConLog.Error("STQR: bad lsa (0x%x)", lsa); Emu.Pause(); @@ -1330,6 +1751,17 @@ private: } CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); + WRAPPER_END(rt, i16, CPU.PC, 0); + /*u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0; + + GpVar v0(c, kVarTypeUInt64); + GpVar v1(c, kVarTypeUInt64); + c.mov(v0, cpu_qword(GPR[rt]._u64[0])); + c.mov(v1, cpu_qword(GPR[rt]._u64[1])); + c.bswap(v0); + c.bswap(v1); + c.mov(qword_ptr(*ls_var, lsa), v1); + c.mov(qword_ptr(*ls_var, lsa + 8), v0);*/ } void BRA(s32 i16) { @@ -1338,7 +1770,7 @@ private: } void LQA(u32 rt, s32 i16) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, i16, yy, zz); u32 lsa = (i16 << 2) & 0x3fff0; if(!CPU.IsGoodLSA(lsa)) { @@ -1348,6 +1780,7 @@ private: } CPU.GPR[rt]._u128 = CPU.ReadLS128(lsa); + WRAPPER_END(rt, i16, 0, 0); } void BRASL(u32 rt, s32 i16) { @@ -1358,25 +1791,57 @@ private: } void BR(s32 i16) { - UNIMPLEMENTED(); - CPU.SetBranch(branchTarget(CPU.PC, i16)); + do_finalize = true; + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + //ConLog.Write("BR(i16=%d)", i16); } void FSMBI(u32 rt, s32 i16) { - XmmVar v0(c); + WRAPPER_BEGIN(rt, i16, yy, zz); + const u32 s = i16; + + for (u32 j = 0; j < 16; ++j) + { + if ((s >> j) & 0x1) + { + CPU.GPR[rt]._u8[j] = 0xFF; + } + else + { + CPU.GPR[rt]._u8[j] = 0x00; + } + } + WRAPPER_END(rt, i16, 0, 0); + /*XmmVar v0(c); c.movaps(v0, imm_xmm(fsmbi_mask[i16 & 0xffff])); - c.movaps(cpu_xmm(GPR[rt]), v0); + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } void BRSL(u32 rt, s32 i16) { - UNIMPLEMENTED(); - CPU.GPR[rt].Reset(); - CPU.GPR[rt]._u32[3] = CPU.PC + 4; - CPU.SetBranch(branchTarget(CPU.PC, i16)); + GpVar v0(c, kVarTypeUInt64); + c.xor_(v0, v0); + c.mov(cpu_qword(GPR[rt]._u64[1]), v0); + c.mov(cpu_qword(GPR[rt]._u64[0]), v0); + c.mov(cpu_dword(GPR[rt]._u32[3]), CPU.PC + 4); + + do_finalize = true; + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + //ConLog.Write("BRSL(rt=%d,i16=%d)", rt, i16); } void LQR(u32 rt, s32 i16) { - u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0; + WRAPPER_BEGIN(rt, i16, PC, zz); + u32 lsa = branchTarget(PC, i16) & 0x3fff0; + if (!CPU.IsGoodLSA(lsa)) + { + ConLog.Error("LQR: bad lsa (0x%x)", lsa); + Emu.Pause(); + return; + } + + CPU.GPR[rt]._u128 = CPU.ReadLS128(lsa); + WRAPPER_END(rt, i16, CPU.PC, 0); + /*u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0; GpVar v0(c, kVarTypeUInt64); GpVar v1(c, kVarTypeUInt64); @@ -1385,47 +1850,96 @@ private: c.bswap(v0); c.bswap(v1); c.mov(cpu_qword(GPR[rt]._u64[0]), v1); - c.mov(cpu_qword(GPR[rt]._u64[1]), v0); + c.mov(cpu_qword(GPR[rt]._u64[1]), v0);*/ } void IL(u32 rt, s32 i16) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, i16, yy, zz); CPU.GPR[rt]._i32[0] = CPU.GPR[rt]._i32[1] = CPU.GPR[rt]._i32[2] = CPU.GPR[rt]._i32[3] = i16; + WRAPPER_END(rt, i16, 0, 0); + /*XmmVar v0(c); + if (i16 == 0) + { + c.xorps(v0, v0); + } + else if (i16 == -1) + { + c.cmpps(v0, v0, 0); + } + else + { + c.movaps(v0, imm_xmm(s19_to_s32[i16 & 0x7ffff])); + } + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } void ILHU(u32 rt, s32 i16) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, i16, yy, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._i32[w] = i16 << 16; + WRAPPER_END(rt, i16, 0, 0); + /*XmmVar v0(c); + if (i16 == 0) + { + c.xorps(v0, v0); + } + else if (i16 == -1) + { + c.cmpps(v0, v0, 0); + c.pslld(v0, 16); + } + else + { + c.movaps(v0, imm_xmm(s19_to_s32[i16 & 0x7ffff])); + c.pslld(v0, 16); + } + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } void ILH(u32 rt, s32 i16) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, i16, yy, zz); for (int h = 0; h < 8; h++) - CPU.GPR[rt]._i16[h] = i16; + CPU.GPR[rt]._i16[h] = (s32)i16; + WRAPPER_END(rt, i16, 0, 0); } void IOHL(u32 rt, s32 i16) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, i16, yy, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._i32[w] |= (i16 & 0xFFFF); + WRAPPER_END(rt, i16, 0, 0); + /*XmmVar v0(c); + if (i16 == 0) + { + // nop + } + else + { + c.movaps(v0, cpu_xmm(GPR[rt])); + c.orps(v0, imm_xmm(s19_to_s32[i16 & 0xffff])); + c.movaps(cpu_xmm(GPR[rt]), v0); + }*/ } //0 - 7 void ORI(u32 rt, u32 ra, s32 i10) { - XmmVar v0(c); - if (i10 == 0) + WRAPPER_BEGIN(rt, ra, i10, zz); + for (u32 i = 0; i < 4; ++i) + CPU.GPR[rt]._i32[i] = CPU.GPR[ra]._i32[i] | (s32)i10; + WRAPPER_END(rt, ra, i10, 0); + /*XmmVar v0(c); + if (i10 == -1) { - // zero - c.xorps(v0, v0); + // fill with 1 + c.cmpps(v0, v0, 0); c.movaps(cpu_xmm(GPR[rt]), v0); } - else if (i10 == -1) + else if (i10 == 0) { if (rt == ra) { @@ -1443,53 +1957,68 @@ private: c.movaps(v0, cpu_xmm(GPR[ra])); c.orps(v0, imm_xmm(s19_to_s32[i10 & 0x7ffff])); c.movaps(cpu_xmm(GPR[rt]), v0); - } + }*/ } void ORHI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int h = 0; h < 8; h++) - CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] | i10; + CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] | (s32)i10; + WRAPPER_END(rt, ra, i10, 0); } void ORBI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int b = 0; b < 16; b++) - CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] | i10; + CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] | (s32)i10; + WRAPPER_END(rt, ra, i10, 0); } void SFI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = i10 - CPU.GPR[ra]._i32[w]; + CPU.GPR[rt]._i32[w] = (s32)i10 - CPU.GPR[ra]._i32[w]; + WRAPPER_END(rt, ra, i10, 0); + // TODO } void SFHI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int h = 0; h < 8; h++) - CPU.GPR[rt]._i16[h] = i10 - CPU.GPR[ra]._i16[h]; + CPU.GPR[rt]._i16[h] = (s32)i10 - CPU.GPR[ra]._i16[h]; + WRAPPER_END(rt, ra, i10, 0); } void ANDI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i32[w] & i10; + CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i32[w] & (s32)i10; + WRAPPER_END(rt, ra, i10, 0); + // TODO } void ANDHI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int h = 0; h < 8; h++) - CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] & i10; + CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] & (s32)i10; + WRAPPER_END(rt, ra, i10, 0); } void ANDBI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int b = 0; b < 16; b++) - CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] & i10; + CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] & (s32)i10; + WRAPPER_END(rt, ra, i10, 0); } void AI(u32 rt, u32 ra, s32 i10) { - XmmVar v0(c); + WRAPPER_BEGIN(rt, ra, i10, zz); + CPU.GPR[rt]._i32[0] = CPU.GPR[ra]._i32[0] + i10; + CPU.GPR[rt]._i32[1] = CPU.GPR[ra]._i32[1] + i10; + CPU.GPR[rt]._i32[2] = CPU.GPR[ra]._i32[2] + i10; + CPU.GPR[rt]._i32[3] = CPU.GPR[ra]._i32[3] + i10; + WRAPPER_END(rt, ra, i10, 0); + /*XmmVar v0(c); if (i10 == 0) { if (rt == ra) @@ -1509,17 +2038,28 @@ private: c.movdqa(v0, cpu_xmm(GPR[ra])); c.paddd(v0, imm_xmm(s19_to_s32[i10 & 0x7ffff])); c.movdqa(cpu_xmm(GPR[rt]), v0); - } + }*/ } void AHI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for(u32 h = 0; h < 8; ++h) - CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] + i10; + CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] + (s32)i10; + WRAPPER_END(rt, ra, i10, 0); } void STQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding { - GpVar lsa(c, kVarTypeUInt32); + WRAPPER_BEGIN(rt, i10, ra, zz); + const u32 lsa = (CPU.GPR[ra]._i32[3] + i10) & 0x3fff0; + if (!CPU.IsGoodLSA(lsa)) + { + ConLog.Error("STQD: bad lsa (0x%x)", lsa); + Emu.Pause(); + return; + } + CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); + WRAPPER_END(rt, i10, ra, 0); + /*GpVar lsa(c, kVarTypeUInt32); GpVar v0(c, kVarTypeUInt64); GpVar v1(c, kVarTypeUInt64); @@ -1531,13 +2071,13 @@ private: c.bswap(v0); c.bswap(v1); c.mov(qword_ptr(*ls_var, lsa, 0, 0), v1); - c.mov(qword_ptr(*ls_var, lsa, 0, 8), v0); + c.mov(qword_ptr(*ls_var, lsa, 0, 8), v0);*/ } void LQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, i10, ra, zz); const u32 lsa = (CPU.GPR[ra]._i32[3] + i10) & 0x3fff0; - if(!CPU.IsGoodLSA(lsa)) + if (!CPU.IsGoodLSA(lsa)) { ConLog.Error("LQD: bad lsa (0x%x)", lsa); Emu.Pause(); @@ -1545,42 +2085,63 @@ private: } CPU.GPR[rt]._u128 = CPU.ReadLS128(lsa); + WRAPPER_END(rt, i10, ra, 0); + /*GpVar lsa(c, kVarTypeUInt32); + GpVar v0(c, kVarTypeUInt64); + GpVar v1(c, kVarTypeUInt64); + + c.mov(lsa, cpu_dword(GPR[ra]._u32[3])); + if (i10) c.add(lsa, i10); + c.and_(lsa, 0x3fff0); + c.mov(v0, qword_ptr(*ls_var, lsa, 0, 0)); + c.mov(v1, qword_ptr(*ls_var, lsa, 0, 8)); + c.bswap(v0); + c.bswap(v1); + c.mov(cpu_qword(GPR[rt]._u64[0]), v1); + c.mov(cpu_qword(GPR[rt]._u64[1]), v0);*/ } void XORI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i32[w] ^ i10; + CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i32[w] ^ (s32)i10; + WRAPPER_END(rt, ra, i10, 0); } void XORHI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int h = 0; h < 8; h++) - CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] ^ i10; + CPU.GPR[rt]._i16[h] = CPU.GPR[ra]._i16[h] ^ (s32)i10; + WRAPPER_END(rt, ra, i10, 0); } void XORBI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int b = 0; b < 16; b++) - CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] ^ i10; + CPU.GPR[rt]._i8[b] = CPU.GPR[ra]._i8[b] ^ (s32)i10; + WRAPPER_END(rt, ra, i10, 0); } void CGTI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int w = 0; w < 4; w++) - CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] > i10 ? 0xffffffff : 0; + CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] > (s32)i10 ? 0xffffffff : 0; + WRAPPER_END(rt, ra, i10, 0); + // TODO } void CGTHI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._i16[h] > i10 ? 0xffff : 0; + CPU.GPR[rt]._u16[h] = CPU.GPR[ra]._i16[h] > (s32)i10 ? 0xffff : 0; + WRAPPER_END(rt, ra, i10, 0); } void CGTBI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._i8[b] > (s8)(i10 & 0xff) ? 0xff : 0; + WRAPPER_END(rt, ra, i10, 0); } void HGTI(u32 rt, u32 ra, s32 i10) { @@ -1589,25 +2150,48 @@ private: } void CLGTI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); - for(u32 i = 0; i < 4; ++i) + WRAPPER_BEGIN(rt, ra, i10, zz); + for (int w = 0; w < 4; w++) + CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._i32[w] > (s32)i10 ? 0xffffffff : 0; + WRAPPER_END(rt, ra, i10, 0); + /*XmmVar v0(c); + if (i10 == -1) { - CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] > (u32)i10) ? 0xffffffff : 0x00000000; + // zero result + c.xorps(v0, v0); + c.movaps(cpu_xmm(GPR[rt]), v0); } + else + { + if (i10 == 0) + { + // load zero + c.pxor(v0, v0); + } + else + { + c.movdqa(v0, imm_xmm(s19_to_s32[i10 & 0x7ffff])); + } + c.psubd(v0, cpu_xmm(GPR[ra])); + c.psrad(v0, 32); + c.movdqa(cpu_xmm(GPR[rt]), v0); + }*/ } void CLGTHI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for(u32 i = 0; i < 8; ++i) { - CPU.GPR[rt]._u16[i] = (CPU.GPR[ra]._u16[i] > (u16)i10) ? 0xffff : 0x0000; + CPU.GPR[rt]._u16[i] = (CPU.GPR[ra]._u16[i] > (u16)(s32)i10) ? 0xffff : 0x0000; } + WRAPPER_END(rt, ra, i10, 0); } void CLGTBI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int b = 0; b < 16; b++) CPU.GPR[rt]._u8[b] = CPU.GPR[ra]._u8[b] > (u8)(i10 & 0xff) ? 0xff : 0; + WRAPPER_END(rt, ra, i10, 0); } void HLGTI(u32 rt, u32 ra, s32 i10) { @@ -1616,36 +2200,43 @@ private: } void MPYI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int w = 0; w < 4; w++) - CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * i10; + CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * (s32)i10; + WRAPPER_END(rt, ra, i10, 0); } void MPYUI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int w = 0; w < 4; w++) CPU.GPR[rt]._u32[w] = CPU.GPR[ra]._u16[w*2] * (u16)(i10 & 0xffff); + WRAPPER_END(rt, ra, i10, 0); } void CEQI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for(u32 i = 0; i < 4; ++i) - CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._i32[i] == i10) ? 0xffffffff : 0x00000000; + CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._i32[i] == (s32)i10) ? 0xffffffff : 0x00000000; + WRAPPER_END(rt, ra, i10, 0); + // TODO } void CEQHI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._i16[h] == (s16)i10) ? 0xffff : 0; + CPU.GPR[rt]._u16[h] = (CPU.GPR[ra]._i16[h] == (s16)(s32)i10) ? 0xffff : 0; + WRAPPER_END(rt, ra, i10, 0); } void CEQBI(u32 rt, u32 ra, s32 i10) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, i10, zz); for (int b = 0; b < 16; b++) CPU.GPR[rt]._i8[b] = (CPU.GPR[ra]._i8[b] == (s8)(i10 & 0xff)) ? 0xff : 0; + WRAPPER_END(rt, ra, i10, 0); } void HEQI(u32 rt, u32 ra, s32 i10) { + // TODO UNIMPLEMENTED(); if(CPU.GPR[ra]._i32[3] == i10) CPU.Stop(); } @@ -1662,35 +2253,57 @@ private: } void ILA(u32 rt, u32 i18) { - XmmVar v0(c); - c.movaps(v0, imm_xmm(s19_to_s32[i18 & 0x3ffff])); - c.movaps(cpu_xmm(GPR[rt]), v0); + WRAPPER_BEGIN(rt, i18, yy, zz); + CPU.GPR[rt]._u32[0] = + CPU.GPR[rt]._u32[1] = + CPU.GPR[rt]._u32[2] = + CPU.GPR[rt]._u32[3] = i18 & 0x3FFFF; + WRAPPER_END(rt, i18, 0, 0); + /*XmmVar v0(c); + if (i18 == 0) + { + c.xorps(v0, v0); + } + else + { + c.movaps(v0, imm_xmm(s19_to_s32[i18 & 0x3ffff])); + } + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } //0 - 3 void SELB(u32 rt, u32 ra, u32 rb, u32 rc) { - UNIMPLEMENTED(); - for(u64 i = 0; i < 2; ++i) + WRAPPER_BEGIN(rt, ra, rb, rc); + for (u64 i = 0; i < 2; ++i) { CPU.GPR[rt]._u64[i] = - ( CPU.GPR[rc]._u64[i] & CPU.GPR[rb]._u64[i]) | + (CPU.GPR[rc]._u64[i] & CPU.GPR[rb]._u64[i]) | (~CPU.GPR[rc]._u64[i] & CPU.GPR[ra]._u64[i]); } + WRAPPER_END(rt, ra, rb, rc); + /*XmmVar v0(c); + XmmVar v1(c); + c.movaps(v0, cpu_xmm(GPR[ra])); + c.movaps(v1, cpu_xmm(GPR[rc])); + c.andnps(v0, v1); + c.andps(v1, cpu_xmm(GPR[rb])); + c.orps(v0, v1); + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } void SHUFB(u32 rt, u32 ra, u32 rb, u32 rc) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, rc); const SPU_GPR_hdr _a = CPU.GPR[ra]; const SPU_GPR_hdr _b = CPU.GPR[rb]; for (int i = 0; i < 16; i++) { u8 b = CPU.GPR[rc]._u8[i]; - if(b & 0x80) + if (b & 0x80) { - if(b & 0x40) + if (b & 0x40) { - if(b & 0x20) + if (b & 0x20) CPU.GPR[rt]._u8[i] = 0x80; else CPU.GPR[rt]._u8[i] = 0xFF; @@ -1700,42 +2313,59 @@ private: } else { - if(b & 0x10) + if (b & 0x10) CPU.GPR[rt]._u8[i] = _b._u8[15 - (b & 0x0F)]; else CPU.GPR[rt]._u8[i] = _a._u8[15 - (b & 0x0F)]; } } + WRAPPER_END(rt, ra, rb, rc); + // TODO } void MPYA(u32 rt, u32 ra, u32 rb, u32 rc) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, rc); for (int w = 0; w < 4; w++) CPU.GPR[rt]._i32[w] = CPU.GPR[ra]._i16[w*2] * CPU.GPR[rb]._i16[w*2] + CPU.GPR[rc]._i32[w]; + WRAPPER_END(rt, ra, rb, rc); } void FNMS(u32 rt, u32 ra, u32 rb, u32 rc) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, rc); CPU.GPR[rt]._f[0] = CPU.GPR[rc]._f[0] - CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0]; CPU.GPR[rt]._f[1] = CPU.GPR[rc]._f[1] - CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1]; CPU.GPR[rt]._f[2] = CPU.GPR[rc]._f[2] - CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2]; CPU.GPR[rt]._f[3] = CPU.GPR[rc]._f[3] - CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3]; + WRAPPER_END(rt, ra, rb, rc); + /*XmmVar v0(c), v1(c); + c.movaps(v0, cpu_xmm(GPR[ra])); + c.mulps(v0, cpu_xmm(GPR[rb])); + c.movaps(v1, cpu_xmm(GPR[rc])); + c.subps(v1, v0); + c.movaps(cpu_xmm(GPR[rt]), v1);*/ } void FMA(u32 rt, u32 ra, u32 rb, u32 rc) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, rc); CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0] + CPU.GPR[rc]._f[0]; CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1] + CPU.GPR[rc]._f[1]; CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2] + CPU.GPR[rc]._f[2]; CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3] + CPU.GPR[rc]._f[3]; + WRAPPER_END(rt, ra, rb, rc); + /*XmmVar v0(c); + c.movaps(v0, cpu_xmm(GPR[ra])); + c.mulps(v0, cpu_xmm(GPR[rb])); + c.addps(v0, cpu_xmm(GPR[rc])); + c.movaps(cpu_xmm(GPR[rt]), v0);*/ } void FMS(u32 rt, u32 ra, u32 rb, u32 rc) { - UNIMPLEMENTED(); + WRAPPER_BEGIN(rt, ra, rb, rc); CPU.GPR[rt]._f[0] = CPU.GPR[ra]._f[0] * CPU.GPR[rb]._f[0] - CPU.GPR[rc]._f[0]; CPU.GPR[rt]._f[1] = CPU.GPR[ra]._f[1] * CPU.GPR[rb]._f[1] - CPU.GPR[rc]._f[1]; CPU.GPR[rt]._f[2] = CPU.GPR[ra]._f[2] * CPU.GPR[rb]._f[2] - CPU.GPR[rc]._f[2]; CPU.GPR[rt]._f[3] = CPU.GPR[ra]._f[3] * CPU.GPR[rb]._f[3] - CPU.GPR[rc]._f[3]; + WRAPPER_END(rt, ra, rb, rc); } void UNK(u32 code, u32 opcode, u32 gcode) diff --git a/rpcs3/Emu/Cell/SPURecompilerCore.cpp b/rpcs3/Emu/Cell/SPURecompilerCore.cpp index bf3662c399..940ef44a4a 100644 --- a/rpcs3/Emu/Cell/SPURecompilerCore.cpp +++ b/rpcs3/Emu/Cell/SPURecompilerCore.cpp @@ -7,7 +7,7 @@ static const SPUImmTable g_spu_imm; SPURecompilerCore::SPURecompilerCore(SPUThread& cpu) : m_enc(new SPURecompiler(cpu, *this)) -, m_inter(new SPUInterpreter(cpu)) +, inter(new SPUInterpreter(cpu)) , CPU(cpu) , compiler(&runtime) { @@ -17,17 +17,17 @@ SPURecompilerCore::SPURecompilerCore(SPUThread& cpu) SPURecompilerCore::~SPURecompilerCore() { delete m_enc; - delete m_inter; + delete inter; } void SPURecompilerCore::Decode(const u32 code) // decode instruction and run with interpreter { - (*SPU_instr::rrr_list)(m_inter, code); + (*SPU_instr::rrr_list)(inter, code); } void SPURecompilerCore::Compile(u16 pos) { - compiler.addFunc(kFuncConvHost, FuncBuilder4()); + compiler.addFunc(kFuncConvHost, FuncBuilder4()); entry[pos].host = pos; GpVar cpu_var(compiler, kVarTypeIntPtr, "cpu"); @@ -45,15 +45,26 @@ void SPURecompilerCore::Compile(u16 pos) compiler.alloc(imm_var); m_enc->imm_var = &imm_var; - GpVar pos_var(compiler, kVarTypeUInt16, "pos"); + GpVar pos_var(compiler, kVarTypeUInt32, "pos"); compiler.setArg(3, pos_var); compiler.alloc(pos_var); + m_enc->pos_var = &pos_var; + + compiler.xor_(pos_var, pos_var); + while (true) { const u32 opcode = Memory.Read32(CPU.dmac.ls_offset + pos * 4); m_enc->do_finalize = false; - (*SPU_instr::rrr_list)(m_enc, opcode); // compile single opcode + if (opcode) + { + (*SPU_instr::rrr_list)(m_enc, opcode); // compile single opcode + } + else + { + m_enc->do_finalize = true; + } bool fin = m_enc->do_finalize; entry[pos].valid = opcode; @@ -63,7 +74,6 @@ void SPURecompilerCore::Compile(u16 pos) entry[pos].host = entry[pos - 1].host; } - compiler.xor_(pos_var, pos_var); compiler.ret(pos_var); compiler.endFunc(); entry[entry[pos].host].pointer = compiler.make(); @@ -74,6 +84,7 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address) const u64 m_offset = address - CPU.PC; const u16 pos = (CPU.PC >> 2); + //ConLog.Write("DecodeMemory: pos=%d", pos); u32* ls = (u32*)Memory.VirtualToRealAddr(m_offset); if (!pos) @@ -115,16 +126,16 @@ u8 SPURecompilerCore::DecodeMemory(const u64 address) return 0; } // jump - typedef u16(*Func)(void* _cpu, void* _ls, const SPUImmTable* _imm, u16 _pos); + typedef u32(*Func)(void* _cpu, void* _ls, const SPUImmTable* _imm, u32 _pos); Func func = asmjit_cast(entry[entry[pos].host].pointer); void* cpu = (u8*)&CPU.GPR[0] - offsetof(SPUThread, GPR[0]); // ugly cpu base offset detection - u16 res = pos == entry[pos].host ? 0 : pos; - res = func(cpu, ls, &g_spu_imm, res); + u16 res = (pos == entry[pos].host) ? 0 : pos; + res = (u16)func(cpu, ls, &g_spu_imm, res); - ConLog.Write("func -> %d", res); + CPU.SetBranch((u64)res << 2); return 0; /*Decode(Memory.Read32(address)); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index e3cc2b8db0..f93e39f4d5 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -3,6 +3,7 @@ #include "Emu/Cell/SPUDecoder.h" #include "Emu/Cell/SPUInterpreter.h" #include "Emu/Cell/SPUDisAsm.h" +#include "Emu/Cell/SPURecompiler.h" SPUThread& GetCurrentSPUThread() { @@ -75,6 +76,8 @@ void SPUThread::DoRun() break; case 1: + m_dec = new SPURecompilerCore(*this); + break; case 2: m_dec = new SPUDecoder(*new SPUInterpreter(*this)); break; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 5c947664c6..a581130473 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -213,20 +213,21 @@ public: union SPU_GPR_hdr { + u32 _u32[4]; + float _f[4]; u128 _u128; s128 _i128; __m128 _m128; __m128i _m128i; u64 _u64[2]; s64 _i64[2]; - u32 _u32[4]; s32 _i32[4]; u16 _u16[8]; s16 _i16[8]; u8 _u8[16]; s8 _i8[16]; double _d[2]; - float _f[4]; + SPU_GPR_hdr() {} @@ -243,9 +244,9 @@ union SPU_GPR_hdr union SPU_SPR_hdr { + u32 _u32[4]; u128 _u128; s128 _i128; - u32 _u32[4]; SPU_SPR_hdr() {} @@ -299,19 +300,19 @@ public: #else static const bool x86 = true; #endif - - private: union _CRT_ALIGN(8) { struct { volatile u32 m_index; u32 m_value[max_count]; }; + struct { + volatile u32 m_index2; + u16 m_val16[max_count * 2]; + }; volatile u64 m_indval; }; std::mutex m_lock; - public: - Channel() { Init(); @@ -586,7 +587,7 @@ public: } } - Sleep(1); // hack + //Sleep(1); // hack switch(cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_LIST_MASK | MFC_RESULT_MASK)) { @@ -1125,6 +1126,115 @@ public: if (Emu.IsStopped()) ConLog.Warning("%s(%s) aborted", __FUNCTION__, spu_ch_name[ch]); } + void DoStop(u32 code) + { + SetExitStatus(code); // exit code (not status) + + switch (code) + { + case 0x110: /* ===== sys_spu_thread_receive_event ===== */ + { + u32 spuq = 0; + if (!SPU.Out_MBox.Pop(spuq)) + { + ConLog.Error("sys_spu_thread_receive_event: cannot read Out_MBox"); + SPU.In_MBox.PushUncond(CELL_EINVAL); // ??? + return; + } + + if (SPU.In_MBox.GetCount()) + { + ConLog.Error("sys_spu_thread_receive_event(spuq=0x%x): In_MBox is not empty", spuq); + SPU.In_MBox.PushUncond(CELL_EBUSY); // ??? + return; + } + + if (Ini.HLELogging.GetValue()) + { + ConLog.Write("sys_spu_thread_receive_event(spuq=0x%x)", spuq); + } + + EventQueue* eq; + if (!SPUQs.GetEventQueue(FIX_SPUQ(spuq), eq)) + { + SPU.In_MBox.PushUncond(CELL_EINVAL); // TODO: check error value + return; + } + + u32 tid = GetId(); + + eq->sq.push(tid); // add thread to sleep queue + + while (true) + { + switch (eq->owner.trylock(tid)) + { + case SMR_OK: + if (!eq->events.count()) + { + eq->owner.unlock(tid); + break; + } + else + { + u32 next = (eq->protocol == SYS_SYNC_FIFO) ? eq->sq.pop() : eq->sq.pop_prio(); + if (next != tid) + { + eq->owner.unlock(tid, next); + break; + } + } + case SMR_SIGNAL: + { + sys_event_data event; + eq->events.pop(event); + eq->owner.unlock(tid); + SPU.In_MBox.PushUncond(CELL_OK); + SPU.In_MBox.PushUncond(event.data1); + SPU.In_MBox.PushUncond(event.data2); + SPU.In_MBox.PushUncond(event.data3); + return; + } + case SMR_FAILED: break; + default: eq->sq.invalidate(tid); SPU.In_MBox.PushUncond(CELL_ECANCELED); return; + } + + Sleep(1); + if (Emu.IsStopped()) + { + ConLog.Warning("sys_spu_thread_receive_event(spuq=0x%x) aborted", spuq); + eq->sq.invalidate(tid); + return; + } + } + } + break; + case 0x102: + if (!SPU.Out_MBox.GetCount()) + { + ConLog.Error("sys_spu_thread_exit (no status, code 0x102)"); + } + else if (Ini.HLELogging.GetValue()) + { + // the real exit status + ConLog.Write("sys_spu_thread_exit (status=0x%x)", SPU.Out_MBox.GetValue()); + } + Stop(); + break; + default: + if (!SPU.Out_MBox.GetCount()) + { + ConLog.Error("Unknown STOP code: 0x%x (no message)", code); + } + else + { + ConLog.Error("Unknown STOP code: 0x%x (message=0x%x)", code, SPU.Out_MBox.GetValue()); + } + Stop(); + break; + } + } + bool IsGoodLSA(const u32 lsa) const { return Memory.IsGoodAddr(lsa + m_offset) && lsa < 0x40000; } virtual u8 ReadLS8 (const u32 lsa) const { return Memory.Read8 (lsa + m_offset); } // m_offset & 0x3fffc ????? virtual u16 ReadLS16 (const u32 lsa) const { return Memory.Read16 (lsa + m_offset); } diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index dc4dfcfc0b..cf6ff47c3d 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -393,6 +393,7 @@ + diff --git a/rpcs3/rpcs3.vcxproj.filters b/rpcs3/rpcs3.vcxproj.filters index 1e00a296fe..3589bf81ee 100644 --- a/rpcs3/rpcs3.vcxproj.filters +++ b/rpcs3/rpcs3.vcxproj.filters @@ -702,5 +702,8 @@ Utilities + + Include + \ No newline at end of file