From 8094037104abf0b788bf76bbf1d507691064cfd7 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 19 Aug 2013 18:08:05 +0000 Subject: [PATCH] [ARM] Add ps_sum0 and a disabled ps_madd. --- Source/Core/Core/Src/PowerPC/JitArm32/Jit.h | 2 + .../Src/PowerPC/JitArm32/JitArm_Paired.cpp | 71 +++++++++++++++++-- .../Src/PowerPC/JitArm32/JitArm_Tables.cpp | 4 +- 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h index 43fdbfa1df..be203868e5 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h @@ -210,6 +210,8 @@ public: // Paired Singles void ps_add(UGeckoInstruction _inst); + void ps_sum0(UGeckoInstruction _inst); + void ps_madd(UGeckoInstruction _inst); void ps_sub(UGeckoInstruction _inst); void ps_mul(UGeckoInstruction _inst); }; diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp index 39a8320389..279ed10a48 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp @@ -43,12 +43,71 @@ void JitArm::ps_add(UGeckoInstruction inst) ARMReg vA1 = fpr.R1(a); ARMReg vB0 = fpr.R0(b); ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); + ARMReg vD0 = fpr.R0(d); + ARMReg vD1 = fpr.R1(d); VADD(vD0, vA0, vB0); VADD(vD1, vA1, vB1); } + +// Wrong, THP videos like SMS and Ikaruga show artifacts +void JitArm::ps_madd(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + + Default(inst); return; + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + + if (inst.Rc) { + Default(inst); return; + } + ARMReg vA0 = fpr.R0(a); + ARMReg vA1 = fpr.R1(a); + ARMReg vB0 = fpr.R0(b); + ARMReg vB1 = fpr.R1(b); + ARMReg vC0 = fpr.R0(c); + ARMReg vC1 = fpr.R1(c); + ARMReg vD0 = fpr.R0(d); + ARMReg vD1 = fpr.R1(d); + + ARMReg V0 = fpr.GetReg(); + ARMReg V1 = fpr.GetReg(); + + VMOV(V0, vC0); + VMOV(V1, vC1); + + VMLA(V0, vA0, vB0); + VMLA(V1, vA1, vB1); + + VMOV(vD0, V0); + VMOV(vD1, V1); + + fpr.Unlock(V0); + fpr.Unlock(V1); +} + +void JitArm::ps_sum0(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + + if (inst.Rc) { + Default(inst); return; + } + ARMReg vA0 = fpr.R0(a); + ARMReg vB1 = fpr.R1(b); + ARMReg vC1 = fpr.R1(c); + ARMReg vD0 = fpr.R0(d); + ARMReg vD1 = fpr.R1(d); + + VADD(vD0, vA0, vB1); + VMOV(vD1, vC1); +} + void JitArm::ps_sub(UGeckoInstruction inst) { INSTRUCTION_START @@ -62,8 +121,8 @@ void JitArm::ps_sub(UGeckoInstruction inst) ARMReg vA1 = fpr.R1(a); ARMReg vB0 = fpr.R0(b); ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); + ARMReg vD0 = fpr.R0(d); + ARMReg vD1 = fpr.R1(d); VSUB(vD0, vA0, vB0); VSUB(vD1, vA1, vB1); @@ -81,8 +140,8 @@ void JitArm::ps_mul(UGeckoInstruction inst) ARMReg vA1 = fpr.R1(a); ARMReg vC0 = fpr.R0(c); ARMReg vC1 = fpr.R1(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); + ARMReg vD0 = fpr.R0(d); + ARMReg vD1 = fpr.R1(d); VMUL(vD0, vA0, vC0); VMUL(vD1, vA1, vC1); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp index fcfde2aae5..fe822179ed 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp @@ -143,7 +143,7 @@ static GekkoOPTemplate table4[] = static GekkoOPTemplate table4_2[] = { - {10, &JitArm::Default}, //"ps_sum0", OPTYPE_PS, 0}}, + {10, &JitArm::ps_sum0}, //"ps_sum0", OPTYPE_PS, 0}}, {11, &JitArm::Default}, //"ps_sum1", OPTYPE_PS, 0}}, {12, &JitArm::Default}, //"ps_muls0", OPTYPE_PS, 0}}, {13, &JitArm::Default}, //"ps_muls1", OPTYPE_PS, 0}}, @@ -157,7 +157,7 @@ static GekkoOPTemplate table4_2[] = {25, &JitArm::ps_mul}, //"ps_mul", OPTYPE_PS, 0}}, {26, &JitArm::Default}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, {28, &JitArm::Default}, //"ps_msub", OPTYPE_PS, 0}}, - {29, &JitArm::Default}, //"ps_madd", OPTYPE_PS, 0}}, + {29, &JitArm::ps_madd}, //"ps_madd", OPTYPE_PS, 0}}, {30, &JitArm::Default}, //"ps_nmsub", OPTYPE_PS, 0}}, {31, &JitArm::Default}, //"ps_nmadd", OPTYPE_PS, 0}}, };