diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 1016ddec9f..cd070d1feb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -157,8 +157,6 @@ public: // Paired void ps_abs(UGeckoInstruction inst); - void ps_add(UGeckoInstruction inst); - void ps_div(UGeckoInstruction inst); void ps_madd(UGeckoInstruction inst); void ps_madds0(UGeckoInstruction inst); void ps_madds1(UGeckoInstruction inst); @@ -168,7 +166,6 @@ public: void ps_merge11(UGeckoInstruction inst); void ps_mr(UGeckoInstruction inst); void ps_msub(UGeckoInstruction inst); - void ps_mul(UGeckoInstruction inst); void ps_muls0(UGeckoInstruction inst); void ps_muls1(UGeckoInstruction inst); void ps_nabs(UGeckoInstruction inst); @@ -177,7 +174,6 @@ public: void ps_neg(UGeckoInstruction inst); void ps_res(UGeckoInstruction inst); void ps_sel(UGeckoInstruction inst); - void ps_sub(UGeckoInstruction inst); void ps_sum0(UGeckoInstruction inst); void ps_sum1(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index e30a3ca3aa..1062e302c2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -40,31 +40,41 @@ void JitArm64::fp_arith(UGeckoInstruction inst) u32 a = inst.FA, d = inst.FD; u32 b = inst.SUBOP5 == 25 ? inst.FC : inst.FB; - bool single = inst.OPCD == 4 || inst.OPCD == 59; + bool single = inst.OPCD == 59; + bool packed = inst.OPCD == 4; - ARM64Reg VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED)); - ARM64Reg VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED)); - ARM64Reg VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR)); - - switch (inst.SUBOP5) + if (packed) { - case 18: - m_float_emit.FDIV(VD, VA, VB); - break; - case 20: - m_float_emit.FSUB(VD, VA, VB); - break; - case 21: - m_float_emit.FADD(VD, VA, VB); - break; - case 25: - m_float_emit.FMUL(VD, VA, VB); - break; - default: - _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); + ARM64Reg VA = fpr.R(a, REG_REG); + ARM64Reg VB = fpr.R(b, REG_REG); + ARM64Reg VD = fpr.RW(d, REG_REG); + + switch (inst.SUBOP5) + { + case 18: m_float_emit.FDIV(64, VD, VA, VB); break; + case 20: m_float_emit.FSUB(64, VD, VA, VB); break; + case 21: m_float_emit.FADD(64, VD, VA, VB); break; + case 25: m_float_emit.FMUL(64, VD, VA, VB); break; + default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); + } + } + else + { + ARM64Reg VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED)); + ARM64Reg VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED)); + ARM64Reg VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR)); + + switch (inst.SUBOP5) + { + case 18: m_float_emit.FDIV(VD, VA, VB); break; + case 20: m_float_emit.FSUB(VD, VA, VB); break; + case 21: m_float_emit.FADD(VD, VA, VB); break; + case 25: m_float_emit.FMUL(VD, VA, VB); break; + default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!"); + } } - if (single) + if (single || packed) fpr.FixSinglePrecision(d); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 8b774978cd..fc84d0d451 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -31,40 +31,6 @@ void JitArm64::ps_abs(UGeckoInstruction inst) m_float_emit.FABS(64, VD, VB); } -void JitArm64::ps_add(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.FADD(64, VD, VA, VB); - fpr.FixSinglePrecision(d); -} - -void JitArm64::ps_div(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.FDIV(64, VD, VA, VB); - fpr.FixSinglePrecision(d); -} - void JitArm64::ps_madd(UGeckoInstruction inst) { INSTRUCTION_START @@ -223,23 +189,6 @@ void JitArm64::ps_mr(UGeckoInstruction inst) m_float_emit.ORR(VD, VB, VB); } -void JitArm64::ps_mul(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.FMUL(64, VD, VA, VC); - fpr.FixSinglePrecision(d); -} - void JitArm64::ps_muls0(UGeckoInstruction inst) { INSTRUCTION_START @@ -421,23 +370,6 @@ void JitArm64::ps_sel(UGeckoInstruction inst) } } -void JitArm64::ps_sub(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - - m_float_emit.FSUB(64, VD, VA, VB); - fpr.FixSinglePrecision(d); -} - void JitArm64::ps_sum0(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 0bade60745..bb7cc8fe6c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -126,12 +126,12 @@ static GekkoOPTemplate table4_2[] = {13, &JitArm64::ps_muls1}, // ps_muls1 {14, &JitArm64::ps_madds0}, // ps_madds0 {15, &JitArm64::ps_madds1}, // ps_madds1 - {18, &JitArm64::ps_div}, // ps_div - {20, &JitArm64::ps_sub}, // ps_sub - {21, &JitArm64::ps_add}, // ps_add + {18, &JitArm64::fp_arith}, // ps_div + {20, &JitArm64::fp_arith}, // ps_sub + {21, &JitArm64::fp_arith}, // ps_add {23, &JitArm64::ps_sel}, // ps_sel {24, &JitArm64::ps_res}, // ps_res - {25, &JitArm64::ps_mul}, // ps_mul + {25, &JitArm64::fp_arith}, // ps_mul {26, &JitArm64::FallBackToInterpreter}, // ps_rsqrte {28, &JitArm64::ps_msub}, // ps_msub {29, &JitArm64::ps_madd}, // ps_madd