diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index dfc2f58fe4..a15712d36f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -145,13 +145,9 @@ public: void fctiwzx(UGeckoInstruction inst); // Paired - void ps_madd(UGeckoInstruction inst); - void ps_maddsX(UGeckoInstruction inst); + void ps_maddXX(UGeckoInstruction inst); void ps_mergeXX(UGeckoInstruction inst); - void ps_msub(UGeckoInstruction inst); void ps_mulsX(UGeckoInstruction inst); - void ps_nmadd(UGeckoInstruction inst); - void ps_nmsub(UGeckoInstruction inst); void ps_res(UGeckoInstruction inst); void ps_sel(UGeckoInstruction inst); void ps_sumX(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 67d58bde05..c17c22b549 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -83,7 +83,8 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst) fpr.FixSinglePrecision(d); fpr.Unlock(V0); } -void JitArm64::ps_madd(UGeckoInstruction inst) + +void JitArm64::ps_maddXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITPairedOff); @@ -91,6 +92,7 @@ void JitArm64::ps_madd(UGeckoInstruction inst) FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; + u32 op5 = inst.SUBOP5; ARM64Reg VA = fpr.R(a, REG_REG); ARM64Reg VB = fpr.R(b, REG_REG); @@ -98,101 +100,40 @@ void JitArm64::ps_madd(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_REG); ARM64Reg V0 = fpr.GetReg(); - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FADD(64, VD, V0, VB); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_maddsX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - bool upper = inst.SUBOP5 == 15; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.DUP(64, V0, VC, upper ? 1 : 0); - m_float_emit.FMUL(64, V0, V0, VA); - m_float_emit.FADD(64, VD, V0, VB); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_msub(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FSUB(64, VD, V0, VB); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_nmadd(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FADD(64, VD, V0, VB); - m_float_emit.FNEG(64, VD, VD); - fpr.FixSinglePrecision(d); - - fpr.Unlock(V0); -} - -void JitArm64::ps_nmsub(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARM64Reg VA = fpr.R(a, REG_REG); - ARM64Reg VB = fpr.R(b, REG_REG); - ARM64Reg VC = fpr.R(c, REG_REG); - ARM64Reg VD = fpr.RW(d, REG_REG); - ARM64Reg V0 = fpr.GetReg(); - - m_float_emit.FMUL(64, V0, VA, VC); - m_float_emit.FSUB(64, VD, V0, VB); - m_float_emit.FNEG(64, VD, VD); + switch (op5) + { + case 14: // ps_madds0 + m_float_emit.DUP(64, V0, VC, 0); + m_float_emit.FMUL(64, V0, V0, VA); + m_float_emit.FADD(64, VD, V0, VB); + break; + case 15: // ps_madds1 + m_float_emit.DUP(64, V0, VC, 1); + m_float_emit.FMUL(64, V0, V0, VA); + m_float_emit.FADD(64, VD, V0, VB); + break; + case 28: // ps_msub + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, VD, V0, VB); + break; + case 29: // ps_madd + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, VD, V0, VB); + break; + case 30: // ps_nmsub + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FSUB(64, VD, V0, VB); + m_float_emit.FNEG(64, VD, VD); + break; + case 31: // ps_nmadd + m_float_emit.FMUL(64, V0, VA, VC); + m_float_emit.FADD(64, VD, V0, VB); + m_float_emit.FNEG(64, VD, VD); + break; + default: + _assert_msg_(DYNA_REC, 0, "ps_madd - invalid op"); + break; + } fpr.FixSinglePrecision(d); fpr.Unlock(V0); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index eb6722a474..700cf42df9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -124,8 +124,8 @@ static GekkoOPTemplate table4_2[] = {11, &JitArm64::ps_sumX}, // ps_sum1 {12, &JitArm64::ps_mulsX}, // ps_muls0 {13, &JitArm64::ps_mulsX}, // ps_muls1 - {14, &JitArm64::ps_maddsX}, // ps_madds0 - {15, &JitArm64::ps_maddsX}, // ps_madds1 + {14, &JitArm64::ps_maddXX}, // ps_madds0 + {15, &JitArm64::ps_maddXX}, // ps_madds1 {18, &JitArm64::fp_arith}, // ps_div {20, &JitArm64::fp_arith}, // ps_sub {21, &JitArm64::fp_arith}, // ps_add @@ -133,10 +133,10 @@ static GekkoOPTemplate table4_2[] = {24, &JitArm64::ps_res}, // ps_res {25, &JitArm64::fp_arith}, // ps_mul {26, &JitArm64::FallBackToInterpreter}, // ps_rsqrte - {28, &JitArm64::ps_msub}, // ps_msub - {29, &JitArm64::ps_madd}, // ps_madd - {30, &JitArm64::ps_nmsub}, // ps_nmsub - {31, &JitArm64::ps_nmadd}, // ps_nmadd + {28, &JitArm64::ps_maddXX}, // ps_msub + {29, &JitArm64::ps_maddXX}, // ps_madd + {30, &JitArm64::ps_maddXX}, // ps_nmsub + {31, &JitArm64::ps_maddXX}, // ps_nmadd };