From adbf6d55dacf256fdd586ee5069b5e20be4eacc7 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Thu, 17 Jun 2021 18:05:07 +0200 Subject: [PATCH] JitArm64: Implement ps_cmpXX --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 3 + .../JitArm64/JitArm64_FloatingPoint.cpp | 55 +++++++++++++++---- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 9 +++ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 24 ++++---- 4 files changed, 69 insertions(+), 22 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 43e3b44b23..ddab950334 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -152,6 +152,7 @@ public: void ps_sumX(UGeckoInstruction inst); void ps_res(UGeckoInstruction inst); void ps_rsqrte(UGeckoInstruction inst); + void ps_cmpXX(UGeckoInstruction inst); // Loadstore paired void psq_l(UGeckoInstruction inst); @@ -168,6 +169,8 @@ public: Arm64Gen::ARM64Reg src_reg, Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG); + void FloatCompare(UGeckoInstruction inst, bool upper = false); + bool IsFPRStoreSafe(size_t guest_reg) const; protected: diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index a3411c642e..e99c6d6a92 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -363,11 +363,8 @@ void JitArm64::frspx(UGeckoInstruction inst) } } -void JitArm64::fcmpX(UGeckoInstruction inst) +void JitArm64::FloatCompare(UGeckoInstruction inst, bool upper) { - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - const bool fprf = SConfig::GetInstance().bFPRF && js.op->wantsFPRF; const u32 a = inst.FA; @@ -386,12 +383,15 @@ void JitArm64::fcmpX(UGeckoInstruction inst) const bool input_ftz_workaround = !cpu_info.bAFP && (!js.fpr_is_store_safe[a] || !js.fpr_is_store_safe[b]); - const bool singles = fpr.IsSingle(a, true) && fpr.IsSingle(b, true) && !input_ftz_workaround; - const RegType type = singles ? RegType::LowerPairSingle : RegType::LowerPair; + const bool singles = fpr.IsSingle(a, !upper) && fpr.IsSingle(b, !upper) && !input_ftz_workaround; + const RegType lower_type = singles ? RegType::LowerPairSingle : RegType::LowerPair; + const RegType upper_type = singles ? RegType::Single : RegType::Register; const auto reg_encoder = singles ? EncodeRegToSingle : EncodeRegToDouble; - const ARM64Reg VA = reg_encoder(fpr.R(a, type)); - const ARM64Reg VB = reg_encoder(fpr.R(b, type)); + const bool upper_a = upper && !js.op->fprIsDuplicated[a]; + const bool upper_b = upper && !js.op->fprIsDuplicated[b]; + ARM64Reg VA = reg_encoder(fpr.R(a, upper_a ? upper_type : lower_type)); + ARM64Reg VB = reg_encoder(fpr.R(b, upper_b ? upper_type : lower_type)); gpr.BindCRToRegister(crf, false); const ARM64Reg XA = gpr.CR(crf); @@ -404,12 +404,39 @@ void JitArm64::fcmpX(UGeckoInstruction inst) ANDI2R(fpscr_reg, fpscr_reg, ~FPCC_MASK); } + ARM64Reg V0Q = ARM64Reg::INVALID_REG; + ARM64Reg V1Q = ARM64Reg::INVALID_REG; + if (upper_a) + { + V0Q = fpr.GetReg(); + m_float_emit.DUP(singles ? 32 : 64, reg_encoder(V0Q), VA, 1); + VA = reg_encoder(V0Q); + } + if (upper_b) + { + if (a == b) + { + VB = VA; + } + else + { + V1Q = fpr.GetReg(); + m_float_emit.DUP(singles ? 32 : 64, reg_encoder(V1Q), VB, 1); + VB = reg_encoder(V1Q); + } + } + + m_float_emit.FCMP(VA, VB); + + if (V0Q != ARM64Reg::INVALID_REG) + fpr.Unlock(V0Q); + if (V1Q != ARM64Reg::INVALID_REG) + fpr.Unlock(V1Q); + FixupBranch pNaN, pLesser, pGreater; FixupBranch continue1, continue2, continue3; ORR(XA, ARM64Reg::ZR, 32, 0, true); - m_float_emit.FCMP(VA, VB); - if (a != b) { // if B > A goto Greater's jump target @@ -465,6 +492,14 @@ void JitArm64::fcmpX(UGeckoInstruction inst) } } +void JitArm64::fcmpX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff); + + FloatCompare(inst); +} + void JitArm64::fctiwzx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 074c9a76e0..86a5112d59 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -451,3 +451,12 @@ void JitArm64::ps_rsqrte(UGeckoInstruction inst) SetFPRFIfNeeded(true, VD); } + +void JitArm64::ps_cmpXX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff); + + const bool upper = inst.SUBOP10 & 64; + FloatCompare(inst, upper); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index da8fb78682..082c5ec402 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -88,18 +88,18 @@ constexpr std::array primarytable{{ constexpr std::array table4{{ // SUBOP10 - {0, &JitArm64::FallBackToInterpreter}, // ps_cmpu0 - {32, &JitArm64::FallBackToInterpreter}, // ps_cmpo0 - {40, &JitArm64::fp_logic}, // ps_neg - {136, &JitArm64::fp_logic}, // ps_nabs - {264, &JitArm64::fp_logic}, // ps_abs - {64, &JitArm64::FallBackToInterpreter}, // ps_cmpu1 - {72, &JitArm64::fp_logic}, // ps_mr - {96, &JitArm64::FallBackToInterpreter}, // ps_cmpo1 - {528, &JitArm64::ps_mergeXX}, // ps_merge00 - {560, &JitArm64::ps_mergeXX}, // ps_merge01 - {592, &JitArm64::ps_mergeXX}, // ps_merge10 - {624, &JitArm64::ps_mergeXX}, // ps_merge11 + {0, &JitArm64::ps_cmpXX}, // ps_cmpu0 + {32, &JitArm64::ps_cmpXX}, // ps_cmpo0 + {40, &JitArm64::fp_logic}, // ps_neg + {136, &JitArm64::fp_logic}, // ps_nabs + {264, &JitArm64::fp_logic}, // ps_abs + {64, &JitArm64::ps_cmpXX}, // ps_cmpu1 + {72, &JitArm64::fp_logic}, // ps_mr + {96, &JitArm64::ps_cmpXX}, // ps_cmpo1 + {528, &JitArm64::ps_mergeXX}, // ps_merge00 + {560, &JitArm64::ps_mergeXX}, // ps_merge01 + {592, &JitArm64::ps_mergeXX}, // ps_merge10 + {624, &JitArm64::ps_mergeXX}, // ps_merge11 {1014, &JitArm64::FallBackToInterpreter}, // dcbz_l }};