From 88f3fec04e3e3b277d071aa0db19cbe77db2696e Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 24 Jan 2021 14:10:04 +0100 Subject: [PATCH 1/3] JitArm64: Add asserts for unexpected single to float conversions If the register pressure is high when allocating registers, Arm64FPRCache may spill a guest register which we are going to allocate later during the current instruction, which has the side effect of turning it into double precision. This will have bad consequences if we are assuming that it is single precision, so let's add some asserts to detect if that ever happens. --- .../JitArm64/JitArm64_FloatingPoint.cpp | 37 ++++++++++++++++--- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 16 ++++++++ 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 8a4eddaf5e..942342707d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -32,8 +32,11 @@ void JitArm64::fp_arith(UGeckoInstruction inst) bool use_c = op5 >= 25; // fmul and all kind of fmaddXX bool use_b = op5 != 25; // fmul uses no B - bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) && - (!use_c || fpr.IsSingle(c, !packed)); + const auto inputs_are_singles_func = [&] { + return fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) && + (!use_c || fpr.IsSingle(c, !packed)); + }; + const bool inputs_are_singles = inputs_are_singles_func(); ARM64Reg VA{}, VB{}, VC{}, VD{}; @@ -117,6 +120,9 @@ void JitArm64::fp_arith(UGeckoInstruction inst) } } + ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(), + "Register allocation turned singles into doubles in the middle of fp_arith"); + if (single || packed) fpr.FixSinglePrecision(d); } @@ -196,6 +202,9 @@ void JitArm64::fp_logic(UGeckoInstruction inst) break; } } + + ASSERT_MSG(DYNA_REC, single == fpr.IsSingle(b, !packed), + "Register allocation turned singles into doubles in the middle of fp_logic"); } void JitArm64::fselx(UGeckoInstruction inst) @@ -209,6 +218,7 @@ void JitArm64::fselx(UGeckoInstruction inst) const u32 c = inst.FC; const u32 d = inst.FD; + const bool a_single = fpr.IsSingle(a, true); if (fpr.IsSingle(a, true)) { const ARM64Reg VA = fpr.R(a, RegType::LowerPairSingle); @@ -220,15 +230,20 @@ void JitArm64::fselx(UGeckoInstruction inst) m_float_emit.FCMPE(EncodeRegToDouble(VA)); } - const bool single = fpr.IsSingle(b, true) && fpr.IsSingle(c, true); - const RegType type = single ? RegType::LowerPairSingle : RegType::LowerPair; - const auto reg_encoder = single ? EncodeRegToSingle : EncodeRegToDouble; + const bool b_and_c_singles = fpr.IsSingle(b, true) && fpr.IsSingle(c, true); + const RegType type = b_and_c_singles ? RegType::LowerPairSingle : RegType::LowerPair; + const auto reg_encoder = b_and_c_singles ? EncodeRegToSingle : EncodeRegToDouble; const ARM64Reg VB = fpr.R(b, type); const ARM64Reg VC = fpr.R(c, type); const ARM64Reg VD = fpr.RW(d, type); m_float_emit.FCSEL(reg_encoder(VD), reg_encoder(VC), reg_encoder(VB), CC_GE); + + ASSERT_MSG(DYNA_REC, + a_single == fpr.IsSingle(a, true) && + b_and_c_singles == (fpr.IsSingle(b, true) && fpr.IsSingle(c, true)), + "Register allocation turned singles into doubles in the middle of fselx"); } void JitArm64::frspx(UGeckoInstruction inst) @@ -241,7 +256,8 @@ void JitArm64::frspx(UGeckoInstruction inst) const u32 b = inst.FB; const u32 d = inst.FD; - if (fpr.IsSingle(b, true)) + const bool single = fpr.IsSingle(b, true); + if (single) { // Source is already in single precision, so no need to do anything but to copy to PSR1. const ARM64Reg VB = fpr.R(b, RegType::LowerPairSingle); @@ -257,6 +273,9 @@ void JitArm64::frspx(UGeckoInstruction inst) m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); } + + ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true), + "Register allocation turned singles into doubles in the middle of frspx"); } void JitArm64::fcmpX(UGeckoInstruction inst) @@ -320,6 +339,9 @@ void JitArm64::fcmpX(UGeckoInstruction inst) SetJumpTarget(continue3); } SetJumpTarget(continue1); + + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a, true) && fpr.IsSingle(b, true)), + "Register allocation turned singles into doubles in the middle of fcmpX"); } void JitArm64::fctiwzx(UGeckoInstruction inst) @@ -357,4 +379,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) } m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); fpr.Unlock(V0); + + ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true), + "Register allocation turned singles into doubles in the middle of fctiwzx"); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index faa58e6ad0..00cd92c39b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -66,6 +66,9 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst) ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op"); break; } + + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b)), + "Register allocation turned singles into doubles in the middle of ps_mergeXX"); } void JitArm64::ps_mulsX(UGeckoInstruction inst) @@ -92,6 +95,9 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst) m_float_emit.FMUL(size, reg_encoder(VD), reg_encoder(VA), reg_encoder(VC), upper ? 1 : 0); + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(c)), + "Register allocation turned singles into doubles in the middle of ps_mulsX"); + fpr.FixSinglePrecision(d); } @@ -250,6 +256,10 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst) ASSERT_MSG(DYNA_REC, 0, "ps_madd - invalid op"); break; } + + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)), + "Register allocation turned singles into doubles in the middle of ps_maddXX"); + fpr.FixSinglePrecision(d); if (V0Q != INVALID_REG) @@ -291,6 +301,9 @@ void JitArm64::ps_sel(UGeckoInstruction inst) m_float_emit.MOV(VD, V0); fpr.Unlock(V0Q); } + + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)), + "Register allocation turned singles into doubles in the middle of ps_sel"); } void JitArm64::ps_sumX(UGeckoInstruction inst) @@ -330,6 +343,9 @@ void JitArm64::ps_sumX(UGeckoInstruction inst) m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0); } + ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)), + "Register allocation turned singles into doubles in the middle of ps_sumX"); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); From f17cd3750a5785033d9f2f9e80e97ee3d0a66ba8 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 24 Jan 2021 14:27:07 +0100 Subject: [PATCH 2/3] JitArm64: Remove default parameters from Arm64FPRCache::R/RW It obscures more than it helps in my opinion. --- Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp | 2 +- Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 942342707d..08e3d2ab9b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -356,7 +356,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst) const bool single = fpr.IsSingle(b, true); const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair); - const ARM64Reg VD = fpr.RW(d); + const ARM64Reg VD = fpr.RW(d, RegType::LowerPair); const ARM64Reg V0 = fpr.GetReg(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index b074001830..4e37df9528 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -281,9 +281,9 @@ public: // Returns a guest register inside of a host register // Will dump an immediate to the host register as well - Arm64Gen::ARM64Reg R(size_t preg, RegType type = RegType::LowerPair); + Arm64Gen::ARM64Reg R(size_t preg, RegType type); - Arm64Gen::ARM64Reg RW(size_t preg, RegType type = RegType::LowerPair); + Arm64Gen::ARM64Reg RW(size_t preg, RegType type); BitSet32 GetCallerSavedUsed() const override; From d00430470b73888896ee30caed5c1a1a72867c30 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 24 Jan 2021 14:56:38 +0100 Subject: [PATCH 3/3] JitArm64: Update registers last used before start of instruction Let's reset m_last_used for each register that will be used in an instruction before we start allocating any of them, so that one of the earlier allocations doesn't spill a register that we want in a later allocation. (We must still also increment/reset m_last_used in R and RW, otherwise we end up in trouble when emulating lmw/stmw since those access more guest registers than there are available host registers.) This should ensure that the asserts added earlier in this pull request are never triggered. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 9 +++++++++ .../Core/PowerPC/JitArm64/JitArm64_RegCache.cpp | 14 +++++++++++++- .../Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h | 2 ++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 4a22d1c964..8555f2b5f2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -694,6 +694,15 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) if (!SConfig::GetInstance().bEnableDebugging) js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC); + // Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps + if (op.inst.OPCD != 46 && op.inst.OPCD != 47) + gpr.UpdateLastUsed(op.regsIn | op.regsOut); + + BitSet32 fpr_used = op.fregsIn; + if (op.fregOut >= 0) + fpr_used[op.fregOut] = true; + fpr.UpdateLastUsed(fpr_used); + // Gather pipe writes using a non-immediate address are discovered by profiling. bool gatherPipeIntCheck = js.fifoWriteAddresses.find(op.address) != js.fifoWriteAddresses.end(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 4a3f255c71..d0d861d992 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -41,10 +41,22 @@ ARM64Reg Arm64RegCache::GetReg() // Holy cow, how did you run out of registers? // We can't return anything reasonable in this case. Return INVALID_REG and watch the failure // happen - WARN_LOG_FMT(DYNA_REC, "All available registers are locked dumb dumb"); + ASSERT_MSG(DYNA_REC, 0, "All available registers are locked!"); return INVALID_REG; } +void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used) +{ + for (size_t i = 0; i < m_guest_registers.size(); ++i) + { + OpArg& reg = m_guest_registers[i]; + if (i < 32 && regs_used[i]) + reg.ResetLastUsed(); + else + reg.IncrementLastUsed(); + } +} + u32 Arm64RegCache::GetUnlockedRegisterCount() const { u32 unlocked_registers = 0; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 4e37df9528..55c86bd4ab 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -140,6 +140,8 @@ public: // Requires unlocking after done Arm64Gen::ARM64Reg GetReg(); + void UpdateLastUsed(BitSet32 regs_used); + // Locks a register so a cache cannot use it // Useful for function calls template