Merge pull request #9472 from JosJuice/jitarm64-unexpected-double

JitArm64: Update registers last used before start of instruction
2021-01-27 12:43:11 +01:00 · 2021-01-27 12:43:11 +01:00 · 2d75b0d2bc
parent 2a6fffd60e d00430470b
commit 2d75b0d2bc
5 changed files with 74 additions and 10 deletions
--- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp
@ -694,6 +694,15 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
    if (!SConfig::GetInstance().bEnableDebugging)
      js.downcountAmount += PatchEngine::GetSpeedhackCycles(js.compilerPC);
    // Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps
    if (op.inst.OPCD != 46 && op.inst.OPCD != 47)
      gpr.UpdateLastUsed(op.regsIn | op.regsOut);
    BitSet32 fpr_used = op.fregsIn;
    if (op.fregOut >= 0)
      fpr_used[op.fregOut] = true;
    fpr.UpdateLastUsed(fpr_used);
    // Gather pipe writes using a non-immediate address are discovered by profiling.
    bool gatherPipeIntCheck = js.fifoWriteAddresses.find(op.address) != js.fifoWriteAddresses.end();
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
@ -32,8 +32,11 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
  bool use_c = op5 >= 25;  // fmul and all kind of fmaddXX
  bool use_b = op5 != 25;  // fmul uses no B
-  bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) &&
+  const auto inputs_are_singles_func = [&] {
-                            (!use_c || fpr.IsSingle(c, !packed));
+    return fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) &&
           (!use_c || fpr.IsSingle(c, !packed));
  };
  const bool inputs_are_singles = inputs_are_singles_func();
  ARM64Reg VA{}, VB{}, VC{}, VD{};
@ -117,6 +120,9 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
    }
  }
  ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(),
             "Register allocation turned singles into doubles in the middle of fp_arith");
  if (single || packed)
    fpr.FixSinglePrecision(d);
 }
@ -196,6 +202,9 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
      break;
    }
  }
  ASSERT_MSG(DYNA_REC, single == fpr.IsSingle(b, !packed),
             "Register allocation turned singles into doubles in the middle of fp_logic");
 }
 void JitArm64::fselx(UGeckoInstruction inst)
@ -209,6 +218,7 @@ void JitArm64::fselx(UGeckoInstruction inst)
  const u32 c = inst.FC;
  const u32 d = inst.FD;
  const bool a_single = fpr.IsSingle(a, true);
  if (fpr.IsSingle(a, true))
  {
    const ARM64Reg VA = fpr.R(a, RegType::LowerPairSingle);
@ -220,15 +230,20 @@ void JitArm64::fselx(UGeckoInstruction inst)
    m_float_emit.FCMPE(EncodeRegToDouble(VA));
  }
-  const bool single = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
+  const bool b_and_c_singles = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
-  const RegType type = single ? RegType::LowerPairSingle : RegType::LowerPair;
+  const RegType type = b_and_c_singles ? RegType::LowerPairSingle : RegType::LowerPair;
-  const auto reg_encoder = single ? EncodeRegToSingle : EncodeRegToDouble;
+  const auto reg_encoder = b_and_c_singles ? EncodeRegToSingle : EncodeRegToDouble;
  const ARM64Reg VB = fpr.R(b, type);
  const ARM64Reg VC = fpr.R(c, type);
  const ARM64Reg VD = fpr.RW(d, type);
  m_float_emit.FCSEL(reg_encoder(VD), reg_encoder(VC), reg_encoder(VB), CC_GE);
  ASSERT_MSG(DYNA_REC,
             a_single == fpr.IsSingle(a, true) &&
                 b_and_c_singles == (fpr.IsSingle(b, true) && fpr.IsSingle(c, true)),
             "Register allocation turned singles into doubles in the middle of fselx");
 }
 void JitArm64::frspx(UGeckoInstruction inst)
@ -241,7 +256,8 @@ void JitArm64::frspx(UGeckoInstruction inst)
  const u32 b = inst.FB;
  const u32 d = inst.FD;
-  if (fpr.IsSingle(b, true))
+  const bool single = fpr.IsSingle(b, true);
  if (single)
  {
    // Source is already in single precision, so no need to do anything but to copy to PSR1.
    const ARM64Reg VB = fpr.R(b, RegType::LowerPairSingle);
@ -257,6 +273,9 @@ void JitArm64::frspx(UGeckoInstruction inst)
    m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
  }
  ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
             "Register allocation turned singles into doubles in the middle of frspx");
 }
 void JitArm64::fcmpX(UGeckoInstruction inst)
@ -320,6 +339,9 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
    SetJumpTarget(continue3);
  }
  SetJumpTarget(continue1);
  ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a, true) && fpr.IsSingle(b, true)),
             "Register allocation turned singles into doubles in the middle of fcmpX");
 }
 void JitArm64::fctiwzx(UGeckoInstruction inst)
@ -334,7 +356,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
  const bool single = fpr.IsSingle(b, true);
  const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair);
-  const ARM64Reg VD = fpr.RW(d);
+  const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);
  const ARM64Reg V0 = fpr.GetReg();
@ -357,4 +379,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
  }
  m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
  fpr.Unlock(V0);
  ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
             "Register allocation turned singles into doubles in the middle of fctiwzx");
 }
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp
@ -66,6 +66,9 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst)
    ASSERT_MSG(DYNA_REC, 0, "ps_merge - invalid op");
    break;
  }
  ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b)),
             "Register allocation turned singles into doubles in the middle of ps_mergeXX");
 }
 void JitArm64::ps_mulsX(UGeckoInstruction inst)
@ -92,6 +95,9 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)
  m_float_emit.FMUL(size, reg_encoder(VD), reg_encoder(VA), reg_encoder(VC), upper ? 1 : 0);
  ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(c)),
             "Register allocation turned singles into doubles in the middle of ps_mulsX");
  fpr.FixSinglePrecision(d);
 }
@ -250,6 +256,10 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
    ASSERT_MSG(DYNA_REC, 0, "ps_madd - invalid op");
    break;
  }
  ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
             "Register allocation turned singles into doubles in the middle of ps_maddXX");
  fpr.FixSinglePrecision(d);
  if (V0Q != INVALID_REG)
@ -291,6 +301,9 @@ void JitArm64::ps_sel(UGeckoInstruction inst)
    m_float_emit.MOV(VD, V0);
    fpr.Unlock(V0Q);
  }
  ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
             "Register allocation turned singles into doubles in the middle of ps_sel");
 }
 void JitArm64::ps_sumX(UGeckoInstruction inst)
@ -330,6 +343,9 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
    m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0);
  }
  ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
             "Register allocation turned singles into doubles in the middle of ps_sumX");
  fpr.FixSinglePrecision(d);
  fpr.Unlock(V0);
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
@ -41,10 +41,22 @@ ARM64Reg Arm64RegCache::GetReg()
  // Holy cow, how did you run out of registers?
  // We can't return anything reasonable in this case. Return INVALID_REG and watch the failure
  // happen
-  WARN_LOG_FMT(DYNA_REC, "All available registers are locked dumb dumb");
+  ASSERT_MSG(DYNA_REC, 0, "All available registers are locked!");
  return INVALID_REG;
 }
 void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used)
 {
  for (size_t i = 0; i < m_guest_registers.size(); ++i)
  {
    OpArg& reg = m_guest_registers[i];
    if (i < 32 && regs_used[i])
      reg.ResetLastUsed();
    else
      reg.IncrementLastUsed();
  }
 }
 u32 Arm64RegCache::GetUnlockedRegisterCount() const
 {
  u32 unlocked_registers = 0;
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
@ -140,6 +140,8 @@ public:
  // Requires unlocking after done
  Arm64Gen::ARM64Reg GetReg();
  void UpdateLastUsed(BitSet32 regs_used);
  // Locks a register so a cache cannot use it
  // Useful for function calls
  template <typename T = Arm64Gen::ARM64Reg, typename... Args>
@ -281,9 +283,9 @@ public:
  // Returns a guest register inside of a host register
  // Will dump an immediate to the host register as well
-  Arm64Gen::ARM64Reg R(size_t preg, RegType type = RegType::LowerPair);
+  Arm64Gen::ARM64Reg R(size_t preg, RegType type);
-  Arm64Gen::ARM64Reg RW(size_t preg, RegType type = RegType::LowerPair);
+  Arm64Gen::ARM64Reg RW(size_t preg, RegType type);
  BitSet32 GetCallerSavedUsed() const override;