Jit64: Don't use fregsIn in HandleNaNs

fregsIn will include FD for double-precision instructions, since for dependency tracking purposes the instruction does read the upper half of FD. This is not what we want in HandleNaNs. The consequence of this bug is that if an instruction was supposed to output a NaN and FD happens to contain a NaN and FD happens to be the same register as an unused register in the instruction encoding, the NaN in FD could get used as the output instead of the correct NaN. This isn't known to affect any games, which isn't especially surprising considering that there's only one game that needs AccurateNaNs anyway.
2022-10-08 18:19:37 +02:00 · 2022-10-08 18:19:37 +02:00 · 2a5f4a2390
parent cd1f89a02b
commit 2a5f4a2390
3 changed files with 16 additions and 16 deletions
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@ -126,7 +126,8 @@ public:
  void FinalizeSingleResult(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true,
                            bool duplicate = false);
  void FinalizeDoubleResult(Gen::X64Reg output, const Gen::OpArg& input);
-  void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm, Gen::X64Reg clobber);
+  void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm, Gen::X64Reg clobber,
+                  std::vector<int> inputs);

  void MultiplyImmediate(u32 imm, int a, int d, bool overflow);

--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@ -92,7 +92,7 @@ void Jit64::FinalizeDoubleResult(X64Reg output, const OpArg& input)
  SetFPRFIfNeeded(input, false);
 }

-void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
+void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber, std::vector<int> inputs)
 {
  //                      | PowerPC  | x86
  // ---------------------+----------+---------
@ -107,14 +107,13 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)

  ASSERT(xmm != clobber);

-  std::vector<u32> inputs;
-  u32 a = inst.FA, b = inst.FB, c = inst.FC;
-  for (u32 i : {a, b, c})
+  // Remove duplicates from inputs
+  for (auto it = inputs.begin(); it != inputs.end();)
  {
-    if (!js.op->fregsIn[i])
-      continue;
-    if (std::find(inputs.begin(), inputs.end(), i) == inputs.end())
-      inputs.push_back(i);
+    if (std::find(inputs.begin(), it, *it) != it)
+      it = inputs.erase(it);
+    else
+      ++it;
  }

  if (inst.OPCD != 4)
@ -128,7 +127,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)

    // If any inputs are NaNs, pick the first NaN of them
    std::vector<FixupBranch> fixups;
-    for (u32 x : inputs)
+    for (int x : inputs)
    {
      RCOpArg Rx = fpr.Use(x, RCMode::Read);
      RegCache::Realize(Rx);
@ -168,7 +167,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
      BLENDVPD(xmm, MConst(psGeneratedQNaN));

      // If any inputs are NaNs, use those instead
-      for (u32 x : inputs)
+      for (int x : inputs)
      {
        RCOpArg Rx = fpr.Use(x, RCMode::Read);
        RegCache::Realize(Rx);
@ -198,7 +197,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm, X64Reg clobber)
      MOVAPD(xmm, tmp);

      // If any inputs are NaNs, use those instead
-      for (u32 x : inputs)
+      for (int x : inputs)
      {
        RCOpArg Rx = fpr.Use(x, RCMode::Read);
        RegCache::Realize(Rx);
@ -277,7 +276,7 @@ void Jit64::fp_arith(UGeckoInstruction inst)
      avx_op(avxOp, sseOp, dest, Rop1, Rop2, packed, reversible);
    }

-    HandleNaNs(inst, dest, XMM0);
+    HandleNaNs(inst, dest, XMM0, {op1, op2});
    if (single)
      FinalizeSingleResult(Rd, R(dest), packed, true);
    else
@ -500,7 +499,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
    result_xmm = Rd;
  }

-  HandleNaNs(inst, result_xmm, XMM0);
+  HandleNaNs(inst, result_xmm, XMM0, {a, b, c});

  if (single)
    FinalizeSingleResult(Rd, R(result_xmm), packed, true);
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
@ -77,7 +77,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
  default:
    PanicAlertFmt("ps_sum WTF!!!");
  }
-  HandleNaNs(inst, tmp, tmp == XMM1 ? XMM0 : XMM1);
+  HandleNaNs(inst, tmp, tmp == XMM1 ? XMM0 : XMM1, {a, b, c});
  FinalizeSingleResult(Rd, R(tmp));
 }

@ -112,7 +112,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
  if (round_input)
    Force25BitPrecision(XMM1, R(XMM1), XMM0);
  MULPD(XMM1, Ra);
-  HandleNaNs(inst, XMM1, XMM0);
+  HandleNaNs(inst, XMM1, XMM0, {a, c});
  FinalizeSingleResult(Rd, R(XMM1));
 }