Merge pull request #7624 from lioncash/paired-single

PowerPC: Remove separate macros for paired singles
2018-12-28 06:32:45 -05:00 · 2018-12-28 06:32:45 -05:00 · b7db1f020b
parent 8c9f553734 244d083f0e
commit b7db1f020b
15 changed files with 457 additions and 235 deletions
--- a/Source/Core/Core/GeckoCode.cpp
+++ b/Source/Core/Core/GeckoCode.cpp
@ -272,8 +272,8 @@ void RunCodeHandler()
  // Registers FPR0->13 are volatile
  for (int i = 0; i < 14; ++i)
  {
-    PowerPC::HostWrite_U64(riPS0(i), SP + 24 + 2 * i * sizeof(u64));
-    PowerPC::HostWrite_U64(riPS1(i), SP + 24 + (2 * i + 1) * sizeof(u64));
+    PowerPC::HostWrite_U64(rPS(i).PS0AsU64(), SP + 24 + 2 * i * sizeof(u64));
+    PowerPC::HostWrite_U64(rPS(i).PS1AsU64(), SP + 24 + (2 * i + 1) * sizeof(u64));
  }
  DEBUG_LOG(ACTIONREPLAY,
            "GeckoCodes: Initiating phantom branch-and-link. "
--- a/Source/Core/Core/HLE/HLE_Misc.cpp
+++ b/Source/Core/Core/HLE/HLE_Misc.cpp
@ -64,8 +64,8 @@ void GeckoReturnTrampoline()
  PowerPC::ExpandCR(PowerPC::HostRead_U32(SP + 20));
  for (int i = 0; i < 14; ++i)
  {
-    riPS0(i) = PowerPC::HostRead_U64(SP + 24 + 2 * i * sizeof(u64));
-    riPS1(i) = PowerPC::HostRead_U64(SP + 24 + (2 * i + 1) * sizeof(u64));
+    rPS(i).SetBoth(PowerPC::HostRead_U64(SP + 24 + 2 * i * sizeof(u64)),
+                   PowerPC::HostRead_U64(SP + 24 + (2 * i + 1) * sizeof(u64)));
  }
 }
 }
--- a/Source/Core/Core/HLE/HLE_VarArgs.cpp
+++ b/Source/Core/Core/HLE/HLE_VarArgs.cpp
@ -15,7 +15,7 @@ u32 HLE::SystemVABI::VAList::GetGPR(u32 gpr) const

 double HLE::SystemVABI::VAList::GetFPR(u32 fpr) const
 {
-  return rPS0(fpr);
+  return rPS(fpr).PS0AsDouble();
 }

 HLE::SystemVABI::VAListStruct::VAListStruct(u32 address)
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp
@ -125,8 +125,10 @@ static void Trace(UGeckoInstruction& inst)
  std::string fregs = "";
  for (int i = 0; i < 32; i++)
  {
-    fregs += StringFromFormat("f%02d: %08" PRIx64 " %08" PRIx64 " ", i, PowerPC::ppcState.ps[i][0],
-                              PowerPC::ppcState.ps[i][1]);
+    const auto& ps = PowerPC::ppcState.ps[i];
+
+    fregs +=
+        StringFromFormat("f%02d: %08" PRIx64 " %08" PRIx64 " ", i, ps.PS0AsU64(), ps.PS1AsU64());
  }

  const std::string ppc_inst = Common::GekkoDisassembler::Disassemble(inst.hex, PC);
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
@ -27,7 +27,7 @@ enum class RoundingMode
 // The Programming Environments Manual for 32 and 64-bit Microprocessors
 void ConvertToInteger(UGeckoInstruction inst, RoundingMode rounding_mode)
 {
-  const double b = rPS0(inst.FB);
+  const double b = rPS(inst.FB).PS0AsDouble();
  u32 value;
  bool exception_occurred = false;

@ -111,9 +111,11 @@ void ConvertToInteger(UGeckoInstruction inst, RoundingMode rounding_mode)
  {
    // Based on HW tests
    // FPRF is not affected
-    riPS0(inst.FD) = 0xfff8000000000000ull | value;
+    u64 result = 0xfff8000000000000ull | value;
    if (value == 0 && std::signbit(b))
-      riPS0(inst.FD) |= 0x100000000ull;
+      result |= 0x100000000ull;
+
+    rPS(inst.FD).SetPS0(result);
  }

  if (inst.Rc)
@ -198,12 +200,18 @@ void Interpreter::Helper_FloatCompareUnordered(UGeckoInstruction inst, double fa

 void Interpreter::fcmpo(UGeckoInstruction inst)
 {
-  Helper_FloatCompareOrdered(inst, rPS0(inst.FA), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  Helper_FloatCompareOrdered(inst, a.PS0AsDouble(), b.PS0AsDouble());
 }

 void Interpreter::fcmpu(UGeckoInstruction inst)
 {
-  Helper_FloatCompareUnordered(inst, rPS0(inst.FA), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  Helper_FloatCompareUnordered(inst, a.PS0AsDouble(), b.PS0AsDouble());
 }

 void Interpreter::fctiwx(UGeckoInstruction inst)
@ -218,7 +226,7 @@ void Interpreter::fctiwzx(UGeckoInstruction inst)

 void Interpreter::fmrx(UGeckoInstruction inst)
 {
-  riPS0(inst.FD) = riPS0(inst.FB);
+  rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64());

  // This is a binary instruction. Does not alter FPSCR
  if (inst.Rc)
@ -227,7 +235,7 @@ void Interpreter::fmrx(UGeckoInstruction inst)

 void Interpreter::fabsx(UGeckoInstruction inst)
 {
-  rPS0(inst.FD) = fabs(rPS0(inst.FB));
+  rPS(inst.FD).SetPS0(fabs(rPS(inst.FB).PS0AsDouble()));

  // This is a binary instruction. Does not alter FPSCR
  if (inst.Rc)
@ -236,7 +244,7 @@ void Interpreter::fabsx(UGeckoInstruction inst)

 void Interpreter::fnabsx(UGeckoInstruction inst)
 {
-  riPS0(inst.FD) = riPS0(inst.FB) | (1ULL << 63);
+  rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64() | (UINT64_C(1) << 63));

  // This is a binary instruction. Does not alter FPSCR
  if (inst.Rc)
@ -245,7 +253,7 @@ void Interpreter::fnabsx(UGeckoInstruction inst)

 void Interpreter::fnegx(UGeckoInstruction inst)
 {
-  riPS0(inst.FD) = riPS0(inst.FB) ^ (1ULL << 63);
+  rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64() ^ (UINT64_C(1) << 63));

  // This is a binary instruction. Does not alter FPSCR
  if (inst.Rc)
@ -254,7 +262,11 @@ void Interpreter::fnegx(UGeckoInstruction inst)

 void Interpreter::fselx(UGeckoInstruction inst)
 {
-  rPS0(inst.FD) = (rPS0(inst.FA) >= -0.0) ? rPS0(inst.FC) : rPS0(inst.FB);
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  rPS(inst.FD).SetPS0((a.PS0AsDouble() >= -0.0) ? c.PS0AsDouble() : b.PS0AsDouble());

  // This is a binary instruction. Does not alter FPSCR
  if (inst.Rc)
@ -266,7 +278,7 @@ void Interpreter::fselx(UGeckoInstruction inst)
 // PS1 is said to be undefined
 void Interpreter::frspx(UGeckoInstruction inst)  // round to single
 {
-  const double b = rPS0(inst.FB);
+  const double b = rPS(inst.FB).PS0AsDouble();
  const double rounded = ForceSingle(b);

  if (std::isnan(b))
@ -278,8 +290,7 @@ void Interpreter::frspx(UGeckoInstruction inst)  // round to single

    if (!is_snan || FPSCR.VE == 0)
    {
-      rPS0(inst.FD) = rounded;
-      rPS1(inst.FD) = rounded;
+      rPS(inst.FD).Fill(rounded);
      PowerPC::UpdateFPRF(b);
    }

@ -290,8 +301,7 @@ void Interpreter::frspx(UGeckoInstruction inst)  // round to single
    SetFI(b != rounded);
    FPSCR.FR = fabs(rounded) > fabs(b);
    PowerPC::UpdateFPRF(rounded);
-    rPS0(inst.FD) = rounded;
-    rPS1(inst.FD) = rounded;
+    rPS(inst.FD).Fill(rounded);
  }

  if (inst.Rc)
@ -300,13 +310,16 @@ void Interpreter::frspx(UGeckoInstruction inst)  // round to single

 void Interpreter::fmulx(UGeckoInstruction inst)
 {
-  const FPResult product = NI_mul(rPS0(inst.FA), rPS0(inst.FC));
+  const auto& a = rPS(inst.FA);
+  const auto& c = rPS(inst.FC);
+
+  const FPResult product = NI_mul(a.PS0AsDouble(), c.PS0AsDouble());

  if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
  {
    const double result = ForceDouble(product.value);

-    rPS0(inst.FD) = result;
+    rPS(inst.FD).SetPS0(result);
    FPSCR.FI = 0;  // are these flags important?
    FPSCR.FR = 0;
    PowerPC::UpdateFPRF(result);
@ -317,14 +330,17 @@ void Interpreter::fmulx(UGeckoInstruction inst)
 }
 void Interpreter::fmulsx(UGeckoInstruction inst)
 {
-  const double c_value = Force25Bit(rPS0(inst.FC));
-  const FPResult d_value = NI_mul(rPS0(inst.FA), c_value);
+  const auto& a = rPS(inst.FA);
+  const auto& c = rPS(inst.FC);
+
+  const double c_value = Force25Bit(c.PS0AsDouble());
+  const FPResult d_value = NI_mul(a.PS0AsDouble(), c_value);

  if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions())
  {
    const double result = ForceSingle(d_value.value);

-    rPS0(inst.FD) = rPS1(inst.FD) = result;
+    rPS(inst.FD).Fill(result);
    FPSCR.FI = 0;
    FPSCR.FR = 0;
    PowerPC::UpdateFPRF(result);
@ -336,12 +352,15 @@ void Interpreter::fmulsx(UGeckoInstruction inst)

 void Interpreter::fmaddx(UGeckoInstruction inst)
 {
-  const FPResult product = NI_madd(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+  const FPResult product = NI_madd(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble());

  if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
  {
    const double result = ForceDouble(product.value);
-    rPS0(inst.FD) = result;
+    rPS(inst.FD).SetPS0(result);
    PowerPC::UpdateFPRF(result);
  }

@ -351,14 +370,18 @@ void Interpreter::fmaddx(UGeckoInstruction inst)

 void Interpreter::fmaddsx(UGeckoInstruction inst)
 {
-  const double c_value = Force25Bit(rPS0(inst.FC));
-  const FPResult d_value = NI_madd(rPS0(inst.FA), c_value, rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double c_value = Force25Bit(c.PS0AsDouble());
+  const FPResult d_value = NI_madd(a.PS0AsDouble(), c_value, b.PS0AsDouble());

  if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions())
  {
    const double result = ForceSingle(d_value.value);

-    rPS0(inst.FD) = rPS1(inst.FD) = result;
+    rPS(inst.FD).Fill(result);
    FPSCR.FI = d_value.value != result;
    FPSCR.FR = 0;
    PowerPC::UpdateFPRF(result);
@ -370,12 +393,15 @@ void Interpreter::fmaddsx(UGeckoInstruction inst)

 void Interpreter::faddx(UGeckoInstruction inst)
 {
-  const FPResult sum = NI_add(rPS0(inst.FA), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  const FPResult sum = NI_add(a.PS0AsDouble(), b.PS0AsDouble());

  if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions())
  {
    const double result = ForceDouble(sum.value);
-    rPS0(inst.FD) = result;
+    rPS(inst.FD).SetPS0(result);
    PowerPC::UpdateFPRF(result);
  }

@ -384,12 +410,15 @@ void Interpreter::faddx(UGeckoInstruction inst)
 }
 void Interpreter::faddsx(UGeckoInstruction inst)
 {
-  const FPResult sum = NI_add(rPS0(inst.FA), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  const FPResult sum = NI_add(a.PS0AsDouble(), b.PS0AsDouble());

  if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions())
  {
    const double result = ForceSingle(sum.value);
-    rPS0(inst.FD) = rPS1(inst.FD) = result;
+    rPS(inst.FD).Fill(result);
    PowerPC::UpdateFPRF(result);
  }

@ -399,14 +428,17 @@ void Interpreter::faddsx(UGeckoInstruction inst)

 void Interpreter::fdivx(UGeckoInstruction inst)
 {
-  const FPResult quotient = NI_div(rPS0(inst.FA), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  const FPResult quotient = NI_div(a.PS0AsDouble(), b.PS0AsDouble());
  const bool not_divide_by_zero = FPSCR.ZE == 0 || quotient.exception != FPSCR_ZX;
  const bool not_invalid = FPSCR.VE == 0 || quotient.HasNoInvalidExceptions();

  if (not_divide_by_zero && not_invalid)
  {
    const double result = ForceDouble(quotient.value);
-    rPS0(inst.FD) = result;
+    rPS(inst.FD).SetPS0(result);
    PowerPC::UpdateFPRF(result);
  }

@ -416,14 +448,17 @@ void Interpreter::fdivx(UGeckoInstruction inst)
 }
 void Interpreter::fdivsx(UGeckoInstruction inst)
 {
-  const FPResult quotient = NI_div(rPS0(inst.FA), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  const FPResult quotient = NI_div(a.PS0AsDouble(), b.PS0AsDouble());
  const bool not_divide_by_zero = FPSCR.ZE == 0 || quotient.exception != FPSCR_ZX;
  const bool not_invalid = FPSCR.VE == 0 || quotient.HasNoInvalidExceptions();

  if (not_divide_by_zero && not_invalid)
  {
    const double result = ForceSingle(quotient.value);
-    rPS0(inst.FD) = rPS1(inst.FD) = result;
+    rPS(inst.FD).Fill(result);
    PowerPC::UpdateFPRF(result);
  }

@ -434,11 +469,11 @@ void Interpreter::fdivsx(UGeckoInstruction inst)
 // Single precision only.
 void Interpreter::fresx(UGeckoInstruction inst)
 {
-  const double b = rPS0(inst.FB);
+  const double b = rPS(inst.FB).PS0AsDouble();

  const auto compute_result = [inst](double value) {
    const double result = Common::ApproximateReciprocal(value);
-    rPS0(inst.FD) = rPS1(inst.FD) = result;
+    rPS(inst.FD).Fill(result);
    PowerPC::UpdateFPRF(result);
  };

@ -472,11 +507,11 @@ void Interpreter::fresx(UGeckoInstruction inst)

 void Interpreter::frsqrtex(UGeckoInstruction inst)
 {
-  const double b = rPS0(inst.FB);
+  const double b = rPS(inst.FB).PS0AsDouble();

  const auto compute_result = [inst](double value) {
    const double result = Common::ApproximateReciprocalSquareRoot(value);
-    rPS0(inst.FD) = result;
+    rPS(inst.FD).SetPS0(result);
    PowerPC::UpdateFPRF(result);
  };

@ -518,12 +553,16 @@ void Interpreter::frsqrtex(UGeckoInstruction inst)

 void Interpreter::fmsubx(UGeckoInstruction inst)
 {
-  const FPResult product = NI_msub(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const FPResult product = NI_msub(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble());

  if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
  {
    const double result = ForceDouble(product.value);
-    rPS0(inst.FD) = result;
+    rPS(inst.FD).SetPS0(result);
    PowerPC::UpdateFPRF(result);
  }

@ -533,13 +572,17 @@ void Interpreter::fmsubx(UGeckoInstruction inst)

 void Interpreter::fmsubsx(UGeckoInstruction inst)
 {
-  const double c_value = Force25Bit(rPS0(inst.FC));
-  const FPResult product = NI_msub(rPS0(inst.FA), c_value, rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double c_value = Force25Bit(c.PS0AsDouble());
+  const FPResult product = NI_msub(a.PS0AsDouble(), c_value, b.PS0AsDouble());

  if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
  {
    const double result = ForceSingle(product.value);
-    rPS0(inst.FD) = rPS1(inst.FD) = result;
+    rPS(inst.FD).Fill(result);
    PowerPC::UpdateFPRF(result);
  }

@ -549,13 +592,19 @@ void Interpreter::fmsubsx(UGeckoInstruction inst)

 void Interpreter::fnmaddx(UGeckoInstruction inst)
 {
-  const FPResult product = NI_madd(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const FPResult product = NI_madd(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble());

  if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
  {
-    const double result = ForceDouble(product.value);
-    rPS0(inst.FD) = std::isnan(result) ? result : -result;
-    PowerPC::UpdateFPRF(rPS0(inst.FD));
+    const double tmp = ForceDouble(product.value);
+    const double result = std::isnan(tmp) ? tmp : -tmp;
+
+    rPS(inst.FD).SetPS0(result);
+    PowerPC::UpdateFPRF(result);
  }

  if (inst.Rc)
@ -564,14 +613,20 @@ void Interpreter::fnmaddx(UGeckoInstruction inst)

 void Interpreter::fnmaddsx(UGeckoInstruction inst)
 {
-  const double c_value = Force25Bit(rPS0(inst.FC));
-  const FPResult product = NI_madd(rPS0(inst.FA), c_value, rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double c_value = Force25Bit(c.PS0AsDouble());
+  const FPResult product = NI_madd(a.PS0AsDouble(), c_value, b.PS0AsDouble());

  if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
  {
-    const double result = ForceSingle(product.value);
-    rPS0(inst.FD) = rPS1(inst.FD) = std::isnan(result) ? result : -result;
-    PowerPC::UpdateFPRF(rPS0(inst.FD));
+    const double tmp = ForceSingle(product.value);
+    const double result = std::isnan(tmp) ? tmp : -tmp;
+
+    rPS(inst.FD).Fill(result);
+    PowerPC::UpdateFPRF(result);
  }

  if (inst.Rc)
@ -580,13 +635,19 @@ void Interpreter::fnmaddsx(UGeckoInstruction inst)

 void Interpreter::fnmsubx(UGeckoInstruction inst)
 {
-  const FPResult product = NI_msub(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const FPResult product = NI_msub(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble());

  if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
  {
-    const double result = ForceDouble(product.value);
-    rPS0(inst.FD) = std::isnan(result) ? result : -result;
-    PowerPC::UpdateFPRF(rPS0(inst.FD));
+    const double tmp = ForceDouble(product.value);
+    const double result = std::isnan(tmp) ? tmp : -tmp;
+
+    rPS(inst.FD).SetPS0(result);
+    PowerPC::UpdateFPRF(result);
  }

  if (inst.Rc)
@ -595,14 +656,20 @@ void Interpreter::fnmsubx(UGeckoInstruction inst)

 void Interpreter::fnmsubsx(UGeckoInstruction inst)
 {
-  const double c_value = Force25Bit(rPS0(inst.FC));
-  const FPResult product = NI_msub(rPS0(inst.FA), c_value, rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double c_value = Force25Bit(c.PS0AsDouble());
+  const FPResult product = NI_msub(a.PS0AsDouble(), c_value, b.PS0AsDouble());

  if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
  {
-    const double result = ForceSingle(product.value);
-    rPS0(inst.FD) = rPS1(inst.FD) = std::isnan(result) ? result : -result;
-    PowerPC::UpdateFPRF(rPS0(inst.FD));
+    const double tmp = ForceSingle(product.value);
+    const double result = std::isnan(tmp) ? tmp : -tmp;
+
+    rPS(inst.FD).Fill(result);
+    PowerPC::UpdateFPRF(result);
  }

  if (inst.Rc)
@ -611,12 +678,15 @@ void Interpreter::fnmsubsx(UGeckoInstruction inst)

 void Interpreter::fsubx(UGeckoInstruction inst)
 {
-  const FPResult difference = NI_sub(rPS0(inst.FA), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  const FPResult difference = NI_sub(a.PS0AsDouble(), b.PS0AsDouble());

  if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions())
  {
    const double result = ForceDouble(difference.value);
-    rPS0(inst.FD) = result;
+    rPS(inst.FD).SetPS0(result);
    PowerPC::UpdateFPRF(result);
  }

@ -626,12 +696,15 @@ void Interpreter::fsubx(UGeckoInstruction inst)

 void Interpreter::fsubsx(UGeckoInstruction inst)
 {
-  const FPResult difference = NI_sub(rPS0(inst.FA), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  const FPResult difference = NI_sub(a.PS0AsDouble(), b.PS0AsDouble());

  if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions())
  {
    const double result = ForceSingle(difference.value);
-    rPS0(inst.FD) = rPS1(inst.FD) = result;
+    rPS(inst.FD).Fill(result);
    PowerPC::UpdateFPRF(result);
  }

--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp
@ -72,7 +72,7 @@ void Interpreter::lfd(UGeckoInstruction inst)
  const u64 temp = PowerPC::Read_U64(address);

  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
-    riPS0(inst.FD) = temp;
+    rPS(inst.FD).SetPS0(temp);
 }

 void Interpreter::lfdu(UGeckoInstruction inst)
@ -89,7 +89,7 @@ void Interpreter::lfdu(UGeckoInstruction inst)

  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
  {
-    riPS0(inst.FD) = temp;
+    rPS(inst.FD).SetPS0(temp);
    rGPR[inst.RA] = address;
  }
 }
@ -108,7 +108,7 @@ void Interpreter::lfdux(UGeckoInstruction inst)

  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
  {
-    riPS0(inst.FD) = temp;
+    rPS(inst.FD).SetPS0(temp);
    rGPR[inst.RA] = address;
  }
 }
@ -126,7 +126,7 @@ void Interpreter::lfdx(UGeckoInstruction inst)
  const u64 temp = PowerPC::Read_U64(address);

  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
-    riPS0(inst.FD) = temp;
+    rPS(inst.FD).SetPS0(temp);
 }

 void Interpreter::lfs(UGeckoInstruction inst)
@ -144,8 +144,7 @@ void Interpreter::lfs(UGeckoInstruction inst)
  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
  {
    const u64 value = ConvertToDouble(temp);
-    riPS0(inst.FD) = value;
-    riPS1(inst.FD) = value;
+    rPS(inst.FD).Fill(value);
  }
 }

@ -164,8 +163,7 @@ void Interpreter::lfsu(UGeckoInstruction inst)
  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
  {
    const u64 value = ConvertToDouble(temp);
-    riPS0(inst.FD) = value;
-    riPS1(inst.FD) = value;
+    rPS(inst.FD).Fill(value);
    rGPR[inst.RA] = address;
  }
 }
@ -184,9 +182,8 @@ void Interpreter::lfsux(UGeckoInstruction inst)

  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
  {
-    u64 value = ConvertToDouble(temp);
-    riPS0(inst.FD) = value;
-    riPS1(inst.FD) = value;
+    const u64 value = ConvertToDouble(temp);
+    rPS(inst.FD).Fill(value);
    rGPR[inst.RA] = address;
  }
 }
@ -206,8 +203,7 @@ void Interpreter::lfsx(UGeckoInstruction inst)
  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
  {
    const u64 value = ConvertToDouble(temp);
-    riPS0(inst.FD) = value;
-    riPS1(inst.FD) = value;
+    rPS(inst.FD).Fill(value);
  }
 }

@ -355,7 +351,7 @@ void Interpreter::stfd(UGeckoInstruction inst)
    return;
  }

-  PowerPC::Write_U64(riPS0(inst.FS), address);
+  PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
 }

 void Interpreter::stfdu(UGeckoInstruction inst)
@ -368,7 +364,7 @@ void Interpreter::stfdu(UGeckoInstruction inst)
    return;
  }

-  PowerPC::Write_U64(riPS0(inst.FS), address);
+  PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
  {
    rGPR[inst.RA] = address;
@ -385,7 +381,7 @@ void Interpreter::stfs(UGeckoInstruction inst)
    return;
  }

-  PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
+  PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
 }

 void Interpreter::stfsu(UGeckoInstruction inst)
@ -398,7 +394,7 @@ void Interpreter::stfsu(UGeckoInstruction inst)
    return;
  }

-  PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
+  PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
  {
    rGPR[inst.RA] = address;
@ -761,7 +757,7 @@ void Interpreter::stfdux(UGeckoInstruction inst)
    return;
  }

-  PowerPC::Write_U64(riPS0(inst.FS), address);
+  PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
  {
    rGPR[inst.RA] = address;
@ -778,7 +774,7 @@ void Interpreter::stfdx(UGeckoInstruction inst)
    return;
  }

-  PowerPC::Write_U64(riPS0(inst.FS), address);
+  PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
 }

 // Stores Floating points into Integers indeXed
@ -792,7 +788,7 @@ void Interpreter::stfiwx(UGeckoInstruction inst)
    return;
  }

-  PowerPC::Write_U32((u32)riPS0(inst.FS), address);
+  PowerPC::Write_U32(rPS(inst.FS).PS0AsU32(), address);
 }

 void Interpreter::stfsux(UGeckoInstruction inst)
@ -805,7 +801,7 @@ void Interpreter::stfsux(UGeckoInstruction inst)
    return;
  }

-  PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
+  PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
  if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
  {
    rGPR[inst.RA] = address;
@ -822,7 +818,7 @@ void Interpreter::stfsx(UGeckoInstruction inst)
    return;
  }

-  PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
+  PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
 }

 void Interpreter::sthbrx(UGeckoInstruction inst)
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp
@ -176,8 +176,8 @@ void Interpreter::Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW)
  const EQuantizeType stType = gqr.st_type;
  const unsigned int stScale = gqr.st_scale;

-  const double ps0 = rPS0(instRS);
-  const double ps1 = rPS1(instRS);
+  const double ps0 = rPS(instRS).PS0AsDouble();
+  const double ps1 = rPS(instRS).PS1AsDouble();

  switch (stType)
  {
@ -301,8 +301,7 @@ void Interpreter::Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW)
    return;
  }

-  rPS0(instRD) = ps0;
-  rPS1(instRD) = ps1;
+  rPS(instRD).SetBoth(ps0, ps1);
 }

 void Interpreter::psq_l(UGeckoInstruction inst)
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
@ -13,8 +13,12 @@
 // These "binary instructions" do not alter FPSCR.
 void Interpreter::ps_sel(UGeckoInstruction inst)
 {
-  rPS0(inst.FD) = rPS0(inst.FA) >= -0.0 ? rPS0(inst.FC) : rPS0(inst.FB);
-  rPS1(inst.FD) = rPS1(inst.FA) >= -0.0 ? rPS1(inst.FC) : rPS1(inst.FB);
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  rPS(inst.FD).SetBoth(a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(),
+                       a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble());

  if (inst.Rc)
    Helper_UpdateCR1();
@ -22,8 +26,9 @@ void Interpreter::ps_sel(UGeckoInstruction inst)

 void Interpreter::ps_neg(UGeckoInstruction inst)
 {
-  riPS0(inst.FD) = riPS0(inst.FB) ^ (1ULL << 63);
-  riPS1(inst.FD) = riPS1(inst.FB) ^ (1ULL << 63);
+  const auto& b = rPS(inst.FB);
+
+  rPS(inst.FD).SetBoth(b.PS0AsU64() ^ (UINT64_C(1) << 63), b.PS1AsU64() ^ (UINT64_C(1) << 63));

  if (inst.Rc)
    Helper_UpdateCR1();
@ -31,8 +36,7 @@ void Interpreter::ps_neg(UGeckoInstruction inst)

 void Interpreter::ps_mr(UGeckoInstruction inst)
 {
-  rPS0(inst.FD) = rPS0(inst.FB);
-  rPS1(inst.FD) = rPS1(inst.FB);
+  rPS(inst.FD) = rPS(inst.FB);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -40,8 +44,9 @@ void Interpreter::ps_mr(UGeckoInstruction inst)

 void Interpreter::ps_nabs(UGeckoInstruction inst)
 {
-  riPS0(inst.FD) = riPS0(inst.FB) | (1ULL << 63);
-  riPS1(inst.FD) = riPS1(inst.FB) | (1ULL << 63);
+  const auto& b = rPS(inst.FB);
+
+  rPS(inst.FD).SetBoth(b.PS0AsU64() | (UINT64_C(1) << 63), b.PS1AsU64() | (UINT64_C(1) << 63));

  if (inst.Rc)
    Helper_UpdateCR1();
@ -49,8 +54,9 @@ void Interpreter::ps_nabs(UGeckoInstruction inst)

 void Interpreter::ps_abs(UGeckoInstruction inst)
 {
-  riPS0(inst.FD) = riPS0(inst.FB) & ~(1ULL << 63);
-  riPS1(inst.FD) = riPS1(inst.FB) & ~(1ULL << 63);
+  const auto& b = rPS(inst.FB);
+
+  rPS(inst.FD).SetBoth(b.PS0AsU64() & ~(UINT64_C(1) << 63), b.PS1AsU64() & ~(UINT64_C(1) << 63));

  if (inst.Rc)
    Helper_UpdateCR1();
@ -59,10 +65,10 @@ void Interpreter::ps_abs(UGeckoInstruction inst)
 // These are just moves, double is OK.
 void Interpreter::ps_merge00(UGeckoInstruction inst)
 {
-  double p0 = rPS0(inst.FA);
-  double p1 = rPS0(inst.FB);
-  rPS0(inst.FD) = p0;
-  rPS1(inst.FD) = p1;
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  rPS(inst.FD).SetBoth(a.PS0AsDouble(), b.PS0AsDouble());

  if (inst.Rc)
    Helper_UpdateCR1();
@ -70,10 +76,10 @@ void Interpreter::ps_merge00(UGeckoInstruction inst)

 void Interpreter::ps_merge01(UGeckoInstruction inst)
 {
-  double p0 = rPS0(inst.FA);
-  double p1 = rPS1(inst.FB);
-  rPS0(inst.FD) = p0;
-  rPS1(inst.FD) = p1;
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  rPS(inst.FD).SetBoth(a.PS0AsDouble(), b.PS1AsDouble());

  if (inst.Rc)
    Helper_UpdateCR1();
@ -81,10 +87,10 @@ void Interpreter::ps_merge01(UGeckoInstruction inst)

 void Interpreter::ps_merge10(UGeckoInstruction inst)
 {
-  double p0 = rPS1(inst.FA);
-  double p1 = rPS0(inst.FB);
-  rPS0(inst.FD) = p0;
-  rPS1(inst.FD) = p1;
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  rPS(inst.FD).SetBoth(a.PS1AsDouble(), b.PS0AsDouble());

  if (inst.Rc)
    Helper_UpdateCR1();
@ -92,10 +98,10 @@ void Interpreter::ps_merge10(UGeckoInstruction inst)

 void Interpreter::ps_merge11(UGeckoInstruction inst)
 {
-  double p0 = rPS1(inst.FA);
-  double p1 = rPS1(inst.FB);
-  rPS0(inst.FD) = p0;
-  rPS1(inst.FD) = p1;
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  rPS(inst.FD).SetBoth(a.PS1AsDouble(), b.PS1AsDouble());

  if (inst.Rc)
    Helper_UpdateCR1();
@ -104,9 +110,14 @@ void Interpreter::ps_merge11(UGeckoInstruction inst)
 // From here on, the real deal.
 void Interpreter::ps_div(UGeckoInstruction inst)
 {
-  rPS0(inst.FD) = ForceSingle(NI_div(rPS0(inst.FA), rPS0(inst.FB)).value);
-  rPS1(inst.FD) = ForceSingle(NI_div(rPS1(inst.FA), rPS1(inst.FB)).value);
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  const double ps0 = ForceSingle(NI_div(a.PS0AsDouble(), b.PS0AsDouble()).value);
+  const double ps1 = ForceSingle(NI_div(a.PS1AsDouble(), b.PS1AsDouble()).value);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -115,8 +126,8 @@ void Interpreter::ps_div(UGeckoInstruction inst)
 void Interpreter::ps_res(UGeckoInstruction inst)
 {
  // this code is based on the real hardware tests
-  const double a = rPS0(inst.FB);
-  const double b = rPS1(inst.FB);
+  const double a = rPS(inst.FB).PS0AsDouble();
+  const double b = rPS(inst.FB).PS1AsDouble();

  if (a == 0.0 || b == 0.0)
  {
@ -130,9 +141,11 @@ void Interpreter::ps_res(UGeckoInstruction inst)
  if (Common::IsSNAN(a) || Common::IsSNAN(b))
    SetFPException(FPSCR_VXSNAN);

-  rPS0(inst.FD) = Common::ApproximateReciprocal(a);
-  rPS1(inst.FD) = Common::ApproximateReciprocal(b);
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const double ps0 = Common::ApproximateReciprocal(a);
+  const double ps1 = Common::ApproximateReciprocal(b);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -140,8 +153,8 @@ void Interpreter::ps_res(UGeckoInstruction inst)

 void Interpreter::ps_rsqrte(UGeckoInstruction inst)
 {
-  const double ps0 = rPS0(inst.FB);
-  const double ps1 = rPS1(inst.FB);
+  const double ps0 = rPS(inst.FB).PS0AsDouble();
+  const double ps1 = rPS(inst.FB).PS1AsDouble();

  if (ps0 == 0.0 || ps1 == 0.0)
  {
@ -161,10 +174,11 @@ void Interpreter::ps_rsqrte(UGeckoInstruction inst)
  if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1))
    SetFPException(FPSCR_VXSNAN);

-  rPS0(inst.FD) = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps0));
-  rPS1(inst.FD) = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps1));
+  const double dst_ps0 = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps0));
+  const double dst_ps1 = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps1));

-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  rPS(inst.FD).SetBoth(dst_ps0, dst_ps1);
+  PowerPC::UpdateFPRF(dst_ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -172,9 +186,14 @@ void Interpreter::ps_rsqrte(UGeckoInstruction inst)

 void Interpreter::ps_sub(UGeckoInstruction inst)
 {
-  rPS0(inst.FD) = ForceSingle(NI_sub(rPS0(inst.FA), rPS0(inst.FB)).value);
-  rPS1(inst.FD) = ForceSingle(NI_sub(rPS1(inst.FA), rPS1(inst.FB)).value);
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  const double ps0 = ForceSingle(NI_sub(a.PS0AsDouble(), b.PS0AsDouble()).value);
+  const double ps1 = ForceSingle(NI_sub(a.PS1AsDouble(), b.PS1AsDouble()).value);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -182,9 +201,14 @@ void Interpreter::ps_sub(UGeckoInstruction inst)

 void Interpreter::ps_add(UGeckoInstruction inst)
 {
-  rPS0(inst.FD) = ForceSingle(NI_add(rPS0(inst.FA), rPS0(inst.FB)).value);
-  rPS1(inst.FD) = ForceSingle(NI_add(rPS1(inst.FA), rPS1(inst.FB)).value);
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  const double ps0 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS0AsDouble()).value);
+  const double ps1 = ForceSingle(NI_add(a.PS1AsDouble(), b.PS1AsDouble()).value);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -192,11 +216,17 @@ void Interpreter::ps_add(UGeckoInstruction inst)

 void Interpreter::ps_mul(UGeckoInstruction inst)
 {
-  const double c0 = Force25Bit(rPS0(inst.FC));
-  const double c1 = Force25Bit(rPS1(inst.FC));
-  rPS0(inst.FD) = ForceSingle(NI_mul(rPS0(inst.FA), c0).value);
-  rPS1(inst.FD) = ForceSingle(NI_mul(rPS1(inst.FA), c1).value);
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& c = rPS(inst.FC);
+
+  const double c0 = Force25Bit(c.PS0AsDouble());
+  const double c1 = Force25Bit(c.PS1AsDouble());
+
+  const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c0).value);
+  const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c1).value);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -204,11 +234,18 @@ void Interpreter::ps_mul(UGeckoInstruction inst)

 void Interpreter::ps_msub(UGeckoInstruction inst)
 {
-  const double c0 = Force25Bit(rPS0(inst.FC));
-  const double c1 = Force25Bit(rPS1(inst.FC));
-  rPS0(inst.FD) = ForceSingle(NI_msub(rPS0(inst.FA), c0, rPS0(inst.FB)).value);
-  rPS1(inst.FD) = ForceSingle(NI_msub(rPS1(inst.FA), c1, rPS1(inst.FB)).value);
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double c0 = Force25Bit(c.PS0AsDouble());
+  const double c1 = Force25Bit(c.PS1AsDouble());
+
+  const double ps0 = ForceSingle(NI_msub(a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
+  const double ps1 = ForceSingle(NI_msub(a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -216,11 +253,18 @@ void Interpreter::ps_msub(UGeckoInstruction inst)

 void Interpreter::ps_madd(UGeckoInstruction inst)
 {
-  const double c0 = Force25Bit(rPS0(inst.FC));
-  const double c1 = Force25Bit(rPS1(inst.FC));
-  rPS0(inst.FD) = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value);
-  rPS1(inst.FD) = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value);
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double c0 = Force25Bit(c.PS0AsDouble());
+  const double c1 = Force25Bit(c.PS1AsDouble());
+
+  const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
+  const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -228,13 +272,21 @@ void Interpreter::ps_madd(UGeckoInstruction inst)

 void Interpreter::ps_nmsub(UGeckoInstruction inst)
 {
-  const double c0 = Force25Bit(rPS0(inst.FC));
-  const double c1 = Force25Bit(rPS1(inst.FC));
-  const double result0 = ForceSingle(NI_msub(rPS0(inst.FA), c0, rPS0(inst.FB)).value);
-  const double result1 = ForceSingle(NI_msub(rPS1(inst.FA), c1, rPS1(inst.FB)).value);
-  rPS0(inst.FD) = std::isnan(result0) ? result0 : -result0;
-  rPS1(inst.FD) = std::isnan(result1) ? result1 : -result1;
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double c0 = Force25Bit(c.PS0AsDouble());
+  const double c1 = Force25Bit(c.PS1AsDouble());
+
+  const double tmp0 = ForceSingle(NI_msub(a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
+  const double tmp1 = ForceSingle(NI_msub(a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
+
+  const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0;
+  const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1;
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -242,13 +294,21 @@ void Interpreter::ps_nmsub(UGeckoInstruction inst)

 void Interpreter::ps_nmadd(UGeckoInstruction inst)
 {
-  const double c0 = Force25Bit(rPS0(inst.FC));
-  const double c1 = Force25Bit(rPS1(inst.FC));
-  const double result0 = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value);
-  const double result1 = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value);
-  rPS0(inst.FD) = std::isnan(result0) ? result0 : -result0;
-  rPS1(inst.FD) = std::isnan(result1) ? result1 : -result1;
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double c0 = Force25Bit(c.PS0AsDouble());
+  const double c1 = Force25Bit(c.PS1AsDouble());
+
+  const double tmp0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
+  const double tmp1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
+
+  const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0;
+  const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1;
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -256,11 +316,15 @@ void Interpreter::ps_nmadd(UGeckoInstruction inst)

 void Interpreter::ps_sum0(UGeckoInstruction inst)
 {
-  const double p0 = ForceSingle(NI_add(rPS0(inst.FA), rPS1(inst.FB)).value);
-  const double p1 = ForceSingle(rPS1(inst.FC));
-  rPS0(inst.FD) = p0;
-  rPS1(inst.FD) = p1;
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double ps0 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS1AsDouble()).value);
+  const double ps1 = ForceSingle(c.PS1AsDouble());
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -268,11 +332,15 @@ void Interpreter::ps_sum0(UGeckoInstruction inst)

 void Interpreter::ps_sum1(UGeckoInstruction inst)
 {
-  const double p0 = ForceSingle(rPS0(inst.FC));
-  const double p1 = ForceSingle(NI_add(rPS0(inst.FA), rPS1(inst.FB)).value);
-  rPS0(inst.FD) = p0;
-  rPS1(inst.FD) = p1;
-  PowerPC::UpdateFPRF(rPS1(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double ps0 = ForceSingle(c.PS0AsDouble());
+  const double ps1 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS1AsDouble()).value);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps1);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -280,12 +348,15 @@ void Interpreter::ps_sum1(UGeckoInstruction inst)

 void Interpreter::ps_muls0(UGeckoInstruction inst)
 {
-  const double c0 = Force25Bit(rPS0(inst.FC));
-  const double p0 = ForceSingle(NI_mul(rPS0(inst.FA), c0).value);
-  const double p1 = ForceSingle(NI_mul(rPS1(inst.FA), c0).value);
-  rPS0(inst.FD) = p0;
-  rPS1(inst.FD) = p1;
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& c = rPS(inst.FC);
+
+  const double c0 = Force25Bit(c.PS0AsDouble());
+  const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c0).value);
+  const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c0).value);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -293,12 +364,15 @@ void Interpreter::ps_muls0(UGeckoInstruction inst)

 void Interpreter::ps_muls1(UGeckoInstruction inst)
 {
-  const double c1 = Force25Bit(rPS1(inst.FC));
-  const double p0 = ForceSingle(NI_mul(rPS0(inst.FA), c1).value);
-  const double p1 = ForceSingle(NI_mul(rPS1(inst.FA), c1).value);
-  rPS0(inst.FD) = p0;
-  rPS1(inst.FD) = p1;
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& c = rPS(inst.FC);
+
+  const double c1 = Force25Bit(c.PS1AsDouble());
+  const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c1).value);
+  const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c1).value);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -306,12 +380,16 @@ void Interpreter::ps_muls1(UGeckoInstruction inst)

 void Interpreter::ps_madds0(UGeckoInstruction inst)
 {
-  const double c0 = Force25Bit(rPS0(inst.FC));
-  const double p0 = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value);
-  const double p1 = ForceSingle(NI_madd(rPS1(inst.FA), c0, rPS1(inst.FB)).value);
-  rPS0(inst.FD) = p0;
-  rPS1(inst.FD) = p1;
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double c0 = Force25Bit(c.PS0AsDouble());
+  const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
+  const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c0, b.PS1AsDouble()).value);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -319,12 +397,16 @@ void Interpreter::ps_madds0(UGeckoInstruction inst)

 void Interpreter::ps_madds1(UGeckoInstruction inst)
 {
-  const double c1 = Force25Bit(rPS1(inst.FC));
-  const double p0 = ForceSingle(NI_madd(rPS0(inst.FA), c1, rPS0(inst.FB)).value);
-  const double p1 = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value);
-  rPS0(inst.FD) = p0;
-  rPS1(inst.FD) = p1;
-  PowerPC::UpdateFPRF(rPS0(inst.FD));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+  const auto& c = rPS(inst.FC);
+
+  const double c1 = Force25Bit(c.PS1AsDouble());
+  const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c1, b.PS0AsDouble()).value);
+  const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
+
+  rPS(inst.FD).SetBoth(ps0, ps1);
+  PowerPC::UpdateFPRF(ps0);

  if (inst.Rc)
    Helper_UpdateCR1();
@ -332,20 +414,32 @@ void Interpreter::ps_madds1(UGeckoInstruction inst)

 void Interpreter::ps_cmpu0(UGeckoInstruction inst)
 {
-  Helper_FloatCompareUnordered(inst, rPS0(inst.FA), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  Helper_FloatCompareUnordered(inst, a.PS0AsDouble(), b.PS0AsDouble());
 }

 void Interpreter::ps_cmpo0(UGeckoInstruction inst)
 {
-  Helper_FloatCompareOrdered(inst, rPS0(inst.FA), rPS0(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  Helper_FloatCompareOrdered(inst, a.PS0AsDouble(), b.PS0AsDouble());
 }

 void Interpreter::ps_cmpu1(UGeckoInstruction inst)
 {
-  Helper_FloatCompareUnordered(inst, rPS1(inst.FA), rPS1(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  Helper_FloatCompareUnordered(inst, a.PS1AsDouble(), b.PS1AsDouble());
 }

 void Interpreter::ps_cmpo1(UGeckoInstruction inst)
 {
-  Helper_FloatCompareOrdered(inst, rPS1(inst.FA), rPS1(inst.FB));
+  const auto& a = rPS(inst.FA);
+  const auto& b = rPS(inst.FB);
+
+  Helper_FloatCompareOrdered(inst, a.PS1AsDouble(), b.PS1AsDouble());
 }
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp
@ -98,7 +98,7 @@ void Interpreter::mtfsfx(UGeckoInstruction inst)
      m |= (0xFU << (i * 4));
  }

-  FPSCR = (FPSCR.Hex & ~m) | (static_cast<u32>(riPS0(inst.FB)) & m);
+  FPSCR = (FPSCR.Hex & ~m) | (static_cast<u32>(rPS(inst.FB).PS0AsU64()) & m);
  FPSCRtoFPUSettings(FPSCR);

  if (inst.Rc)
@ -555,7 +555,7 @@ void Interpreter::mffsx(UGeckoInstruction inst)
  // TODO(ector): grab all overflow flags etc and set them in FPSCR

  UpdateFPSCR();
-  riPS0(inst.FD) = 0xFFF8000000000000 | FPSCR.Hex;
+  rPS(inst.FD).SetPS0(UINT64_C(0xFFF8000000000000) | FPSCR.Hex);

  if (inst.Rc)
    Helper_UpdateCR1();
--- a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp
@ -34,7 +34,7 @@ const X64Reg* FPURegCache::GetAllocationOrder(size_t* count) const

 OpArg FPURegCache::GetDefaultLocation(preg_t preg) const
 {
-  return PPCSTATE(ps[preg][0]);
+  return PPCSTATE(ps[preg].ps0);
 }

 BitSet32 FPURegCache::GetRegUtilization() const
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
@ -452,7 +452,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
      // Load the high 64bits from the file and insert them in to the high 64bits of the host
      // register
      ARM64Reg tmp_reg = GetReg();
-      m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
+      m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
      m_float_emit->INS(64, host_reg, 1, tmp_reg, 0);
      UnlockRegister(tmp_reg);

@ -506,7 +506,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
      reg.Load(host_reg, REG_LOWER_PAIR);
    }
    reg.SetDirty(false);
-    m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
+    m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
    return host_reg;
  }
  default:
@ -554,7 +554,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
      // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit
      // store.
      // It would take longer to do an insert to a temporary and a 64bit store than to just do this.
-      m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
+      m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
      break;
    case REG_DUP_SINGLE:
      flush_reg = GetReg();
@ -562,7 +562,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
    // fall through
    case REG_DUP:
      // Store PSR1 (which is equal to PSR0) in memory.
-      m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
+      m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
      break;
    default:
      // All other types doesn't store anything in PSR1.
@ -687,7 +687,7 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state)
      store_size = 64;

    if (dirty)
-      m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
+      m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));

    if (!maintain_state)
    {
@ -702,9 +702,9 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state)
      // If the paired registers were at the start of ppcState we could do an STP here.
      // Too bad moving them would break savestate compatibility between x86_64 and AArch64
      // m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, PPC_REG,
-      // PPCSTATE_OFF(ps[preg][0]));
-      m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
-      m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
+      // PPCSTATE_OFF(ps[preg].ps0));
+      m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
+      m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
    }

    if (!maintain_state)
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
@ -24,7 +24,7 @@ static const Arm64Gen::ARM64Reg DISPATCHER_PC =

 // Some asserts to make sure we will be able to load everything
 static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR");
-static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0,
+static_assert((PPCSTATE_OFF(ps[0].ps0) % 8) == 0,
              "LDR(64bit VFP) requires FPRs to be 8 byte aligned");
 static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!");
 static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!");
--- a/Source/Core/Core/PowerPC/PowerPC.cpp
+++ b/Source/Core/Core/PowerPC/PowerPC.cpp
@ -4,6 +4,7 @@

 #include "Core/PowerPC/PowerPC.h"

+#include <algorithm>
 #include <cstring>
 #include <istream>
 #include <ostream>
@ -11,6 +12,7 @@
 #include <vector>

 #include "Common/Assert.h"
+#include "Common/BitUtils.h"
 #include "Common/ChunkFile.h"
 #include "Common/CommonTypes.h"
 #include "Common/FPURoundMode.h"
@ -42,6 +44,27 @@ MemChecks memchecks;
 PPCDebugInterface debug_interface;

 static CoreTiming::EventType* s_invalidate_cache_thread_safe;
+
+double PairedSingle::PS0AsDouble() const
+{
+  return Common::BitCast<double>(ps0);
+}
+
+double PairedSingle::PS1AsDouble() const
+{
+  return Common::BitCast<double>(ps1);
+}
+
+void PairedSingle::SetPS0(double value)
+{
+  ps0 = Common::BitCast<u64>(value);
+}
+
+void PairedSingle::SetPS1(double value)
+{
+  ps1 = Common::BitCast<u64>(value);
+}
+
 static void InvalidateCacheThreadSafe(u64 userdata, s64 cyclesLate)
 {
  ppcState.iCache.Invalidate(static_cast<u32>(userdata));
@ -135,10 +158,11 @@ void DoState(PointerWrap& p)

 static void ResetRegisters()
 {
-  memset(ppcState.ps, 0, sizeof(ppcState.ps));
-  memset(ppcState.sr, 0, sizeof(ppcState.sr));
-  memset(ppcState.gpr, 0, sizeof(ppcState.gpr));
-  memset(ppcState.spr, 0, sizeof(ppcState.spr));
+  std::fill(std::begin(ppcState.ps), std::end(ppcState.ps), PairedSingle{});
+  std::fill(std::begin(ppcState.sr), std::end(ppcState.sr), 0U);
+  std::fill(std::begin(ppcState.gpr), std::end(ppcState.gpr), 0U);
+  std::fill(std::begin(ppcState.spr), std::end(ppcState.spr), 0U);
+
  /*
  0x00080200 = lonestar 2.0
  0x00088202 = lonestar 2.2
--- a/Source/Core/Core/PowerPC/PowerPC.h
+++ b/Source/Core/Core/PowerPC/PowerPC.h
@ -8,6 +8,7 @@
 #include <cstddef>
 #include <iosfwd>
 #include <tuple>
+#include <type_traits>
 #include <vector>

 #include "Common/CommonTypes.h"
@ -57,6 +58,43 @@ struct TLBEntry
  u8 recent = 0;
 };

+struct PairedSingle
+{
+  u64 PS0AsU64() const { return ps0; }
+  u64 PS1AsU64() const { return ps1; }
+
+  u32 PS0AsU32() const { return static_cast<u32>(ps0); }
+  u32 PS1AsU32() const { return static_cast<u32>(ps1); }
+
+  double PS0AsDouble() const;
+  double PS1AsDouble() const;
+
+  void SetPS0(u64 value) { ps0 = value; }
+  void SetPS0(double value);
+
+  void SetPS1(u64 value) { ps1 = value; }
+  void SetPS1(double value);
+
+  void SetBoth(u64 lhs, u64 rhs)
+  {
+    SetPS0(lhs);
+    SetPS1(rhs);
+  }
+  void SetBoth(double lhs, double rhs)
+  {
+    SetPS0(lhs);
+    SetPS1(rhs);
+  }
+
+  void Fill(u64 value) { SetBoth(value, value); }
+  void Fill(double value) { SetBoth(value, value); }
+
+  u64 ps0 = 0;
+  u64 ps1 = 0;
+};
+// Paired single must be standard layout in order for offsetof to work, which is used by the JITs
+static_assert(std::is_standard_layout<PairedSingle>(), "PairedSingle must be standard layout");
+
 // This contains the entire state of the emulated PowerPC "Gekko" CPU.
 struct PowerPCState
 {
@ -114,7 +152,7 @@ struct PowerPCState
  // The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
  // but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
  // Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
-  alignas(16) u64 ps[32][2];
+  alignas(16) PairedSingle ps[32];

  u32 sr[16];  // Segment registers.

@ -212,11 +250,7 @@ void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst);
 #define TL PowerPC::ppcState.spr[SPR_TL]
 #define TU PowerPC::ppcState.spr[SPR_TU]

-#define rPS0(i) (*(double*)(&PowerPC::ppcState.ps[i][0]))
-#define rPS1(i) (*(double*)(&PowerPC::ppcState.ps[i][1]))
-
-#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0]))
-#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1]))
+#define rPS(i) (PowerPC::ppcState.ps[(i)])

 enum CRBits
 {
--- a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp
+++ b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp
@ -227,11 +227,11 @@ void RegisterWidget::PopulateTable()
                [i](u64 value) { GPR(i) = value; });

    // Floating point registers (double)
-    AddRegister(i, 2, RegisterType::fpr, "f" + std::to_string(i), [i] { return riPS0(i); },
-                [i](u64 value) { riPS0(i) = value; });
+    AddRegister(i, 2, RegisterType::fpr, "f" + std::to_string(i), [i] { return rPS(i).PS0AsU64(); },
+                [i](u64 value) { rPS(i).SetPS0(value); });

-    AddRegister(i, 4, RegisterType::fpr, "", [i] { return riPS1(i); },
-                [i](u64 value) { riPS1(i) = value; });
+    AddRegister(i, 4, RegisterType::fpr, "", [i] { return rPS(i).PS1AsU64(); },
+                [i](u64 value) { rPS(i).SetPS1(value); });
  }

  for (int i = 0; i < 8; i++)