From 244d083f0e2e0084fb5b597db4ffd0884b0e7495 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 14 Dec 2018 13:23:12 -0500 Subject: [PATCH] PowerPC: Remove separate macros for paired singles Previously, PowerPC.h had four macros in it like so: \#define rPS0(i) (*(double*)(&PowerPC::ppcState.ps[i][0])) \#define rPS1(i) (*(double*)(&PowerPC::ppcState.ps[i][1])) \#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0])) \#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1])) Casting between object representations like this is undefined behavior. Given this is used heavily with the interpreter (that is, the most accurate, but slowest CPU backend), we don't exactly want to allow undefined behavior to creep into it. Instead, this adds a helper struct for operating with the paired singles, and replaces the four macros with a single macro for accessing the paired-singles/floating-point registers. This way, it's left up to the caller to explicitly decide how it wants to interpret the data (and makes it more obvious where different interpretations of the same data are occurring at, as there'll be a call to one of the [x]AsDouble() functions). --- Source/Core/Core/GeckoCode.cpp | 4 +- Source/Core/Core/HLE/HLE_Misc.cpp | 4 +- Source/Core/Core/HLE/HLE_VarArgs.cpp | 2 +- .../Core/PowerPC/Interpreter/Interpreter.cpp | 6 +- .../Interpreter/Interpreter_FloatingPoint.cpp | 201 +++++++---- .../Interpreter/Interpreter_LoadStore.cpp | 40 +-- .../Interpreter_LoadStorePaired.cpp | 7 +- .../Interpreter/Interpreter_Paired.cpp | 318 ++++++++++++------ .../Interpreter_SystemRegisters.cpp | 4 +- .../PowerPC/Jit64/RegCache/FPURegCache.cpp | 2 +- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 16 +- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 2 +- Source/Core/Core/PowerPC/PowerPC.cpp | 32 +- Source/Core/Core/PowerPC/PowerPC.h | 46 ++- .../DolphinQt/Debugger/RegisterWidget.cpp | 8 +- 15 files changed, 457 insertions(+), 235 deletions(-) diff --git a/Source/Core/Core/GeckoCode.cpp b/Source/Core/Core/GeckoCode.cpp index eb90eda5a0..ae3d0f66f5 100644 --- a/Source/Core/Core/GeckoCode.cpp +++ b/Source/Core/Core/GeckoCode.cpp @@ -272,8 +272,8 @@ void RunCodeHandler() // Registers FPR0->13 are volatile for (int i = 0; i < 14; ++i) { - PowerPC::HostWrite_U64(riPS0(i), SP + 24 + 2 * i * sizeof(u64)); - PowerPC::HostWrite_U64(riPS1(i), SP + 24 + (2 * i + 1) * sizeof(u64)); + PowerPC::HostWrite_U64(rPS(i).PS0AsU64(), SP + 24 + 2 * i * sizeof(u64)); + PowerPC::HostWrite_U64(rPS(i).PS1AsU64(), SP + 24 + (2 * i + 1) * sizeof(u64)); } DEBUG_LOG(ACTIONREPLAY, "GeckoCodes: Initiating phantom branch-and-link. " diff --git a/Source/Core/Core/HLE/HLE_Misc.cpp b/Source/Core/Core/HLE/HLE_Misc.cpp index bfba82ed24..27cb3012a7 100644 --- a/Source/Core/Core/HLE/HLE_Misc.cpp +++ b/Source/Core/Core/HLE/HLE_Misc.cpp @@ -64,8 +64,8 @@ void GeckoReturnTrampoline() PowerPC::ExpandCR(PowerPC::HostRead_U32(SP + 20)); for (int i = 0; i < 14; ++i) { - riPS0(i) = PowerPC::HostRead_U64(SP + 24 + 2 * i * sizeof(u64)); - riPS1(i) = PowerPC::HostRead_U64(SP + 24 + (2 * i + 1) * sizeof(u64)); + rPS(i).SetBoth(PowerPC::HostRead_U64(SP + 24 + 2 * i * sizeof(u64)), + PowerPC::HostRead_U64(SP + 24 + (2 * i + 1) * sizeof(u64))); } } } diff --git a/Source/Core/Core/HLE/HLE_VarArgs.cpp b/Source/Core/Core/HLE/HLE_VarArgs.cpp index 0517fa8637..e7e6a9099e 100644 --- a/Source/Core/Core/HLE/HLE_VarArgs.cpp +++ b/Source/Core/Core/HLE/HLE_VarArgs.cpp @@ -15,7 +15,7 @@ u32 HLE::SystemVABI::VAList::GetGPR(u32 gpr) const double HLE::SystemVABI::VAList::GetFPR(u32 fpr) const { - return rPS0(fpr); + return rPS(fpr).PS0AsDouble(); } HLE::SystemVABI::VAListStruct::VAListStruct(u32 address) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp index 8f912cd73b..8a26ade061 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp @@ -88,8 +88,10 @@ static void Trace(UGeckoInstruction& inst) std::string fregs = ""; for (int i = 0; i < 32; i++) { - fregs += StringFromFormat("f%02d: %08" PRIx64 " %08" PRIx64 " ", i, PowerPC::ppcState.ps[i][0], - PowerPC::ppcState.ps[i][1]); + const auto& ps = PowerPC::ppcState.ps[i]; + + fregs += + StringFromFormat("f%02d: %08" PRIx64 " %08" PRIx64 " ", i, ps.PS0AsU64(), ps.PS1AsU64()); } const std::string ppc_inst = Common::GekkoDisassembler::Disassemble(inst.hex, PC); diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp index 3a68997bf0..e39c39e1fa 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -27,7 +27,7 @@ enum class RoundingMode // The Programming Environments Manual for 32 and 64-bit Microprocessors void ConvertToInteger(UGeckoInstruction inst, RoundingMode rounding_mode) { - const double b = rPS0(inst.FB); + const double b = rPS(inst.FB).PS0AsDouble(); u32 value; bool exception_occurred = false; @@ -111,9 +111,11 @@ void ConvertToInteger(UGeckoInstruction inst, RoundingMode rounding_mode) { // Based on HW tests // FPRF is not affected - riPS0(inst.FD) = 0xfff8000000000000ull | value; + u64 result = 0xfff8000000000000ull | value; if (value == 0 && std::signbit(b)) - riPS0(inst.FD) |= 0x100000000ull; + result |= 0x100000000ull; + + rPS(inst.FD).SetPS0(result); } if (inst.Rc) @@ -198,12 +200,18 @@ void Interpreter::Helper_FloatCompareUnordered(UGeckoInstruction inst, double fa void Interpreter::fcmpo(UGeckoInstruction inst) { - Helper_FloatCompareOrdered(inst, rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareOrdered(inst, a.PS0AsDouble(), b.PS0AsDouble()); } void Interpreter::fcmpu(UGeckoInstruction inst) { - Helper_FloatCompareUnordered(inst, rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareUnordered(inst, a.PS0AsDouble(), b.PS0AsDouble()); } void Interpreter::fctiwx(UGeckoInstruction inst) @@ -218,7 +226,7 @@ void Interpreter::fctiwzx(UGeckoInstruction inst) void Interpreter::fmrx(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB); + rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64()); // This is a binary instruction. Does not alter FPSCR if (inst.Rc) @@ -227,7 +235,7 @@ void Interpreter::fmrx(UGeckoInstruction inst) void Interpreter::fabsx(UGeckoInstruction inst) { - rPS0(inst.FD) = fabs(rPS0(inst.FB)); + rPS(inst.FD).SetPS0(fabs(rPS(inst.FB).PS0AsDouble())); // This is a binary instruction. Does not alter FPSCR if (inst.Rc) @@ -236,7 +244,7 @@ void Interpreter::fabsx(UGeckoInstruction inst) void Interpreter::fnabsx(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB) | (1ULL << 63); + rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64() | (UINT64_C(1) << 63)); // This is a binary instruction. Does not alter FPSCR if (inst.Rc) @@ -245,7 +253,7 @@ void Interpreter::fnabsx(UGeckoInstruction inst) void Interpreter::fnegx(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB) ^ (1ULL << 63); + rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64() ^ (UINT64_C(1) << 63)); // This is a binary instruction. Does not alter FPSCR if (inst.Rc) @@ -254,7 +262,11 @@ void Interpreter::fnegx(UGeckoInstruction inst) void Interpreter::fselx(UGeckoInstruction inst) { - rPS0(inst.FD) = (rPS0(inst.FA) >= -0.0) ? rPS0(inst.FC) : rPS0(inst.FB); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + rPS(inst.FD).SetPS0((a.PS0AsDouble() >= -0.0) ? c.PS0AsDouble() : b.PS0AsDouble()); // This is a binary instruction. Does not alter FPSCR if (inst.Rc) @@ -266,7 +278,7 @@ void Interpreter::fselx(UGeckoInstruction inst) // PS1 is said to be undefined void Interpreter::frspx(UGeckoInstruction inst) // round to single { - const double b = rPS0(inst.FB); + const double b = rPS(inst.FB).PS0AsDouble(); const double rounded = ForceSingle(b); if (std::isnan(b)) @@ -278,8 +290,7 @@ void Interpreter::frspx(UGeckoInstruction inst) // round to single if (!is_snan || FPSCR.VE == 0) { - rPS0(inst.FD) = rounded; - rPS1(inst.FD) = rounded; + rPS(inst.FD).Fill(rounded); PowerPC::UpdateFPRF(b); } @@ -290,8 +301,7 @@ void Interpreter::frspx(UGeckoInstruction inst) // round to single SetFI(b != rounded); FPSCR.FR = fabs(rounded) > fabs(b); PowerPC::UpdateFPRF(rounded); - rPS0(inst.FD) = rounded; - rPS1(inst.FD) = rounded; + rPS(inst.FD).Fill(rounded); } if (inst.Rc) @@ -300,13 +310,16 @@ void Interpreter::frspx(UGeckoInstruction inst) // round to single void Interpreter::fmulx(UGeckoInstruction inst) { - const FPResult product = NI_mul(rPS0(inst.FA), rPS0(inst.FC)); + const auto& a = rPS(inst.FA); + const auto& c = rPS(inst.FC); + + const FPResult product = NI_mul(a.PS0AsDouble(), c.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { const double result = ForceDouble(product.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); FPSCR.FI = 0; // are these flags important? FPSCR.FR = 0; PowerPC::UpdateFPRF(result); @@ -317,14 +330,17 @@ void Interpreter::fmulx(UGeckoInstruction inst) } void Interpreter::fmulsx(UGeckoInstruction inst) { - const double c_value = Force25Bit(rPS0(inst.FC)); - const FPResult d_value = NI_mul(rPS0(inst.FA), c_value); + const auto& a = rPS(inst.FA); + const auto& c = rPS(inst.FC); + + const double c_value = Force25Bit(c.PS0AsDouble()); + const FPResult d_value = NI_mul(a.PS0AsDouble(), c_value); if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions()) { const double result = ForceSingle(d_value.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); FPSCR.FI = 0; FPSCR.FR = 0; PowerPC::UpdateFPRF(result); @@ -336,12 +352,15 @@ void Interpreter::fmulsx(UGeckoInstruction inst) void Interpreter::fmaddx(UGeckoInstruction inst) { - const FPResult product = NI_madd(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + const FPResult product = NI_madd(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { const double result = ForceDouble(product.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); } @@ -351,14 +370,18 @@ void Interpreter::fmaddx(UGeckoInstruction inst) void Interpreter::fmaddsx(UGeckoInstruction inst) { - const double c_value = Force25Bit(rPS0(inst.FC)); - const FPResult d_value = NI_madd(rPS0(inst.FA), c_value, rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c_value = Force25Bit(c.PS0AsDouble()); + const FPResult d_value = NI_madd(a.PS0AsDouble(), c_value, b.PS0AsDouble()); if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions()) { const double result = ForceSingle(d_value.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); FPSCR.FI = d_value.value != result; FPSCR.FR = 0; PowerPC::UpdateFPRF(result); @@ -370,12 +393,15 @@ void Interpreter::fmaddsx(UGeckoInstruction inst) void Interpreter::faddx(UGeckoInstruction inst) { - const FPResult sum = NI_add(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult sum = NI_add(a.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions()) { const double result = ForceDouble(sum.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); } @@ -384,12 +410,15 @@ void Interpreter::faddx(UGeckoInstruction inst) } void Interpreter::faddsx(UGeckoInstruction inst) { - const FPResult sum = NI_add(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult sum = NI_add(a.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions()) { const double result = ForceSingle(sum.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); PowerPC::UpdateFPRF(result); } @@ -399,14 +428,17 @@ void Interpreter::faddsx(UGeckoInstruction inst) void Interpreter::fdivx(UGeckoInstruction inst) { - const FPResult quotient = NI_div(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult quotient = NI_div(a.PS0AsDouble(), b.PS0AsDouble()); const bool not_divide_by_zero = FPSCR.ZE == 0 || quotient.exception != FPSCR_ZX; const bool not_invalid = FPSCR.VE == 0 || quotient.HasNoInvalidExceptions(); if (not_divide_by_zero && not_invalid) { const double result = ForceDouble(quotient.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); } @@ -416,14 +448,17 @@ void Interpreter::fdivx(UGeckoInstruction inst) } void Interpreter::fdivsx(UGeckoInstruction inst) { - const FPResult quotient = NI_div(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult quotient = NI_div(a.PS0AsDouble(), b.PS0AsDouble()); const bool not_divide_by_zero = FPSCR.ZE == 0 || quotient.exception != FPSCR_ZX; const bool not_invalid = FPSCR.VE == 0 || quotient.HasNoInvalidExceptions(); if (not_divide_by_zero && not_invalid) { const double result = ForceSingle(quotient.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); PowerPC::UpdateFPRF(result); } @@ -434,11 +469,11 @@ void Interpreter::fdivsx(UGeckoInstruction inst) // Single precision only. void Interpreter::fresx(UGeckoInstruction inst) { - const double b = rPS0(inst.FB); + const double b = rPS(inst.FB).PS0AsDouble(); const auto compute_result = [inst](double value) { const double result = Common::ApproximateReciprocal(value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); PowerPC::UpdateFPRF(result); }; @@ -472,11 +507,11 @@ void Interpreter::fresx(UGeckoInstruction inst) void Interpreter::frsqrtex(UGeckoInstruction inst) { - const double b = rPS0(inst.FB); + const double b = rPS(inst.FB).PS0AsDouble(); const auto compute_result = [inst](double value) { const double result = Common::ApproximateReciprocalSquareRoot(value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); }; @@ -518,12 +553,16 @@ void Interpreter::frsqrtex(UGeckoInstruction inst) void Interpreter::fmsubx(UGeckoInstruction inst) { - const FPResult product = NI_msub(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const FPResult product = NI_msub(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { const double result = ForceDouble(product.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); } @@ -533,13 +572,17 @@ void Interpreter::fmsubx(UGeckoInstruction inst) void Interpreter::fmsubsx(UGeckoInstruction inst) { - const double c_value = Force25Bit(rPS0(inst.FC)); - const FPResult product = NI_msub(rPS0(inst.FA), c_value, rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c_value = Force25Bit(c.PS0AsDouble()); + const FPResult product = NI_msub(a.PS0AsDouble(), c_value, b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { const double result = ForceSingle(product.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); PowerPC::UpdateFPRF(result); } @@ -549,13 +592,19 @@ void Interpreter::fmsubsx(UGeckoInstruction inst) void Interpreter::fnmaddx(UGeckoInstruction inst) { - const FPResult product = NI_madd(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const FPResult product = NI_madd(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { - const double result = ForceDouble(product.value); - rPS0(inst.FD) = std::isnan(result) ? result : -result; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const double tmp = ForceDouble(product.value); + const double result = std::isnan(tmp) ? tmp : -tmp; + + rPS(inst.FD).SetPS0(result); + PowerPC::UpdateFPRF(result); } if (inst.Rc) @@ -564,14 +613,20 @@ void Interpreter::fnmaddx(UGeckoInstruction inst) void Interpreter::fnmaddsx(UGeckoInstruction inst) { - const double c_value = Force25Bit(rPS0(inst.FC)); - const FPResult product = NI_madd(rPS0(inst.FA), c_value, rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c_value = Force25Bit(c.PS0AsDouble()); + const FPResult product = NI_madd(a.PS0AsDouble(), c_value, b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { - const double result = ForceSingle(product.value); - rPS0(inst.FD) = rPS1(inst.FD) = std::isnan(result) ? result : -result; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const double tmp = ForceSingle(product.value); + const double result = std::isnan(tmp) ? tmp : -tmp; + + rPS(inst.FD).Fill(result); + PowerPC::UpdateFPRF(result); } if (inst.Rc) @@ -580,13 +635,19 @@ void Interpreter::fnmaddsx(UGeckoInstruction inst) void Interpreter::fnmsubx(UGeckoInstruction inst) { - const FPResult product = NI_msub(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const FPResult product = NI_msub(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { - const double result = ForceDouble(product.value); - rPS0(inst.FD) = std::isnan(result) ? result : -result; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const double tmp = ForceDouble(product.value); + const double result = std::isnan(tmp) ? tmp : -tmp; + + rPS(inst.FD).SetPS0(result); + PowerPC::UpdateFPRF(result); } if (inst.Rc) @@ -595,14 +656,20 @@ void Interpreter::fnmsubx(UGeckoInstruction inst) void Interpreter::fnmsubsx(UGeckoInstruction inst) { - const double c_value = Force25Bit(rPS0(inst.FC)); - const FPResult product = NI_msub(rPS0(inst.FA), c_value, rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c_value = Force25Bit(c.PS0AsDouble()); + const FPResult product = NI_msub(a.PS0AsDouble(), c_value, b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { - const double result = ForceSingle(product.value); - rPS0(inst.FD) = rPS1(inst.FD) = std::isnan(result) ? result : -result; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const double tmp = ForceSingle(product.value); + const double result = std::isnan(tmp) ? tmp : -tmp; + + rPS(inst.FD).Fill(result); + PowerPC::UpdateFPRF(result); } if (inst.Rc) @@ -611,12 +678,15 @@ void Interpreter::fnmsubsx(UGeckoInstruction inst) void Interpreter::fsubx(UGeckoInstruction inst) { - const FPResult difference = NI_sub(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult difference = NI_sub(a.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions()) { const double result = ForceDouble(difference.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); } @@ -626,12 +696,15 @@ void Interpreter::fsubx(UGeckoInstruction inst) void Interpreter::fsubsx(UGeckoInstruction inst) { - const FPResult difference = NI_sub(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult difference = NI_sub(a.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions()) { const double result = ForceSingle(difference.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); PowerPC::UpdateFPRF(result); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index e9eb2820b8..8e270dfc9f 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -72,7 +72,7 @@ void Interpreter::lfd(UGeckoInstruction inst) const u64 temp = PowerPC::Read_U64(address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) - riPS0(inst.FD) = temp; + rPS(inst.FD).SetPS0(temp); } void Interpreter::lfdu(UGeckoInstruction inst) @@ -89,7 +89,7 @@ void Interpreter::lfdu(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { - riPS0(inst.FD) = temp; + rPS(inst.FD).SetPS0(temp); rGPR[inst.RA] = address; } } @@ -108,7 +108,7 @@ void Interpreter::lfdux(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { - riPS0(inst.FD) = temp; + rPS(inst.FD).SetPS0(temp); rGPR[inst.RA] = address; } } @@ -126,7 +126,7 @@ void Interpreter::lfdx(UGeckoInstruction inst) const u64 temp = PowerPC::Read_U64(address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) - riPS0(inst.FD) = temp; + rPS(inst.FD).SetPS0(temp); } void Interpreter::lfs(UGeckoInstruction inst) @@ -144,8 +144,7 @@ void Interpreter::lfs(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { const u64 value = ConvertToDouble(temp); - riPS0(inst.FD) = value; - riPS1(inst.FD) = value; + rPS(inst.FD).Fill(value); } } @@ -164,8 +163,7 @@ void Interpreter::lfsu(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { const u64 value = ConvertToDouble(temp); - riPS0(inst.FD) = value; - riPS1(inst.FD) = value; + rPS(inst.FD).Fill(value); rGPR[inst.RA] = address; } } @@ -184,9 +182,8 @@ void Interpreter::lfsux(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { - u64 value = ConvertToDouble(temp); - riPS0(inst.FD) = value; - riPS1(inst.FD) = value; + const u64 value = ConvertToDouble(temp); + rPS(inst.FD).Fill(value); rGPR[inst.RA] = address; } } @@ -206,8 +203,7 @@ void Interpreter::lfsx(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { const u64 value = ConvertToDouble(temp); - riPS0(inst.FD) = value; - riPS1(inst.FD) = value; + rPS(inst.FD).Fill(value); } } @@ -355,7 +351,7 @@ void Interpreter::stfd(UGeckoInstruction inst) return; } - PowerPC::Write_U64(riPS0(inst.FS), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); } void Interpreter::stfdu(UGeckoInstruction inst) @@ -368,7 +364,7 @@ void Interpreter::stfdu(UGeckoInstruction inst) return; } - PowerPC::Write_U64(riPS0(inst.FS), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -385,7 +381,7 @@ void Interpreter::stfs(UGeckoInstruction inst) return; } - PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); } void Interpreter::stfsu(UGeckoInstruction inst) @@ -398,7 +394,7 @@ void Interpreter::stfsu(UGeckoInstruction inst) return; } - PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -761,7 +757,7 @@ void Interpreter::stfdux(UGeckoInstruction inst) return; } - PowerPC::Write_U64(riPS0(inst.FS), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -778,7 +774,7 @@ void Interpreter::stfdx(UGeckoInstruction inst) return; } - PowerPC::Write_U64(riPS0(inst.FS), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); } // Stores Floating points into Integers indeXed @@ -792,7 +788,7 @@ void Interpreter::stfiwx(UGeckoInstruction inst) return; } - PowerPC::Write_U32((u32)riPS0(inst.FS), address); + PowerPC::Write_U32(rPS(inst.FS).PS0AsU32(), address); } void Interpreter::stfsux(UGeckoInstruction inst) @@ -805,7 +801,7 @@ void Interpreter::stfsux(UGeckoInstruction inst) return; } - PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -822,7 +818,7 @@ void Interpreter::stfsx(UGeckoInstruction inst) return; } - PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); } void Interpreter::sthbrx(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp index c6fc5f507b..af96d59df7 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp @@ -176,8 +176,8 @@ void Interpreter::Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW) const EQuantizeType stType = gqr.st_type; const unsigned int stScale = gqr.st_scale; - const double ps0 = rPS0(instRS); - const double ps1 = rPS1(instRS); + const double ps0 = rPS(instRS).PS0AsDouble(); + const double ps1 = rPS(instRS).PS1AsDouble(); switch (stType) { @@ -301,8 +301,7 @@ void Interpreter::Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW) return; } - rPS0(instRD) = ps0; - rPS1(instRD) = ps1; + rPS(instRD).SetBoth(ps0, ps1); } void Interpreter::psq_l(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp index 02df9993a9..81054866ac 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp @@ -13,8 +13,12 @@ // These "binary instructions" do not alter FPSCR. void Interpreter::ps_sel(UGeckoInstruction inst) { - rPS0(inst.FD) = rPS0(inst.FA) >= -0.0 ? rPS0(inst.FC) : rPS0(inst.FB); - rPS1(inst.FD) = rPS1(inst.FA) >= -0.0 ? rPS1(inst.FC) : rPS1(inst.FB); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + rPS(inst.FD).SetBoth(a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(), + a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble()); if (inst.Rc) Helper_UpdateCR1(); @@ -22,8 +26,9 @@ void Interpreter::ps_sel(UGeckoInstruction inst) void Interpreter::ps_neg(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB) ^ (1ULL << 63); - riPS1(inst.FD) = riPS1(inst.FB) ^ (1ULL << 63); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(b.PS0AsU64() ^ (UINT64_C(1) << 63), b.PS1AsU64() ^ (UINT64_C(1) << 63)); if (inst.Rc) Helper_UpdateCR1(); @@ -31,8 +36,7 @@ void Interpreter::ps_neg(UGeckoInstruction inst) void Interpreter::ps_mr(UGeckoInstruction inst) { - rPS0(inst.FD) = rPS0(inst.FB); - rPS1(inst.FD) = rPS1(inst.FB); + rPS(inst.FD) = rPS(inst.FB); if (inst.Rc) Helper_UpdateCR1(); @@ -40,8 +44,9 @@ void Interpreter::ps_mr(UGeckoInstruction inst) void Interpreter::ps_nabs(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB) | (1ULL << 63); - riPS1(inst.FD) = riPS1(inst.FB) | (1ULL << 63); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(b.PS0AsU64() | (UINT64_C(1) << 63), b.PS1AsU64() | (UINT64_C(1) << 63)); if (inst.Rc) Helper_UpdateCR1(); @@ -49,8 +54,9 @@ void Interpreter::ps_nabs(UGeckoInstruction inst) void Interpreter::ps_abs(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB) & ~(1ULL << 63); - riPS1(inst.FD) = riPS1(inst.FB) & ~(1ULL << 63); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(b.PS0AsU64() & ~(UINT64_C(1) << 63), b.PS1AsU64() & ~(UINT64_C(1) << 63)); if (inst.Rc) Helper_UpdateCR1(); @@ -59,10 +65,10 @@ void Interpreter::ps_abs(UGeckoInstruction inst) // These are just moves, double is OK. void Interpreter::ps_merge00(UGeckoInstruction inst) { - double p0 = rPS0(inst.FA); - double p1 = rPS0(inst.FB); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(a.PS0AsDouble(), b.PS0AsDouble()); if (inst.Rc) Helper_UpdateCR1(); @@ -70,10 +76,10 @@ void Interpreter::ps_merge00(UGeckoInstruction inst) void Interpreter::ps_merge01(UGeckoInstruction inst) { - double p0 = rPS0(inst.FA); - double p1 = rPS1(inst.FB); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(a.PS0AsDouble(), b.PS1AsDouble()); if (inst.Rc) Helper_UpdateCR1(); @@ -81,10 +87,10 @@ void Interpreter::ps_merge01(UGeckoInstruction inst) void Interpreter::ps_merge10(UGeckoInstruction inst) { - double p0 = rPS1(inst.FA); - double p1 = rPS0(inst.FB); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(a.PS1AsDouble(), b.PS0AsDouble()); if (inst.Rc) Helper_UpdateCR1(); @@ -92,10 +98,10 @@ void Interpreter::ps_merge10(UGeckoInstruction inst) void Interpreter::ps_merge11(UGeckoInstruction inst) { - double p0 = rPS1(inst.FA); - double p1 = rPS1(inst.FB); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(a.PS1AsDouble(), b.PS1AsDouble()); if (inst.Rc) Helper_UpdateCR1(); @@ -104,9 +110,14 @@ void Interpreter::ps_merge11(UGeckoInstruction inst) // From here on, the real deal. void Interpreter::ps_div(UGeckoInstruction inst) { - rPS0(inst.FD) = ForceSingle(NI_div(rPS0(inst.FA), rPS0(inst.FB)).value); - rPS1(inst.FD) = ForceSingle(NI_div(rPS1(inst.FA), rPS1(inst.FB)).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const double ps0 = ForceSingle(NI_div(a.PS0AsDouble(), b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_div(a.PS1AsDouble(), b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -115,8 +126,8 @@ void Interpreter::ps_div(UGeckoInstruction inst) void Interpreter::ps_res(UGeckoInstruction inst) { // this code is based on the real hardware tests - const double a = rPS0(inst.FB); - const double b = rPS1(inst.FB); + const double a = rPS(inst.FB).PS0AsDouble(); + const double b = rPS(inst.FB).PS1AsDouble(); if (a == 0.0 || b == 0.0) { @@ -130,9 +141,11 @@ void Interpreter::ps_res(UGeckoInstruction inst) if (Common::IsSNAN(a) || Common::IsSNAN(b)) SetFPException(FPSCR_VXSNAN); - rPS0(inst.FD) = Common::ApproximateReciprocal(a); - rPS1(inst.FD) = Common::ApproximateReciprocal(b); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const double ps0 = Common::ApproximateReciprocal(a); + const double ps1 = Common::ApproximateReciprocal(b); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -140,8 +153,8 @@ void Interpreter::ps_res(UGeckoInstruction inst) void Interpreter::ps_rsqrte(UGeckoInstruction inst) { - const double ps0 = rPS0(inst.FB); - const double ps1 = rPS1(inst.FB); + const double ps0 = rPS(inst.FB).PS0AsDouble(); + const double ps1 = rPS(inst.FB).PS1AsDouble(); if (ps0 == 0.0 || ps1 == 0.0) { @@ -161,10 +174,11 @@ void Interpreter::ps_rsqrte(UGeckoInstruction inst) if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1)) SetFPException(FPSCR_VXSNAN); - rPS0(inst.FD) = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps0)); - rPS1(inst.FD) = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps1)); + const double dst_ps0 = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps0)); + const double dst_ps1 = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps1)); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + rPS(inst.FD).SetBoth(dst_ps0, dst_ps1); + PowerPC::UpdateFPRF(dst_ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -172,9 +186,14 @@ void Interpreter::ps_rsqrte(UGeckoInstruction inst) void Interpreter::ps_sub(UGeckoInstruction inst) { - rPS0(inst.FD) = ForceSingle(NI_sub(rPS0(inst.FA), rPS0(inst.FB)).value); - rPS1(inst.FD) = ForceSingle(NI_sub(rPS1(inst.FA), rPS1(inst.FB)).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const double ps0 = ForceSingle(NI_sub(a.PS0AsDouble(), b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_sub(a.PS1AsDouble(), b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -182,9 +201,14 @@ void Interpreter::ps_sub(UGeckoInstruction inst) void Interpreter::ps_add(UGeckoInstruction inst) { - rPS0(inst.FD) = ForceSingle(NI_add(rPS0(inst.FA), rPS0(inst.FB)).value); - rPS1(inst.FD) = ForceSingle(NI_add(rPS1(inst.FA), rPS1(inst.FB)).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const double ps0 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_add(a.PS1AsDouble(), b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -192,11 +216,17 @@ void Interpreter::ps_add(UGeckoInstruction inst) void Interpreter::ps_mul(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double c1 = Force25Bit(rPS1(inst.FC)); - rPS0(inst.FD) = ForceSingle(NI_mul(rPS0(inst.FA), c0).value); - rPS1(inst.FD) = ForceSingle(NI_mul(rPS1(inst.FA), c1).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double c1 = Force25Bit(c.PS1AsDouble()); + + const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c0).value); + const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c1).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -204,11 +234,18 @@ void Interpreter::ps_mul(UGeckoInstruction inst) void Interpreter::ps_msub(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double c1 = Force25Bit(rPS1(inst.FC)); - rPS0(inst.FD) = ForceSingle(NI_msub(rPS0(inst.FA), c0, rPS0(inst.FB)).value); - rPS1(inst.FD) = ForceSingle(NI_msub(rPS1(inst.FA), c1, rPS1(inst.FB)).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double c1 = Force25Bit(c.PS1AsDouble()); + + const double ps0 = ForceSingle(NI_msub(a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_msub(a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -216,11 +253,18 @@ void Interpreter::ps_msub(UGeckoInstruction inst) void Interpreter::ps_madd(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double c1 = Force25Bit(rPS1(inst.FC)); - rPS0(inst.FD) = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value); - rPS1(inst.FD) = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double c1 = Force25Bit(c.PS1AsDouble()); + + const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -228,13 +272,21 @@ void Interpreter::ps_madd(UGeckoInstruction inst) void Interpreter::ps_nmsub(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double c1 = Force25Bit(rPS1(inst.FC)); - const double result0 = ForceSingle(NI_msub(rPS0(inst.FA), c0, rPS0(inst.FB)).value); - const double result1 = ForceSingle(NI_msub(rPS1(inst.FA), c1, rPS1(inst.FB)).value); - rPS0(inst.FD) = std::isnan(result0) ? result0 : -result0; - rPS1(inst.FD) = std::isnan(result1) ? result1 : -result1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double c1 = Force25Bit(c.PS1AsDouble()); + + const double tmp0 = ForceSingle(NI_msub(a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const double tmp1 = ForceSingle(NI_msub(a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + + const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; + const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -242,13 +294,21 @@ void Interpreter::ps_nmsub(UGeckoInstruction inst) void Interpreter::ps_nmadd(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double c1 = Force25Bit(rPS1(inst.FC)); - const double result0 = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value); - const double result1 = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value); - rPS0(inst.FD) = std::isnan(result0) ? result0 : -result0; - rPS1(inst.FD) = std::isnan(result1) ? result1 : -result1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double c1 = Force25Bit(c.PS1AsDouble()); + + const double tmp0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const double tmp1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + + const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; + const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -256,11 +316,15 @@ void Interpreter::ps_nmadd(UGeckoInstruction inst) void Interpreter::ps_sum0(UGeckoInstruction inst) { - const double p0 = ForceSingle(NI_add(rPS0(inst.FA), rPS1(inst.FB)).value); - const double p1 = ForceSingle(rPS1(inst.FC)); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double ps0 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS1AsDouble()).value); + const double ps1 = ForceSingle(c.PS1AsDouble()); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -268,11 +332,15 @@ void Interpreter::ps_sum0(UGeckoInstruction inst) void Interpreter::ps_sum1(UGeckoInstruction inst) { - const double p0 = ForceSingle(rPS0(inst.FC)); - const double p1 = ForceSingle(NI_add(rPS0(inst.FA), rPS1(inst.FB)).value); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS1(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double ps0 = ForceSingle(c.PS0AsDouble()); + const double ps1 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps1); if (inst.Rc) Helper_UpdateCR1(); @@ -280,12 +348,15 @@ void Interpreter::ps_sum1(UGeckoInstruction inst) void Interpreter::ps_muls0(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double p0 = ForceSingle(NI_mul(rPS0(inst.FA), c0).value); - const double p1 = ForceSingle(NI_mul(rPS1(inst.FA), c0).value); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c0).value); + const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c0).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -293,12 +364,15 @@ void Interpreter::ps_muls0(UGeckoInstruction inst) void Interpreter::ps_muls1(UGeckoInstruction inst) { - const double c1 = Force25Bit(rPS1(inst.FC)); - const double p0 = ForceSingle(NI_mul(rPS0(inst.FA), c1).value); - const double p1 = ForceSingle(NI_mul(rPS1(inst.FA), c1).value); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& c = rPS(inst.FC); + + const double c1 = Force25Bit(c.PS1AsDouble()); + const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c1).value); + const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c1).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -306,12 +380,16 @@ void Interpreter::ps_muls1(UGeckoInstruction inst) void Interpreter::ps_madds0(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double p0 = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value); - const double p1 = ForceSingle(NI_madd(rPS1(inst.FA), c0, rPS1(inst.FB)).value); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c0, b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -319,12 +397,16 @@ void Interpreter::ps_madds0(UGeckoInstruction inst) void Interpreter::ps_madds1(UGeckoInstruction inst) { - const double c1 = Force25Bit(rPS1(inst.FC)); - const double p0 = ForceSingle(NI_madd(rPS0(inst.FA), c1, rPS0(inst.FB)).value); - const double p1 = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c1 = Force25Bit(c.PS1AsDouble()); + const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c1, b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -332,20 +414,32 @@ void Interpreter::ps_madds1(UGeckoInstruction inst) void Interpreter::ps_cmpu0(UGeckoInstruction inst) { - Helper_FloatCompareUnordered(inst, rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareUnordered(inst, a.PS0AsDouble(), b.PS0AsDouble()); } void Interpreter::ps_cmpo0(UGeckoInstruction inst) { - Helper_FloatCompareOrdered(inst, rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareOrdered(inst, a.PS0AsDouble(), b.PS0AsDouble()); } void Interpreter::ps_cmpu1(UGeckoInstruction inst) { - Helper_FloatCompareUnordered(inst, rPS1(inst.FA), rPS1(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareUnordered(inst, a.PS1AsDouble(), b.PS1AsDouble()); } void Interpreter::ps_cmpo1(UGeckoInstruction inst) { - Helper_FloatCompareOrdered(inst, rPS1(inst.FA), rPS1(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareOrdered(inst, a.PS1AsDouble(), b.PS1AsDouble()); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 6de8588307..19cae33ce5 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -98,7 +98,7 @@ void Interpreter::mtfsfx(UGeckoInstruction inst) m |= (0xFU << (i * 4)); } - FPSCR = (FPSCR.Hex & ~m) | (static_cast(riPS0(inst.FB)) & m); + FPSCR = (FPSCR.Hex & ~m) | (static_cast(rPS(inst.FB).PS0AsU64()) & m); FPSCRtoFPUSettings(FPSCR); if (inst.Rc) @@ -554,7 +554,7 @@ void Interpreter::mffsx(UGeckoInstruction inst) // TODO(ector): grab all overflow flags etc and set them in FPSCR UpdateFPSCR(); - riPS0(inst.FD) = 0xFFF8000000000000 | FPSCR.Hex; + rPS(inst.FD).SetPS0(UINT64_C(0xFFF8000000000000) | FPSCR.Hex); if (inst.Rc) Helper_UpdateCR1(); diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp index f671afb74c..39a73c6963 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp @@ -34,7 +34,7 @@ const X64Reg* FPURegCache::GetAllocationOrder(size_t* count) const OpArg FPURegCache::GetDefaultLocation(preg_t preg) const { - return PPCSTATE(ps[preg][0]); + return PPCSTATE(ps[preg].ps0); } BitSet32 FPURegCache::GetRegUtilization() const diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 64532c968f..78fd2633d9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -452,7 +452,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) // Load the high 64bits from the file and insert them in to the high 64bits of the host // register ARM64Reg tmp_reg = GetReg(); - m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1])); + m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1)); m_float_emit->INS(64, host_reg, 1, tmp_reg, 0); UnlockRegister(tmp_reg); @@ -506,7 +506,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) reg.Load(host_reg, REG_LOWER_PAIR); } reg.SetDirty(false); - m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0)); return host_reg; } default: @@ -554,7 +554,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit // store. // It would take longer to do an insert to a temporary and a 64bit store than to just do this. - m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0)); break; case REG_DUP_SINGLE: flush_reg = GetReg(); @@ -562,7 +562,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) // fall through case REG_DUP: // Store PSR1 (which is equal to PSR0) in memory. - m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1])); + m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1)); break; default: // All other types doesn't store anything in PSR1. @@ -687,7 +687,7 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state) store_size = 64; if (dirty) - m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0)); if (!maintain_state) { @@ -702,9 +702,9 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state) // If the paired registers were at the start of ppcState we could do an STP here. // Too bad moving them would break savestate compatibility between x86_64 and AArch64 // m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, PPC_REG, - // PPCSTATE_OFF(ps[preg][0])); - m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); - m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1])); + // PPCSTATE_OFF(ps[preg].ps0)); + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0)); + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1)); } if (!maintain_state) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 3d8c6a9837..9860e4843e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -24,7 +24,7 @@ static const Arm64Gen::ARM64Reg DISPATCHER_PC = // Some asserts to make sure we will be able to load everything static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); -static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, +static_assert((PPCSTATE_OFF(ps[0].ps0) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!"); static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!"); diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 174406d1ce..52d19543a3 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -4,6 +4,7 @@ #include "Core/PowerPC/PowerPC.h" +#include #include #include #include @@ -11,6 +12,7 @@ #include #include "Common/Assert.h" +#include "Common/BitUtils.h" #include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/FPURoundMode.h" @@ -42,6 +44,27 @@ MemChecks memchecks; PPCDebugInterface debug_interface; static CoreTiming::EventType* s_invalidate_cache_thread_safe; + +double PairedSingle::PS0AsDouble() const +{ + return Common::BitCast(ps0); +} + +double PairedSingle::PS1AsDouble() const +{ + return Common::BitCast(ps1); +} + +void PairedSingle::SetPS0(double value) +{ + ps0 = Common::BitCast(value); +} + +void PairedSingle::SetPS1(double value) +{ + ps1 = Common::BitCast(value); +} + static void InvalidateCacheThreadSafe(u64 userdata, s64 cyclesLate) { ppcState.iCache.Invalidate(static_cast(userdata)); @@ -135,10 +158,11 @@ void DoState(PointerWrap& p) static void ResetRegisters() { - memset(ppcState.ps, 0, sizeof(ppcState.ps)); - memset(ppcState.sr, 0, sizeof(ppcState.sr)); - memset(ppcState.gpr, 0, sizeof(ppcState.gpr)); - memset(ppcState.spr, 0, sizeof(ppcState.spr)); + std::fill(std::begin(ppcState.ps), std::end(ppcState.ps), PairedSingle{}); + std::fill(std::begin(ppcState.sr), std::end(ppcState.sr), 0U); + std::fill(std::begin(ppcState.gpr), std::end(ppcState.gpr), 0U); + std::fill(std::begin(ppcState.spr), std::end(ppcState.spr), 0U); + /* 0x00080200 = lonestar 2.0 0x00088202 = lonestar 2.2 diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 28dd4dc26b..2c43bca14f 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "Common/CommonTypes.h" @@ -57,6 +58,43 @@ struct TLBEntry u8 recent = 0; }; +struct PairedSingle +{ + u64 PS0AsU64() const { return ps0; } + u64 PS1AsU64() const { return ps1; } + + u32 PS0AsU32() const { return static_cast(ps0); } + u32 PS1AsU32() const { return static_cast(ps1); } + + double PS0AsDouble() const; + double PS1AsDouble() const; + + void SetPS0(u64 value) { ps0 = value; } + void SetPS0(double value); + + void SetPS1(u64 value) { ps1 = value; } + void SetPS1(double value); + + void SetBoth(u64 lhs, u64 rhs) + { + SetPS0(lhs); + SetPS1(rhs); + } + void SetBoth(double lhs, double rhs) + { + SetPS0(lhs); + SetPS1(rhs); + } + + void Fill(u64 value) { SetBoth(value, value); } + void Fill(double value) { SetBoth(value, value); } + + u64 ps0 = 0; + u64 ps1 = 0; +}; +// Paired single must be standard layout in order for offsetof to work, which is used by the JITs +static_assert(std::is_standard_layout(), "PairedSingle must be standard layout"); + // This contains the entire state of the emulated PowerPC "Gekko" CPU. struct PowerPCState { @@ -114,7 +152,7 @@ struct PowerPCState // The paired singles are strange : PS0 is stored in the full 64 bits of each FPR // but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits. // Since we want to use SIMD, SSE2 is the only viable alternative - 2x double. - alignas(16) u64 ps[32][2]; + alignas(16) PairedSingle ps[32]; u32 sr[16]; // Segment registers. @@ -212,11 +250,7 @@ void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst); #define TL PowerPC::ppcState.spr[SPR_TL] #define TU PowerPC::ppcState.spr[SPR_TU] -#define rPS0(i) (*(double*)(&PowerPC::ppcState.ps[i][0])) -#define rPS1(i) (*(double*)(&PowerPC::ppcState.ps[i][1])) - -#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0])) -#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1])) +#define rPS(i) (PowerPC::ppcState.ps[(i)]) enum CRBits { diff --git a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp index f1e06bdfb4..93bc112f67 100644 --- a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp +++ b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp @@ -227,11 +227,11 @@ void RegisterWidget::PopulateTable() [i](u64 value) { GPR(i) = value; }); // Floating point registers (double) - AddRegister(i, 2, RegisterType::fpr, "f" + std::to_string(i), [i] { return riPS0(i); }, - [i](u64 value) { riPS0(i) = value; }); + AddRegister(i, 2, RegisterType::fpr, "f" + std::to_string(i), [i] { return rPS(i).PS0AsU64(); }, + [i](u64 value) { rPS(i).SetPS0(value); }); - AddRegister(i, 4, RegisterType::fpr, "", [i] { return riPS1(i); }, - [i](u64 value) { riPS1(i) = value; }); + AddRegister(i, 4, RegisterType::fpr, "", [i] { return rPS(i).PS1AsU64(); }, + [i](u64 value) { rPS(i).SetPS1(value); }); } for (int i = 0; i < 8; i++)