PowerPC: Remove separate macros for paired singles

Previously, PowerPC.h had four macros in it like so:

\#define rPS0(i) (*(double*)(&PowerPC::ppcState.ps[i][0]))
\#define rPS1(i) (*(double*)(&PowerPC::ppcState.ps[i][1]))

\#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0]))
\#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1]))

Casting between object representations like this is undefined behavior.
Given this is used heavily with the interpreter (that is, the most
accurate, but slowest CPU backend), we don't exactly want to allow
undefined behavior to creep into it.

Instead, this adds a helper struct for operating with the paired singles,
and replaces the four macros with a single macro for accessing the
paired-singles/floating-point registers.

This way, it's left up to the caller to explicitly decide how it wants to interpret
the data (and makes it more obvious where different interpretations of
the same data are occurring at, as there'll be a call to one of the
[x]AsDouble() functions).
This commit is contained in:
Lioncash 2018-12-14 13:23:12 -05:00
parent 2dcd058f7d
commit 244d083f0e
15 changed files with 457 additions and 235 deletions

View File

@ -272,8 +272,8 @@ void RunCodeHandler()
// Registers FPR0->13 are volatile
for (int i = 0; i < 14; ++i)
{
PowerPC::HostWrite_U64(riPS0(i), SP + 24 + 2 * i * sizeof(u64));
PowerPC::HostWrite_U64(riPS1(i), SP + 24 + (2 * i + 1) * sizeof(u64));
PowerPC::HostWrite_U64(rPS(i).PS0AsU64(), SP + 24 + 2 * i * sizeof(u64));
PowerPC::HostWrite_U64(rPS(i).PS1AsU64(), SP + 24 + (2 * i + 1) * sizeof(u64));
}
DEBUG_LOG(ACTIONREPLAY,
"GeckoCodes: Initiating phantom branch-and-link. "

View File

@ -64,8 +64,8 @@ void GeckoReturnTrampoline()
PowerPC::ExpandCR(PowerPC::HostRead_U32(SP + 20));
for (int i = 0; i < 14; ++i)
{
riPS0(i) = PowerPC::HostRead_U64(SP + 24 + 2 * i * sizeof(u64));
riPS1(i) = PowerPC::HostRead_U64(SP + 24 + (2 * i + 1) * sizeof(u64));
rPS(i).SetBoth(PowerPC::HostRead_U64(SP + 24 + 2 * i * sizeof(u64)),
PowerPC::HostRead_U64(SP + 24 + (2 * i + 1) * sizeof(u64)));
}
}
}

View File

@ -15,7 +15,7 @@ u32 HLE::SystemVABI::VAList::GetGPR(u32 gpr) const
double HLE::SystemVABI::VAList::GetFPR(u32 fpr) const
{
return rPS0(fpr);
return rPS(fpr).PS0AsDouble();
}
HLE::SystemVABI::VAListStruct::VAListStruct(u32 address)

View File

@ -88,8 +88,10 @@ static void Trace(UGeckoInstruction& inst)
std::string fregs = "";
for (int i = 0; i < 32; i++)
{
fregs += StringFromFormat("f%02d: %08" PRIx64 " %08" PRIx64 " ", i, PowerPC::ppcState.ps[i][0],
PowerPC::ppcState.ps[i][1]);
const auto& ps = PowerPC::ppcState.ps[i];
fregs +=
StringFromFormat("f%02d: %08" PRIx64 " %08" PRIx64 " ", i, ps.PS0AsU64(), ps.PS1AsU64());
}
const std::string ppc_inst = Common::GekkoDisassembler::Disassemble(inst.hex, PC);

View File

@ -27,7 +27,7 @@ enum class RoundingMode
// The Programming Environments Manual for 32 and 64-bit Microprocessors
void ConvertToInteger(UGeckoInstruction inst, RoundingMode rounding_mode)
{
const double b = rPS0(inst.FB);
const double b = rPS(inst.FB).PS0AsDouble();
u32 value;
bool exception_occurred = false;
@ -111,9 +111,11 @@ void ConvertToInteger(UGeckoInstruction inst, RoundingMode rounding_mode)
{
// Based on HW tests
// FPRF is not affected
riPS0(inst.FD) = 0xfff8000000000000ull | value;
u64 result = 0xfff8000000000000ull | value;
if (value == 0 && std::signbit(b))
riPS0(inst.FD) |= 0x100000000ull;
result |= 0x100000000ull;
rPS(inst.FD).SetPS0(result);
}
if (inst.Rc)
@ -198,12 +200,18 @@ void Interpreter::Helper_FloatCompareUnordered(UGeckoInstruction inst, double fa
void Interpreter::fcmpo(UGeckoInstruction inst)
{
Helper_FloatCompareOrdered(inst, rPS0(inst.FA), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
Helper_FloatCompareOrdered(inst, a.PS0AsDouble(), b.PS0AsDouble());
}
void Interpreter::fcmpu(UGeckoInstruction inst)
{
Helper_FloatCompareUnordered(inst, rPS0(inst.FA), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
Helper_FloatCompareUnordered(inst, a.PS0AsDouble(), b.PS0AsDouble());
}
void Interpreter::fctiwx(UGeckoInstruction inst)
@ -218,7 +226,7 @@ void Interpreter::fctiwzx(UGeckoInstruction inst)
void Interpreter::fmrx(UGeckoInstruction inst)
{
riPS0(inst.FD) = riPS0(inst.FB);
rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64());
// This is a binary instruction. Does not alter FPSCR
if (inst.Rc)
@ -227,7 +235,7 @@ void Interpreter::fmrx(UGeckoInstruction inst)
void Interpreter::fabsx(UGeckoInstruction inst)
{
rPS0(inst.FD) = fabs(rPS0(inst.FB));
rPS(inst.FD).SetPS0(fabs(rPS(inst.FB).PS0AsDouble()));
// This is a binary instruction. Does not alter FPSCR
if (inst.Rc)
@ -236,7 +244,7 @@ void Interpreter::fabsx(UGeckoInstruction inst)
void Interpreter::fnabsx(UGeckoInstruction inst)
{
riPS0(inst.FD) = riPS0(inst.FB) | (1ULL << 63);
rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64() | (UINT64_C(1) << 63));
// This is a binary instruction. Does not alter FPSCR
if (inst.Rc)
@ -245,7 +253,7 @@ void Interpreter::fnabsx(UGeckoInstruction inst)
void Interpreter::fnegx(UGeckoInstruction inst)
{
riPS0(inst.FD) = riPS0(inst.FB) ^ (1ULL << 63);
rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64() ^ (UINT64_C(1) << 63));
// This is a binary instruction. Does not alter FPSCR
if (inst.Rc)
@ -254,7 +262,11 @@ void Interpreter::fnegx(UGeckoInstruction inst)
void Interpreter::fselx(UGeckoInstruction inst)
{
rPS0(inst.FD) = (rPS0(inst.FA) >= -0.0) ? rPS0(inst.FC) : rPS0(inst.FB);
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
rPS(inst.FD).SetPS0((a.PS0AsDouble() >= -0.0) ? c.PS0AsDouble() : b.PS0AsDouble());
// This is a binary instruction. Does not alter FPSCR
if (inst.Rc)
@ -266,7 +278,7 @@ void Interpreter::fselx(UGeckoInstruction inst)
// PS1 is said to be undefined
void Interpreter::frspx(UGeckoInstruction inst) // round to single
{
const double b = rPS0(inst.FB);
const double b = rPS(inst.FB).PS0AsDouble();
const double rounded = ForceSingle(b);
if (std::isnan(b))
@ -278,8 +290,7 @@ void Interpreter::frspx(UGeckoInstruction inst) // round to single
if (!is_snan || FPSCR.VE == 0)
{
rPS0(inst.FD) = rounded;
rPS1(inst.FD) = rounded;
rPS(inst.FD).Fill(rounded);
PowerPC::UpdateFPRF(b);
}
@ -290,8 +301,7 @@ void Interpreter::frspx(UGeckoInstruction inst) // round to single
SetFI(b != rounded);
FPSCR.FR = fabs(rounded) > fabs(b);
PowerPC::UpdateFPRF(rounded);
rPS0(inst.FD) = rounded;
rPS1(inst.FD) = rounded;
rPS(inst.FD).Fill(rounded);
}
if (inst.Rc)
@ -300,13 +310,16 @@ void Interpreter::frspx(UGeckoInstruction inst) // round to single
void Interpreter::fmulx(UGeckoInstruction inst)
{
const FPResult product = NI_mul(rPS0(inst.FA), rPS0(inst.FC));
const auto& a = rPS(inst.FA);
const auto& c = rPS(inst.FC);
const FPResult product = NI_mul(a.PS0AsDouble(), c.PS0AsDouble());
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{
const double result = ForceDouble(product.value);
rPS0(inst.FD) = result;
rPS(inst.FD).SetPS0(result);
FPSCR.FI = 0; // are these flags important?
FPSCR.FR = 0;
PowerPC::UpdateFPRF(result);
@ -317,14 +330,17 @@ void Interpreter::fmulx(UGeckoInstruction inst)
}
void Interpreter::fmulsx(UGeckoInstruction inst)
{
const double c_value = Force25Bit(rPS0(inst.FC));
const FPResult d_value = NI_mul(rPS0(inst.FA), c_value);
const auto& a = rPS(inst.FA);
const auto& c = rPS(inst.FC);
const double c_value = Force25Bit(c.PS0AsDouble());
const FPResult d_value = NI_mul(a.PS0AsDouble(), c_value);
if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions())
{
const double result = ForceSingle(d_value.value);
rPS0(inst.FD) = rPS1(inst.FD) = result;
rPS(inst.FD).Fill(result);
FPSCR.FI = 0;
FPSCR.FR = 0;
PowerPC::UpdateFPRF(result);
@ -336,12 +352,15 @@ void Interpreter::fmulsx(UGeckoInstruction inst)
void Interpreter::fmaddx(UGeckoInstruction inst)
{
const FPResult product = NI_madd(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const FPResult product = NI_madd(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble());
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{
const double result = ForceDouble(product.value);
rPS0(inst.FD) = result;
rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result);
}
@ -351,14 +370,18 @@ void Interpreter::fmaddx(UGeckoInstruction inst)
void Interpreter::fmaddsx(UGeckoInstruction inst)
{
const double c_value = Force25Bit(rPS0(inst.FC));
const FPResult d_value = NI_madd(rPS0(inst.FA), c_value, rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double c_value = Force25Bit(c.PS0AsDouble());
const FPResult d_value = NI_madd(a.PS0AsDouble(), c_value, b.PS0AsDouble());
if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions())
{
const double result = ForceSingle(d_value.value);
rPS0(inst.FD) = rPS1(inst.FD) = result;
rPS(inst.FD).Fill(result);
FPSCR.FI = d_value.value != result;
FPSCR.FR = 0;
PowerPC::UpdateFPRF(result);
@ -370,12 +393,15 @@ void Interpreter::fmaddsx(UGeckoInstruction inst)
void Interpreter::faddx(UGeckoInstruction inst)
{
const FPResult sum = NI_add(rPS0(inst.FA), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const FPResult sum = NI_add(a.PS0AsDouble(), b.PS0AsDouble());
if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions())
{
const double result = ForceDouble(sum.value);
rPS0(inst.FD) = result;
rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result);
}
@ -384,12 +410,15 @@ void Interpreter::faddx(UGeckoInstruction inst)
}
void Interpreter::faddsx(UGeckoInstruction inst)
{
const FPResult sum = NI_add(rPS0(inst.FA), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const FPResult sum = NI_add(a.PS0AsDouble(), b.PS0AsDouble());
if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions())
{
const double result = ForceSingle(sum.value);
rPS0(inst.FD) = rPS1(inst.FD) = result;
rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result);
}
@ -399,14 +428,17 @@ void Interpreter::faddsx(UGeckoInstruction inst)
void Interpreter::fdivx(UGeckoInstruction inst)
{
const FPResult quotient = NI_div(rPS0(inst.FA), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const FPResult quotient = NI_div(a.PS0AsDouble(), b.PS0AsDouble());
const bool not_divide_by_zero = FPSCR.ZE == 0 || quotient.exception != FPSCR_ZX;
const bool not_invalid = FPSCR.VE == 0 || quotient.HasNoInvalidExceptions();
if (not_divide_by_zero && not_invalid)
{
const double result = ForceDouble(quotient.value);
rPS0(inst.FD) = result;
rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result);
}
@ -416,14 +448,17 @@ void Interpreter::fdivx(UGeckoInstruction inst)
}
void Interpreter::fdivsx(UGeckoInstruction inst)
{
const FPResult quotient = NI_div(rPS0(inst.FA), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const FPResult quotient = NI_div(a.PS0AsDouble(), b.PS0AsDouble());
const bool not_divide_by_zero = FPSCR.ZE == 0 || quotient.exception != FPSCR_ZX;
const bool not_invalid = FPSCR.VE == 0 || quotient.HasNoInvalidExceptions();
if (not_divide_by_zero && not_invalid)
{
const double result = ForceSingle(quotient.value);
rPS0(inst.FD) = rPS1(inst.FD) = result;
rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result);
}
@ -434,11 +469,11 @@ void Interpreter::fdivsx(UGeckoInstruction inst)
// Single precision only.
void Interpreter::fresx(UGeckoInstruction inst)
{
const double b = rPS0(inst.FB);
const double b = rPS(inst.FB).PS0AsDouble();
const auto compute_result = [inst](double value) {
const double result = Common::ApproximateReciprocal(value);
rPS0(inst.FD) = rPS1(inst.FD) = result;
rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result);
};
@ -472,11 +507,11 @@ void Interpreter::fresx(UGeckoInstruction inst)
void Interpreter::frsqrtex(UGeckoInstruction inst)
{
const double b = rPS0(inst.FB);
const double b = rPS(inst.FB).PS0AsDouble();
const auto compute_result = [inst](double value) {
const double result = Common::ApproximateReciprocalSquareRoot(value);
rPS0(inst.FD) = result;
rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result);
};
@ -518,12 +553,16 @@ void Interpreter::frsqrtex(UGeckoInstruction inst)
void Interpreter::fmsubx(UGeckoInstruction inst)
{
const FPResult product = NI_msub(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const FPResult product = NI_msub(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble());
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{
const double result = ForceDouble(product.value);
rPS0(inst.FD) = result;
rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result);
}
@ -533,13 +572,17 @@ void Interpreter::fmsubx(UGeckoInstruction inst)
void Interpreter::fmsubsx(UGeckoInstruction inst)
{
const double c_value = Force25Bit(rPS0(inst.FC));
const FPResult product = NI_msub(rPS0(inst.FA), c_value, rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double c_value = Force25Bit(c.PS0AsDouble());
const FPResult product = NI_msub(a.PS0AsDouble(), c_value, b.PS0AsDouble());
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{
const double result = ForceSingle(product.value);
rPS0(inst.FD) = rPS1(inst.FD) = result;
rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result);
}
@ -549,13 +592,19 @@ void Interpreter::fmsubsx(UGeckoInstruction inst)
void Interpreter::fnmaddx(UGeckoInstruction inst)
{
const FPResult product = NI_madd(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const FPResult product = NI_madd(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble());
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{
const double result = ForceDouble(product.value);
rPS0(inst.FD) = std::isnan(result) ? result : -result;
PowerPC::UpdateFPRF(rPS0(inst.FD));
const double tmp = ForceDouble(product.value);
const double result = std::isnan(tmp) ? tmp : -tmp;
rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result);
}
if (inst.Rc)
@ -564,14 +613,20 @@ void Interpreter::fnmaddx(UGeckoInstruction inst)
void Interpreter::fnmaddsx(UGeckoInstruction inst)
{
const double c_value = Force25Bit(rPS0(inst.FC));
const FPResult product = NI_madd(rPS0(inst.FA), c_value, rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double c_value = Force25Bit(c.PS0AsDouble());
const FPResult product = NI_madd(a.PS0AsDouble(), c_value, b.PS0AsDouble());
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{
const double result = ForceSingle(product.value);
rPS0(inst.FD) = rPS1(inst.FD) = std::isnan(result) ? result : -result;
PowerPC::UpdateFPRF(rPS0(inst.FD));
const double tmp = ForceSingle(product.value);
const double result = std::isnan(tmp) ? tmp : -tmp;
rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result);
}
if (inst.Rc)
@ -580,13 +635,19 @@ void Interpreter::fnmaddsx(UGeckoInstruction inst)
void Interpreter::fnmsubx(UGeckoInstruction inst)
{
const FPResult product = NI_msub(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const FPResult product = NI_msub(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble());
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{
const double result = ForceDouble(product.value);
rPS0(inst.FD) = std::isnan(result) ? result : -result;
PowerPC::UpdateFPRF(rPS0(inst.FD));
const double tmp = ForceDouble(product.value);
const double result = std::isnan(tmp) ? tmp : -tmp;
rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result);
}
if (inst.Rc)
@ -595,14 +656,20 @@ void Interpreter::fnmsubx(UGeckoInstruction inst)
void Interpreter::fnmsubsx(UGeckoInstruction inst)
{
const double c_value = Force25Bit(rPS0(inst.FC));
const FPResult product = NI_msub(rPS0(inst.FA), c_value, rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double c_value = Force25Bit(c.PS0AsDouble());
const FPResult product = NI_msub(a.PS0AsDouble(), c_value, b.PS0AsDouble());
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{
const double result = ForceSingle(product.value);
rPS0(inst.FD) = rPS1(inst.FD) = std::isnan(result) ? result : -result;
PowerPC::UpdateFPRF(rPS0(inst.FD));
const double tmp = ForceSingle(product.value);
const double result = std::isnan(tmp) ? tmp : -tmp;
rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result);
}
if (inst.Rc)
@ -611,12 +678,15 @@ void Interpreter::fnmsubsx(UGeckoInstruction inst)
void Interpreter::fsubx(UGeckoInstruction inst)
{
const FPResult difference = NI_sub(rPS0(inst.FA), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const FPResult difference = NI_sub(a.PS0AsDouble(), b.PS0AsDouble());
if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions())
{
const double result = ForceDouble(difference.value);
rPS0(inst.FD) = result;
rPS(inst.FD).SetPS0(result);
PowerPC::UpdateFPRF(result);
}
@ -626,12 +696,15 @@ void Interpreter::fsubx(UGeckoInstruction inst)
void Interpreter::fsubsx(UGeckoInstruction inst)
{
const FPResult difference = NI_sub(rPS0(inst.FA), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const FPResult difference = NI_sub(a.PS0AsDouble(), b.PS0AsDouble());
if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions())
{
const double result = ForceSingle(difference.value);
rPS0(inst.FD) = rPS1(inst.FD) = result;
rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRF(result);
}

View File

@ -72,7 +72,7 @@ void Interpreter::lfd(UGeckoInstruction inst)
const u64 temp = PowerPC::Read_U64(address);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
riPS0(inst.FD) = temp;
rPS(inst.FD).SetPS0(temp);
}
void Interpreter::lfdu(UGeckoInstruction inst)
@ -89,7 +89,7 @@ void Interpreter::lfdu(UGeckoInstruction inst)
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
riPS0(inst.FD) = temp;
rPS(inst.FD).SetPS0(temp);
rGPR[inst.RA] = address;
}
}
@ -108,7 +108,7 @@ void Interpreter::lfdux(UGeckoInstruction inst)
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
riPS0(inst.FD) = temp;
rPS(inst.FD).SetPS0(temp);
rGPR[inst.RA] = address;
}
}
@ -126,7 +126,7 @@ void Interpreter::lfdx(UGeckoInstruction inst)
const u64 temp = PowerPC::Read_U64(address);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
riPS0(inst.FD) = temp;
rPS(inst.FD).SetPS0(temp);
}
void Interpreter::lfs(UGeckoInstruction inst)
@ -144,8 +144,7 @@ void Interpreter::lfs(UGeckoInstruction inst)
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
const u64 value = ConvertToDouble(temp);
riPS0(inst.FD) = value;
riPS1(inst.FD) = value;
rPS(inst.FD).Fill(value);
}
}
@ -164,8 +163,7 @@ void Interpreter::lfsu(UGeckoInstruction inst)
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
const u64 value = ConvertToDouble(temp);
riPS0(inst.FD) = value;
riPS1(inst.FD) = value;
rPS(inst.FD).Fill(value);
rGPR[inst.RA] = address;
}
}
@ -184,9 +182,8 @@ void Interpreter::lfsux(UGeckoInstruction inst)
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
u64 value = ConvertToDouble(temp);
riPS0(inst.FD) = value;
riPS1(inst.FD) = value;
const u64 value = ConvertToDouble(temp);
rPS(inst.FD).Fill(value);
rGPR[inst.RA] = address;
}
}
@ -206,8 +203,7 @@ void Interpreter::lfsx(UGeckoInstruction inst)
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
const u64 value = ConvertToDouble(temp);
riPS0(inst.FD) = value;
riPS1(inst.FD) = value;
rPS(inst.FD).Fill(value);
}
}
@ -355,7 +351,7 @@ void Interpreter::stfd(UGeckoInstruction inst)
return;
}
PowerPC::Write_U64(riPS0(inst.FS), address);
PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
}
void Interpreter::stfdu(UGeckoInstruction inst)
@ -368,7 +364,7 @@ void Interpreter::stfdu(UGeckoInstruction inst)
return;
}
PowerPC::Write_U64(riPS0(inst.FS), address);
PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
rGPR[inst.RA] = address;
@ -385,7 +381,7 @@ void Interpreter::stfs(UGeckoInstruction inst)
return;
}
PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
}
void Interpreter::stfsu(UGeckoInstruction inst)
@ -398,7 +394,7 @@ void Interpreter::stfsu(UGeckoInstruction inst)
return;
}
PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
rGPR[inst.RA] = address;
@ -761,7 +757,7 @@ void Interpreter::stfdux(UGeckoInstruction inst)
return;
}
PowerPC::Write_U64(riPS0(inst.FS), address);
PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
rGPR[inst.RA] = address;
@ -778,7 +774,7 @@ void Interpreter::stfdx(UGeckoInstruction inst)
return;
}
PowerPC::Write_U64(riPS0(inst.FS), address);
PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address);
}
// Stores Floating points into Integers indeXed
@ -792,7 +788,7 @@ void Interpreter::stfiwx(UGeckoInstruction inst)
return;
}
PowerPC::Write_U32((u32)riPS0(inst.FS), address);
PowerPC::Write_U32(rPS(inst.FS).PS0AsU32(), address);
}
void Interpreter::stfsux(UGeckoInstruction inst)
@ -805,7 +801,7 @@ void Interpreter::stfsux(UGeckoInstruction inst)
return;
}
PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
rGPR[inst.RA] = address;
@ -822,7 +818,7 @@ void Interpreter::stfsx(UGeckoInstruction inst)
return;
}
PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address);
PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address);
}
void Interpreter::sthbrx(UGeckoInstruction inst)

View File

@ -176,8 +176,8 @@ void Interpreter::Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW)
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;
const double ps0 = rPS0(instRS);
const double ps1 = rPS1(instRS);
const double ps0 = rPS(instRS).PS0AsDouble();
const double ps1 = rPS(instRS).PS1AsDouble();
switch (stType)
{
@ -301,8 +301,7 @@ void Interpreter::Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW)
return;
}
rPS0(instRD) = ps0;
rPS1(instRD) = ps1;
rPS(instRD).SetBoth(ps0, ps1);
}
void Interpreter::psq_l(UGeckoInstruction inst)

View File

@ -13,8 +13,12 @@
// These "binary instructions" do not alter FPSCR.
void Interpreter::ps_sel(UGeckoInstruction inst)
{
rPS0(inst.FD) = rPS0(inst.FA) >= -0.0 ? rPS0(inst.FC) : rPS0(inst.FB);
rPS1(inst.FD) = rPS1(inst.FA) >= -0.0 ? rPS1(inst.FC) : rPS1(inst.FB);
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
rPS(inst.FD).SetBoth(a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(),
a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble());
if (inst.Rc)
Helper_UpdateCR1();
@ -22,8 +26,9 @@ void Interpreter::ps_sel(UGeckoInstruction inst)
void Interpreter::ps_neg(UGeckoInstruction inst)
{
riPS0(inst.FD) = riPS0(inst.FB) ^ (1ULL << 63);
riPS1(inst.FD) = riPS1(inst.FB) ^ (1ULL << 63);
const auto& b = rPS(inst.FB);
rPS(inst.FD).SetBoth(b.PS0AsU64() ^ (UINT64_C(1) << 63), b.PS1AsU64() ^ (UINT64_C(1) << 63));
if (inst.Rc)
Helper_UpdateCR1();
@ -31,8 +36,7 @@ void Interpreter::ps_neg(UGeckoInstruction inst)
void Interpreter::ps_mr(UGeckoInstruction inst)
{
rPS0(inst.FD) = rPS0(inst.FB);
rPS1(inst.FD) = rPS1(inst.FB);
rPS(inst.FD) = rPS(inst.FB);
if (inst.Rc)
Helper_UpdateCR1();
@ -40,8 +44,9 @@ void Interpreter::ps_mr(UGeckoInstruction inst)
void Interpreter::ps_nabs(UGeckoInstruction inst)
{
riPS0(inst.FD) = riPS0(inst.FB) | (1ULL << 63);
riPS1(inst.FD) = riPS1(inst.FB) | (1ULL << 63);
const auto& b = rPS(inst.FB);
rPS(inst.FD).SetBoth(b.PS0AsU64() | (UINT64_C(1) << 63), b.PS1AsU64() | (UINT64_C(1) << 63));
if (inst.Rc)
Helper_UpdateCR1();
@ -49,8 +54,9 @@ void Interpreter::ps_nabs(UGeckoInstruction inst)
void Interpreter::ps_abs(UGeckoInstruction inst)
{
riPS0(inst.FD) = riPS0(inst.FB) & ~(1ULL << 63);
riPS1(inst.FD) = riPS1(inst.FB) & ~(1ULL << 63);
const auto& b = rPS(inst.FB);
rPS(inst.FD).SetBoth(b.PS0AsU64() & ~(UINT64_C(1) << 63), b.PS1AsU64() & ~(UINT64_C(1) << 63));
if (inst.Rc)
Helper_UpdateCR1();
@ -59,10 +65,10 @@ void Interpreter::ps_abs(UGeckoInstruction inst)
// These are just moves, double is OK.
void Interpreter::ps_merge00(UGeckoInstruction inst)
{
double p0 = rPS0(inst.FA);
double p1 = rPS0(inst.FB);
rPS0(inst.FD) = p0;
rPS1(inst.FD) = p1;
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
rPS(inst.FD).SetBoth(a.PS0AsDouble(), b.PS0AsDouble());
if (inst.Rc)
Helper_UpdateCR1();
@ -70,10 +76,10 @@ void Interpreter::ps_merge00(UGeckoInstruction inst)
void Interpreter::ps_merge01(UGeckoInstruction inst)
{
double p0 = rPS0(inst.FA);
double p1 = rPS1(inst.FB);
rPS0(inst.FD) = p0;
rPS1(inst.FD) = p1;
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
rPS(inst.FD).SetBoth(a.PS0AsDouble(), b.PS1AsDouble());
if (inst.Rc)
Helper_UpdateCR1();
@ -81,10 +87,10 @@ void Interpreter::ps_merge01(UGeckoInstruction inst)
void Interpreter::ps_merge10(UGeckoInstruction inst)
{
double p0 = rPS1(inst.FA);
double p1 = rPS0(inst.FB);
rPS0(inst.FD) = p0;
rPS1(inst.FD) = p1;
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
rPS(inst.FD).SetBoth(a.PS1AsDouble(), b.PS0AsDouble());
if (inst.Rc)
Helper_UpdateCR1();
@ -92,10 +98,10 @@ void Interpreter::ps_merge10(UGeckoInstruction inst)
void Interpreter::ps_merge11(UGeckoInstruction inst)
{
double p0 = rPS1(inst.FA);
double p1 = rPS1(inst.FB);
rPS0(inst.FD) = p0;
rPS1(inst.FD) = p1;
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
rPS(inst.FD).SetBoth(a.PS1AsDouble(), b.PS1AsDouble());
if (inst.Rc)
Helper_UpdateCR1();
@ -104,9 +110,14 @@ void Interpreter::ps_merge11(UGeckoInstruction inst)
// From here on, the real deal.
void Interpreter::ps_div(UGeckoInstruction inst)
{
rPS0(inst.FD) = ForceSingle(NI_div(rPS0(inst.FA), rPS0(inst.FB)).value);
rPS1(inst.FD) = ForceSingle(NI_div(rPS1(inst.FA), rPS1(inst.FB)).value);
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const double ps0 = ForceSingle(NI_div(a.PS0AsDouble(), b.PS0AsDouble()).value);
const double ps1 = ForceSingle(NI_div(a.PS1AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -115,8 +126,8 @@ void Interpreter::ps_div(UGeckoInstruction inst)
void Interpreter::ps_res(UGeckoInstruction inst)
{
// this code is based on the real hardware tests
const double a = rPS0(inst.FB);
const double b = rPS1(inst.FB);
const double a = rPS(inst.FB).PS0AsDouble();
const double b = rPS(inst.FB).PS1AsDouble();
if (a == 0.0 || b == 0.0)
{
@ -130,9 +141,11 @@ void Interpreter::ps_res(UGeckoInstruction inst)
if (Common::IsSNAN(a) || Common::IsSNAN(b))
SetFPException(FPSCR_VXSNAN);
rPS0(inst.FD) = Common::ApproximateReciprocal(a);
rPS1(inst.FD) = Common::ApproximateReciprocal(b);
PowerPC::UpdateFPRF(rPS0(inst.FD));
const double ps0 = Common::ApproximateReciprocal(a);
const double ps1 = Common::ApproximateReciprocal(b);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -140,8 +153,8 @@ void Interpreter::ps_res(UGeckoInstruction inst)
void Interpreter::ps_rsqrte(UGeckoInstruction inst)
{
const double ps0 = rPS0(inst.FB);
const double ps1 = rPS1(inst.FB);
const double ps0 = rPS(inst.FB).PS0AsDouble();
const double ps1 = rPS(inst.FB).PS1AsDouble();
if (ps0 == 0.0 || ps1 == 0.0)
{
@ -161,10 +174,11 @@ void Interpreter::ps_rsqrte(UGeckoInstruction inst)
if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1))
SetFPException(FPSCR_VXSNAN);
rPS0(inst.FD) = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps0));
rPS1(inst.FD) = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps1));
const double dst_ps0 = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps0));
const double dst_ps1 = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps1));
PowerPC::UpdateFPRF(rPS0(inst.FD));
rPS(inst.FD).SetBoth(dst_ps0, dst_ps1);
PowerPC::UpdateFPRF(dst_ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -172,9 +186,14 @@ void Interpreter::ps_rsqrte(UGeckoInstruction inst)
void Interpreter::ps_sub(UGeckoInstruction inst)
{
rPS0(inst.FD) = ForceSingle(NI_sub(rPS0(inst.FA), rPS0(inst.FB)).value);
rPS1(inst.FD) = ForceSingle(NI_sub(rPS1(inst.FA), rPS1(inst.FB)).value);
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const double ps0 = ForceSingle(NI_sub(a.PS0AsDouble(), b.PS0AsDouble()).value);
const double ps1 = ForceSingle(NI_sub(a.PS1AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -182,9 +201,14 @@ void Interpreter::ps_sub(UGeckoInstruction inst)
void Interpreter::ps_add(UGeckoInstruction inst)
{
rPS0(inst.FD) = ForceSingle(NI_add(rPS0(inst.FA), rPS0(inst.FB)).value);
rPS1(inst.FD) = ForceSingle(NI_add(rPS1(inst.FA), rPS1(inst.FB)).value);
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const double ps0 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS0AsDouble()).value);
const double ps1 = ForceSingle(NI_add(a.PS1AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -192,11 +216,17 @@ void Interpreter::ps_add(UGeckoInstruction inst)
void Interpreter::ps_mul(UGeckoInstruction inst)
{
const double c0 = Force25Bit(rPS0(inst.FC));
const double c1 = Force25Bit(rPS1(inst.FC));
rPS0(inst.FD) = ForceSingle(NI_mul(rPS0(inst.FA), c0).value);
rPS1(inst.FD) = ForceSingle(NI_mul(rPS1(inst.FA), c1).value);
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& c = rPS(inst.FC);
const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c0).value);
const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c1).value);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -204,11 +234,18 @@ void Interpreter::ps_mul(UGeckoInstruction inst)
void Interpreter::ps_msub(UGeckoInstruction inst)
{
const double c0 = Force25Bit(rPS0(inst.FC));
const double c1 = Force25Bit(rPS1(inst.FC));
rPS0(inst.FD) = ForceSingle(NI_msub(rPS0(inst.FA), c0, rPS0(inst.FB)).value);
rPS1(inst.FD) = ForceSingle(NI_msub(rPS1(inst.FA), c1, rPS1(inst.FB)).value);
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = ForceSingle(NI_msub(a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
const double ps1 = ForceSingle(NI_msub(a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -216,11 +253,18 @@ void Interpreter::ps_msub(UGeckoInstruction inst)
void Interpreter::ps_madd(UGeckoInstruction inst)
{
const double c0 = Force25Bit(rPS0(inst.FC));
const double c1 = Force25Bit(rPS1(inst.FC));
rPS0(inst.FD) = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value);
rPS1(inst.FD) = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value);
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -228,13 +272,21 @@ void Interpreter::ps_madd(UGeckoInstruction inst)
void Interpreter::ps_nmsub(UGeckoInstruction inst)
{
const double c0 = Force25Bit(rPS0(inst.FC));
const double c1 = Force25Bit(rPS1(inst.FC));
const double result0 = ForceSingle(NI_msub(rPS0(inst.FA), c0, rPS0(inst.FB)).value);
const double result1 = ForceSingle(NI_msub(rPS1(inst.FA), c1, rPS1(inst.FB)).value);
rPS0(inst.FD) = std::isnan(result0) ? result0 : -result0;
rPS1(inst.FD) = std::isnan(result1) ? result1 : -result1;
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble());
const double tmp0 = ForceSingle(NI_msub(a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
const double tmp1 = ForceSingle(NI_msub(a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0;
const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1;
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -242,13 +294,21 @@ void Interpreter::ps_nmsub(UGeckoInstruction inst)
void Interpreter::ps_nmadd(UGeckoInstruction inst)
{
const double c0 = Force25Bit(rPS0(inst.FC));
const double c1 = Force25Bit(rPS1(inst.FC));
const double result0 = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value);
const double result1 = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value);
rPS0(inst.FD) = std::isnan(result0) ? result0 : -result0;
rPS1(inst.FD) = std::isnan(result1) ? result1 : -result1;
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble());
const double tmp0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
const double tmp1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0;
const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1;
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -256,11 +316,15 @@ void Interpreter::ps_nmadd(UGeckoInstruction inst)
void Interpreter::ps_sum0(UGeckoInstruction inst)
{
const double p0 = ForceSingle(NI_add(rPS0(inst.FA), rPS1(inst.FB)).value);
const double p1 = ForceSingle(rPS1(inst.FC));
rPS0(inst.FD) = p0;
rPS1(inst.FD) = p1;
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double ps0 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS1AsDouble()).value);
const double ps1 = ForceSingle(c.PS1AsDouble());
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -268,11 +332,15 @@ void Interpreter::ps_sum0(UGeckoInstruction inst)
void Interpreter::ps_sum1(UGeckoInstruction inst)
{
const double p0 = ForceSingle(rPS0(inst.FC));
const double p1 = ForceSingle(NI_add(rPS0(inst.FA), rPS1(inst.FB)).value);
rPS0(inst.FD) = p0;
rPS1(inst.FD) = p1;
PowerPC::UpdateFPRF(rPS1(inst.FD));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double ps0 = ForceSingle(c.PS0AsDouble());
const double ps1 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps1);
if (inst.Rc)
Helper_UpdateCR1();
@ -280,12 +348,15 @@ void Interpreter::ps_sum1(UGeckoInstruction inst)
void Interpreter::ps_muls0(UGeckoInstruction inst)
{
const double c0 = Force25Bit(rPS0(inst.FC));
const double p0 = ForceSingle(NI_mul(rPS0(inst.FA), c0).value);
const double p1 = ForceSingle(NI_mul(rPS1(inst.FA), c0).value);
rPS0(inst.FD) = p0;
rPS1(inst.FD) = p1;
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& c = rPS(inst.FC);
const double c0 = Force25Bit(c.PS0AsDouble());
const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c0).value);
const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c0).value);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -293,12 +364,15 @@ void Interpreter::ps_muls0(UGeckoInstruction inst)
void Interpreter::ps_muls1(UGeckoInstruction inst)
{
const double c1 = Force25Bit(rPS1(inst.FC));
const double p0 = ForceSingle(NI_mul(rPS0(inst.FA), c1).value);
const double p1 = ForceSingle(NI_mul(rPS1(inst.FA), c1).value);
rPS0(inst.FD) = p0;
rPS1(inst.FD) = p1;
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& c = rPS(inst.FC);
const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c1).value);
const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c1).value);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -306,12 +380,16 @@ void Interpreter::ps_muls1(UGeckoInstruction inst)
void Interpreter::ps_madds0(UGeckoInstruction inst)
{
const double c0 = Force25Bit(rPS0(inst.FC));
const double p0 = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value);
const double p1 = ForceSingle(NI_madd(rPS1(inst.FA), c0, rPS1(inst.FB)).value);
rPS0(inst.FD) = p0;
rPS1(inst.FD) = p1;
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double c0 = Force25Bit(c.PS0AsDouble());
const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c0, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -319,12 +397,16 @@ void Interpreter::ps_madds0(UGeckoInstruction inst)
void Interpreter::ps_madds1(UGeckoInstruction inst)
{
const double c1 = Force25Bit(rPS1(inst.FC));
const double p0 = ForceSingle(NI_madd(rPS0(inst.FA), c1, rPS0(inst.FB)).value);
const double p1 = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value);
rPS0(inst.FD) = p0;
rPS1(inst.FD) = p1;
PowerPC::UpdateFPRF(rPS0(inst.FD));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC);
const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c1, b.PS0AsDouble()).value);
const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRF(ps0);
if (inst.Rc)
Helper_UpdateCR1();
@ -332,20 +414,32 @@ void Interpreter::ps_madds1(UGeckoInstruction inst)
void Interpreter::ps_cmpu0(UGeckoInstruction inst)
{
Helper_FloatCompareUnordered(inst, rPS0(inst.FA), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
Helper_FloatCompareUnordered(inst, a.PS0AsDouble(), b.PS0AsDouble());
}
void Interpreter::ps_cmpo0(UGeckoInstruction inst)
{
Helper_FloatCompareOrdered(inst, rPS0(inst.FA), rPS0(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
Helper_FloatCompareOrdered(inst, a.PS0AsDouble(), b.PS0AsDouble());
}
void Interpreter::ps_cmpu1(UGeckoInstruction inst)
{
Helper_FloatCompareUnordered(inst, rPS1(inst.FA), rPS1(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
Helper_FloatCompareUnordered(inst, a.PS1AsDouble(), b.PS1AsDouble());
}
void Interpreter::ps_cmpo1(UGeckoInstruction inst)
{
Helper_FloatCompareOrdered(inst, rPS1(inst.FA), rPS1(inst.FB));
const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB);
Helper_FloatCompareOrdered(inst, a.PS1AsDouble(), b.PS1AsDouble());
}

View File

@ -98,7 +98,7 @@ void Interpreter::mtfsfx(UGeckoInstruction inst)
m |= (0xFU << (i * 4));
}
FPSCR = (FPSCR.Hex & ~m) | (static_cast<u32>(riPS0(inst.FB)) & m);
FPSCR = (FPSCR.Hex & ~m) | (static_cast<u32>(rPS(inst.FB).PS0AsU64()) & m);
FPSCRtoFPUSettings(FPSCR);
if (inst.Rc)
@ -554,7 +554,7 @@ void Interpreter::mffsx(UGeckoInstruction inst)
// TODO(ector): grab all overflow flags etc and set them in FPSCR
UpdateFPSCR();
riPS0(inst.FD) = 0xFFF8000000000000 | FPSCR.Hex;
rPS(inst.FD).SetPS0(UINT64_C(0xFFF8000000000000) | FPSCR.Hex);
if (inst.Rc)
Helper_UpdateCR1();

View File

@ -34,7 +34,7 @@ const X64Reg* FPURegCache::GetAllocationOrder(size_t* count) const
OpArg FPURegCache::GetDefaultLocation(preg_t preg) const
{
return PPCSTATE(ps[preg][0]);
return PPCSTATE(ps[preg].ps0);
}
BitSet32 FPURegCache::GetRegUtilization() const

View File

@ -452,7 +452,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
// Load the high 64bits from the file and insert them in to the high 64bits of the host
// register
ARM64Reg tmp_reg = GetReg();
m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
m_float_emit->INS(64, host_reg, 1, tmp_reg, 0);
UnlockRegister(tmp_reg);
@ -506,7 +506,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
reg.Load(host_reg, REG_LOWER_PAIR);
}
reg.SetDirty(false);
m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
return host_reg;
}
default:
@ -554,7 +554,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit
// store.
// It would take longer to do an insert to a temporary and a 64bit store than to just do this.
m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
break;
case REG_DUP_SINGLE:
flush_reg = GetReg();
@ -562,7 +562,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
// fall through
case REG_DUP:
// Store PSR1 (which is equal to PSR0) in memory.
m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
break;
default:
// All other types doesn't store anything in PSR1.
@ -687,7 +687,7 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state)
store_size = 64;
if (dirty)
m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
if (!maintain_state)
{
@ -702,9 +702,9 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state)
// If the paired registers were at the start of ppcState we could do an STP here.
// Too bad moving them would break savestate compatibility between x86_64 and AArch64
// m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, PPC_REG,
// PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
// PPCSTATE_OFF(ps[preg].ps0));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
}
if (!maintain_state)

View File

@ -24,7 +24,7 @@ static const Arm64Gen::ARM64Reg DISPATCHER_PC =
// Some asserts to make sure we will be able to load everything
static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR");
static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0,
static_assert((PPCSTATE_OFF(ps[0].ps0) % 8) == 0,
"LDR(64bit VFP) requires FPRs to be 8 byte aligned");
static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!");
static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!");

View File

@ -4,6 +4,7 @@
#include "Core/PowerPC/PowerPC.h"
#include <algorithm>
#include <cstring>
#include <istream>
#include <ostream>
@ -11,6 +12,7 @@
#include <vector>
#include "Common/Assert.h"
#include "Common/BitUtils.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "Common/FPURoundMode.h"
@ -42,6 +44,27 @@ MemChecks memchecks;
PPCDebugInterface debug_interface;
static CoreTiming::EventType* s_invalidate_cache_thread_safe;
double PairedSingle::PS0AsDouble() const
{
return Common::BitCast<double>(ps0);
}
double PairedSingle::PS1AsDouble() const
{
return Common::BitCast<double>(ps1);
}
void PairedSingle::SetPS0(double value)
{
ps0 = Common::BitCast<u64>(value);
}
void PairedSingle::SetPS1(double value)
{
ps1 = Common::BitCast<u64>(value);
}
static void InvalidateCacheThreadSafe(u64 userdata, s64 cyclesLate)
{
ppcState.iCache.Invalidate(static_cast<u32>(userdata));
@ -135,10 +158,11 @@ void DoState(PointerWrap& p)
static void ResetRegisters()
{
memset(ppcState.ps, 0, sizeof(ppcState.ps));
memset(ppcState.sr, 0, sizeof(ppcState.sr));
memset(ppcState.gpr, 0, sizeof(ppcState.gpr));
memset(ppcState.spr, 0, sizeof(ppcState.spr));
std::fill(std::begin(ppcState.ps), std::end(ppcState.ps), PairedSingle{});
std::fill(std::begin(ppcState.sr), std::end(ppcState.sr), 0U);
std::fill(std::begin(ppcState.gpr), std::end(ppcState.gpr), 0U);
std::fill(std::begin(ppcState.spr), std::end(ppcState.spr), 0U);
/*
0x00080200 = lonestar 2.0
0x00088202 = lonestar 2.2

View File

@ -8,6 +8,7 @@
#include <cstddef>
#include <iosfwd>
#include <tuple>
#include <type_traits>
#include <vector>
#include "Common/CommonTypes.h"
@ -57,6 +58,43 @@ struct TLBEntry
u8 recent = 0;
};
struct PairedSingle
{
u64 PS0AsU64() const { return ps0; }
u64 PS1AsU64() const { return ps1; }
u32 PS0AsU32() const { return static_cast<u32>(ps0); }
u32 PS1AsU32() const { return static_cast<u32>(ps1); }
double PS0AsDouble() const;
double PS1AsDouble() const;
void SetPS0(u64 value) { ps0 = value; }
void SetPS0(double value);
void SetPS1(u64 value) { ps1 = value; }
void SetPS1(double value);
void SetBoth(u64 lhs, u64 rhs)
{
SetPS0(lhs);
SetPS1(rhs);
}
void SetBoth(double lhs, double rhs)
{
SetPS0(lhs);
SetPS1(rhs);
}
void Fill(u64 value) { SetBoth(value, value); }
void Fill(double value) { SetBoth(value, value); }
u64 ps0 = 0;
u64 ps1 = 0;
};
// Paired single must be standard layout in order for offsetof to work, which is used by the JITs
static_assert(std::is_standard_layout<PairedSingle>(), "PairedSingle must be standard layout");
// This contains the entire state of the emulated PowerPC "Gekko" CPU.
struct PowerPCState
{
@ -114,7 +152,7 @@ struct PowerPCState
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
alignas(16) u64 ps[32][2];
alignas(16) PairedSingle ps[32];
u32 sr[16]; // Segment registers.
@ -212,11 +250,7 @@ void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst);
#define TL PowerPC::ppcState.spr[SPR_TL]
#define TU PowerPC::ppcState.spr[SPR_TU]
#define rPS0(i) (*(double*)(&PowerPC::ppcState.ps[i][0]))
#define rPS1(i) (*(double*)(&PowerPC::ppcState.ps[i][1]))
#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0]))
#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1]))
#define rPS(i) (PowerPC::ppcState.ps[(i)])
enum CRBits
{

View File

@ -227,11 +227,11 @@ void RegisterWidget::PopulateTable()
[i](u64 value) { GPR(i) = value; });
// Floating point registers (double)
AddRegister(i, 2, RegisterType::fpr, "f" + std::to_string(i), [i] { return riPS0(i); },
[i](u64 value) { riPS0(i) = value; });
AddRegister(i, 2, RegisterType::fpr, "f" + std::to_string(i), [i] { return rPS(i).PS0AsU64(); },
[i](u64 value) { rPS(i).SetPS0(value); });
AddRegister(i, 4, RegisterType::fpr, "", [i] { return riPS1(i); },
[i](u64 value) { riPS1(i) = value; });
AddRegister(i, 4, RegisterType::fpr, "", [i] { return rPS(i).PS1AsU64(); },
[i](u64 value) { rPS(i).SetPS1(value); });
}
for (int i = 0; i < 8; i++)