Interpreter: Return float from ForceSingle

Performance optimization, along with making the code a little
neater. Saves us from performing a single -> double -> single
conversion when calling UpdateFPRFSingle.
This commit is contained in:
JosJuice 2021-06-13 14:45:09 +02:00
parent d56721ebb9
commit 8d2c069c34
3 changed files with 47 additions and 57 deletions

View File

@ -36,15 +36,13 @@ inline void SetFPException(UReg_FPSCR* fpscr, u32 mask)
fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0; fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
} }
inline double ForceSingle(const UReg_FPSCR& fpscr, double value) inline float ForceSingle(const UReg_FPSCR& fpscr, double value)
{ {
// convert to float... float x = static_cast<float>(value);
float x = (float)value;
if (!cpu_info.bFlushToZero && fpscr.NI) if (!cpu_info.bFlushToZero && fpscr.NI)
{ {
x = Common::FlushToZero(x); x = Common::FlushToZero(x);
} }
// ...and back to double:
return x; return x;
} }

View File

@ -290,7 +290,7 @@ void Interpreter::fselx(UGeckoInstruction inst)
void Interpreter::frspx(UGeckoInstruction inst) // round to single void Interpreter::frspx(UGeckoInstruction inst) // round to single
{ {
const double b = rPS(inst.FB).PS0AsDouble(); const double b = rPS(inst.FB).PS0AsDouble();
const double rounded = ForceSingle(FPSCR, b); const float rounded = ForceSingle(FPSCR, b);
if (std::isnan(b)) if (std::isnan(b))
{ {
@ -349,7 +349,7 @@ void Interpreter::fmulsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions())
{ {
const double result = ForceSingle(FPSCR, d_value.value); const float result = ForceSingle(FPSCR, d_value.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
FPSCR.FI = 0; FPSCR.FI = 0;
@ -390,7 +390,7 @@ void Interpreter::fmaddsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions())
{ {
const double result = ForceSingle(FPSCR, d_value.value); const float result = ForceSingle(FPSCR, d_value.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
FPSCR.FI = d_value.value != result; FPSCR.FI = d_value.value != result;
@ -428,7 +428,7 @@ void Interpreter::faddsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions())
{ {
const double result = ForceSingle(FPSCR, sum.value); const float result = ForceSingle(FPSCR, sum.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRFSingle(result); PowerPC::UpdateFPRFSingle(result);
} }
@ -468,7 +468,7 @@ void Interpreter::fdivsx(UGeckoInstruction inst)
if (not_divide_by_zero && not_invalid) if (not_divide_by_zero && not_invalid)
{ {
const double result = ForceSingle(FPSCR, quotient.value); const float result = ForceSingle(FPSCR, quotient.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRFSingle(result); PowerPC::UpdateFPRFSingle(result);
} }
@ -592,7 +592,7 @@ void Interpreter::fmsubsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{ {
const double result = ForceSingle(FPSCR, product.value); const float result = ForceSingle(FPSCR, product.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRFSingle(result); PowerPC::UpdateFPRFSingle(result);
} }
@ -633,8 +633,8 @@ void Interpreter::fnmaddsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{ {
const double tmp = ForceSingle(FPSCR, product.value); const float tmp = ForceSingle(FPSCR, product.value);
const double result = std::isnan(tmp) ? tmp : -tmp; const float result = std::isnan(tmp) ? tmp : -tmp;
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRFSingle(result); PowerPC::UpdateFPRFSingle(result);
@ -676,8 +676,8 @@ void Interpreter::fnmsubsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || product.HasNoInvalidExceptions())
{ {
const double tmp = ForceSingle(FPSCR, product.value); const float tmp = ForceSingle(FPSCR, product.value);
const double result = std::isnan(tmp) ? tmp : -tmp; const float result = std::isnan(tmp) ? tmp : -tmp;
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRFSingle(result); PowerPC::UpdateFPRFSingle(result);
@ -714,7 +714,7 @@ void Interpreter::fsubsx(UGeckoInstruction inst)
if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions()) if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions())
{ {
const double result = ForceSingle(FPSCR, difference.value); const float result = ForceSingle(FPSCR, difference.value);
rPS(inst.FD).Fill(result); rPS(inst.FD).Fill(result);
PowerPC::UpdateFPRFSingle(result); PowerPC::UpdateFPRFSingle(result);
} }

View File

@ -113,8 +113,8 @@ void Interpreter::ps_div(UGeckoInstruction inst)
const auto& a = rPS(inst.FA); const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB); const auto& b = rPS(inst.FB);
const double ps0 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); const float ps0 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value);
const double ps1 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_div(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -174,8 +174,8 @@ void Interpreter::ps_rsqrte(UGeckoInstruction inst)
if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1)) if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1))
SetFPException(&FPSCR, FPSCR_VXSNAN); SetFPException(&FPSCR, FPSCR_VXSNAN);
const double dst_ps0 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps0)); const float dst_ps0 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps0));
const double dst_ps1 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps1)); const float dst_ps1 = ForceSingle(FPSCR, Common::ApproximateReciprocalSquareRoot(ps1));
rPS(inst.FD).SetBoth(dst_ps0, dst_ps1); rPS(inst.FD).SetBoth(dst_ps0, dst_ps1);
PowerPC::UpdateFPRFSingle(dst_ps0); PowerPC::UpdateFPRFSingle(dst_ps0);
@ -189,8 +189,8 @@ void Interpreter::ps_sub(UGeckoInstruction inst)
const auto& a = rPS(inst.FA); const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB); const auto& b = rPS(inst.FB);
const double ps0 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); const float ps0 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value);
const double ps1 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_sub(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -204,8 +204,8 @@ void Interpreter::ps_add(UGeckoInstruction inst)
const auto& a = rPS(inst.FA); const auto& a = rPS(inst.FA);
const auto& b = rPS(inst.FB); const auto& b = rPS(inst.FB);
const double ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value); const float ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS0AsDouble()).value);
const double ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS1AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -222,8 +222,8 @@ void Interpreter::ps_mul(UGeckoInstruction inst)
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value); const float ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value);
const double ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value); const float ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -241,10 +241,8 @@ void Interpreter::ps_msub(UGeckoInstruction inst)
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = const float ps0 = ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps1 =
ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -262,10 +260,8 @@ void Interpreter::ps_madd(UGeckoInstruction inst)
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = const float ps0 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps1 =
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -283,13 +279,13 @@ void Interpreter::ps_nmsub(UGeckoInstruction inst)
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double tmp0 = const float tmp0 =
ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
const double tmp1 = const float tmp1 =
ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); ForceSingle(FPSCR, NI_msub(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; const float ps0 = std::isnan(tmp0) ? tmp0 : -tmp0;
const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; const float ps1 = std::isnan(tmp1) ? tmp1 : -tmp1;
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -307,13 +303,13 @@ void Interpreter::ps_nmadd(UGeckoInstruction inst)
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double tmp0 = const float tmp0 =
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
const double tmp1 = const float tmp1 =
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value); ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; const float ps0 = std::isnan(tmp0) ? tmp0 : -tmp0;
const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; const float ps1 = std::isnan(tmp1) ? tmp1 : -tmp1;
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -328,8 +324,8 @@ void Interpreter::ps_sum0(UGeckoInstruction inst)
const auto& b = rPS(inst.FB); const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value); const float ps0 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value);
const double ps1 = ForceSingle(FPSCR, c.PS1AsDouble()); const float ps1 = ForceSingle(FPSCR, c.PS1AsDouble());
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -344,8 +340,8 @@ void Interpreter::ps_sum1(UGeckoInstruction inst)
const auto& b = rPS(inst.FB); const auto& b = rPS(inst.FB);
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double ps0 = ForceSingle(FPSCR, c.PS0AsDouble()); const float ps0 = ForceSingle(FPSCR, c.PS0AsDouble());
const double ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_add(&FPSCR, a.PS0AsDouble(), b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps1); PowerPC::UpdateFPRFSingle(ps1);
@ -360,8 +356,8 @@ void Interpreter::ps_muls0(UGeckoInstruction inst)
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value); const float ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c0).value);
const double ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c0).value); const float ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c0).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -376,8 +372,8 @@ void Interpreter::ps_muls1(UGeckoInstruction inst)
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c1).value); const float ps0 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS0AsDouble(), c1).value);
const double ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value); const float ps1 = ForceSingle(FPSCR, NI_mul(&FPSCR, a.PS1AsDouble(), c1).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -393,10 +389,8 @@ void Interpreter::ps_madds0(UGeckoInstruction inst)
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double c0 = Force25Bit(c.PS0AsDouble()); const double c0 = Force25Bit(c.PS0AsDouble());
const double ps0 = const float ps0 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value);
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c0, b.PS0AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c0, b.PS1AsDouble()).value);
const double ps1 =
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c0, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);
@ -412,10 +406,8 @@ void Interpreter::ps_madds1(UGeckoInstruction inst)
const auto& c = rPS(inst.FC); const auto& c = rPS(inst.FC);
const double c1 = Force25Bit(c.PS1AsDouble()); const double c1 = Force25Bit(c.PS1AsDouble());
const double ps0 = const float ps0 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c1, b.PS0AsDouble()).value);
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS0AsDouble(), c1, b.PS0AsDouble()).value); const float ps1 = ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
const double ps1 =
ForceSingle(FPSCR, NI_madd(&FPSCR, a.PS1AsDouble(), c1, b.PS1AsDouble()).value);
rPS(inst.FD).SetBoth(ps0, ps1); rPS(inst.FD).SetBoth(ps0, ps1);
PowerPC::UpdateFPRFSingle(ps0); PowerPC::UpdateFPRFSingle(ps0);