Jit64: optimize floating-point/paired-single arith

The "else if (b != d)" branch was dead code and only works if b == d. Now
the last else block with two temporary XMM registers is not needed anymore.
This commit is contained in:
Tillmann Karras 2013-10-15 23:47:35 +02:00 committed by degasus
parent b4513313bb
commit c234dc97c0
2 changed files with 29 additions and 39 deletions

View File

@ -25,33 +25,28 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm
fpr.BindToRegister(d, true); fpr.BindToRegister(d, true);
(this->*op)(fpr.RX(d), fpr.R(b)); (this->*op)(fpr.RX(d), fpr.R(b));
} }
else if (d == b && reversible) else if (d == b)
{
if (reversible)
{ {
fpr.BindToRegister(d, true); fpr.BindToRegister(d, true);
(this->*op)(fpr.RX(d), fpr.R(a)); (this->*op)(fpr.RX(d), fpr.R(a));
} }
else if (a != d && b != d) else
{
MOVSD(XMM0, fpr.R(b));
fpr.BindToRegister(d, !dupe);
MOVSD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), Gen::R(XMM0));
}
}
else
{ {
// Sources different from d, can use rather quick solution // Sources different from d, can use rather quick solution
fpr.BindToRegister(d, !dupe); fpr.BindToRegister(d, !dupe);
MOVSD(fpr.RX(d), fpr.R(a)); MOVSD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), fpr.R(b)); (this->*op)(fpr.RX(d), fpr.R(b));
} }
else if (b != d)
{
fpr.BindToRegister(d, !dupe);
MOVSD(XMM0, fpr.R(b));
MOVSD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), Gen::R(XMM0));
}
else // Other combo, must use two temps :(
{
MOVSD(XMM0, fpr.R(a));
MOVSD(XMM1, fpr.R(b));
fpr.BindToRegister(d, !dupe);
(this->*op)(XMM0, Gen::R(XMM1));
MOVSD(fpr.RX(d), Gen::R(XMM0));
}
if (dupe) if (dupe)
{ {
ForceSinglePrecisionS(fpr.RX(d)); ForceSinglePrecisionS(fpr.RX(d));

View File

@ -152,33 +152,28 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X6
fpr.BindToRegister(d, true); fpr.BindToRegister(d, true);
(this->*op)(fpr.RX(d), fpr.R(b)); (this->*op)(fpr.RX(d), fpr.R(b));
} }
else if (d == b && reversible) else if (d == b)
{
if (reversible)
{ {
fpr.BindToRegister(d, true); fpr.BindToRegister(d, true);
(this->*op)(fpr.RX(d), fpr.R(a)); (this->*op)(fpr.RX(d), fpr.R(a));
} }
else if (a != d && b != d) else
{
MOVAPD(XMM0, fpr.R(b));
fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), Gen::R(XMM0));
}
}
else
{ {
//sources different from d, can use rather quick solution //sources different from d, can use rather quick solution
fpr.BindToRegister(d, false); fpr.BindToRegister(d, false);
MOVAPD(fpr.RX(d), fpr.R(a)); MOVAPD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), fpr.R(b)); (this->*op)(fpr.RX(d), fpr.R(b));
} }
else if (b != d)
{
fpr.BindToRegister(d, false);
MOVAPD(XMM0, fpr.R(b));
MOVAPD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), Gen::R(XMM0));
}
else //Other combo, must use two temps :(
{
MOVAPD(XMM0, fpr.R(a));
MOVAPD(XMM1, fpr.R(b));
fpr.BindToRegister(d, false);
(this->*op)(XMM0, Gen::R(XMM1));
MOVAPD(fpr.RX(d), Gen::R(XMM0));
}
ForceSinglePrecisionP(fpr.RX(d)); ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();
} }