Jit64: optimize ps_sum
This commit is contained in:
parent
6593ba7ecc
commit
05a55de08f
|
@ -156,24 +156,40 @@ void Jit64::ps_sum(UGeckoInstruction inst)
|
||||||
int b = inst.FB;
|
int b = inst.FB;
|
||||||
int c = inst.FC;
|
int c = inst.FC;
|
||||||
fpr.Lock(a, b, c, d);
|
fpr.Lock(a, b, c, d);
|
||||||
|
OpArg op_a = fpr.R(a);
|
||||||
|
fpr.BindToRegister(d, false);
|
||||||
|
X64Reg tmp = d == b || d == c ? XMM0 : fpr.RX(d);
|
||||||
|
MOVDDUP(tmp, op_a); // {a.ps0, a.ps0}
|
||||||
|
ADDPD(tmp, fpr.R(b)); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
|
||||||
switch (inst.SUBOP5)
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
case 10:
|
case 10: // ps_sum0
|
||||||
MOVDDUP(XMM0, fpr.R(a)); // {a.ps0, a.ps0}
|
UNPCKHPD(tmp, fpr.R(c)); // {a.ps0 + b.ps1, c.ps1}
|
||||||
ADDPD(XMM0, fpr.R(b)); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
|
|
||||||
UNPCKHPD(XMM0, fpr.R(c)); // {a.ps0 + b.ps1, c.ps1}
|
|
||||||
break;
|
break;
|
||||||
case 11:
|
case 11: // ps_sum1
|
||||||
MOVDDUP(XMM1, fpr.R(a)); // {a.ps0, a.ps0}
|
// {c.ps0, a.ps0 + b.ps1}
|
||||||
ADDPD(XMM1, fpr.R(b)); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
|
if (fpr.R(c).IsSimpleReg())
|
||||||
MOVAPD(XMM0, fpr.R(c));
|
{
|
||||||
SHUFPD(XMM0, R(XMM1), 2); // {c.ps0, a.ps0 + b.ps1}
|
if (cpu_info.bSSE4_1)
|
||||||
|
{
|
||||||
|
BLENDPD(tmp, fpr.R(c), 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MOVAPD(XMM1, fpr.R(c));
|
||||||
|
SHUFPD(XMM1, R(tmp), 2);
|
||||||
|
tmp = XMM1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MOVLPD(tmp, fpr.R(c));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
PanicAlert("ps_sum WTF!!!");
|
PanicAlert("ps_sum WTF!!!");
|
||||||
}
|
}
|
||||||
fpr.BindToRegister(d, false);
|
ForceSinglePrecision(fpr.RX(d), R(tmp));
|
||||||
ForceSinglePrecision(fpr.RX(d), R(XMM0));
|
|
||||||
SetFPRFIfNeeded(fpr.RX(d));
|
SetFPRFIfNeeded(fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
@ -192,10 +208,10 @@ void Jit64::ps_muls(UGeckoInstruction inst)
|
||||||
fpr.Lock(a, c, d);
|
fpr.Lock(a, c, d);
|
||||||
switch (inst.SUBOP5)
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
case 12:
|
case 12: // ps_muls0
|
||||||
MOVDDUP(XMM0, fpr.R(c));
|
MOVDDUP(XMM0, fpr.R(c));
|
||||||
break;
|
break;
|
||||||
case 13:
|
case 13: // ps_muls1
|
||||||
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM0, fpr.R(c), fpr.R(c), 3);
|
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM0, fpr.R(c), fpr.R(c), 3);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
Loading…
Reference in New Issue