Jit64: optimize ps_sum

This commit is contained in:
Tillmann Karras 2015-05-21 12:33:36 +02:00
parent 6593ba7ecc
commit 05a55de08f
1 changed files with 29 additions and 13 deletions

View File

@ -156,24 +156,40 @@ void Jit64::ps_sum(UGeckoInstruction inst)
int b = inst.FB; int b = inst.FB;
int c = inst.FC; int c = inst.FC;
fpr.Lock(a, b, c, d); fpr.Lock(a, b, c, d);
OpArg op_a = fpr.R(a);
fpr.BindToRegister(d, false);
X64Reg tmp = d == b || d == c ? XMM0 : fpr.RX(d);
MOVDDUP(tmp, op_a); // {a.ps0, a.ps0}
ADDPD(tmp, fpr.R(b)); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
switch (inst.SUBOP5) switch (inst.SUBOP5)
{ {
case 10: case 10: // ps_sum0
MOVDDUP(XMM0, fpr.R(a)); // {a.ps0, a.ps0} UNPCKHPD(tmp, fpr.R(c)); // {a.ps0 + b.ps1, c.ps1}
ADDPD(XMM0, fpr.R(b)); // {a.ps0 + b.ps0, a.ps0 + b.ps1}
UNPCKHPD(XMM0, fpr.R(c)); // {a.ps0 + b.ps1, c.ps1}
break; break;
case 11: case 11: // ps_sum1
MOVDDUP(XMM1, fpr.R(a)); // {a.ps0, a.ps0} // {c.ps0, a.ps0 + b.ps1}
ADDPD(XMM1, fpr.R(b)); // {a.ps0 + b.ps0, a.ps0 + b.ps1} if (fpr.R(c).IsSimpleReg())
MOVAPD(XMM0, fpr.R(c)); {
SHUFPD(XMM0, R(XMM1), 2); // {c.ps0, a.ps0 + b.ps1} if (cpu_info.bSSE4_1)
{
BLENDPD(tmp, fpr.R(c), 1);
}
else
{
MOVAPD(XMM1, fpr.R(c));
SHUFPD(XMM1, R(tmp), 2);
tmp = XMM1;
}
}
else
{
MOVLPD(tmp, fpr.R(c));
}
break; break;
default: default:
PanicAlert("ps_sum WTF!!!"); PanicAlert("ps_sum WTF!!!");
} }
fpr.BindToRegister(d, false); ForceSinglePrecision(fpr.RX(d), R(tmp));
ForceSinglePrecision(fpr.RX(d), R(XMM0));
SetFPRFIfNeeded(fpr.RX(d)); SetFPRFIfNeeded(fpr.RX(d));
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -192,10 +208,10 @@ void Jit64::ps_muls(UGeckoInstruction inst)
fpr.Lock(a, c, d); fpr.Lock(a, c, d);
switch (inst.SUBOP5) switch (inst.SUBOP5)
{ {
case 12: case 12: // ps_muls0
MOVDDUP(XMM0, fpr.R(c)); MOVDDUP(XMM0, fpr.R(c));
break; break;
case 13: case 13: // ps_muls1
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM0, fpr.R(c), fpr.R(c), 3); avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, XMM0, fpr.R(c), fpr.R(c), 3);
break; break;
default: default: