Jit64: fselx - Optimize AVX packed

For the packed variant, we can skip the final MOVAPS and write the
result directly into the destination register.

Before:
66 0F 57 C0          xorpd       xmm0,xmm0
66 41 0F C2 C1 06    cmpnlepd    xmm0,xmm9
C4 C3 09 4B CC 00    vblendvpd   xmm1,xmm14,xmm12,xmm0
44 0F 28 F1          movaps      xmm14,xmm1

After:
66 0F 57 C0          xorpd       xmm0,xmm0
66 41 0F C2 C1 06    cmpnlepd    xmm0,xmm9
C4 43 09 4B F4 00    vblendvpd   xmm14,xmm14,xmm12,xmm0
This commit is contained in:
Sintendo 2020-07-28 23:09:58 +02:00
parent a52774ca63
commit afb86a12ab
1 changed files with 6 additions and 0 deletions

View File

@ -449,6 +449,12 @@ void Jit64::fselx(UGeckoInstruction inst)
MOVAPD(XMM1, Rc); MOVAPD(XMM1, Rc);
} }
if (packed)
{
VBLENDVPD(Rd, src1, Rb, XMM0);
return;
}
VBLENDVPD(XMM1, src1, Rb, XMM0); VBLENDVPD(XMM1, src1, Rb, XMM0);
} }
else if (cpu_info.bSSE4_1) else if (cpu_info.bSSE4_1)