From afb86a12abff89116ce012e3d5fbbbd264368be7 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Tue, 28 Jul 2020 23:09:58 +0200 Subject: [PATCH] Jit64: fselx - Optimize AVX packed For the packed variant, we can skip the final MOVAPS and write the result directly into the destination register. Before: 66 0F 57 C0 xorpd xmm0,xmm0 66 41 0F C2 C1 06 cmpnlepd xmm0,xmm9 C4 C3 09 4B CC 00 vblendvpd xmm1,xmm14,xmm12,xmm0 44 0F 28 F1 movaps xmm14,xmm1 After: 66 0F 57 C0 xorpd xmm0,xmm0 66 41 0F C2 C1 06 cmpnlepd xmm0,xmm9 C4 43 09 4B F4 00 vblendvpd xmm14,xmm14,xmm12,xmm0 --- Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 0d6bff0b1a..a0895b47c9 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -449,6 +449,12 @@ void Jit64::fselx(UGeckoInstruction inst) MOVAPD(XMM1, Rc); } + if (packed) + { + VBLENDVPD(Rd, src1, Rb, XMM0); + return; + } + VBLENDVPD(XMM1, src1, Rb, XMM0); } else if (cpu_info.bSSE4_1)