From afb86a12abff89116ce012e3d5fbbbd264368be7 Mon Sep 17 00:00:00 2001
From: Sintendo <bram.speeckaert@gmail.com>
Date: Tue, 28 Jul 2020 23:09:58 +0200
Subject: [PATCH] Jit64: fselx - Optimize AVX packed

For the packed variant, we can skip the final MOVAPS and write the
result directly into the destination register.

Before:
66 0F 57 C0          xorpd       xmm0,xmm0
66 41 0F C2 C1 06    cmpnlepd    xmm0,xmm9
C4 C3 09 4B CC 00    vblendvpd   xmm1,xmm14,xmm12,xmm0
44 0F 28 F1          movaps      xmm14,xmm1

After:
66 0F 57 C0          xorpd       xmm0,xmm0
66 41 0F C2 C1 06    cmpnlepd    xmm0,xmm9
C4 43 09 4B F4 00    vblendvpd   xmm14,xmm14,xmm12,xmm0
---
 Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
index 0d6bff0b1a..a0895b47c9 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@@ -449,6 +449,12 @@ void Jit64::fselx(UGeckoInstruction inst)
       MOVAPD(XMM1, Rc);
     }
 
+    if (packed)
+    {
+      VBLENDVPD(Rd, src1, Rb, XMM0);
+      return;
+    }
+
     VBLENDVPD(XMM1, src1, Rb, XMM0);
   }
   else if (cpu_info.bSSE4_1)