From ba75a016b4b1121d56dfc7dd0029a2680edaf8ad Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 23 Oct 2022 21:27:36 -0700 Subject: [PATCH] [x64] Add AX512 optimization for `OPCODE_SELECT`(V128) Uses `vpternlogd` to collapse the bitwise select operation into one instruction. Though it needs a `vmovdqa` instruction since `vpternlogd` reads and writes to the first argument. --- src/xenia/cpu/backend/x64/x64_sequences.cc | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 135a3753c..391c105ce 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -35,6 +35,7 @@ #include "xenia/cpu/backend/x64/x64_emitter.h" #include "xenia/cpu/backend/x64/x64_op.h" #include "xenia/cpu/backend/x64/x64_tracers.h" +#include "xenia/cpu/backend/x64/x64_util.h" #include "xenia/cpu/hir/hir_builder.h" #include "xenia/cpu/processor.h" @@ -745,21 +746,30 @@ struct SELECT_V128_V128 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - Xmm src1 = i.src1.is_constant ? e.xmm0 : i.src1; + const Xmm src1 = i.src1.is_constant ? e.xmm0 : i.src1; if (i.src1.is_constant) { e.LoadConstantXmm(src1, i.src1.constant()); } - Xmm src2 = i.src2.is_constant ? e.xmm1 : i.src2; + const Xmm src2 = i.src2.is_constant ? e.xmm1 : i.src2; if (i.src2.is_constant) { e.LoadConstantXmm(src2, i.src2.constant()); } - Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3; + const Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3; if (i.src3.is_constant) { e.LoadConstantXmm(src3, i.src3.constant()); } + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { + e.vmovdqa(e.xmm3, src1); + e.vpternlogd(e.xmm3, src2, src3, + (~TernaryOperand::a & TernaryOperand::b) | + (TernaryOperand::c & TernaryOperand::a)); + e.vmovdqa(i.dest, e.xmm3); + return; + } + // src1 ? src2 : src3; e.vpandn(e.xmm3, src1, src2); e.vpand(i.dest, src1, src3);