From be8b9c512f67dca8df6d39f96e0c00c3ef5fcb00 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 21 Feb 2022 14:00:20 -0800 Subject: [PATCH] [x64] Add GFNI optimization for SPLAT(int8) `pxor` is a zero-uop register-rename and `gf2p8affineqb dest, zero, int8` is a very quick single-instruction way to use affine galois transformations to fill a register with an immediate byte without touching memory. --- src/xenia/cpu/backend/x64/x64_seq_vector.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/xenia/cpu/backend/x64/x64_seq_vector.cc b/src/xenia/cpu/backend/x64/x64_seq_vector.cc index 7cf4650b5..09eb2b00e 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_vector.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_vector.cc @@ -1574,7 +1574,11 @@ EMITTER_OPCODE_TABLE(OPCODE_EXTRACT, EXTRACT_I8, EXTRACT_I16, EXTRACT_I32); struct SPLAT_I8 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { if (i.src1.is_constant) { - // TODO(benvanik): faster constant splats. + if (e.IsFeatureEnabled(kX64EmitGFNI)) { + e.pxor(e.xmm0, e.xmm0); + e.gf2p8affineqb(i.dest, e.xmm0, i.src1.constant()); + return; + } e.mov(e.eax, i.src1.constant()); e.vmovd(e.xmm0, e.eax); } else {