[x64] Add GFNI optimization for SPLAT(int8)

`pxor` is a zero-uop register-rename and `gf2p8affineqb dest, zero, int8`
is a very quick single-instruction way to use affine galois
transformations to fill a register with an immediate byte without
touching memory.
This commit is contained in:
Wunkolo 2022-02-21 14:00:20 -08:00 committed by Rick Gibbed
parent 3a115ae6a0
commit be8b9c512f
1 changed files with 5 additions and 1 deletions

View File

@ -1574,7 +1574,11 @@ EMITTER_OPCODE_TABLE(OPCODE_EXTRACT, EXTRACT_I8, EXTRACT_I16, EXTRACT_I32);
struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> { struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
if (i.src1.is_constant) { if (i.src1.is_constant) {
// TODO(benvanik): faster constant splats. if (e.IsFeatureEnabled(kX64EmitGFNI)) {
e.pxor(e.xmm0, e.xmm0);
e.gf2p8affineqb(i.dest, e.xmm0, i.src1.constant());
return;
}
e.mov(e.eax, i.src1.constant()); e.mov(e.eax, i.src1.constant());
e.vmovd(e.xmm0, e.eax); e.vmovd(e.xmm0, e.eax);
} else { } else {