[x64] Add AVX512 optimizations for `OPCODE_VECTOR_COMPARE_UGT`(Integer)
AVX512 has native unsigned integer comparisons instructions, removing the need to XOR the most-significant-bit with a constant in memory to use the signed comparison instructions. These instructions only write to a k-mask register though and need an additional call to `vpmovm2*` to turn the mask-register into a vector-mask register. As of Icelake: `vpcmpu*` is all L3/T1 `vpmovm2d` is L1/T0.33 `vpmovm2{b,w}` is L3/T0.33 As of Zen4: `vpcmpu*` is all L3/T0.50 `vpmovm2*` is all L1/T0.25
This commit is contained in:
parent
121bf93cbe
commit
6ee2e3718f
|
@ -409,6 +409,43 @@ struct VECTOR_COMPARE_UGT_V128
|
|||
: Sequence<VECTOR_COMPARE_UGT_V128,
|
||||
I<OPCODE_VECTOR_COMPARE_UGT, V128Op, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho | kX64EmitAVX512BW |
|
||||
kX64EmitAVX512DQ) &&
|
||||
(i.instr->flags != FLOAT32_TYPE)) {
|
||||
Xmm src1 = e.xmm0;
|
||||
if (i.src1.is_constant) {
|
||||
e.LoadConstantXmm(src1, i.src1.constant());
|
||||
} else {
|
||||
src1 = i.src1;
|
||||
}
|
||||
|
||||
Xmm src2 = e.xmm1;
|
||||
if (i.src2.is_constant) {
|
||||
e.LoadConstantXmm(src2, i.src2.constant());
|
||||
} else {
|
||||
src2 = i.src2;
|
||||
}
|
||||
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
e.vpcmpub(e.k1, src1, src2, 0x6);
|
||||
e.vpmovm2b(i.dest, e.k1);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
e.vpcmpuw(e.k1, src1, src2, 0x6);
|
||||
e.vpmovm2w(i.dest, e.k1);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
e.vpcmpud(e.k1, src1, src2, 0x6);
|
||||
e.vpmovm2d(i.dest, e.k1);
|
||||
break;
|
||||
default:
|
||||
assert_always();
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Xbyak::Address sign_addr = e.ptr[e.rax]; // dummy
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
|
|
Loading…
Reference in New Issue