OPCODE_VECTOR_SHA and SPLAT_I16 for non-AVX2 CPUs
This commit is contained in:
parent
3249f84700
commit
cff09a4509
|
@ -4901,6 +4901,16 @@ EMITTER(VECTOR_SHA_V128, MATCH(I<OPCODE_VECTOR_SHA, V128<>, V128<>, V128<>>)) {
|
||||||
}
|
}
|
||||||
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
||||||
}
|
}
|
||||||
|
static __m128i EmulateVectorShaI32(void*, __m128i src1, __m128i src2) {
|
||||||
|
alignas(16) int32_t value[4];
|
||||||
|
alignas(16) int32_t shamt[4];
|
||||||
|
_mm_store_si128(reinterpret_cast<__m128i*>(value), src1);
|
||||||
|
_mm_store_si128(reinterpret_cast<__m128i*>(shamt), src2);
|
||||||
|
for (size_t i = 0; i < 4; ++i) {
|
||||||
|
value[i] = value[i] >> (shamt[i] & 0x1F);
|
||||||
|
}
|
||||||
|
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
||||||
|
}
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
switch (i.instr->flags) {
|
switch (i.instr->flags) {
|
||||||
case INT8_TYPE:
|
case INT8_TYPE:
|
||||||
|
@ -4928,6 +4938,7 @@ EMITTER(VECTOR_SHA_V128, MATCH(I<OPCODE_VECTOR_SHA, V128<>, V128<>, V128<>>)) {
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
break;
|
break;
|
||||||
case INT32_TYPE:
|
case INT32_TYPE:
|
||||||
|
if (e.cpu()->has(Xbyak::util::Cpu::tAVX2)) {
|
||||||
// src shift mask may have values >31, and x86 sets to zero when
|
// src shift mask may have values >31, and x86 sets to zero when
|
||||||
// that happens so we mask.
|
// that happens so we mask.
|
||||||
if (i.src2.is_constant) {
|
if (i.src2.is_constant) {
|
||||||
|
@ -4937,6 +4948,19 @@ EMITTER(VECTOR_SHA_V128, MATCH(I<OPCODE_VECTOR_SHA, V128<>, V128<>, V128<>>)) {
|
||||||
e.vandps(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS));
|
e.vandps(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS));
|
||||||
}
|
}
|
||||||
e.vpsravd(i.dest, i.src1, e.xmm0);
|
e.vpsravd(i.dest, i.src1, e.xmm0);
|
||||||
|
} else {
|
||||||
|
// Emulated for now...
|
||||||
|
// TODO: Native version
|
||||||
|
if (i.src2.is_constant) {
|
||||||
|
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
||||||
|
e.lea(e.r9, e.StashXmm(1, e.xmm0));
|
||||||
|
} else {
|
||||||
|
e.lea(e.r9, e.StashXmm(1, i.src2));
|
||||||
|
}
|
||||||
|
e.lea(e.r8, e.StashXmm(0, i.src1));
|
||||||
|
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShaI32));
|
||||||
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert_always();
|
assert_always();
|
||||||
|
@ -5475,8 +5499,16 @@ EMITTER(SPLAT_I16, MATCH(I<OPCODE_SPLAT, V128<>, I16<>>)) {
|
||||||
e.vpbroadcastw(i.dest, e.xmm0);
|
e.vpbroadcastw(i.dest, e.xmm0);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// TODO
|
if (i.src1.is_constant) {
|
||||||
e.DebugBreak();
|
e.mov(e.eax, i.src1.constant());
|
||||||
|
e.movd(e.xmm0, e.eax);
|
||||||
|
} else {
|
||||||
|
e.movd(e.xmm0, i.src1.reg().cvt32());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Credits: VC++ compiler (i love you so much)
|
||||||
|
e.punpcklwd(e.xmm0, e.xmm0); // unpack low word data
|
||||||
|
e.pshufd(i.dest, e.xmm0, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue