Untested VECTOR_SHA 16 variable and VECTOR_SHL 16 variable.
This commit is contained in:
parent
810c256f21
commit
07afc58264
|
@ -539,6 +539,10 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
||||||
0xFFFFFF0Cu, 0xFFFFFF0Fu),
|
0xFFFFFF0Cu, 0xFFFFFF0Fu),
|
||||||
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f,
|
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f,
|
||||||
1.0f / 255.0f, 1.0f / 255.0f),
|
1.0f / 255.0f, 1.0f / 255.0f),
|
||||||
|
/* XMMMaskEvenPI16 */ vec128i(0x0000FFFFu, 0x0000FFFFu,
|
||||||
|
0x0000FFFFu, 0x0000FFFFu),
|
||||||
|
/* XMMShiftMaskEvenPI16 */ vec128i(0x0000000Fu, 0x0000000Fu,
|
||||||
|
0x0000000Fu, 0x0000000Fu),
|
||||||
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu,
|
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu,
|
||||||
0x0000001Fu, 0x0000001Fu),
|
0x0000001Fu, 0x0000001Fu),
|
||||||
/* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu,
|
/* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu,
|
||||||
|
|
|
@ -54,6 +54,8 @@ enum XmmConst {
|
||||||
XMMPackD3DCOLOR,
|
XMMPackD3DCOLOR,
|
||||||
XMMUnpackD3DCOLOR,
|
XMMUnpackD3DCOLOR,
|
||||||
XMMOneOver255,
|
XMMOneOver255,
|
||||||
|
XMMMaskEvenPI16,
|
||||||
|
XMMShiftMaskEvenPI16,
|
||||||
XMMShiftMaskPS,
|
XMMShiftMaskPS,
|
||||||
XMMShiftByteMask,
|
XMMShiftByteMask,
|
||||||
XMMUnsignedDwordMax,
|
XMMUnsignedDwordMax,
|
||||||
|
|
|
@ -4150,7 +4150,23 @@ EMITTER(VECTOR_SHL_V128, MATCH(I<OPCODE_VECTOR_SHL, V128<>, V128<>, V128<>>)) {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Fully variable shift.
|
// Fully variable shift.
|
||||||
assert_always();
|
// TODO(benvanik): find a better sequence.
|
||||||
|
Xmm temp = i.dest;
|
||||||
|
if (i.dest == i.src1 || i.dest == i.src2) {
|
||||||
|
temp = e.xmm2;
|
||||||
|
}
|
||||||
|
// Even:
|
||||||
|
e.vpand(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskEvenPI16));
|
||||||
|
e.vpsllvd(e.xmm1, i.src1, e.xmm0);
|
||||||
|
e.vpand(e.xmm1, e.GetXmmConstPtr(XMMMaskEvenPI16));
|
||||||
|
// Odd:
|
||||||
|
e.vpsrld(e.xmm0, i.src2, 16);
|
||||||
|
e.vpand(e.xmm0, e.GetXmmConstPtr(XMMShiftMaskEvenPI16));
|
||||||
|
e.vpsrld(i.dest, i.src1, 16);
|
||||||
|
e.vpsllvd(i.dest, i.dest, e.xmm0);
|
||||||
|
e.vpslld(i.dest, 8);
|
||||||
|
// Merge:
|
||||||
|
e.vpor(i.dest, e.xmm1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
static void EmitInt32(X64Emitter& e, const EmitArgType& i) {
|
static void EmitInt32(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
@ -4308,6 +4324,20 @@ EMITTER_OPCODE_TABLE(
|
||||||
EMITTER(VECTOR_SHA_V128, MATCH(I<OPCODE_VECTOR_SHA, V128<>, V128<>, V128<>>)) {
|
EMITTER(VECTOR_SHA_V128, MATCH(I<OPCODE_VECTOR_SHA, V128<>, V128<>, V128<>>)) {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
switch (i.instr->flags) {
|
switch (i.instr->flags) {
|
||||||
|
case INT16_TYPE:
|
||||||
|
// Even halfwords:
|
||||||
|
e.vpand(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskEvenPI16));
|
||||||
|
e.vpslld(e.xmm1, i.src1, 16);
|
||||||
|
e.vpsrad(e.xmm1, 8);
|
||||||
|
e.vpsravd(e.xmm1, e.xmm1, e.xmm0);
|
||||||
|
// Odd halfwords:
|
||||||
|
e.vpsrld(e.xmm0, i.src2, 16);
|
||||||
|
e.vpand(e.xmm0, e.GetXmmConstPtr(XMMShiftMaskEvenPI16));
|
||||||
|
e.vpslld(i.dest, i.src1, 16);
|
||||||
|
e.vpsravd(i.dest, i.dest, e.xmm0);
|
||||||
|
// Merge:
|
||||||
|
e.vpor(i.dest, e.xmm1);
|
||||||
|
break;
|
||||||
case INT32_TYPE:
|
case INT32_TYPE:
|
||||||
// src shift mask may have values >31, and x86 sets to zero when
|
// src shift mask may have values >31, and x86 sets to zero when
|
||||||
// that happens so we mask.
|
// that happens so we mask.
|
||||||
|
|
Loading…
Reference in New Issue