Variable vector_shl int8.
This commit is contained in:
parent
7635bb71a0
commit
e32342e956
|
@ -443,6 +443,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
||||||
/* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02, 0xFFFFFF01, 0xFFFFFF00, 0xFFFFFF02),
|
/* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02, 0xFFFFFF01, 0xFFFFFF00, 0xFFFFFF02),
|
||||||
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f),
|
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f),
|
||||||
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
|
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
|
||||||
|
/* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu),
|
||||||
};
|
};
|
||||||
// TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to
|
// TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to
|
||||||
// prevent this move.
|
// prevent this move.
|
||||||
|
|
|
@ -51,6 +51,7 @@ enum XmmConst {
|
||||||
XMMUnpackD3DCOLOR = 12,
|
XMMUnpackD3DCOLOR = 12,
|
||||||
XMMOneOver255 = 13,
|
XMMOneOver255 = 13,
|
||||||
XMMShiftMaskPS = 14,
|
XMMShiftMaskPS = 14,
|
||||||
|
XMMShiftByteMask = 15,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
||||||
|
|
|
@ -3734,7 +3734,41 @@ EMITTER(VECTOR_SHL_V128, MATCH(I<OPCODE_VECTOR_SHL, V128<>, V128<>, V128<>>)) {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Fully variable shift.
|
// Fully variable shift.
|
||||||
XEASSERTALWAYS();
|
// TODO(benvanik): find a better sequence.
|
||||||
|
Xmm temp = i.dest;
|
||||||
|
if (i.dest == i.src1 || i.dest == i.src2) {
|
||||||
|
temp = e.xmm2;
|
||||||
|
}
|
||||||
|
auto byte_mask = e.GetXmmConstPtr(XMMShiftByteMask);
|
||||||
|
// AABBCCDD|EEFFGGHH|IIJJKKLL|MMNNOOPP
|
||||||
|
// DD| HH| LL| PP
|
||||||
|
e.vpand(e.xmm0, i.src1, byte_mask);
|
||||||
|
e.vpand(e.xmm1, i.src2, byte_mask);
|
||||||
|
e.vpsllvd(temp, e.xmm0, e.xmm1);
|
||||||
|
// CC | GG | KK | OO
|
||||||
|
e.vpsrld(e.xmm0, i.src1, 8);
|
||||||
|
e.vpand(e.xmm0, byte_mask);
|
||||||
|
e.vpsrld(e.xmm1, i.src2, 8);
|
||||||
|
e.vpand(e.xmm1, byte_mask);
|
||||||
|
e.vpsllvd(e.xmm0, e.xmm0, e.xmm1);
|
||||||
|
e.vpslld(e.xmm0, 8);
|
||||||
|
e.vpor(temp, e.xmm0);
|
||||||
|
// BB | FF | JJ | NN
|
||||||
|
e.vpsrld(e.xmm0, i.src1, 16);
|
||||||
|
e.vpand(e.xmm0, byte_mask);
|
||||||
|
e.vpsrld(e.xmm1, i.src2, 16);
|
||||||
|
e.vpand(e.xmm1, byte_mask);
|
||||||
|
e.vpsllvd(e.xmm0, e.xmm0, e.xmm1);
|
||||||
|
e.vpslld(e.xmm0, 16);
|
||||||
|
e.vpor(temp, e.xmm0);
|
||||||
|
// AA |EE |II |MM
|
||||||
|
e.vpsrld(e.xmm0, i.src1, 24);
|
||||||
|
e.vpand(e.xmm0, byte_mask);
|
||||||
|
e.vpsrld(e.xmm1, i.src2, 24);
|
||||||
|
e.vpand(e.xmm1, byte_mask);
|
||||||
|
e.vpsllvd(e.xmm0, e.xmm0, e.xmm1);
|
||||||
|
e.vpslld(e.xmm0, 24);
|
||||||
|
e.vpor(i.dest, temp, e.xmm0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
static void EmitInt16(X64Emitter& e, const EmitArgType& i) {
|
static void EmitInt16(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
|
Loading…
Reference in New Issue