Variable vector_shl int8.
This commit is contained in:
parent
7635bb71a0
commit
e32342e956
|
@ -443,6 +443,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
|||
/* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02, 0xFFFFFF01, 0xFFFFFF00, 0xFFFFFF02),
|
||||
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f),
|
||||
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
|
||||
/* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu),
|
||||
};
|
||||
// TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to
|
||||
// prevent this move.
|
||||
|
|
|
@ -51,6 +51,7 @@ enum XmmConst {
|
|||
XMMUnpackD3DCOLOR = 12,
|
||||
XMMOneOver255 = 13,
|
||||
XMMShiftMaskPS = 14,
|
||||
XMMShiftByteMask = 15,
|
||||
};
|
||||
|
||||
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
||||
|
|
|
@ -3734,7 +3734,41 @@ EMITTER(VECTOR_SHL_V128, MATCH(I<OPCODE_VECTOR_SHL, V128<>, V128<>, V128<>>)) {
|
|||
}
|
||||
} else {
|
||||
// Fully variable shift.
|
||||
XEASSERTALWAYS();
|
||||
// TODO(benvanik): find a better sequence.
|
||||
Xmm temp = i.dest;
|
||||
if (i.dest == i.src1 || i.dest == i.src2) {
|
||||
temp = e.xmm2;
|
||||
}
|
||||
auto byte_mask = e.GetXmmConstPtr(XMMShiftByteMask);
|
||||
// AABBCCDD|EEFFGGHH|IIJJKKLL|MMNNOOPP
|
||||
// DD| HH| LL| PP
|
||||
e.vpand(e.xmm0, i.src1, byte_mask);
|
||||
e.vpand(e.xmm1, i.src2, byte_mask);
|
||||
e.vpsllvd(temp, e.xmm0, e.xmm1);
|
||||
// CC | GG | KK | OO
|
||||
e.vpsrld(e.xmm0, i.src1, 8);
|
||||
e.vpand(e.xmm0, byte_mask);
|
||||
e.vpsrld(e.xmm1, i.src2, 8);
|
||||
e.vpand(e.xmm1, byte_mask);
|
||||
e.vpsllvd(e.xmm0, e.xmm0, e.xmm1);
|
||||
e.vpslld(e.xmm0, 8);
|
||||
e.vpor(temp, e.xmm0);
|
||||
// BB | FF | JJ | NN
|
||||
e.vpsrld(e.xmm0, i.src1, 16);
|
||||
e.vpand(e.xmm0, byte_mask);
|
||||
e.vpsrld(e.xmm1, i.src2, 16);
|
||||
e.vpand(e.xmm1, byte_mask);
|
||||
e.vpsllvd(e.xmm0, e.xmm0, e.xmm1);
|
||||
e.vpslld(e.xmm0, 16);
|
||||
e.vpor(temp, e.xmm0);
|
||||
// AA |EE |II |MM
|
||||
e.vpsrld(e.xmm0, i.src1, 24);
|
||||
e.vpand(e.xmm0, byte_mask);
|
||||
e.vpsrld(e.xmm1, i.src2, 24);
|
||||
e.vpand(e.xmm1, byte_mask);
|
||||
e.vpsllvd(e.xmm0, e.xmm0, e.xmm1);
|
||||
e.vpslld(e.xmm0, 24);
|
||||
e.vpor(i.dest, temp, e.xmm0);
|
||||
}
|
||||
}
|
||||
static void EmitInt16(X64Emitter& e, const EmitArgType& i) {
|
||||
|
|
Loading…
Reference in New Issue