Variable vector_shl int8.

This commit is contained in:
Ben Vanik 2014-05-27 13:02:00 -07:00
parent 7635bb71a0
commit e32342e956
3 changed files with 37 additions and 1 deletions

View File

@ -443,6 +443,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
/* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02, 0xFFFFFF01, 0xFFFFFF00, 0xFFFFFF02), /* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02, 0xFFFFFF01, 0xFFFFFF00, 0xFFFFFF02),
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f), /* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f),
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu), /* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
/* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu),
}; };
// TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to // TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to
// prevent this move. // prevent this move.

View File

@ -51,6 +51,7 @@ enum XmmConst {
XMMUnpackD3DCOLOR = 12, XMMUnpackD3DCOLOR = 12,
XMMOneOver255 = 13, XMMOneOver255 = 13,
XMMShiftMaskPS = 14, XMMShiftMaskPS = 14,
XMMShiftByteMask = 15,
}; };
// Unfortunately due to the design of xbyak we have to pass this to the ctor. // Unfortunately due to the design of xbyak we have to pass this to the ctor.

View File

@ -3734,7 +3734,41 @@ EMITTER(VECTOR_SHL_V128, MATCH(I<OPCODE_VECTOR_SHL, V128<>, V128<>, V128<>>)) {
} }
} else { } else {
// Fully variable shift. // Fully variable shift.
XEASSERTALWAYS(); // TODO(benvanik): find a better sequence.
Xmm temp = i.dest;
if (i.dest == i.src1 || i.dest == i.src2) {
temp = e.xmm2;
}
auto byte_mask = e.GetXmmConstPtr(XMMShiftByteMask);
// AABBCCDD|EEFFGGHH|IIJJKKLL|MMNNOOPP
// DD| HH| LL| PP
e.vpand(e.xmm0, i.src1, byte_mask);
e.vpand(e.xmm1, i.src2, byte_mask);
e.vpsllvd(temp, e.xmm0, e.xmm1);
// CC | GG | KK | OO
e.vpsrld(e.xmm0, i.src1, 8);
e.vpand(e.xmm0, byte_mask);
e.vpsrld(e.xmm1, i.src2, 8);
e.vpand(e.xmm1, byte_mask);
e.vpsllvd(e.xmm0, e.xmm0, e.xmm1);
e.vpslld(e.xmm0, 8);
e.vpor(temp, e.xmm0);
// BB | FF | JJ | NN
e.vpsrld(e.xmm0, i.src1, 16);
e.vpand(e.xmm0, byte_mask);
e.vpsrld(e.xmm1, i.src2, 16);
e.vpand(e.xmm1, byte_mask);
e.vpsllvd(e.xmm0, e.xmm0, e.xmm1);
e.vpslld(e.xmm0, 16);
e.vpor(temp, e.xmm0);
// AA |EE |II |MM
e.vpsrld(e.xmm0, i.src1, 24);
e.vpand(e.xmm0, byte_mask);
e.vpsrld(e.xmm1, i.src2, 24);
e.vpand(e.xmm1, byte_mask);
e.vpsllvd(e.xmm0, e.xmm0, e.xmm1);
e.vpslld(e.xmm0, 24);
e.vpor(i.dest, temp, e.xmm0);
} }
} }
static void EmitInt16(X64Emitter& e, const EmitArgType& i) { static void EmitInt16(X64Emitter& e, const EmitArgType& i) {