From e32342e956568128b620d0dd133145cb3e069938 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Tue, 27 May 2014 13:02:00 -0700 Subject: [PATCH] Variable vector_shl int8. --- src/alloy/backend/x64/x64_emitter.cc | 1 + src/alloy/backend/x64/x64_emitter.h | 1 + src/alloy/backend/x64/x64_sequences.cc | 36 +++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index aad925ef7..6e94a660a 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -443,6 +443,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) { /* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02, 0xFFFFFF01, 0xFFFFFF00, 0xFFFFFF02), /* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f), /* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu), + /* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu), }; // TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to // prevent this move. diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index 7a36e3837..d67348e18 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -51,6 +51,7 @@ enum XmmConst { XMMUnpackD3DCOLOR = 12, XMMOneOver255 = 13, XMMShiftMaskPS = 14, + XMMShiftByteMask = 15, }; // Unfortunately due to the design of xbyak we have to pass this to the ctor. diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index 1a3f90abc..7f0850d6c 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -3734,7 +3734,41 @@ EMITTER(VECTOR_SHL_V128, MATCH(I, V128<>, V128<>>)) { } } else { // Fully variable shift. - XEASSERTALWAYS(); + // TODO(benvanik): find a better sequence. + Xmm temp = i.dest; + if (i.dest == i.src1 || i.dest == i.src2) { + temp = e.xmm2; + } + auto byte_mask = e.GetXmmConstPtr(XMMShiftByteMask); + // AABBCCDD|EEFFGGHH|IIJJKKLL|MMNNOOPP + // DD| HH| LL| PP + e.vpand(e.xmm0, i.src1, byte_mask); + e.vpand(e.xmm1, i.src2, byte_mask); + e.vpsllvd(temp, e.xmm0, e.xmm1); + // CC | GG | KK | OO + e.vpsrld(e.xmm0, i.src1, 8); + e.vpand(e.xmm0, byte_mask); + e.vpsrld(e.xmm1, i.src2, 8); + e.vpand(e.xmm1, byte_mask); + e.vpsllvd(e.xmm0, e.xmm0, e.xmm1); + e.vpslld(e.xmm0, 8); + e.vpor(temp, e.xmm0); + // BB | FF | JJ | NN + e.vpsrld(e.xmm0, i.src1, 16); + e.vpand(e.xmm0, byte_mask); + e.vpsrld(e.xmm1, i.src2, 16); + e.vpand(e.xmm1, byte_mask); + e.vpsllvd(e.xmm0, e.xmm0, e.xmm1); + e.vpslld(e.xmm0, 16); + e.vpor(temp, e.xmm0); + // AA |EE |II |MM + e.vpsrld(e.xmm0, i.src1, 24); + e.vpand(e.xmm0, byte_mask); + e.vpsrld(e.xmm1, i.src2, 24); + e.vpand(e.xmm1, byte_mask); + e.vpsllvd(e.xmm0, e.xmm0, e.xmm1); + e.vpslld(e.xmm0, 24); + e.vpor(i.dest, temp, e.xmm0); } } static void EmitInt16(X64Emitter& e, const EmitArgType& i) {