From d234f2bc47a9a808a3d8fd62f1ae6eae5f23e524 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 11 Jan 2015 15:17:16 -0800 Subject: [PATCH] vperm test + fix for % byte. --- src/alloy/backend/x64/x64_emitter.cc | 1 + src/alloy/backend/x64/x64_emitter.h | 1 + src/alloy/backend/x64/x64_sequences.cc | 4 ++ .../frontend/ppc/test/bin/instr_vperm.bin | Bin 0 -> 32 bytes .../frontend/ppc/test/bin/instr_vperm.dis | 21 +++++++++ .../frontend/ppc/test/bin/instr_vperm.map | 4 ++ src/alloy/frontend/ppc/test/instr_vperm.s | 44 ++++++++++++++++++ 7 files changed, 75 insertions(+) create mode 100644 src/alloy/frontend/ppc/test/bin/instr_vperm.bin create mode 100644 src/alloy/frontend/ppc/test/bin/instr_vperm.dis create mode 100644 src/alloy/frontend/ppc/test/bin/instr_vperm.map create mode 100644 src/alloy/frontend/ppc/test/instr_vperm.s diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index c26aa59a1..00723703c 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -810,6 +810,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) { /* XMMByteOrderMask */ vec128i(0x01000302u, 0x05040706u, 0x09080B0Au, 0x0D0C0F0Eu), /* XMMPermuteControl15 */ vec128b(15), + /* XMMPermuteByteMask */ vec128b(0x1F), /* XMMPackD3DCOLORSat */ vec128i(0x404000FFu), /* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u), diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index b54bc8267..9245f408d 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -57,6 +57,7 @@ enum XmmConst { XMMByteSwapMask, XMMByteOrderMask, XMMPermuteControl15, + XMMPermuteByteMask, XMMPackD3DCOLORSat, XMMPackD3DCOLOR, XMMUnpackD3DCOLOR, diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index 81803d8c2..0d0c56270 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -5007,6 +5007,7 @@ EMITTER(PERMUTE_I32, MATCH(I, I32<>, V128<>, V128<>>)) { }; EMITTER(PERMUTE_V128, MATCH(I, V128<>, V128<>, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { + assert_true(i.instr->flags == INT8_TYPE); // TODO(benvanik): find out how to do this with only one temp register! // Permute bytes between src2 and src3. if (i.src3.value->IsConstantZero()) { @@ -5022,6 +5023,7 @@ EMITTER(PERMUTE_V128, MATCH(I, V128<>, V128<>, V128<>>)) } else { e.vxorps(e.xmm0, i.src1, e.GetXmmConstPtr(XMMSwapWordMask)); } + e.vpand(e.xmm0, e.GetXmmConstPtr(XMMPermuteByteMask)); if (i.src2.is_constant) { e.LoadConstantXmm(i.dest, i.src2.constant()); e.vpshufb(i.dest, i.dest, e.xmm0); @@ -5035,12 +5037,14 @@ EMITTER(PERMUTE_V128, MATCH(I, V128<>, V128<>, V128<>>)) } else { // General permute. // Control mask needs to be shuffled. + // TODO(benvanik): do constants here instead of in generated code. if (i.src1.is_constant) { e.LoadConstantXmm(e.xmm2, i.src1.constant()); e.vxorps(e.xmm2, e.xmm2, e.GetXmmConstPtr(XMMSwapWordMask)); } else { e.vxorps(e.xmm2, i.src1, e.GetXmmConstPtr(XMMSwapWordMask)); } + e.vpand(e.xmm2, e.GetXmmConstPtr(XMMPermuteByteMask)); Xmm src2_shuf = e.xmm0; if (i.src2.value->IsConstantZero()) { e.vpxor(src2_shuf, src2_shuf); diff --git a/src/alloy/frontend/ppc/test/bin/instr_vperm.bin b/src/alloy/frontend/ppc/test/bin/instr_vperm.bin new file mode 100644 index 0000000000000000000000000000000000000000..25523ecae35522cbc10682b27e6db55dc4d198dc GIT binary patch literal 32 ScmWeQteEZBz@Q+2g9ZScxCu1? literal 0 HcmV?d00001 diff --git a/src/alloy/frontend/ppc/test/bin/instr_vperm.dis b/src/alloy/frontend/ppc/test/bin/instr_vperm.dis new file mode 100644 index 000000000..353683ccb --- /dev/null +++ b/src/alloy/frontend/ppc/test/bin/instr_vperm.dis @@ -0,0 +1,21 @@ + +/vagrant/src/alloy/frontend/ppc/test/bin//instr_vperm.o: file format elf64-powerpc + + +Disassembly of section .text: + +0000000000100000 : + 100000: 10 c3 21 6b vperm v6,v3,v4,v5 + 100004: 4e 80 00 20 blr + +0000000000100008 : + 100008: 10 c3 21 6b vperm v6,v3,v4,v5 + 10000c: 4e 80 00 20 blr + +0000000000100010 : + 100010: 10 c3 21 6b vperm v6,v3,v4,v5 + 100014: 4e 80 00 20 blr + +0000000000100018 : + 100018: 10 c3 21 6b vperm v6,v3,v4,v5 + 10001c: 4e 80 00 20 blr diff --git a/src/alloy/frontend/ppc/test/bin/instr_vperm.map b/src/alloy/frontend/ppc/test/bin/instr_vperm.map new file mode 100644 index 000000000..18e6711bc --- /dev/null +++ b/src/alloy/frontend/ppc/test/bin/instr_vperm.map @@ -0,0 +1,4 @@ +0000000000000000 t test_vperm_1 +0000000000000008 t test_vperm_2 +0000000000000010 t test_vperm_3 +0000000000000018 t test_vperm_4 diff --git a/src/alloy/frontend/ppc/test/instr_vperm.s b/src/alloy/frontend/ppc/test/instr_vperm.s new file mode 100644 index 000000000..d709b5ae0 --- /dev/null +++ b/src/alloy/frontend/ppc/test/instr_vperm.s @@ -0,0 +1,44 @@ +test_vperm_1: + #_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F] + #_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F] + #_ REGISTER_IN v5 [00000000, 00000000, 00000000, 00000000] + vperm v6, v3, v4, v5 + blr + #_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F] + #_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F] + #_ REGISTER_OUT v5 [00000000, 00000000, 00000000, 00000000] + #_ REGISTER_OUT v6 [00000000, 00000000, 00000000, 00000000] + +test_vperm_2: + #_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F] + #_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F] + #_ REGISTER_IN v5 [01010101, 01010101, 01010101, 01010101] + vperm v6, v3, v4, v5 + blr + #_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F] + #_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F] + #_ REGISTER_OUT v5 [01010101, 01010101, 01010101, 01010101] + #_ REGISTER_OUT v6 [01010101, 01010101, 01010101, 01010101] + +test_vperm_3: + #_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F] + #_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F] + #_ REGISTER_IN v5 [11111111, 11111111, 11111111, 11111111] + vperm v6, v3, v4, v5 + blr + #_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F] + #_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F] + #_ REGISTER_OUT v5 [11111111, 11111111, 11111111, 11111111] + #_ REGISTER_OUT v6 [11111111, 11111111, 11111111, 11111111] + +test_vperm_4: + # try with > 32b values (should mod) + #_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F] + #_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F] + #_ REGISTER_IN v5 [21212121, 21212121, 21212121, 21212121] + vperm v6, v3, v4, v5 + blr + #_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F] + #_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F] + #_ REGISTER_OUT v5 [21212121, 21212121, 21212121, 21212121] + #_ REGISTER_OUT v6 [01010101, 01010101, 01010101, 01010101]