vperm test + fix for % byte.

This commit is contained in:
Ben Vanik 2015-01-11 15:17:16 -08:00
parent 229daab25b
commit d234f2bc47
7 changed files with 75 additions and 0 deletions

View File

@ -810,6 +810,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
/* XMMByteOrderMask */ vec128i(0x01000302u, 0x05040706u,
0x09080B0Au, 0x0D0C0F0Eu),
/* XMMPermuteControl15 */ vec128b(15),
/* XMMPermuteByteMask */ vec128b(0x1F),
/* XMMPackD3DCOLORSat */ vec128i(0x404000FFu),
/* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu,
0xFFFFFFFFu, 0x0C000408u),

View File

@ -57,6 +57,7 @@ enum XmmConst {
XMMByteSwapMask,
XMMByteOrderMask,
XMMPermuteControl15,
XMMPermuteByteMask,
XMMPackD3DCOLORSat,
XMMPackD3DCOLOR,
XMMUnpackD3DCOLOR,

View File

@ -5007,6 +5007,7 @@ EMITTER(PERMUTE_I32, MATCH(I<OPCODE_PERMUTE, V128<>, I32<>, V128<>, V128<>>)) {
};
EMITTER(PERMUTE_V128, MATCH(I<OPCODE_PERMUTE, V128<>, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
assert_true(i.instr->flags == INT8_TYPE);
// TODO(benvanik): find out how to do this with only one temp register!
// Permute bytes between src2 and src3.
if (i.src3.value->IsConstantZero()) {
@ -5022,6 +5023,7 @@ EMITTER(PERMUTE_V128, MATCH(I<OPCODE_PERMUTE, V128<>, V128<>, V128<>, V128<>>))
} else {
e.vxorps(e.xmm0, i.src1, e.GetXmmConstPtr(XMMSwapWordMask));
}
e.vpand(e.xmm0, e.GetXmmConstPtr(XMMPermuteByteMask));
if (i.src2.is_constant) {
e.LoadConstantXmm(i.dest, i.src2.constant());
e.vpshufb(i.dest, i.dest, e.xmm0);
@ -5035,12 +5037,14 @@ EMITTER(PERMUTE_V128, MATCH(I<OPCODE_PERMUTE, V128<>, V128<>, V128<>, V128<>>))
} else {
// General permute.
// Control mask needs to be shuffled.
// TODO(benvanik): do constants here instead of in generated code.
if (i.src1.is_constant) {
e.LoadConstantXmm(e.xmm2, i.src1.constant());
e.vxorps(e.xmm2, e.xmm2, e.GetXmmConstPtr(XMMSwapWordMask));
} else {
e.vxorps(e.xmm2, i.src1, e.GetXmmConstPtr(XMMSwapWordMask));
}
e.vpand(e.xmm2, e.GetXmmConstPtr(XMMPermuteByteMask));
Xmm src2_shuf = e.xmm0;
if (i.src2.value->IsConstantZero()) {
e.vpxor(src2_shuf, src2_shuf);

Binary file not shown.

View File

@ -0,0 +1,21 @@
/vagrant/src/alloy/frontend/ppc/test/bin//instr_vperm.o: file format elf64-powerpc
Disassembly of section .text:
0000000000100000 <test_vperm_1>:
100000: 10 c3 21 6b vperm v6,v3,v4,v5
100004: 4e 80 00 20 blr
0000000000100008 <test_vperm_2>:
100008: 10 c3 21 6b vperm v6,v3,v4,v5
10000c: 4e 80 00 20 blr
0000000000100010 <test_vperm_3>:
100010: 10 c3 21 6b vperm v6,v3,v4,v5
100014: 4e 80 00 20 blr
0000000000100018 <test_vperm_4>:
100018: 10 c3 21 6b vperm v6,v3,v4,v5
10001c: 4e 80 00 20 blr

View File

@ -0,0 +1,4 @@
0000000000000000 t test_vperm_1
0000000000000008 t test_vperm_2
0000000000000010 t test_vperm_3
0000000000000018 t test_vperm_4

View File

@ -0,0 +1,44 @@
test_vperm_1:
#_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
#_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
#_ REGISTER_IN v5 [00000000, 00000000, 00000000, 00000000]
vperm v6, v3, v4, v5
blr
#_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
#_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
#_ REGISTER_OUT v5 [00000000, 00000000, 00000000, 00000000]
#_ REGISTER_OUT v6 [00000000, 00000000, 00000000, 00000000]
test_vperm_2:
#_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
#_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
#_ REGISTER_IN v5 [01010101, 01010101, 01010101, 01010101]
vperm v6, v3, v4, v5
blr
#_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
#_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
#_ REGISTER_OUT v5 [01010101, 01010101, 01010101, 01010101]
#_ REGISTER_OUT v6 [01010101, 01010101, 01010101, 01010101]
test_vperm_3:
#_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
#_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
#_ REGISTER_IN v5 [11111111, 11111111, 11111111, 11111111]
vperm v6, v3, v4, v5
blr
#_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
#_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
#_ REGISTER_OUT v5 [11111111, 11111111, 11111111, 11111111]
#_ REGISTER_OUT v6 [11111111, 11111111, 11111111, 11111111]
test_vperm_4:
# try with > 32b values (should mod)
#_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
#_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
#_ REGISTER_IN v5 [21212121, 21212121, 21212121, 21212121]
vperm v6, v3, v4, v5
blr
#_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
#_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
#_ REGISTER_OUT v5 [21212121, 21212121, 21212121, 21212121]
#_ REGISTER_OUT v6 [01010101, 01010101, 01010101, 01010101]