vperm test + fix for % byte.
This commit is contained in:
parent
229daab25b
commit
d234f2bc47
|
@ -810,6 +810,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
||||||
/* XMMByteOrderMask */ vec128i(0x01000302u, 0x05040706u,
|
/* XMMByteOrderMask */ vec128i(0x01000302u, 0x05040706u,
|
||||||
0x09080B0Au, 0x0D0C0F0Eu),
|
0x09080B0Au, 0x0D0C0F0Eu),
|
||||||
/* XMMPermuteControl15 */ vec128b(15),
|
/* XMMPermuteControl15 */ vec128b(15),
|
||||||
|
/* XMMPermuteByteMask */ vec128b(0x1F),
|
||||||
/* XMMPackD3DCOLORSat */ vec128i(0x404000FFu),
|
/* XMMPackD3DCOLORSat */ vec128i(0x404000FFu),
|
||||||
/* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu,
|
/* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu,
|
||||||
0xFFFFFFFFu, 0x0C000408u),
|
0xFFFFFFFFu, 0x0C000408u),
|
||||||
|
|
|
@ -57,6 +57,7 @@ enum XmmConst {
|
||||||
XMMByteSwapMask,
|
XMMByteSwapMask,
|
||||||
XMMByteOrderMask,
|
XMMByteOrderMask,
|
||||||
XMMPermuteControl15,
|
XMMPermuteControl15,
|
||||||
|
XMMPermuteByteMask,
|
||||||
XMMPackD3DCOLORSat,
|
XMMPackD3DCOLORSat,
|
||||||
XMMPackD3DCOLOR,
|
XMMPackD3DCOLOR,
|
||||||
XMMUnpackD3DCOLOR,
|
XMMUnpackD3DCOLOR,
|
||||||
|
|
|
@ -5007,6 +5007,7 @@ EMITTER(PERMUTE_I32, MATCH(I<OPCODE_PERMUTE, V128<>, I32<>, V128<>, V128<>>)) {
|
||||||
};
|
};
|
||||||
EMITTER(PERMUTE_V128, MATCH(I<OPCODE_PERMUTE, V128<>, V128<>, V128<>, V128<>>)) {
|
EMITTER(PERMUTE_V128, MATCH(I<OPCODE_PERMUTE, V128<>, V128<>, V128<>, V128<>>)) {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
assert_true(i.instr->flags == INT8_TYPE);
|
||||||
// TODO(benvanik): find out how to do this with only one temp register!
|
// TODO(benvanik): find out how to do this with only one temp register!
|
||||||
// Permute bytes between src2 and src3.
|
// Permute bytes between src2 and src3.
|
||||||
if (i.src3.value->IsConstantZero()) {
|
if (i.src3.value->IsConstantZero()) {
|
||||||
|
@ -5022,6 +5023,7 @@ EMITTER(PERMUTE_V128, MATCH(I<OPCODE_PERMUTE, V128<>, V128<>, V128<>, V128<>>))
|
||||||
} else {
|
} else {
|
||||||
e.vxorps(e.xmm0, i.src1, e.GetXmmConstPtr(XMMSwapWordMask));
|
e.vxorps(e.xmm0, i.src1, e.GetXmmConstPtr(XMMSwapWordMask));
|
||||||
}
|
}
|
||||||
|
e.vpand(e.xmm0, e.GetXmmConstPtr(XMMPermuteByteMask));
|
||||||
if (i.src2.is_constant) {
|
if (i.src2.is_constant) {
|
||||||
e.LoadConstantXmm(i.dest, i.src2.constant());
|
e.LoadConstantXmm(i.dest, i.src2.constant());
|
||||||
e.vpshufb(i.dest, i.dest, e.xmm0);
|
e.vpshufb(i.dest, i.dest, e.xmm0);
|
||||||
|
@ -5035,12 +5037,14 @@ EMITTER(PERMUTE_V128, MATCH(I<OPCODE_PERMUTE, V128<>, V128<>, V128<>, V128<>>))
|
||||||
} else {
|
} else {
|
||||||
// General permute.
|
// General permute.
|
||||||
// Control mask needs to be shuffled.
|
// Control mask needs to be shuffled.
|
||||||
|
// TODO(benvanik): do constants here instead of in generated code.
|
||||||
if (i.src1.is_constant) {
|
if (i.src1.is_constant) {
|
||||||
e.LoadConstantXmm(e.xmm2, i.src1.constant());
|
e.LoadConstantXmm(e.xmm2, i.src1.constant());
|
||||||
e.vxorps(e.xmm2, e.xmm2, e.GetXmmConstPtr(XMMSwapWordMask));
|
e.vxorps(e.xmm2, e.xmm2, e.GetXmmConstPtr(XMMSwapWordMask));
|
||||||
} else {
|
} else {
|
||||||
e.vxorps(e.xmm2, i.src1, e.GetXmmConstPtr(XMMSwapWordMask));
|
e.vxorps(e.xmm2, i.src1, e.GetXmmConstPtr(XMMSwapWordMask));
|
||||||
}
|
}
|
||||||
|
e.vpand(e.xmm2, e.GetXmmConstPtr(XMMPermuteByteMask));
|
||||||
Xmm src2_shuf = e.xmm0;
|
Xmm src2_shuf = e.xmm0;
|
||||||
if (i.src2.value->IsConstantZero()) {
|
if (i.src2.value->IsConstantZero()) {
|
||||||
e.vpxor(src2_shuf, src2_shuf);
|
e.vpxor(src2_shuf, src2_shuf);
|
||||||
|
|
Binary file not shown.
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
/vagrant/src/alloy/frontend/ppc/test/bin//instr_vperm.o: file format elf64-powerpc
|
||||||
|
|
||||||
|
|
||||||
|
Disassembly of section .text:
|
||||||
|
|
||||||
|
0000000000100000 <test_vperm_1>:
|
||||||
|
100000: 10 c3 21 6b vperm v6,v3,v4,v5
|
||||||
|
100004: 4e 80 00 20 blr
|
||||||
|
|
||||||
|
0000000000100008 <test_vperm_2>:
|
||||||
|
100008: 10 c3 21 6b vperm v6,v3,v4,v5
|
||||||
|
10000c: 4e 80 00 20 blr
|
||||||
|
|
||||||
|
0000000000100010 <test_vperm_3>:
|
||||||
|
100010: 10 c3 21 6b vperm v6,v3,v4,v5
|
||||||
|
100014: 4e 80 00 20 blr
|
||||||
|
|
||||||
|
0000000000100018 <test_vperm_4>:
|
||||||
|
100018: 10 c3 21 6b vperm v6,v3,v4,v5
|
||||||
|
10001c: 4e 80 00 20 blr
|
|
@ -0,0 +1,4 @@
|
||||||
|
0000000000000000 t test_vperm_1
|
||||||
|
0000000000000008 t test_vperm_2
|
||||||
|
0000000000000010 t test_vperm_3
|
||||||
|
0000000000000018 t test_vperm_4
|
|
@ -0,0 +1,44 @@
|
||||||
|
test_vperm_1:
|
||||||
|
#_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
|
||||||
|
#_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
|
||||||
|
#_ REGISTER_IN v5 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
vperm v6, v3, v4, v5
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
|
||||||
|
#_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
|
||||||
|
#_ REGISTER_OUT v5 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
#_ REGISTER_OUT v6 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
|
||||||
|
test_vperm_2:
|
||||||
|
#_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
|
||||||
|
#_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
|
||||||
|
#_ REGISTER_IN v5 [01010101, 01010101, 01010101, 01010101]
|
||||||
|
vperm v6, v3, v4, v5
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
|
||||||
|
#_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
|
||||||
|
#_ REGISTER_OUT v5 [01010101, 01010101, 01010101, 01010101]
|
||||||
|
#_ REGISTER_OUT v6 [01010101, 01010101, 01010101, 01010101]
|
||||||
|
|
||||||
|
test_vperm_3:
|
||||||
|
#_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
|
||||||
|
#_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
|
||||||
|
#_ REGISTER_IN v5 [11111111, 11111111, 11111111, 11111111]
|
||||||
|
vperm v6, v3, v4, v5
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
|
||||||
|
#_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
|
||||||
|
#_ REGISTER_OUT v5 [11111111, 11111111, 11111111, 11111111]
|
||||||
|
#_ REGISTER_OUT v6 [11111111, 11111111, 11111111, 11111111]
|
||||||
|
|
||||||
|
test_vperm_4:
|
||||||
|
# try with > 32b values (should mod)
|
||||||
|
#_ REGISTER_IN v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
|
||||||
|
#_ REGISTER_IN v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
|
||||||
|
#_ REGISTER_IN v5 [21212121, 21212121, 21212121, 21212121]
|
||||||
|
vperm v6, v3, v4, v5
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v3 [00010203, 04050607, 08090A0B, 0C0D0E0F]
|
||||||
|
#_ REGISTER_OUT v4 [10111213, 14151617, 18191A1B, 1C1D1E1F]
|
||||||
|
#_ REGISTER_OUT v5 [21212121, 21212121, 21212121, 21212121]
|
||||||
|
#_ REGISTER_OUT v6 [01010101, 01010101, 01010101, 01010101]
|
Loading…
Reference in New Issue