diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index 0d0c56270..53995f4c3 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -5364,8 +5364,11 @@ EMITTER(UNPACK, MATCH(I, V128<>>)) { } // Shuffle bytes. e.vpshufb(i.dest, src, e.GetXmmConstPtr(XMMUnpackSHORT_2)); + // Sign extend words. + e.vpslld(i.dest, 16); + e.vpsrad(i.dest, 16); // Add 3,3,0,1. - e.vpor(i.dest, e.GetXmmConstPtr(XMM3301)); + e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301)); } static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) { assert_false(IsPackOutSaturate(flags)); diff --git a/src/alloy/frontend/ppc/test/bin/instr_vupkd3d128.bin b/src/alloy/frontend/ppc/test/bin/instr_vupkd3d128.bin new file mode 100644 index 000000000..a87c63968 Binary files /dev/null and b/src/alloy/frontend/ppc/test/bin/instr_vupkd3d128.bin differ diff --git a/src/alloy/frontend/ppc/test/bin/instr_vupkd3d128.dis b/src/alloy/frontend/ppc/test/bin/instr_vupkd3d128.dis new file mode 100644 index 000000000..8f6225d63 --- /dev/null +++ b/src/alloy/frontend/ppc/test/bin/instr_vupkd3d128.dis @@ -0,0 +1,29 @@ + +/vagrant/src/alloy/frontend/ppc/test/bin//instr_vupkd3d128.o: file format elf64-powerpc + + +Disassembly of section .text: + +0000000000100000 : + 100000: 18 60 1f f0 vupkd3d128 v3,v3,0 + 100004: 4e 80 00 20 blr + +0000000000100008 : + 100008: 18 64 1f f0 vupkd3d128 v3,v3,4 + 10000c: 4e 80 00 20 blr + +0000000000100010 : + 100010: 18 64 1f f0 vupkd3d128 v3,v3,4 + 100014: 4e 80 00 20 blr + +0000000000100018 : + 100018: 18 64 1f f0 vupkd3d128 v3,v3,4 + 10001c: 4e 80 00 20 blr + +0000000000100020 : + 100020: 18 6c 1f f0 vupkd3d128 v3,v3,12 + 100024: 4e 80 00 20 blr + +0000000000100028 : + 100028: 18 74 1f f0 vupkd3d128 v3,v3,20 + 10002c: 4e 80 00 20 blr diff --git a/src/alloy/frontend/ppc/test/bin/instr_vupkd3d128.map b/src/alloy/frontend/ppc/test/bin/instr_vupkd3d128.map new file mode 100644 index 000000000..1e52624e8 --- /dev/null +++ b/src/alloy/frontend/ppc/test/bin/instr_vupkd3d128.map @@ -0,0 +1,6 @@ +0000000000000000 t test_vupkd3d128_d3dcolor +0000000000000008 t test_vupkd3d128_short2_0 +0000000000000010 t test_vupkd3d128_short2_1 +0000000000000018 t test_vupkd3d128_short2_2 +0000000000000020 t test_vupkd3d128_float16_2_0 +0000000000000028 t test_vupkd3d128_float16_4_0 diff --git a/src/alloy/frontend/ppc/test/instr_vupkd3d128.s b/src/alloy/frontend/ppc/test/instr_vupkd3d128.s new file mode 100644 index 000000000..31800b3a2 --- /dev/null +++ b/src/alloy/frontend/ppc/test/instr_vupkd3d128.s @@ -0,0 +1,48 @@ +# vupkd3d128 dest, src, type +# type: +# 0 = PACK_TYPE_D3DCOLOR +# 1 = PACK_TYPE_SHORT_2 +# 3 = PACK_TYPE_FLOAT16_2 +# 5 = PACK_TYPE_FLOAT16_4 + +# vupkd3d128 is broken in binutils, so these are hand coded + +test_vupkd3d128_d3dcolor: + #_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 04010203] + # vupkd3d128 v3, v3, 0 + .long 0x18601FF0 + blr + #_ REGISTER_OUT v3 [3f800001, 3f800002, 3f800003, 3f800004] + +test_vupkd3d128_short2_0: + #_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFF8001] + # vupkd3d128 v3, v3, 1 + .long 0x18641FF0 + blr + #_ REGISTER_OUT v3 [40407fff, 403f8001, 00000000, 3f800000] +test_vupkd3d128_short2_1: + #_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 4000C000] + # vupkd3d128 v3, v3, 1 + .long 0x18641FF0 + blr + #_ REGISTER_OUT v3 [40404000, 403FC000, 00000000, 3f800000] +test_vupkd3d128_short2_2: + #_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 7FFFF333] + # vupkd3d128 v3, v3, 1 + .long 0x18641FF0 + blr + #_ REGISTER_OUT v3 [40407FFF, 403FF333, 00000000, 3f800000] + +test_vupkd3d128_float16_2_0: + #_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 3800B800] + # vupkd3d128 v3, v3, 3 + .long 0x186C1FF0 + blr + #_ REGISTER_OUT v3 [3F000000, BF000000, 00000000, 3f800000] + +test_vupkd3d128_float16_4_0: + #_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, 3800B801, 3802B803] + # vupkd3d128 v3, v3, 5 + .long 0x18741FF0 + blr + #_ REGISTER_OUT v3 [3F000000, bf002000, 3f004000, bf006000]