From e767c2e90aae6f6bf6593d5bbda04c2d401a2d1f Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 25 Oct 2014 15:23:27 -0700 Subject: [PATCH] Fixing permute, I think. --- src/alloy/backend/x64/x64_emitter.cc | 2 + src/alloy/backend/x64/x64_emitter.h | 1 + src/alloy/backend/x64/x64_sequences.cc | 8 ++-- src/alloy/frontend/ppc/test/bin/instr_lvl.bin | Bin 0 -> 8 bytes src/alloy/frontend/ppc/test/bin/instr_lvl.dis | 9 ++++ src/alloy/frontend/ppc/test/bin/instr_lvl.map | 1 + src/alloy/frontend/ppc/test/instr_lvl.s | 7 +++ src/alloy/test/test.gypi | 2 +- src/alloy/test/test_byte_swap.cc | 42 ++++++++++++++++++ 9 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 src/alloy/frontend/ppc/test/bin/instr_lvl.bin create mode 100644 src/alloy/frontend/ppc/test/bin/instr_lvl.dis create mode 100644 src/alloy/frontend/ppc/test/bin/instr_lvl.map create mode 100644 src/alloy/frontend/ppc/test/instr_lvl.s create mode 100644 src/alloy/test/test_byte_swap.cc diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index 0dac2fb36..204d614cc 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -835,6 +835,8 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) { 0x0000001Fu, 0x0000001Fu), /* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu), + /* XMMSwapWordMask */ vec128i(0x03030303u, 0x03030303u, + 0x03030303u, 0x03030303u), /* XMMUnsignedDwordMax */ vec128i(0xFFFFFFFFu, 0x00000000u, 0xFFFFFFFFu, 0x00000000u), /* XMM255 */ vec128f(255.0f), diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index 7755c8cdb..009f72e92 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -67,6 +67,7 @@ enum XmmConst { XMMShiftMaskEvenPI16, XMMShiftMaskPS, XMMShiftByteMask, + XMMSwapWordMask, XMMUnsignedDwordMax, XMM255, XMMPI32, diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index 3b9c5c0d5..0a1db4b2a 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -4999,9 +4999,9 @@ EMITTER(PERMUTE_V128, MATCH(I, V128<>, V128<>, V128<>>)) // Control mask needs to be shuffled. if (i.src1.is_constant) { e.LoadConstantXmm(e.xmm0, i.src1.constant()); - e.vpshufb(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMByteSwapMask)); + e.vxorps(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMSwapWordMask)); } else { - e.vpshufb(e.xmm0, i.src1, e.GetXmmConstPtr(XMMByteSwapMask)); + e.vxorps(e.xmm0, i.src1, e.GetXmmConstPtr(XMMSwapWordMask)); } if (i.src2.is_constant) { e.LoadConstantXmm(i.dest, i.src2.constant()); @@ -5018,9 +5018,9 @@ EMITTER(PERMUTE_V128, MATCH(I, V128<>, V128<>, V128<>>)) // Control mask needs to be shuffled. if (i.src1.is_constant) { e.LoadConstantXmm(e.xmm2, i.src1.constant()); - e.vpshufb(e.xmm2, e.xmm2, e.GetXmmConstPtr(XMMByteSwapMask)); + e.vxorps(e.xmm2, e.xmm2, e.GetXmmConstPtr(XMMSwapWordMask)); } else { - e.vpshufb(e.xmm2, i.src1, e.GetXmmConstPtr(XMMByteSwapMask)); + e.vxorps(e.xmm2, i.src1, e.GetXmmConstPtr(XMMSwapWordMask)); } Xmm src2_shuf = e.xmm0; if (i.src2.value->IsConstantZero()) { diff --git a/src/alloy/frontend/ppc/test/bin/instr_lvl.bin b/src/alloy/frontend/ppc/test/bin/instr_lvl.bin new file mode 100644 index 0000000000000000000000000000000000000000..671e6951f3433fdcd6a86adb041a2f331d109b37 GIT binary patch literal 8 Pcmb: + 100000: 7c 64 04 0e lvlx v3,r4,r0 + 100004: 4e 80 00 20 blr diff --git a/src/alloy/frontend/ppc/test/bin/instr_lvl.map b/src/alloy/frontend/ppc/test/bin/instr_lvl.map new file mode 100644 index 000000000..a5a3ba36c --- /dev/null +++ b/src/alloy/frontend/ppc/test/bin/instr_lvl.map @@ -0,0 +1 @@ +0000000000000000 t test_lvl_1 diff --git a/src/alloy/frontend/ppc/test/instr_lvl.s b/src/alloy/frontend/ppc/test/instr_lvl.s new file mode 100644 index 000000000..10b61676e --- /dev/null +++ b/src/alloy/frontend/ppc/test/instr_lvl.s @@ -0,0 +1,7 @@ +test_lvl_1: + #_ MEMORY_IN 00001077 0a 0b 0c 0d 0e 0f 10 13 0c 0d 0e 10 11 12 13 14 ff ff ff ff ff ff + #_ REGISTER_IN r4 0x1077 + lvlx v3, r4, r0 + blr + #_ REGISTER_OUT r4 0x1077 + #_ REGISTER_OUT v3 [0A0B0C0D, 0E0F1013, 0C000000, 00000000] diff --git a/src/alloy/test/test.gypi b/src/alloy/test/test.gypi index 9f5825fd2..508acb087 100644 --- a/src/alloy/test/test.gypi +++ b/src/alloy/test/test.gypi @@ -57,7 +57,7 @@ #'test_atomic_exchange.cc', #'test_atomic_sub.cc', #'test_branch.cc', - #'test_byte_swap.cc', + 'test_byte_swap.cc', #'test_cast.cc', #'test_cntlz.cc', #'test_compare.cc', diff --git a/src/alloy/test/test_byte_swap.cc b/src/alloy/test/test_byte_swap.cc new file mode 100644 index 000000000..03131d031 --- /dev/null +++ b/src/alloy/test/test_byte_swap.cc @@ -0,0 +1,42 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +using namespace alloy; +using namespace alloy::hir; +using namespace alloy::runtime; +using namespace alloy::test; +using alloy::frontend::ppc::PPCContext; + +TEST_CASE("BYTE_SWAP_V128", "[instr]") { + TestFunction([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.ByteSwap(LoadVR(b, 4))); + b.Return(); + }).Run([](PPCContext* ctx) { + ctx->v[4] = vec128b(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128b(3, 2, 1, 0, 7, 6, 5, 4, 11, + 10, 9, 8, 15, 14, 13, 12)); + }); + TestFunction([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.ByteSwap(LoadVR(b, 4))); + b.Return(); + }).Run([](PPCContext* ctx) { + ctx->v[4] = vec128i(0x0C13100F, 0x0E0D0C0B, 0x0A000000, + 0x00000000); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(0x0F10130C, 0x0B0C0D0E, 0x0000000A, 0x00000000)); + }); +}