From 5bec5bb203aaf96666da6700a98d16265a93387b Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 26 Oct 2014 02:01:02 -0700 Subject: [PATCH] Fixing stvl/stvr. --- src/alloy/frontend/ppc/ppc_emit_altivec.cc | 30 ++++++++---------- .../frontend/ppc/test/bin/instr_stvl.bin | Bin 8 -> 16 bytes .../frontend/ppc/test/bin/instr_stvl.dis | 4 +++ .../frontend/ppc/test/bin/instr_stvl.map | 1 + .../frontend/ppc/test/bin/instr_stvr.bin | Bin 8 -> 16 bytes .../frontend/ppc/test/bin/instr_stvr.dis | 4 +++ .../frontend/ppc/test/bin/instr_stvr.map | 1 + src/alloy/frontend/ppc/test/instr_stvl.s | 10 ++++++ src/alloy/frontend/ppc/test/instr_stvr.s | 10 ++++++ 9 files changed, 44 insertions(+), 16 deletions(-) diff --git a/src/alloy/frontend/ppc/ppc_emit_altivec.cc b/src/alloy/frontend/ppc/ppc_emit_altivec.cc index 7bc00959e..f5ad96053 100644 --- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc +++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc @@ -265,18 +265,16 @@ int InstrEmit_stvlx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, // we could optimize this to prevent the other load/mask, in that case. Value* ea = CalculateEA_0(f, ra, rb); Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF)); - Value* new_value = f.LoadVR(vd); // ea &= ~0xF ea = f.And(ea, f.LoadConstant(~0xFull)); + // v = (old & ~mask) | ((new >> eb) & mask) + Value* new_value = f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE), + f.LoadVR(vd), INT8_TYPE); Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE)); - // v = (new >> eb) | (old & (ONE << (16 - eb))) - Value* v = f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE), new_value, - INT8_TYPE); - v = f.Or( - v, f.And(old_value, - f.Permute(f.LoadVectorShl(f.Sub(f.LoadConstant((int8_t)16), eb)), - f.Not(f.LoadZero(VEC128_TYPE)), - f.LoadZero(VEC128_TYPE), INT8_TYPE))); + // mask = FFFF... >> eb + Value* mask = f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE), + f.Not(f.LoadZero(VEC128_TYPE)), INT8_TYPE); + Value* v = f.Or(f.And(old_value, f.Not(mask)), f.And(new_value, mask)); // ea &= ~0xF (handled above) f.Store(ea, f.ByteSwap(v)); return 0; @@ -301,16 +299,16 @@ int InstrEmit_stvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, // we could optimize this to prevent the other load/mask, in that case. Value* ea = CalculateEA_0(f, ra, rb); Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF)); - Value* new_value = f.LoadVR(vd); // ea &= ~0xF ea = f.And(ea, f.LoadConstant(~0xFull)); + // v = (old & ~mask) | ((new << eb) & mask) + Value* new_value = f.Permute(f.LoadVectorShr(eb), f.LoadVR(vd), + f.LoadZero(VEC128_TYPE), INT8_TYPE); Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE)); - // v = (new << (16 - eb)) | (old & (ONE >> eb)) - Value* v = f.Permute(f.LoadVectorShl(f.Sub(f.LoadConstant((int8_t)16), eb)), - new_value, f.LoadZero(VEC128_TYPE), INT8_TYPE); - v = f.Or(v, f.And(old_value, - f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE), - f.Not(f.LoadZero(VEC128_TYPE)), INT8_TYPE))); + // mask = ~FFFF... >> eb + Value* mask = f.Permute(f.LoadVectorShr(eb), f.Not(f.LoadZero(VEC128_TYPE)), + f.LoadZero(VEC128_TYPE), INT8_TYPE); + Value* v = f.Or(f.And(old_value, f.Not(mask)), f.And(new_value, mask)); // ea &= ~0xF (handled above) f.Store(ea, f.ByteSwap(v)); return 0; diff --git a/src/alloy/frontend/ppc/test/bin/instr_stvl.bin b/src/alloy/frontend/ppc/test/bin/instr_stvl.bin index b079eec533f3ad9fe7e1bd6058606d865ef63895..4ec644997db0d107310e362f142152e1c69b65e3 100644 GIT binary patch literal 16 Rcmb: 100000: 7c 64 05 0e stvlx v3,r4,r0 100004: 4e 80 00 20 blr + +0000000000100008 : + 100008: 7c 64 05 0e stvlx v3,r4,r0 + 10000c: 4e 80 00 20 blr diff --git a/src/alloy/frontend/ppc/test/bin/instr_stvl.map b/src/alloy/frontend/ppc/test/bin/instr_stvl.map index 3afd818c7..e4549f6e4 100644 --- a/src/alloy/frontend/ppc/test/bin/instr_stvl.map +++ b/src/alloy/frontend/ppc/test/bin/instr_stvl.map @@ -1 +1,2 @@ 0000000000000000 t test_stvl_1 +0000000000000008 t test_stvl_2 diff --git a/src/alloy/frontend/ppc/test/bin/instr_stvr.bin b/src/alloy/frontend/ppc/test/bin/instr_stvr.bin index b7195fac17e4eca5ef96fd2bb45ce4ce2780bbeb..4e8fde8776955572edc410c379159a5a66705b2e 100644 GIT binary patch literal 16 Tcmb: 100000: 7c 64 2d 4e stvrx v3,r4,r5 100004: 4e 80 00 20 blr + +0000000000100008 : + 100008: 7c 64 05 4e stvrx v3,r4,r0 + 10000c: 4e 80 00 20 blr diff --git a/src/alloy/frontend/ppc/test/bin/instr_stvr.map b/src/alloy/frontend/ppc/test/bin/instr_stvr.map index b0296d550..4d82bf66b 100644 --- a/src/alloy/frontend/ppc/test/bin/instr_stvr.map +++ b/src/alloy/frontend/ppc/test/bin/instr_stvr.map @@ -1 +1,2 @@ 0000000000000000 t test_stvr_1 +0000000000000008 t test_stvr_2 diff --git a/src/alloy/frontend/ppc/test/instr_stvl.s b/src/alloy/frontend/ppc/test/instr_stvl.s index ed95423e9..ce1dfef01 100644 --- a/src/alloy/frontend/ppc/test/instr_stvl.s +++ b/src/alloy/frontend/ppc/test/instr_stvl.s @@ -7,3 +7,13 @@ test_stvl_1: #_ REGISTER_OUT r4 0x1040 #_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F] #_ MEMORY_OUT 00001040 BE74FCBD BD912ABA BF317BBB BF2D135F + +test_stvl_2: + #_ MEMORY_IN 00001040 00010203 04050607 08090A0B 0C0D0E0F + #_ REGISTER_IN r4 0x1044 + #_ REGISTER_IN v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF] + stvlx v3, r4, r0 + blr + #_ REGISTER_OUT r4 0x1044 + #_ REGISTER_OUT v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF] + #_ MEMORY_OUT 00001040 00010203 F0F1F2F3 F4F5F6F7 F8F9FAFB diff --git a/src/alloy/frontend/ppc/test/instr_stvr.s b/src/alloy/frontend/ppc/test/instr_stvr.s index 2e8e8cb9d..a651f0ba0 100644 --- a/src/alloy/frontend/ppc/test/instr_stvr.s +++ b/src/alloy/frontend/ppc/test/instr_stvr.s @@ -11,3 +11,13 @@ test_stvr_1: #_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F] #_ MEMORY_OUT 00001040 BE74FCBD BD912ABA BF317BBB BF2D135F #_ MEMORY_OUT 00001050 00000000 00000000 00000000 00000000 + +test_stvr_2: + #_ MEMORY_IN 00001040 00010203 04050607 08090A0B 0C0D0E0F + #_ REGISTER_IN r4 0x1044 + #_ REGISTER_IN v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF] + stvrx v3, r4, r0 + blr + #_ REGISTER_OUT r4 0x1044 + #_ REGISTER_OUT v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF] + #_ MEMORY_OUT 00001040 FCFDFEFF 04050607 08090A0B 0C0D0E0F