Fixing stvl/stvr.

This commit is contained in:
Ben Vanik 2014-10-26 02:01:02 -07:00
parent 9649eb0e8d
commit 5bec5bb203
9 changed files with 44 additions and 16 deletions

View File

@ -265,18 +265,16 @@ int InstrEmit_stvlx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
// we could optimize this to prevent the other load/mask, in that case. // we could optimize this to prevent the other load/mask, in that case.
Value* ea = CalculateEA_0(f, ra, rb); Value* ea = CalculateEA_0(f, ra, rb);
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF)); Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
Value* new_value = f.LoadVR(vd);
// ea &= ~0xF // ea &= ~0xF
ea = f.And(ea, f.LoadConstant(~0xFull)); ea = f.And(ea, f.LoadConstant(~0xFull));
// v = (old & ~mask) | ((new >> eb) & mask)
Value* new_value = f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE),
f.LoadVR(vd), INT8_TYPE);
Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE)); Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE));
// v = (new >> eb) | (old & (ONE << (16 - eb))) // mask = FFFF... >> eb
Value* v = f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE), new_value, Value* mask = f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE),
INT8_TYPE); f.Not(f.LoadZero(VEC128_TYPE)), INT8_TYPE);
v = f.Or( Value* v = f.Or(f.And(old_value, f.Not(mask)), f.And(new_value, mask));
v, f.And(old_value,
f.Permute(f.LoadVectorShl(f.Sub(f.LoadConstant((int8_t)16), eb)),
f.Not(f.LoadZero(VEC128_TYPE)),
f.LoadZero(VEC128_TYPE), INT8_TYPE)));
// ea &= ~0xF (handled above) // ea &= ~0xF (handled above)
f.Store(ea, f.ByteSwap(v)); f.Store(ea, f.ByteSwap(v));
return 0; return 0;
@ -301,16 +299,16 @@ int InstrEmit_stvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
// we could optimize this to prevent the other load/mask, in that case. // we could optimize this to prevent the other load/mask, in that case.
Value* ea = CalculateEA_0(f, ra, rb); Value* ea = CalculateEA_0(f, ra, rb);
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF)); Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
Value* new_value = f.LoadVR(vd);
// ea &= ~0xF // ea &= ~0xF
ea = f.And(ea, f.LoadConstant(~0xFull)); ea = f.And(ea, f.LoadConstant(~0xFull));
// v = (old & ~mask) | ((new << eb) & mask)
Value* new_value = f.Permute(f.LoadVectorShr(eb), f.LoadVR(vd),
f.LoadZero(VEC128_TYPE), INT8_TYPE);
Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE)); Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE));
// v = (new << (16 - eb)) | (old & (ONE >> eb)) // mask = ~FFFF... >> eb
Value* v = f.Permute(f.LoadVectorShl(f.Sub(f.LoadConstant((int8_t)16), eb)), Value* mask = f.Permute(f.LoadVectorShr(eb), f.Not(f.LoadZero(VEC128_TYPE)),
new_value, f.LoadZero(VEC128_TYPE), INT8_TYPE); f.LoadZero(VEC128_TYPE), INT8_TYPE);
v = f.Or(v, f.And(old_value, Value* v = f.Or(f.And(old_value, f.Not(mask)), f.And(new_value, mask));
f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE),
f.Not(f.LoadZero(VEC128_TYPE)), INT8_TYPE)));
// ea &= ~0xF (handled above) // ea &= ~0xF (handled above)
f.Store(ea, f.ByteSwap(v)); f.Store(ea, f.ByteSwap(v));
return 0; return 0;

View File

@ -7,3 +7,7 @@ Disassembly of section .text:
0000000000100000 <test_stvl_1>: 0000000000100000 <test_stvl_1>:
100000: 7c 64 05 0e stvlx v3,r4,r0 100000: 7c 64 05 0e stvlx v3,r4,r0
100004: 4e 80 00 20 blr 100004: 4e 80 00 20 blr
0000000000100008 <test_stvl_2>:
100008: 7c 64 05 0e stvlx v3,r4,r0
10000c: 4e 80 00 20 blr

View File

@ -1 +1,2 @@
0000000000000000 t test_stvl_1 0000000000000000 t test_stvl_1
0000000000000008 t test_stvl_2

View File

@ -7,3 +7,7 @@ Disassembly of section .text:
0000000000100000 <test_stvr_1>: 0000000000100000 <test_stvr_1>:
100000: 7c 64 2d 4e stvrx v3,r4,r5 100000: 7c 64 2d 4e stvrx v3,r4,r5
100004: 4e 80 00 20 blr 100004: 4e 80 00 20 blr
0000000000100008 <test_stvr_2>:
100008: 7c 64 05 4e stvrx v3,r4,r0
10000c: 4e 80 00 20 blr

View File

@ -1 +1,2 @@
0000000000000000 t test_stvr_1 0000000000000000 t test_stvr_1
0000000000000008 t test_stvr_2

View File

@ -7,3 +7,13 @@ test_stvl_1:
#_ REGISTER_OUT r4 0x1040 #_ REGISTER_OUT r4 0x1040
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F] #_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
#_ MEMORY_OUT 00001040 BE74FCBD BD912ABA BF317BBB BF2D135F #_ MEMORY_OUT 00001040 BE74FCBD BD912ABA BF317BBB BF2D135F
test_stvl_2:
#_ MEMORY_IN 00001040 00010203 04050607 08090A0B 0C0D0E0F
#_ REGISTER_IN r4 0x1044
#_ REGISTER_IN v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
stvlx v3, r4, r0
blr
#_ REGISTER_OUT r4 0x1044
#_ REGISTER_OUT v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
#_ MEMORY_OUT 00001040 00010203 F0F1F2F3 F4F5F6F7 F8F9FAFB

View File

@ -11,3 +11,13 @@ test_stvr_1:
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F] #_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
#_ MEMORY_OUT 00001040 BE74FCBD BD912ABA BF317BBB BF2D135F #_ MEMORY_OUT 00001040 BE74FCBD BD912ABA BF317BBB BF2D135F
#_ MEMORY_OUT 00001050 00000000 00000000 00000000 00000000 #_ MEMORY_OUT 00001050 00000000 00000000 00000000 00000000
test_stvr_2:
#_ MEMORY_IN 00001040 00010203 04050607 08090A0B 0C0D0E0F
#_ REGISTER_IN r4 0x1044
#_ REGISTER_IN v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
stvrx v3, r4, r0
blr
#_ REGISTER_OUT r4 0x1044
#_ REGISTER_OUT v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
#_ MEMORY_OUT 00001040 FCFDFEFF 04050607 08090A0B 0C0D0E0F