Fixing stvl/stvr.
This commit is contained in:
parent
9649eb0e8d
commit
5bec5bb203
|
@ -265,18 +265,16 @@ int InstrEmit_stvlx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
|
||||||
// we could optimize this to prevent the other load/mask, in that case.
|
// we could optimize this to prevent the other load/mask, in that case.
|
||||||
Value* ea = CalculateEA_0(f, ra, rb);
|
Value* ea = CalculateEA_0(f, ra, rb);
|
||||||
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
|
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
|
||||||
Value* new_value = f.LoadVR(vd);
|
|
||||||
// ea &= ~0xF
|
// ea &= ~0xF
|
||||||
ea = f.And(ea, f.LoadConstant(~0xFull));
|
ea = f.And(ea, f.LoadConstant(~0xFull));
|
||||||
|
// v = (old & ~mask) | ((new >> eb) & mask)
|
||||||
|
Value* new_value = f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE),
|
||||||
|
f.LoadVR(vd), INT8_TYPE);
|
||||||
Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE));
|
Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE));
|
||||||
// v = (new >> eb) | (old & (ONE << (16 - eb)))
|
// mask = FFFF... >> eb
|
||||||
Value* v = f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE), new_value,
|
Value* mask = f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE),
|
||||||
INT8_TYPE);
|
f.Not(f.LoadZero(VEC128_TYPE)), INT8_TYPE);
|
||||||
v = f.Or(
|
Value* v = f.Or(f.And(old_value, f.Not(mask)), f.And(new_value, mask));
|
||||||
v, f.And(old_value,
|
|
||||||
f.Permute(f.LoadVectorShl(f.Sub(f.LoadConstant((int8_t)16), eb)),
|
|
||||||
f.Not(f.LoadZero(VEC128_TYPE)),
|
|
||||||
f.LoadZero(VEC128_TYPE), INT8_TYPE)));
|
|
||||||
// ea &= ~0xF (handled above)
|
// ea &= ~0xF (handled above)
|
||||||
f.Store(ea, f.ByteSwap(v));
|
f.Store(ea, f.ByteSwap(v));
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -301,16 +299,16 @@ int InstrEmit_stvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
|
||||||
// we could optimize this to prevent the other load/mask, in that case.
|
// we could optimize this to prevent the other load/mask, in that case.
|
||||||
Value* ea = CalculateEA_0(f, ra, rb);
|
Value* ea = CalculateEA_0(f, ra, rb);
|
||||||
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
|
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
|
||||||
Value* new_value = f.LoadVR(vd);
|
|
||||||
// ea &= ~0xF
|
// ea &= ~0xF
|
||||||
ea = f.And(ea, f.LoadConstant(~0xFull));
|
ea = f.And(ea, f.LoadConstant(~0xFull));
|
||||||
|
// v = (old & ~mask) | ((new << eb) & mask)
|
||||||
|
Value* new_value = f.Permute(f.LoadVectorShr(eb), f.LoadVR(vd),
|
||||||
|
f.LoadZero(VEC128_TYPE), INT8_TYPE);
|
||||||
Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE));
|
Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE));
|
||||||
// v = (new << (16 - eb)) | (old & (ONE >> eb))
|
// mask = ~FFFF... >> eb
|
||||||
Value* v = f.Permute(f.LoadVectorShl(f.Sub(f.LoadConstant((int8_t)16), eb)),
|
Value* mask = f.Permute(f.LoadVectorShr(eb), f.Not(f.LoadZero(VEC128_TYPE)),
|
||||||
new_value, f.LoadZero(VEC128_TYPE), INT8_TYPE);
|
f.LoadZero(VEC128_TYPE), INT8_TYPE);
|
||||||
v = f.Or(v, f.And(old_value,
|
Value* v = f.Or(f.And(old_value, f.Not(mask)), f.And(new_value, mask));
|
||||||
f.Permute(f.LoadVectorShr(eb), f.LoadZero(VEC128_TYPE),
|
|
||||||
f.Not(f.LoadZero(VEC128_TYPE)), INT8_TYPE)));
|
|
||||||
// ea &= ~0xF (handled above)
|
// ea &= ~0xF (handled above)
|
||||||
f.Store(ea, f.ByteSwap(v));
|
f.Store(ea, f.ByteSwap(v));
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Binary file not shown.
|
@ -7,3 +7,7 @@ Disassembly of section .text:
|
||||||
0000000000100000 <test_stvl_1>:
|
0000000000100000 <test_stvl_1>:
|
||||||
100000: 7c 64 05 0e stvlx v3,r4,r0
|
100000: 7c 64 05 0e stvlx v3,r4,r0
|
||||||
100004: 4e 80 00 20 blr
|
100004: 4e 80 00 20 blr
|
||||||
|
|
||||||
|
0000000000100008 <test_stvl_2>:
|
||||||
|
100008: 7c 64 05 0e stvlx v3,r4,r0
|
||||||
|
10000c: 4e 80 00 20 blr
|
||||||
|
|
|
@ -1 +1,2 @@
|
||||||
0000000000000000 t test_stvl_1
|
0000000000000000 t test_stvl_1
|
||||||
|
0000000000000008 t test_stvl_2
|
||||||
|
|
Binary file not shown.
|
@ -7,3 +7,7 @@ Disassembly of section .text:
|
||||||
0000000000100000 <test_stvr_1>:
|
0000000000100000 <test_stvr_1>:
|
||||||
100000: 7c 64 2d 4e stvrx v3,r4,r5
|
100000: 7c 64 2d 4e stvrx v3,r4,r5
|
||||||
100004: 4e 80 00 20 blr
|
100004: 4e 80 00 20 blr
|
||||||
|
|
||||||
|
0000000000100008 <test_stvr_2>:
|
||||||
|
100008: 7c 64 05 4e stvrx v3,r4,r0
|
||||||
|
10000c: 4e 80 00 20 blr
|
||||||
|
|
|
@ -1 +1,2 @@
|
||||||
0000000000000000 t test_stvr_1
|
0000000000000000 t test_stvr_1
|
||||||
|
0000000000000008 t test_stvr_2
|
||||||
|
|
|
@ -7,3 +7,13 @@ test_stvl_1:
|
||||||
#_ REGISTER_OUT r4 0x1040
|
#_ REGISTER_OUT r4 0x1040
|
||||||
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
||||||
#_ MEMORY_OUT 00001040 BE74FCBD BD912ABA BF317BBB BF2D135F
|
#_ MEMORY_OUT 00001040 BE74FCBD BD912ABA BF317BBB BF2D135F
|
||||||
|
|
||||||
|
test_stvl_2:
|
||||||
|
#_ MEMORY_IN 00001040 00010203 04050607 08090A0B 0C0D0E0F
|
||||||
|
#_ REGISTER_IN r4 0x1044
|
||||||
|
#_ REGISTER_IN v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
|
||||||
|
stvlx v3, r4, r0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT r4 0x1044
|
||||||
|
#_ REGISTER_OUT v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
|
||||||
|
#_ MEMORY_OUT 00001040 00010203 F0F1F2F3 F4F5F6F7 F8F9FAFB
|
||||||
|
|
|
@ -11,3 +11,13 @@ test_stvr_1:
|
||||||
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
||||||
#_ MEMORY_OUT 00001040 BE74FCBD BD912ABA BF317BBB BF2D135F
|
#_ MEMORY_OUT 00001040 BE74FCBD BD912ABA BF317BBB BF2D135F
|
||||||
#_ MEMORY_OUT 00001050 00000000 00000000 00000000 00000000
|
#_ MEMORY_OUT 00001050 00000000 00000000 00000000 00000000
|
||||||
|
|
||||||
|
test_stvr_2:
|
||||||
|
#_ MEMORY_IN 00001040 00010203 04050607 08090A0B 0C0D0E0F
|
||||||
|
#_ REGISTER_IN r4 0x1044
|
||||||
|
#_ REGISTER_IN v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
|
||||||
|
stvrx v3, r4, r0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT r4 0x1044
|
||||||
|
#_ REGISTER_OUT v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
|
||||||
|
#_ MEMORY_OUT 00001040 FCFDFEFF 04050607 08090A0B 0C0D0E0F
|
||||||
|
|
Loading…
Reference in New Issue