Short-circuiting lvrx/stvrx. Should help bad accesses in many games.
Fixes #411.
This commit is contained in:
parent
dfa5b90c36
commit
65812438c4
|
@ -245,14 +245,26 @@ XEEMITTER(lvlxl128, VX128_1(4, 1539), VX128_1)(PPCHIRBuilder& f, InstrData& i) {
|
|||
|
||||
int InstrEmit_lvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
|
||||
uint32_t rb) {
|
||||
// NOTE: if eb == 0 (so 16b aligned) then no data is loaded. This is important
|
||||
// as often times memcpy's will use this to handle the remaining <=16b of a
|
||||
// buffer, which sometimes may be nothing and hang off the end of the valid
|
||||
// page area. We still need to zero the resulting register, though.
|
||||
Value* ea = CalculateEA_0(f, ra, rb);
|
||||
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstantInt8(0xF));
|
||||
// Skip if %16=0 (just load zero).
|
||||
auto load_label = f.NewLabel();
|
||||
auto end_label = f.NewLabel();
|
||||
f.BranchTrue(eb, load_label);
|
||||
f.StoreVR(vd, f.LoadZeroVec128());
|
||||
f.Branch(end_label);
|
||||
f.MarkLabel(load_label);
|
||||
// ea &= ~0xF
|
||||
ea = f.And(ea, f.LoadConstantUint64(~0xFull));
|
||||
// v = (new >> (16 - eb))
|
||||
Value* v = f.Permute(f.LoadVectorShl(eb), f.LoadZeroVec128(),
|
||||
f.ByteSwap(f.Load(ea, VEC128_TYPE)), INT8_TYPE);
|
||||
f.StoreVR(vd, v);
|
||||
f.MarkLabel(end_label);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(lvrx, 0x7C00044E, X)(PPCHIRBuilder& f, InstrData& i) {
|
||||
|
@ -304,10 +316,15 @@ XEEMITTER(stvlxl128, VX128_1(4, 1795), VX128_1)(PPCHIRBuilder& f,
|
|||
|
||||
int InstrEmit_stvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
|
||||
uint32_t rb) {
|
||||
// NOTE: if eb == 0 (so 16b aligned) this equals new_value
|
||||
// we could optimize this to prevent the other load/mask, in that case.
|
||||
// NOTE: if eb == 0 (so 16b aligned) then no data is loaded. This is important
|
||||
// as often times memcpy's will use this to handle the remaining <=16b of a
|
||||
// buffer, which sometimes may be nothing and hang off the end of the valid
|
||||
// page area.
|
||||
Value* ea = CalculateEA_0(f, ra, rb);
|
||||
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstantInt8(0xF));
|
||||
// Skip if %16=0 (no data to store).
|
||||
auto skip_label = f.NewLabel();
|
||||
f.BranchFalse(eb, skip_label);
|
||||
// ea &= ~0xF
|
||||
ea = f.And(ea, f.LoadConstantUint64(~0xFull));
|
||||
// v = (old & ~mask) | ((new << eb) & mask)
|
||||
|
@ -320,6 +337,7 @@ int InstrEmit_stvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
|
|||
Value* v = f.Or(f.And(old_value, f.Not(mask)), f.And(new_value, mask));
|
||||
// ea &= ~0xF (handled above)
|
||||
f.Store(ea, f.ByteSwap(v));
|
||||
f.MarkLabel(skip_label);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(stvrx, 0x7C00054E, X)(PPCHIRBuilder& f, InstrData& i) {
|
||||
|
|
|
@ -18,3 +18,23 @@ test_lvr_1_constant:
|
|||
#_ REGISTER_OUT r4 0x100010B7
|
||||
#_ REGISTER_OUT r5 0x10
|
||||
#_ REGISTER_OUT v3 [00000000, 00000000, 000D0E10, 11121314]
|
||||
|
||||
test_lvr_2:
|
||||
#_ REGISTER_IN r4 0x20000000
|
||||
#_ REGISTER_IN r5 0x10
|
||||
#_ REGISTER_IN v3 [FFFFFFFF, FFFFFFFF, FFFFFFFF, FFFFFFFF]
|
||||
lvrx v3, r4, r5
|
||||
blr
|
||||
#_ REGISTER_OUT r4 0x20000000
|
||||
#_ REGISTER_OUT r5 0x10
|
||||
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||
|
||||
test_lvr_2_constant:
|
||||
#_ REGISTER_IN v3 [FFFFFFFF, FFFFFFFF, FFFFFFFF, FFFFFFFF]
|
||||
lis r4, 0x2000
|
||||
li r5, 0x10
|
||||
lvrx v3, r4, r5
|
||||
blr
|
||||
#_ REGISTER_OUT r4 0x20000000
|
||||
#_ REGISTER_OUT r5 0x10
|
||||
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||
|
|
|
@ -47,3 +47,21 @@ test_stvr_2_constant:
|
|||
#_ REGISTER_OUT r4 0x10001044
|
||||
#_ REGISTER_OUT v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
|
||||
#_ MEMORY_OUT 10001040 FCFDFEFF 04050607 08090A0B 0C0D0E0F
|
||||
|
||||
test_stvr_3:
|
||||
#_ REGISTER_IN r4 0x10010000
|
||||
#_ REGISTER_IN r5 0x0
|
||||
#_ REGISTER_IN v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
||||
stvrx v3, r4, r5
|
||||
blr
|
||||
#_ REGISTER_OUT r4 0x10010000
|
||||
#_ REGISTER_OUT r5 0x0
|
||||
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
||||
|
||||
test_stvr_3_constant:
|
||||
#_ REGISTER_IN v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
||||
lis r4, 0x1001
|
||||
stvrx v3, r4, r0
|
||||
blr
|
||||
#_ REGISTER_OUT r4 0x10010000
|
||||
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
||||
|
|
Loading…
Reference in New Issue