Short-circuiting lvrx/stvrx. Should help bad accesses in many games.

Fixes #411.
This commit is contained in:
Ben Vanik 2015-08-30 16:38:01 -07:00
parent dfa5b90c36
commit 65812438c4
3 changed files with 58 additions and 2 deletions

View File

@ -245,14 +245,26 @@ XEEMITTER(lvlxl128, VX128_1(4, 1539), VX128_1)(PPCHIRBuilder& f, InstrData& i) {
int InstrEmit_lvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
uint32_t rb) {
// NOTE: if eb == 0 (so 16b aligned) then no data is loaded. This is important
// as often times memcpy's will use this to handle the remaining <=16b of a
// buffer, which sometimes may be nothing and hang off the end of the valid
// page area. We still need to zero the resulting register, though.
Value* ea = CalculateEA_0(f, ra, rb);
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstantInt8(0xF));
// Skip if %16=0 (just load zero).
auto load_label = f.NewLabel();
auto end_label = f.NewLabel();
f.BranchTrue(eb, load_label);
f.StoreVR(vd, f.LoadZeroVec128());
f.Branch(end_label);
f.MarkLabel(load_label);
// ea &= ~0xF
ea = f.And(ea, f.LoadConstantUint64(~0xFull));
// v = (new >> (16 - eb))
Value* v = f.Permute(f.LoadVectorShl(eb), f.LoadZeroVec128(),
f.ByteSwap(f.Load(ea, VEC128_TYPE)), INT8_TYPE);
f.StoreVR(vd, v);
f.MarkLabel(end_label);
return 0;
}
XEEMITTER(lvrx, 0x7C00044E, X)(PPCHIRBuilder& f, InstrData& i) {
@ -304,10 +316,15 @@ XEEMITTER(stvlxl128, VX128_1(4, 1795), VX128_1)(PPCHIRBuilder& f,
int InstrEmit_stvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
uint32_t rb) {
// NOTE: if eb == 0 (so 16b aligned) this equals new_value
// we could optimize this to prevent the other load/mask, in that case.
// NOTE: if eb == 0 (so 16b aligned) then no data is loaded. This is important
// as often times memcpy's will use this to handle the remaining <=16b of a
// buffer, which sometimes may be nothing and hang off the end of the valid
// page area.
Value* ea = CalculateEA_0(f, ra, rb);
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstantInt8(0xF));
// Skip if %16=0 (no data to store).
auto skip_label = f.NewLabel();
f.BranchFalse(eb, skip_label);
// ea &= ~0xF
ea = f.And(ea, f.LoadConstantUint64(~0xFull));
// v = (old & ~mask) | ((new << eb) & mask)
@ -320,6 +337,7 @@ int InstrEmit_stvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
Value* v = f.Or(f.And(old_value, f.Not(mask)), f.And(new_value, mask));
// ea &= ~0xF (handled above)
f.Store(ea, f.ByteSwap(v));
f.MarkLabel(skip_label);
return 0;
}
XEEMITTER(stvrx, 0x7C00054E, X)(PPCHIRBuilder& f, InstrData& i) {

View File

@ -18,3 +18,23 @@ test_lvr_1_constant:
#_ REGISTER_OUT r4 0x100010B7
#_ REGISTER_OUT r5 0x10
#_ REGISTER_OUT v3 [00000000, 00000000, 000D0E10, 11121314]
test_lvr_2:
#_ REGISTER_IN r4 0x20000000
#_ REGISTER_IN r5 0x10
#_ REGISTER_IN v3 [FFFFFFFF, FFFFFFFF, FFFFFFFF, FFFFFFFF]
lvrx v3, r4, r5
blr
#_ REGISTER_OUT r4 0x20000000
#_ REGISTER_OUT r5 0x10
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
test_lvr_2_constant:
#_ REGISTER_IN v3 [FFFFFFFF, FFFFFFFF, FFFFFFFF, FFFFFFFF]
lis r4, 0x2000
li r5, 0x10
lvrx v3, r4, r5
blr
#_ REGISTER_OUT r4 0x20000000
#_ REGISTER_OUT r5 0x10
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]

View File

@ -47,3 +47,21 @@ test_stvr_2_constant:
#_ REGISTER_OUT r4 0x10001044
#_ REGISTER_OUT v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
#_ MEMORY_OUT 10001040 FCFDFEFF 04050607 08090A0B 0C0D0E0F
test_stvr_3:
#_ REGISTER_IN r4 0x10010000
#_ REGISTER_IN r5 0x0
#_ REGISTER_IN v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
stvrx v3, r4, r5
blr
#_ REGISTER_OUT r4 0x10010000
#_ REGISTER_OUT r5 0x0
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
test_stvr_3_constant:
#_ REGISTER_IN v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
lis r4, 0x1001
stvrx v3, r4, r0
blr
#_ REGISTER_OUT r4 0x10010000
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]