Short-circuiting lvrx/stvrx. Should help bad accesses in many games.
Fixes #411.
This commit is contained in:
parent
dfa5b90c36
commit
65812438c4
|
@ -245,14 +245,26 @@ XEEMITTER(lvlxl128, VX128_1(4, 1539), VX128_1)(PPCHIRBuilder& f, InstrData& i) {
|
||||||
|
|
||||||
int InstrEmit_lvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
|
int InstrEmit_lvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
|
||||||
uint32_t rb) {
|
uint32_t rb) {
|
||||||
|
// NOTE: if eb == 0 (so 16b aligned) then no data is loaded. This is important
|
||||||
|
// as often times memcpy's will use this to handle the remaining <=16b of a
|
||||||
|
// buffer, which sometimes may be nothing and hang off the end of the valid
|
||||||
|
// page area. We still need to zero the resulting register, though.
|
||||||
Value* ea = CalculateEA_0(f, ra, rb);
|
Value* ea = CalculateEA_0(f, ra, rb);
|
||||||
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstantInt8(0xF));
|
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstantInt8(0xF));
|
||||||
|
// Skip if %16=0 (just load zero).
|
||||||
|
auto load_label = f.NewLabel();
|
||||||
|
auto end_label = f.NewLabel();
|
||||||
|
f.BranchTrue(eb, load_label);
|
||||||
|
f.StoreVR(vd, f.LoadZeroVec128());
|
||||||
|
f.Branch(end_label);
|
||||||
|
f.MarkLabel(load_label);
|
||||||
// ea &= ~0xF
|
// ea &= ~0xF
|
||||||
ea = f.And(ea, f.LoadConstantUint64(~0xFull));
|
ea = f.And(ea, f.LoadConstantUint64(~0xFull));
|
||||||
// v = (new >> (16 - eb))
|
// v = (new >> (16 - eb))
|
||||||
Value* v = f.Permute(f.LoadVectorShl(eb), f.LoadZeroVec128(),
|
Value* v = f.Permute(f.LoadVectorShl(eb), f.LoadZeroVec128(),
|
||||||
f.ByteSwap(f.Load(ea, VEC128_TYPE)), INT8_TYPE);
|
f.ByteSwap(f.Load(ea, VEC128_TYPE)), INT8_TYPE);
|
||||||
f.StoreVR(vd, v);
|
f.StoreVR(vd, v);
|
||||||
|
f.MarkLabel(end_label);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
XEEMITTER(lvrx, 0x7C00044E, X)(PPCHIRBuilder& f, InstrData& i) {
|
XEEMITTER(lvrx, 0x7C00044E, X)(PPCHIRBuilder& f, InstrData& i) {
|
||||||
|
@ -304,10 +316,15 @@ XEEMITTER(stvlxl128, VX128_1(4, 1795), VX128_1)(PPCHIRBuilder& f,
|
||||||
|
|
||||||
int InstrEmit_stvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
|
int InstrEmit_stvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
|
||||||
uint32_t rb) {
|
uint32_t rb) {
|
||||||
// NOTE: if eb == 0 (so 16b aligned) this equals new_value
|
// NOTE: if eb == 0 (so 16b aligned) then no data is loaded. This is important
|
||||||
// we could optimize this to prevent the other load/mask, in that case.
|
// as often times memcpy's will use this to handle the remaining <=16b of a
|
||||||
|
// buffer, which sometimes may be nothing and hang off the end of the valid
|
||||||
|
// page area.
|
||||||
Value* ea = CalculateEA_0(f, ra, rb);
|
Value* ea = CalculateEA_0(f, ra, rb);
|
||||||
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstantInt8(0xF));
|
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstantInt8(0xF));
|
||||||
|
// Skip if %16=0 (no data to store).
|
||||||
|
auto skip_label = f.NewLabel();
|
||||||
|
f.BranchFalse(eb, skip_label);
|
||||||
// ea &= ~0xF
|
// ea &= ~0xF
|
||||||
ea = f.And(ea, f.LoadConstantUint64(~0xFull));
|
ea = f.And(ea, f.LoadConstantUint64(~0xFull));
|
||||||
// v = (old & ~mask) | ((new << eb) & mask)
|
// v = (old & ~mask) | ((new << eb) & mask)
|
||||||
|
@ -320,6 +337,7 @@ int InstrEmit_stvrx_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t ra,
|
||||||
Value* v = f.Or(f.And(old_value, f.Not(mask)), f.And(new_value, mask));
|
Value* v = f.Or(f.And(old_value, f.Not(mask)), f.And(new_value, mask));
|
||||||
// ea &= ~0xF (handled above)
|
// ea &= ~0xF (handled above)
|
||||||
f.Store(ea, f.ByteSwap(v));
|
f.Store(ea, f.ByteSwap(v));
|
||||||
|
f.MarkLabel(skip_label);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
XEEMITTER(stvrx, 0x7C00054E, X)(PPCHIRBuilder& f, InstrData& i) {
|
XEEMITTER(stvrx, 0x7C00054E, X)(PPCHIRBuilder& f, InstrData& i) {
|
||||||
|
|
|
@ -18,3 +18,23 @@ test_lvr_1_constant:
|
||||||
#_ REGISTER_OUT r4 0x100010B7
|
#_ REGISTER_OUT r4 0x100010B7
|
||||||
#_ REGISTER_OUT r5 0x10
|
#_ REGISTER_OUT r5 0x10
|
||||||
#_ REGISTER_OUT v3 [00000000, 00000000, 000D0E10, 11121314]
|
#_ REGISTER_OUT v3 [00000000, 00000000, 000D0E10, 11121314]
|
||||||
|
|
||||||
|
test_lvr_2:
|
||||||
|
#_ REGISTER_IN r4 0x20000000
|
||||||
|
#_ REGISTER_IN r5 0x10
|
||||||
|
#_ REGISTER_IN v3 [FFFFFFFF, FFFFFFFF, FFFFFFFF, FFFFFFFF]
|
||||||
|
lvrx v3, r4, r5
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT r4 0x20000000
|
||||||
|
#_ REGISTER_OUT r5 0x10
|
||||||
|
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
|
||||||
|
test_lvr_2_constant:
|
||||||
|
#_ REGISTER_IN v3 [FFFFFFFF, FFFFFFFF, FFFFFFFF, FFFFFFFF]
|
||||||
|
lis r4, 0x2000
|
||||||
|
li r5, 0x10
|
||||||
|
lvrx v3, r4, r5
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT r4 0x20000000
|
||||||
|
#_ REGISTER_OUT r5 0x10
|
||||||
|
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
|
|
@ -47,3 +47,21 @@ test_stvr_2_constant:
|
||||||
#_ REGISTER_OUT r4 0x10001044
|
#_ REGISTER_OUT r4 0x10001044
|
||||||
#_ REGISTER_OUT v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
|
#_ REGISTER_OUT v3 [F0F1F2F3, F4F5F6F7, F8F9FAFB, FCFDFEFF]
|
||||||
#_ MEMORY_OUT 10001040 FCFDFEFF 04050607 08090A0B 0C0D0E0F
|
#_ MEMORY_OUT 10001040 FCFDFEFF 04050607 08090A0B 0C0D0E0F
|
||||||
|
|
||||||
|
test_stvr_3:
|
||||||
|
#_ REGISTER_IN r4 0x10010000
|
||||||
|
#_ REGISTER_IN r5 0x0
|
||||||
|
#_ REGISTER_IN v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
||||||
|
stvrx v3, r4, r5
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT r4 0x10010000
|
||||||
|
#_ REGISTER_OUT r5 0x0
|
||||||
|
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
||||||
|
|
||||||
|
test_stvr_3_constant:
|
||||||
|
#_ REGISTER_IN v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
||||||
|
lis r4, 0x1001
|
||||||
|
stvrx v3, r4, r0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT r4 0x10010000
|
||||||
|
#_ REGISTER_OUT v3 [BE74FCBD, BD912ABA, BF317BBB, BF2D135F]
|
||||||
|
|
Loading…
Reference in New Issue