JitArm64: Optiming shifting and masking PC in slow dispatcher
Instead of shifting left by 1, we can first shift right by 2 and then left by 3. This is both faster and smaller, because we get the right shift for free with the masking and the left shift for free with the address calculation. It also happens to match the pseudocode more closely, which is always nice for readability.
This commit is contained in:
parent
c9347a2a19
commit
9e970bcb30
|
@ -123,11 +123,10 @@ void JitArm64::GenerateAsm()
|
||||||
ARM64Reg msr2 = ARM64Reg::W13;
|
ARM64Reg msr2 = ARM64Reg::W13;
|
||||||
|
|
||||||
// iCache[(address >> 2) & iCache_Mask];
|
// iCache[(address >> 2) & iCache_Mask];
|
||||||
ORR(pc_masked, ARM64Reg::WZR,
|
UBFX(pc_masked, DISPATCHER_PC, 2,
|
||||||
LogicalImm(JitBaseBlockCache::FAST_BLOCK_MAP_FALLBACK_MASK << 3, 32));
|
MathUtil::IntLog2(JitBaseBlockCache::FAST_BLOCK_MAP_FALLBACK_ELEMENTS) - 2);
|
||||||
AND(pc_masked, pc_masked, DISPATCHER_PC, ArithOption(DISPATCHER_PC, ShiftType::LSL, 1));
|
|
||||||
MOVP2R(cache_base, GetBlockCache()->GetFastBlockMapFallback());
|
MOVP2R(cache_base, GetBlockCache()->GetFastBlockMapFallback());
|
||||||
LDR(block, cache_base, EncodeRegTo64(pc_masked));
|
LDR(block, cache_base, ArithOption(EncodeRegTo64(pc_masked), true));
|
||||||
FixupBranch not_found = CBZ(block);
|
FixupBranch not_found = CBZ(block);
|
||||||
|
|
||||||
// b.effectiveAddress != addr || b.msrBits != msr
|
// b.effectiveAddress != addr || b.msrBits != msr
|
||||||
|
|
Loading…
Reference in New Issue