Jit: Perform BAT lookup in dcbf/dcbi/dcbst
When 66b992c
fixed https://bugs.dolphin-emu.org/issues/12133,
it did so by removing the broken address calculation entirely and
always using the slow path. This caused a performance regression,
https://bugs.dolphin-emu.org/issues/12477.
This commit instead replaces the broken address calculation with
a BAT lookup. If the BAT lookup succeeds, we can use the old fast
path. Otherwise we use the slow path.
Intends to improve https://bugs.dolphin-emu.org/issues/12477.
This commit is contained in:
parent
b84a0704cd
commit
92d1d60ff1
|
@ -240,31 +240,46 @@ void Jit64::dcbx(UGeckoInstruction inst)
|
||||||
RCX64Reg tmp = gpr.Scratch();
|
RCX64Reg tmp = gpr.Scratch();
|
||||||
RegCache::Realize(Ra, Rb, tmp);
|
RegCache::Realize(Ra, Rb, tmp);
|
||||||
|
|
||||||
MOV_sum(32, addr, Ra, Rb);
|
// Translate effective address to physical address.
|
||||||
|
MOV_sum(32, value, Ra, Rb);
|
||||||
|
FixupBranch bat_lookup_failed;
|
||||||
|
if (MSR.IR)
|
||||||
|
{
|
||||||
|
MOV(32, R(addr), R(value));
|
||||||
|
bat_lookup_failed = BATAddressLookup(value, tmp);
|
||||||
|
AND(32, R(addr), Imm32(0x0001ffff));
|
||||||
|
AND(32, R(value), Imm32(0xfffe0000));
|
||||||
|
OR(32, R(value), R(addr));
|
||||||
|
}
|
||||||
|
MOV(32, R(addr), R(value));
|
||||||
|
|
||||||
// Check whether a JIT cache line needs to be invalidated.
|
// Check whether a JIT cache line needs to be invalidated.
|
||||||
LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits)
|
SHR(32, R(value), Imm8(5 + 5)); // >> 5 for cache line size, >> 5 for width of bitset
|
||||||
SHR(32, R(value), Imm8(3 + 5 + 5)); // >> 5 for cache line size, >> 5 for width of bitset
|
|
||||||
MOV(64, R(tmp), ImmPtr(GetBlockCache()->GetBlockBitSet()));
|
MOV(64, R(tmp), ImmPtr(GetBlockCache()->GetBlockBitSet()));
|
||||||
MOV(32, R(value), MComplex(tmp, value, SCALE_4, 0));
|
MOV(32, R(value), MComplex(tmp, value, SCALE_4, 0));
|
||||||
SHR(32, R(addr), Imm8(5));
|
SHR(32, R(addr), Imm8(5));
|
||||||
BT(32, R(value), R(addr));
|
BT(32, R(value), R(addr));
|
||||||
|
FixupBranch invalidate_needed = J_CC(CC_C, true);
|
||||||
|
|
||||||
FixupBranch c = J_CC(CC_C, true);
|
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(c);
|
SetJumpTarget(invalidate_needed);
|
||||||
|
SHL(32, R(addr), Imm8(5));
|
||||||
|
if (MSR.IR)
|
||||||
|
SetJumpTarget(bat_lookup_failed);
|
||||||
|
|
||||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||||
|
registersInUse[X64Reg(tmp)] = false;
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||||
MOV(32, R(ABI_PARAM1), R(addr));
|
MOV(32, R(ABI_PARAM1), R(addr));
|
||||||
SHL(32, R(ABI_PARAM1), Imm8(5));
|
|
||||||
MOV(32, R(ABI_PARAM2), Imm32(32));
|
MOV(32, R(ABI_PARAM2), Imm32(32));
|
||||||
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
||||||
ABI_CallFunction(JitInterface::InvalidateICache);
|
ABI_CallFunction(JitInterface::InvalidateICache);
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||||
asm_routines.ResetStack(*this);
|
asm_routines.ResetStack(*this);
|
||||||
c = J(true);
|
|
||||||
|
FixupBranch done = J(true);
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
SetJumpTarget(c);
|
SetJumpTarget(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::dcbt(UGeckoInstruction inst)
|
void Jit64::dcbt(UGeckoInstruction inst)
|
||||||
|
|
|
@ -91,6 +91,16 @@ void EmuCodeBlock::SwitchToNearCode()
|
||||||
SetCodePtr(m_near_code, m_near_code_end, m_near_code_write_failed);
|
SetCodePtr(m_near_code, m_near_code_end, m_near_code_write_failed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FixupBranch EmuCodeBlock::BATAddressLookup(X64Reg addr, X64Reg tmp)
|
||||||
|
{
|
||||||
|
MOV(64, R(tmp), ImmPtr(&PowerPC::dbat_table[0]));
|
||||||
|
SHR(32, R(addr), Imm8(PowerPC::BAT_INDEX_SHIFT));
|
||||||
|
MOV(32, R(addr), MComplex(tmp, addr, SCALE_4, 0));
|
||||||
|
BT(32, R(addr), Imm8(IntLog2(PowerPC::BAT_MAPPED_BIT)));
|
||||||
|
|
||||||
|
return J_CC(CC_Z, m_far_code.Enabled());
|
||||||
|
}
|
||||||
|
|
||||||
FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_addr,
|
FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_addr,
|
||||||
BitSet32 registers_in_use)
|
BitSet32 registers_in_use)
|
||||||
{
|
{
|
||||||
|
|
|
@ -49,6 +49,10 @@ public:
|
||||||
return Gen::M(m_const_pool.GetConstant(&value, sizeof(T), N, index));
|
return Gen::M(m_const_pool.GetConstant(&value, sizeof(T), N, index));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Writes upper 15 bits of physical address to addr and clobbers the lower 17 bits of addr.
|
||||||
|
// Jumps to the returned FixupBranch if lookup fails.
|
||||||
|
Gen::FixupBranch BATAddressLookup(Gen::X64Reg addr, Gen::X64Reg tmp);
|
||||||
|
|
||||||
Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
|
Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
|
||||||
BitSet32 registers_in_use);
|
BitSet32 registers_in_use);
|
||||||
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
|
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
|
||||||
|
|
|
@ -229,6 +229,10 @@ protected:
|
||||||
// Loadstore routines
|
// Loadstore routines
|
||||||
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);
|
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);
|
||||||
void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset);
|
void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset);
|
||||||
|
// If lookup succeeds, writes upper 15 bits of physical address to addr_out. If not,
|
||||||
|
// jumps to the returned FixupBranch. Clobbers tmp and the 17 lower bits of addr_out.
|
||||||
|
Arm64Gen::FixupBranch BATAddressLookup(Arm64Gen::ARM64Reg addr_out, Arm64Gen::ARM64Reg addr_in,
|
||||||
|
Arm64Gen::ARM64Reg tmp);
|
||||||
|
|
||||||
void DoJit(u32 em_address, JitBlock* b, u32 nextPC);
|
void DoJit(u32 em_address, JitBlock* b, u32 nextPC);
|
||||||
|
|
||||||
|
|
|
@ -274,6 +274,19 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
||||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp)
|
||||||
|
{
|
||||||
|
tmp = EncodeRegTo64(tmp);
|
||||||
|
|
||||||
|
MOVP2R(tmp, PowerPC::dbat_table.data());
|
||||||
|
LSR(addr_out, addr_in, PowerPC::BAT_INDEX_SHIFT);
|
||||||
|
LDR(addr_out, tmp, ArithOption(addr_out, true));
|
||||||
|
FixupBranch pass = TBNZ(addr_out, IntLog2(PowerPC::BAT_MAPPED_BIT));
|
||||||
|
FixupBranch fail = B();
|
||||||
|
SetJumpTarget(pass);
|
||||||
|
return fail;
|
||||||
|
}
|
||||||
|
|
||||||
void JitArm64::lXX(UGeckoInstruction inst)
|
void JitArm64::lXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
|
@ -539,46 +552,59 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITLoadStoreOff);
|
JITDISABLE(bJITLoadStoreOff);
|
||||||
|
|
||||||
gpr.Lock(ARM64Reg::W30);
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
|
||||||
|
|
||||||
ARM64Reg addr = gpr.GetReg();
|
ARM64Reg effective_addr = ARM64Reg::W0;
|
||||||
|
ARM64Reg physical_addr = MSR.IR ? gpr.GetReg() : effective_addr;
|
||||||
ARM64Reg value = gpr.GetReg();
|
ARM64Reg value = gpr.GetReg();
|
||||||
ARM64Reg WA = ARM64Reg::W30;
|
ARM64Reg WA = ARM64Reg::W30;
|
||||||
|
|
||||||
u32 a = inst.RA, b = inst.RB;
|
u32 a = inst.RA, b = inst.RB;
|
||||||
|
|
||||||
if (a)
|
if (a)
|
||||||
ADD(addr, gpr.R(a), gpr.R(b));
|
ADD(effective_addr, gpr.R(a), gpr.R(b));
|
||||||
else
|
else
|
||||||
MOV(addr, gpr.R(b));
|
MOV(effective_addr, gpr.R(b));
|
||||||
|
|
||||||
|
// Translate effective address to physical address.
|
||||||
|
FixupBranch bat_lookup_failed;
|
||||||
|
if (MSR.IR)
|
||||||
|
{
|
||||||
|
bat_lookup_failed = BATAddressLookup(physical_addr, effective_addr, WA);
|
||||||
|
BFI(physical_addr, effective_addr, 0, PowerPC::BAT_INDEX_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
// Check whether a JIT cache line needs to be invalidated.
|
// Check whether a JIT cache line needs to be invalidated.
|
||||||
AND(value, addr, LogicalImm(0x1ffffc00, 32)); // upper three bits and last 10 bit are masked for
|
LSR(value, physical_addr, 5 + 5); // >> 5 for cache line size, >> 5 for width of bitset
|
||||||
// the bitset of cachelines, 0x1ffffc00
|
|
||||||
LSR(value, value, 5 + 5); // >> 5 for cache line size, >> 5 for width of bitset
|
|
||||||
MOVP2R(EncodeRegTo64(WA), GetBlockCache()->GetBlockBitSet());
|
MOVP2R(EncodeRegTo64(WA), GetBlockCache()->GetBlockBitSet());
|
||||||
LDR(value, EncodeRegTo64(WA), ArithOption(EncodeRegTo64(value), true));
|
LDR(value, EncodeRegTo64(WA), ArithOption(EncodeRegTo64(value), true));
|
||||||
|
|
||||||
LSR(addr, addr, 5); // mask sizeof cacheline, & 0x1f is the position within the bitset
|
LSR(WA, physical_addr, 5); // mask sizeof cacheline, & 0x1f is the position within the bitset
|
||||||
|
|
||||||
LSRV(value, value, addr); // move current bit to bit 0
|
LSRV(value, value, WA); // move current bit to bit 0
|
||||||
|
|
||||||
FixupBranch bit_not_set = TBZ(value, 0);
|
FixupBranch bit_not_set = TBZ(value, 0);
|
||||||
FixupBranch far_addr = B();
|
FixupBranch far_addr = B();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(far_addr);
|
SetJumpTarget(far_addr);
|
||||||
|
if (MSR.IR)
|
||||||
|
SetJumpTarget(bat_lookup_failed);
|
||||||
|
|
||||||
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
|
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
|
||||||
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
||||||
|
gprs_to_push[DecodeReg(effective_addr)] = false;
|
||||||
|
gprs_to_push[DecodeReg(physical_addr)] = false;
|
||||||
|
gprs_to_push[DecodeReg(value)] = false;
|
||||||
|
gprs_to_push[DecodeReg(WA)] = false;
|
||||||
|
|
||||||
ABI_PushRegisters(gprs_to_push);
|
ABI_PushRegisters(gprs_to_push);
|
||||||
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);
|
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);
|
||||||
|
|
||||||
LSL(ARM64Reg::W0, addr, 5);
|
MOVP2R(ARM64Reg::X8, &JitInterface::InvalidateICache);
|
||||||
MOVI2R(ARM64Reg::X1, 32);
|
// W0 was already set earlier
|
||||||
MOVI2R(ARM64Reg::X2, 0);
|
MOVI2R(ARM64Reg::W1, 32);
|
||||||
MOVP2R(ARM64Reg::X3, &JitInterface::InvalidateICache);
|
MOVI2R(ARM64Reg::W2, 0);
|
||||||
BLR(ARM64Reg::X3);
|
BLR(ARM64Reg::X8);
|
||||||
|
|
||||||
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);
|
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);
|
||||||
ABI_PopRegisters(gprs_to_push);
|
ABI_PopRegisters(gprs_to_push);
|
||||||
|
@ -588,7 +614,9 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
SetJumpTarget(bit_not_set);
|
SetJumpTarget(bit_not_set);
|
||||||
SetJumpTarget(near_addr);
|
SetJumpTarget(near_addr);
|
||||||
|
|
||||||
gpr.Unlock(addr, value, ARM64Reg::W30);
|
gpr.Unlock(effective_addr, value, WA);
|
||||||
|
if (MSR.IR)
|
||||||
|
gpr.Unlock(physical_addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::dcbt(UGeckoInstruction inst)
|
void JitArm64::dcbt(UGeckoInstruction inst)
|
||||||
|
|
Loading…
Reference in New Issue