JitArm64: Never check downcount on block entry
Jumping between linked blocks currently works as follows: First, at the
end of the first block, we check if the downcount is greater than zero.
If it is, we jump to the `normalEntry` of the block. So far so good. But
if the downcount wasn't greater than zero, we jump to the `checkedEntry`
of the block, which checks the downcount *again* and then jumps to
`do_timing` if it's less than zero (which seems like an off by one error
- Jit64 doesn't do anything like this). This second check is rather
redundant. Let's jump to `do_timing` where we previously jumped to
`checkedEntry`.
Jit64 doesn't check the downcount on block entry. See 5236dc3
.
This commit is contained in:
parent
bed3874497
commit
f78ba9ac55
|
@ -361,11 +361,14 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
|
||||||
|
|
||||||
LK &= m_enable_blr_optimization;
|
LK &= m_enable_blr_optimization;
|
||||||
|
|
||||||
|
const u8* host_address_after_return;
|
||||||
if (LK)
|
if (LK)
|
||||||
{
|
{
|
||||||
// Push {ARM_PC+20; PPC_PC} on the stack
|
// Push {ARM_PC; PPC_PC} on the stack
|
||||||
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
||||||
ADR(ARM64Reg::X0, 20);
|
constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2;
|
||||||
|
host_address_after_return = GetCodePtr() + adr_offset;
|
||||||
|
ADR(ARM64Reg::X0, adr_offset);
|
||||||
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
|
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -381,6 +384,8 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
|
||||||
|
|
||||||
if (LK)
|
if (LK)
|
||||||
{
|
{
|
||||||
|
DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed());
|
||||||
|
|
||||||
// Write the regular exit node after the return.
|
// Write the regular exit node after the return.
|
||||||
linkData.exitAddress = exit_address_after_return;
|
linkData.exitAddress = exit_address_after_return;
|
||||||
linkData.exitPtrs = GetWritableCodePtr();
|
linkData.exitPtrs = GetWritableCodePtr();
|
||||||
|
@ -411,10 +416,13 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
|
||||||
{
|
{
|
||||||
// Push {ARM_PC, PPC_PC} on the stack
|
// Push {ARM_PC, PPC_PC} on the stack
|
||||||
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
||||||
ADR(ARM64Reg::X0, 12);
|
constexpr s32 adr_offset = sizeof(u32) * 3;
|
||||||
|
const u8* host_address_after_return = GetCodePtr() + adr_offset;
|
||||||
|
ADR(ARM64Reg::X0, adr_offset);
|
||||||
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
|
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
|
||||||
|
|
||||||
BL(dispatcher);
|
BL(dispatcher);
|
||||||
|
DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed());
|
||||||
|
|
||||||
// Write the regular exit node after the return.
|
// Write the regular exit node after the return.
|
||||||
JitBlock* b = js.curBlock;
|
JitBlock* b = js.curBlock;
|
||||||
|
@ -440,11 +448,14 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return)
|
||||||
ARM64Reg after_reg = gpr.GetReg();
|
ARM64Reg after_reg = gpr.GetReg();
|
||||||
ARM64Reg code_reg = gpr.GetReg();
|
ARM64Reg code_reg = gpr.GetReg();
|
||||||
MOVI2R(after_reg, exit_address_after_return);
|
MOVI2R(after_reg, exit_address_after_return);
|
||||||
ADR(EncodeRegTo64(code_reg), 12);
|
constexpr s32 adr_offset = sizeof(u32) * 3;
|
||||||
|
const u8* host_address_after_return = GetCodePtr() + adr_offset;
|
||||||
|
ADR(EncodeRegTo64(code_reg), adr_offset);
|
||||||
STP(IndexType::Pre, EncodeRegTo64(code_reg), EncodeRegTo64(after_reg), ARM64Reg::SP, -16);
|
STP(IndexType::Pre, EncodeRegTo64(code_reg), EncodeRegTo64(after_reg), ARM64Reg::SP, -16);
|
||||||
gpr.Unlock(after_reg, code_reg);
|
gpr.Unlock(after_reg, code_reg);
|
||||||
|
|
||||||
FixupBranch skip_exit = BL();
|
FixupBranch skip_exit = BL();
|
||||||
|
DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed());
|
||||||
gpr.Unlock(ARM64Reg::W30);
|
gpr.Unlock(ARM64Reg::W30);
|
||||||
|
|
||||||
// Write the regular exit node after the return.
|
// Write the regular exit node after the return.
|
||||||
|
@ -839,17 +850,7 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||||
|
|
||||||
u8* const start = GetWritableCodePtr();
|
u8* const start = GetWritableCodePtr();
|
||||||
b->checkedEntry = start;
|
b->checkedEntry = start;
|
||||||
|
b->normalEntry = start;
|
||||||
// Downcount flag check, Only valid for linked blocks
|
|
||||||
{
|
|
||||||
FixupBranch bail = B(CC_PL);
|
|
||||||
MOVI2R(DISPATCHER_PC, js.blockStart);
|
|
||||||
B(do_timing);
|
|
||||||
SetJumpTarget(bail);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normal entry doesn't need to check for downcount.
|
|
||||||
b->normalEntry = GetWritableCodePtr();
|
|
||||||
|
|
||||||
// Conditionally add profiling code.
|
// Conditionally add profiling code.
|
||||||
if (jo.profile_blocks)
|
if (jo.profile_blocks)
|
||||||
|
|
|
@ -22,52 +22,73 @@ void JitArm64BlockCache::Init()
|
||||||
void JitArm64BlockCache::WriteLinkBlock(Arm64Gen::ARM64XEmitter& emit,
|
void JitArm64BlockCache::WriteLinkBlock(Arm64Gen::ARM64XEmitter& emit,
|
||||||
const JitBlock::LinkData& source, const JitBlock* dest)
|
const JitBlock::LinkData& source, const JitBlock* dest)
|
||||||
{
|
{
|
||||||
|
const u8* start = emit.GetCodePtr();
|
||||||
|
|
||||||
if (!dest)
|
if (!dest)
|
||||||
{
|
{
|
||||||
// Use a fixed amount of instructions, so we can assume to use 3 instructions on patching.
|
emit.MOVI2R(DISPATCHER_PC, source.exitAddress);
|
||||||
emit.MOVZ(DISPATCHER_PC, source.exitAddress & 0xFFFF, ShiftAmount::Shift0);
|
|
||||||
emit.MOVK(DISPATCHER_PC, source.exitAddress >> 16, ShiftAmount::Shift16);
|
|
||||||
|
|
||||||
if (source.call)
|
if (source.call)
|
||||||
|
{
|
||||||
|
while (emit.GetCodePtr() < start + BLOCK_LINK_FAST_BL_OFFSET && !emit.HasWriteFailed())
|
||||||
|
emit.NOP();
|
||||||
emit.BL(m_jit.GetAsmRoutines()->dispatcher);
|
emit.BL(m_jit.GetAsmRoutines()->dispatcher);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
emit.B(m_jit.GetAsmRoutines()->dispatcher);
|
emit.B(m_jit.GetAsmRoutines()->dispatcher);
|
||||||
return;
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (source.call)
|
||||||
|
{
|
||||||
|
// The "fast" BL should be the last instruction, so that the return address matches the
|
||||||
|
// address that was pushed onto the stack by the function that called WriteLinkBlock
|
||||||
|
FixupBranch fast = emit.B(CC_GT);
|
||||||
|
emit.MOVI2R(DISPATCHER_PC, source.exitAddress);
|
||||||
|
emit.BL(m_jit.GetAsmRoutines()->do_timing);
|
||||||
|
while (emit.GetCodePtr() < start + BLOCK_LINK_FAST_BL_OFFSET && !emit.HasWriteFailed())
|
||||||
|
emit.BRK(101);
|
||||||
|
emit.SetJumpTarget(fast);
|
||||||
|
emit.BL(dest->normalEntry);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Are we able to jump directly to the block?
|
||||||
|
s64 block_distance = ((s64)dest->normalEntry - (s64)emit.GetCodePtr()) >> 2;
|
||||||
|
if (block_distance >= -0x40000 && block_distance <= 0x3FFFF)
|
||||||
|
{
|
||||||
|
emit.B(CC_GT, dest->normalEntry);
|
||||||
|
emit.MOVI2R(DISPATCHER_PC, source.exitAddress);
|
||||||
|
emit.B(m_jit.GetAsmRoutines()->do_timing);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
FixupBranch slow = emit.B(CC_LE);
|
||||||
|
emit.B(dest->normalEntry);
|
||||||
|
emit.SetJumpTarget(slow);
|
||||||
|
emit.MOVI2R(DISPATCHER_PC, source.exitAddress);
|
||||||
|
emit.B(m_jit.GetAsmRoutines()->do_timing);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (source.call)
|
// Use a fixed number of instructions so we have enough room for any patching needed later.
|
||||||
|
const u8* end = start + BLOCK_LINK_SIZE;
|
||||||
|
while (emit.GetCodePtr() < end)
|
||||||
{
|
{
|
||||||
// The "fast" BL must be the third instruction. So just use the former two to inline the
|
|
||||||
// downcount check here. It's better to do this near jump before the long jump to the other
|
|
||||||
// block.
|
|
||||||
FixupBranch fast_link = emit.B(CC_GT);
|
|
||||||
emit.BL(dest->checkedEntry);
|
|
||||||
emit.SetJumpTarget(fast_link);
|
|
||||||
emit.BL(dest->normalEntry);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Are we able to jump directly to the normal entry?
|
|
||||||
s64 distance = ((s64)dest->normalEntry - (s64)emit.GetCodePtr()) >> 2;
|
|
||||||
if (distance >= -0x40000 && distance <= 0x3FFFF)
|
|
||||||
{
|
|
||||||
emit.B(CC_GT, dest->normalEntry);
|
|
||||||
emit.B(dest->checkedEntry);
|
|
||||||
emit.BRK(101);
|
emit.BRK(101);
|
||||||
return;
|
if (emit.HasWriteFailed())
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
ASSERT(emit.GetCodePtr() == end);
|
||||||
FixupBranch fast_link = emit.B(CC_GT);
|
|
||||||
emit.B(dest->checkedEntry);
|
|
||||||
emit.SetJumpTarget(fast_link);
|
|
||||||
emit.B(dest->normalEntry);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest)
|
void JitArm64BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest)
|
||||||
{
|
{
|
||||||
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
|
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
|
||||||
u8* location = source.exitPtrs;
|
u8* location = source.exitPtrs;
|
||||||
ARM64XEmitter emit(location, location + 12);
|
ARM64XEmitter emit(location, location + BLOCK_LINK_SIZE);
|
||||||
|
|
||||||
WriteLinkBlock(emit, source, dest);
|
WriteLinkBlock(emit, source, dest);
|
||||||
emit.FlushIcache();
|
emit.FlushIcache();
|
||||||
|
|
|
@ -29,6 +29,9 @@ public:
|
||||||
void WriteLinkBlock(Arm64Gen::ARM64XEmitter& emit, const JitBlock::LinkData& source,
|
void WriteLinkBlock(Arm64Gen::ARM64XEmitter& emit, const JitBlock::LinkData& source,
|
||||||
const JitBlock* dest = nullptr);
|
const JitBlock* dest = nullptr);
|
||||||
|
|
||||||
|
static constexpr size_t BLOCK_LINK_SIZE = 5 * sizeof(u32);
|
||||||
|
static constexpr size_t BLOCK_LINK_FAST_BL_OFFSET = BLOCK_LINK_SIZE - sizeof(u32);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override;
|
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override;
|
||||||
void WriteDestroyBlock(const JitBlock& block) override;
|
void WriteDestroyBlock(const JitBlock& block) override;
|
||||||
|
|
Loading…
Reference in New Issue