Merge pull request #11958 from JosJuice/jitarm64-dispatcher-microopt
JitArm64: Dispatcher optimizations
This commit is contained in:
commit
89963c287c
|
@ -19,11 +19,6 @@
|
||||||
#include "Core/System.h"
|
#include "Core/System.h"
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
|
||||||
// These need to be next of each other so that the assembly
|
|
||||||
// code can compare them easily.
|
|
||||||
static_assert(offsetof(JitBlockData, effectiveAddress) + 4 == offsetof(JitBlockData, msrBits));
|
|
||||||
|
|
||||||
Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit)
|
Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -168,12 +163,14 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
// Check block.msrBits.
|
// Check block.msrBits.
|
||||||
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
|
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
|
||||||
AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK));
|
AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK));
|
||||||
// Also check the block.effectiveAddress
|
// Also check the block.effectiveAddress. RSCRATCH_EXTRA still has the PC.
|
||||||
SHL(64, R(RSCRATCH2), Imm8(32));
|
SHL(64, R(RSCRATCH_EXTRA), Imm8(32));
|
||||||
// RSCRATCH_EXTRA still has the PC.
|
|
||||||
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
|
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
|
||||||
CMP(64, R(RSCRATCH2),
|
|
||||||
MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, effectiveAddress))));
|
static_assert(offsetof(JitBlockData, msrBits) + 4 ==
|
||||||
|
offsetof(JitBlockData, effectiveAddress));
|
||||||
|
|
||||||
|
CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, msrBits))));
|
||||||
|
|
||||||
state_mismatch = J_CC(CC_NE);
|
state_mismatch = J_CC(CC_NE);
|
||||||
// Success; branch to the block we found.
|
// Success; branch to the block we found.
|
||||||
|
|
|
@ -41,17 +41,17 @@ void JitArm64::GenerateAsm()
|
||||||
enter_code = GetCodePtr();
|
enter_code = GetCodePtr();
|
||||||
|
|
||||||
ABI_PushRegisters(regs_to_save);
|
ABI_PushRegisters(regs_to_save);
|
||||||
m_float_emit.ABI_PushRegisters(regs_to_save_fpr, ARM64Reg::X30);
|
m_float_emit.ABI_PushRegisters(regs_to_save_fpr, ARM64Reg::X8);
|
||||||
|
|
||||||
MOVP2R(PPC_REG, &m_ppc_state);
|
MOVP2R(PPC_REG, &m_ppc_state);
|
||||||
|
|
||||||
// Store the stack pointer, so we can reset it if the BLR optimization fails.
|
// Store the stack pointer, so we can reset it if the BLR optimization fails.
|
||||||
ADD(ARM64Reg::X0, ARM64Reg::SP, 0);
|
ADD(ARM64Reg::X8, ARM64Reg::SP, 0);
|
||||||
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
|
STR(IndexType::Unsigned, ARM64Reg::X8, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
|
||||||
|
|
||||||
// Push {nullptr; -1} as invalid destination on the stack.
|
// Push {nullptr; -1} as invalid destination on the stack.
|
||||||
MOVI2R(ARM64Reg::X0, 0xFFFF'FFFF'FFFF'FFFF);
|
MOVI2R(ARM64Reg::X8, 0xFFFF'FFFF'FFFF'FFFF);
|
||||||
STP(IndexType::Pre, ARM64Reg::ZR, ARM64Reg::X0, ARM64Reg::SP, -16);
|
STP(IndexType::Pre, ARM64Reg::ZR, ARM64Reg::X8, ARM64Reg::SP, -16);
|
||||||
|
|
||||||
// The PC will be loaded into DISPATCHER_PC after the call to CoreTiming::Advance().
|
// The PC will be loaded into DISPATCHER_PC after the call to CoreTiming::Advance().
|
||||||
// Advance() does an exception check so we don't know what PC to use until afterwards.
|
// Advance() does an exception check so we don't know what PC to use until afterwards.
|
||||||
|
@ -86,9 +86,9 @@ void JitArm64::GenerateAsm()
|
||||||
FixupBranch debug_exit;
|
FixupBranch debug_exit;
|
||||||
if (enable_debugging)
|
if (enable_debugging)
|
||||||
{
|
{
|
||||||
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0,
|
LDR(IndexType::Unsigned, ARM64Reg::W8, ARM64Reg::X8,
|
||||||
MOVPage2R(ARM64Reg::X0, cpu.GetStatePtr()));
|
MOVPage2R(ARM64Reg::X8, cpu.GetStatePtr()));
|
||||||
debug_exit = CBNZ(ARM64Reg::W0);
|
debug_exit = CBNZ(ARM64Reg::W8);
|
||||||
}
|
}
|
||||||
|
|
||||||
dispatcher_no_check = GetCodePtr();
|
dispatcher_no_check = GetCodePtr();
|
||||||
|
@ -100,9 +100,9 @@ void JitArm64::GenerateAsm()
|
||||||
if (GetBlockCache()->GetEntryPoints())
|
if (GetBlockCache()->GetEntryPoints())
|
||||||
{
|
{
|
||||||
// Check if there is a block
|
// Check if there is a block
|
||||||
ARM64Reg pc_and_msr = ARM64Reg::X25;
|
ARM64Reg pc_and_msr = ARM64Reg::X8;
|
||||||
ARM64Reg cache_base = ARM64Reg::X27;
|
ARM64Reg cache_base = ARM64Reg::X9;
|
||||||
ARM64Reg block = ARM64Reg::X30;
|
ARM64Reg block = ARM64Reg::X10;
|
||||||
LDR(IndexType::Unsigned, EncodeRegTo32(pc_and_msr), PPC_REG, PPCSTATE_OFF(msr));
|
LDR(IndexType::Unsigned, EncodeRegTo32(pc_and_msr), PPC_REG, PPCSTATE_OFF(msr));
|
||||||
MOVP2R(cache_base, GetBlockCache()->GetEntryPoints());
|
MOVP2R(cache_base, GetBlockCache()->GetEntryPoints());
|
||||||
// The entry points map is indexed by ((msrBits << 26) | (address >> 2)).
|
// The entry points map is indexed by ((msrBits << 26) | (address >> 2)).
|
||||||
|
@ -115,37 +115,40 @@ void JitArm64::GenerateAsm()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
ARM64Reg pc_masked = ARM64Reg::W8;
|
||||||
|
ARM64Reg cache_base = ARM64Reg::X9;
|
||||||
|
ARM64Reg block = ARM64Reg::X10;
|
||||||
|
ARM64Reg pc = ARM64Reg::W11;
|
||||||
|
ARM64Reg msr = ARM64Reg::W12;
|
||||||
|
ARM64Reg msr2 = ARM64Reg::W13;
|
||||||
|
ARM64Reg entry = ARM64Reg::X14;
|
||||||
|
|
||||||
// iCache[(address >> 2) & iCache_Mask];
|
// iCache[(address >> 2) & iCache_Mask];
|
||||||
ARM64Reg pc_masked = ARM64Reg::W25;
|
|
||||||
ARM64Reg cache_base = ARM64Reg::X27;
|
|
||||||
ARM64Reg block = ARM64Reg::X30;
|
|
||||||
ORR(pc_masked, ARM64Reg::WZR,
|
|
||||||
LogicalImm(JitBaseBlockCache::FAST_BLOCK_MAP_FALLBACK_MASK << 3, 32));
|
|
||||||
AND(pc_masked, pc_masked, DISPATCHER_PC, ArithOption(DISPATCHER_PC, ShiftType::LSL, 1));
|
|
||||||
MOVP2R(cache_base, GetBlockCache()->GetFastBlockMapFallback());
|
MOVP2R(cache_base, GetBlockCache()->GetFastBlockMapFallback());
|
||||||
LDR(block, cache_base, EncodeRegTo64(pc_masked));
|
UBFX(pc_masked, DISPATCHER_PC, 2,
|
||||||
|
MathUtil::IntLog2(JitBaseBlockCache::FAST_BLOCK_MAP_FALLBACK_ELEMENTS) - 2);
|
||||||
|
LDR(block, cache_base, ArithOption(EncodeRegTo64(pc_masked), true));
|
||||||
FixupBranch not_found = CBZ(block);
|
FixupBranch not_found = CBZ(block);
|
||||||
|
|
||||||
// b.effectiveAddress != addr || b.msrBits != msr
|
// b.effectiveAddress != addr || b.msrBits != msr
|
||||||
ARM64Reg pc_and_msr = ARM64Reg::W25;
|
static_assert(offsetof(JitBlockData, msrBits) + 4 ==
|
||||||
ARM64Reg pc_and_msr2 = ARM64Reg::W24;
|
offsetof(JitBlockData, effectiveAddress));
|
||||||
LDR(IndexType::Unsigned, pc_and_msr, block, offsetof(JitBlockData, effectiveAddress));
|
LDP(IndexType::Signed, msr, pc, block, offsetof(JitBlockData, effectiveAddress));
|
||||||
CMP(pc_and_msr, DISPATCHER_PC);
|
LDR(IndexType::Unsigned, msr2, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
FixupBranch pc_missmatch = B(CC_NEQ);
|
CMP(pc, DISPATCHER_PC);
|
||||||
|
FixupBranch pc_mismatch = B(CC_NEQ);
|
||||||
|
|
||||||
LDR(IndexType::Unsigned, pc_and_msr2, PPC_REG, PPCSTATE_OFF(msr));
|
LDR(IndexType::Unsigned, entry, block, offsetof(JitBlockData, normalEntry));
|
||||||
AND(pc_and_msr2, pc_and_msr2, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32));
|
AND(msr2, msr2, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32));
|
||||||
LDR(IndexType::Unsigned, pc_and_msr, block, offsetof(JitBlockData, msrBits));
|
CMP(msr, msr2);
|
||||||
CMP(pc_and_msr, pc_and_msr2);
|
FixupBranch msr_mismatch = B(CC_NEQ);
|
||||||
|
|
||||||
FixupBranch msr_missmatch = B(CC_NEQ);
|
|
||||||
|
|
||||||
// return blocks[block_num].normalEntry;
|
// return blocks[block_num].normalEntry;
|
||||||
LDR(IndexType::Unsigned, block, block, offsetof(JitBlockData, normalEntry));
|
BR(entry);
|
||||||
BR(block);
|
|
||||||
SetJumpTarget(not_found);
|
SetJumpTarget(not_found);
|
||||||
SetJumpTarget(pc_missmatch);
|
SetJumpTarget(pc_mismatch);
|
||||||
SetJumpTarget(msr_missmatch);
|
SetJumpTarget(msr_mismatch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -182,8 +185,8 @@ void JitArm64::GenerateAsm()
|
||||||
|
|
||||||
// Check the state pointer to see if we are exiting
|
// Check the state pointer to see if we are exiting
|
||||||
// Gets checked on at the end of every slice
|
// Gets checked on at the end of every slice
|
||||||
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, MOVPage2R(ARM64Reg::X0, cpu.GetStatePtr()));
|
LDR(IndexType::Unsigned, ARM64Reg::W8, ARM64Reg::X8, MOVPage2R(ARM64Reg::X8, cpu.GetStatePtr()));
|
||||||
FixupBranch exit = CBNZ(ARM64Reg::W0);
|
FixupBranch exit = CBNZ(ARM64Reg::W8);
|
||||||
|
|
||||||
SetJumpTarget(to_start_of_timing_slice);
|
SetJumpTarget(to_start_of_timing_slice);
|
||||||
ABI_CallFunction(&CoreTiming::GlobalAdvance);
|
ABI_CallFunction(&CoreTiming::GlobalAdvance);
|
||||||
|
@ -212,10 +215,10 @@ void JitArm64::GenerateAsm()
|
||||||
|
|
||||||
// Reset the stack pointer, since the BLR optimization may have pushed things onto the stack
|
// Reset the stack pointer, since the BLR optimization may have pushed things onto the stack
|
||||||
// without popping them.
|
// without popping them.
|
||||||
LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
|
LDR(IndexType::Unsigned, ARM64Reg::X8, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
|
||||||
ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
|
ADD(ARM64Reg::SP, ARM64Reg::X8, 0);
|
||||||
|
|
||||||
m_float_emit.ABI_PopRegisters(regs_to_save_fpr, ARM64Reg::X30);
|
m_float_emit.ABI_PopRegisters(regs_to_save_fpr, ARM64Reg::X8);
|
||||||
ABI_PopRegisters(regs_to_save);
|
ABI_PopRegisters(regs_to_save);
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
|
|
||||||
|
|
|
@ -33,10 +33,10 @@ struct JitBlockData
|
||||||
// The normal entry point for the block, returned by Dispatch().
|
// The normal entry point for the block, returned by Dispatch().
|
||||||
u8* normalEntry;
|
u8* normalEntry;
|
||||||
|
|
||||||
// The effective address (PC) for the beginning of the block.
|
|
||||||
u32 effectiveAddress;
|
|
||||||
// The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK.
|
// The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK.
|
||||||
u32 msrBits;
|
u32 msrBits;
|
||||||
|
// The effective address (PC) for the beginning of the block.
|
||||||
|
u32 effectiveAddress;
|
||||||
// The physical address of the code represented by this block.
|
// The physical address of the code represented by this block.
|
||||||
// Various maps in the cache are indexed by this (block_map
|
// Various maps in the cache are indexed by this (block_map
|
||||||
// and valid_block in particular). This is useful because of
|
// and valid_block in particular). This is useful because of
|
||||||
|
|
Loading…
Reference in New Issue