JitArm64: Avoid loading compilerPC multiple times if it's already in a register.
This commit is contained in:
parent
cd31da97d6
commit
7daa19f40d
|
@ -374,7 +374,8 @@ void JitArm64::EmitStoreMembase(const ARM64Reg& msr)
|
|||
gpr.Unlock(WD);
|
||||
}
|
||||
|
||||
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return)
|
||||
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return,
|
||||
ARM64Reg exit_address_after_return_reg)
|
||||
{
|
||||
Cleanup();
|
||||
EndTimeProfile(js.curBlock);
|
||||
|
@ -386,11 +387,16 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
|
|||
if (LK)
|
||||
{
|
||||
// Push {ARM_PC; PPC_PC} on the stack
|
||||
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
||||
ARM64Reg reg_to_push = exit_address_after_return_reg;
|
||||
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
|
||||
{
|
||||
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
||||
reg_to_push = ARM64Reg::X1;
|
||||
}
|
||||
constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2;
|
||||
host_address_after_return = GetCodePtr() + adr_offset;
|
||||
ADR(ARM64Reg::X0, adr_offset);
|
||||
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
|
||||
STP(IndexType::Pre, ARM64Reg::X0, reg_to_push, ARM64Reg::SP, -16);
|
||||
}
|
||||
|
||||
constexpr size_t primary_farcode_size = 3 * sizeof(u32);
|
||||
|
@ -457,7 +463,8 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
|
|||
SwitchToNearCode();
|
||||
}
|
||||
|
||||
void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_after_return)
|
||||
void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_after_return,
|
||||
ARM64Reg exit_address_after_return_reg)
|
||||
{
|
||||
if (dest != DISPATCHER_PC)
|
||||
MOV(DISPATCHER_PC, dest);
|
||||
|
@ -475,11 +482,17 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
|
|||
else
|
||||
{
|
||||
// Push {ARM_PC, PPC_PC} on the stack
|
||||
ARM64Reg reg_to_push = exit_address_after_return_reg;
|
||||
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
|
||||
{
|
||||
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
||||
reg_to_push = ARM64Reg::X1;
|
||||
}
|
||||
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
||||
constexpr s32 adr_offset = sizeof(u32) * 3;
|
||||
const u8* host_address_after_return = GetCodePtr() + adr_offset;
|
||||
ADR(ARM64Reg::X0, adr_offset);
|
||||
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
|
||||
STP(IndexType::Pre, ARM64Reg::X0, reg_to_push, ARM64Reg::SP, -16);
|
||||
|
||||
BL(dispatcher);
|
||||
DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed());
|
||||
|
@ -515,26 +528,43 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
|
|||
}
|
||||
}
|
||||
|
||||
void JitArm64::FakeLKExit(u32 exit_address_after_return)
|
||||
void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_after_return_reg)
|
||||
{
|
||||
if (!m_enable_blr_optimization)
|
||||
return;
|
||||
|
||||
// We may need to fake the BLR stack on inlined CALL instructions.
|
||||
// Else we can't return to this location any more.
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
ARM64Reg after_reg = gpr.GetReg();
|
||||
if (exit_address_after_return_reg != ARM64Reg::W30)
|
||||
{
|
||||
// Do not lock W30 if it is the same as the exit address register, since
|
||||
// it's already locked. It'll only get clobbered at the BL (below) where
|
||||
// we do not need its value anymore.
|
||||
// NOTE: This means W30 won't contain the return address anymore after this
|
||||
// function has been called!
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
}
|
||||
ARM64Reg after_reg = exit_address_after_return_reg;
|
||||
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
|
||||
{
|
||||
after_reg = gpr.GetReg();
|
||||
MOVI2R(after_reg, exit_address_after_return);
|
||||
}
|
||||
ARM64Reg code_reg = gpr.GetReg();
|
||||
MOVI2R(after_reg, exit_address_after_return);
|
||||
constexpr s32 adr_offset = sizeof(u32) * 3;
|
||||
const u8* host_address_after_return = GetCodePtr() + adr_offset;
|
||||
ADR(EncodeRegTo64(code_reg), adr_offset);
|
||||
STP(IndexType::Pre, EncodeRegTo64(code_reg), EncodeRegTo64(after_reg), ARM64Reg::SP, -16);
|
||||
gpr.Unlock(after_reg, code_reg);
|
||||
gpr.Unlock(code_reg);
|
||||
if (after_reg != exit_address_after_return_reg)
|
||||
gpr.Unlock(after_reg);
|
||||
|
||||
FixupBranch skip_exit = BL();
|
||||
DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed());
|
||||
gpr.Unlock(ARM64Reg::W30);
|
||||
if (exit_address_after_return_reg != ARM64Reg::W30)
|
||||
{
|
||||
gpr.Unlock(ARM64Reg::W30);
|
||||
}
|
||||
|
||||
// Write the regular exit node after the return.
|
||||
JitBlock* b = js.curBlock;
|
||||
|
|
|
@ -315,8 +315,12 @@ protected:
|
|||
void EmitStoreMembase(const Arm64Gen::ARM64Reg& msr);
|
||||
|
||||
// Exits
|
||||
void WriteExit(u32 destination, bool LK = false, u32 exit_address_after_return = 0);
|
||||
void WriteExit(Arm64Gen::ARM64Reg dest, bool LK = false, u32 exit_address_after_return = 0);
|
||||
void
|
||||
WriteExit(u32 destination, bool LK = false, u32 exit_address_after_return = 0,
|
||||
Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||
void
|
||||
WriteExit(Arm64Gen::ARM64Reg dest, bool LK = false, u32 exit_address_after_return = 0,
|
||||
Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||
void WriteExceptionExit(u32 destination, bool only_external = false,
|
||||
bool always_exception = false);
|
||||
void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false,
|
||||
|
@ -325,7 +329,9 @@ protected:
|
|||
void WriteConditionalExceptionExit(int exception, Arm64Gen::ARM64Reg temp_gpr,
|
||||
Arm64Gen::ARM64Reg temp_fpr = Arm64Gen::ARM64Reg::INVALID_REG,
|
||||
u64 increment_sp_on_exit = 0);
|
||||
void FakeLKExit(u32 exit_address_after_return);
|
||||
void
|
||||
FakeLKExit(u32 exit_address_after_return,
|
||||
Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||
void WriteBLRExit(Arm64Gen::ARM64Reg dest);
|
||||
|
||||
Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
|
||||
|
|
|
@ -79,12 +79,12 @@ void JitArm64::bx(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITBranchOff);
|
||||
|
||||
ARM64Reg WA = ARM64Reg::INVALID_REG;
|
||||
if (inst.LK)
|
||||
{
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
WA = gpr.GetReg();
|
||||
MOVI2R(WA, js.compilerPC + 4);
|
||||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
|
||||
if (!js.isLastInstruction)
|
||||
|
@ -94,8 +94,12 @@ void JitArm64::bx(UGeckoInstruction inst)
|
|||
// We have to fake the stack as the RET instruction was not
|
||||
// found in the same block. This is a big overhead, but still
|
||||
// better than calling the dispatcher.
|
||||
FakeLKExit(js.compilerPC + 4);
|
||||
FakeLKExit(js.compilerPC + 4, WA);
|
||||
}
|
||||
|
||||
if (WA != ARM64Reg::INVALID_REG)
|
||||
gpr.Unlock(WA);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -104,19 +108,24 @@ void JitArm64::bx(UGeckoInstruction inst)
|
|||
|
||||
if (js.op->branchIsIdleLoop)
|
||||
{
|
||||
// make idle loops go faster
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
if (WA != ARM64Reg::INVALID_REG)
|
||||
gpr.Unlock(WA);
|
||||
|
||||
MOVP2R(XA, &CoreTiming::GlobalIdle);
|
||||
BLR(XA);
|
||||
gpr.Unlock(WA);
|
||||
// make idle loops go faster
|
||||
ARM64Reg WB = gpr.GetReg();
|
||||
ARM64Reg XB = EncodeRegTo64(WB);
|
||||
|
||||
MOVP2R(XB, &CoreTiming::GlobalIdle);
|
||||
BLR(XB);
|
||||
gpr.Unlock(WB);
|
||||
|
||||
WriteExceptionExit(js.op->branchTo);
|
||||
return;
|
||||
}
|
||||
|
||||
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4);
|
||||
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, inst.LK ? WA : ARM64Reg::INVALID_REG);
|
||||
if (WA != ARM64Reg::INVALID_REG)
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
|
||||
void JitArm64::bcx(UGeckoInstruction inst)
|
||||
|
@ -125,6 +134,8 @@ void JitArm64::bcx(UGeckoInstruction inst)
|
|||
JITDISABLE(bJITBranchOff);
|
||||
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg WB = inst.LK ? gpr.GetReg() : WA;
|
||||
|
||||
FixupBranch pCTRDontBranch;
|
||||
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
|
||||
{
|
||||
|
@ -156,7 +167,7 @@ void JitArm64::bcx(UGeckoInstruction inst)
|
|||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
|
||||
}
|
||||
|
||||
gpr.Flush(FlushMode::MaintainState, WA);
|
||||
gpr.Flush(FlushMode::MaintainState, WB);
|
||||
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
|
||||
|
||||
if (js.op->branchIsIdleLoop)
|
||||
|
@ -171,7 +182,7 @@ void JitArm64::bcx(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4);
|
||||
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, inst.LK ? WA : ARM64Reg::INVALID_REG);
|
||||
}
|
||||
|
||||
SwitchToNearCode();
|
||||
|
@ -189,6 +200,8 @@ void JitArm64::bcx(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
gpr.Unlock(WA);
|
||||
if (WB != WA)
|
||||
gpr.Unlock(WB);
|
||||
}
|
||||
|
||||
void JitArm64::bcctrx(UGeckoInstruction inst)
|
||||
|
@ -211,12 +224,12 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
|
|||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
|
||||
ARM64Reg WB = ARM64Reg::INVALID_REG;
|
||||
if (inst.LK_3)
|
||||
{
|
||||
ARM64Reg WB = gpr.GetReg();
|
||||
WB = gpr.GetReg();
|
||||
MOVI2R(WB, js.compilerPC + 4);
|
||||
STR(IndexType::Unsigned, WB, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
|
||||
gpr.Unlock(WB);
|
||||
}
|
||||
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
|
@ -224,8 +237,10 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
|
|||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR));
|
||||
AND(WA, WA, LogicalImm(~0x3, 32));
|
||||
|
||||
WriteExit(WA, inst.LK_3, js.compilerPC + 4);
|
||||
WriteExit(WA, inst.LK_3, js.compilerPC + 4, inst.LK_3 ? WB : ARM64Reg::INVALID_REG);
|
||||
|
||||
if (WB != ARM64Reg::INVALID_REG)
|
||||
gpr.Unlock(WB);
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue