Merge pull request #12086 from krnlyng/jitcompilerPCload
JitArm64: Avoid loading compilerPC multiple times if it's already in a register.
This commit is contained in:
commit
7ac0db70c6
|
@ -374,7 +374,8 @@ void JitArm64::EmitStoreMembase(const ARM64Reg& msr)
|
||||||
gpr.Unlock(WD);
|
gpr.Unlock(WD);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return)
|
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return,
|
||||||
|
ARM64Reg exit_address_after_return_reg)
|
||||||
{
|
{
|
||||||
Cleanup();
|
Cleanup();
|
||||||
EndTimeProfile(js.curBlock);
|
EndTimeProfile(js.curBlock);
|
||||||
|
@ -386,11 +387,16 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
|
||||||
if (LK)
|
if (LK)
|
||||||
{
|
{
|
||||||
// Push {ARM_PC; PPC_PC} on the stack
|
// Push {ARM_PC; PPC_PC} on the stack
|
||||||
|
ARM64Reg reg_to_push = exit_address_after_return_reg;
|
||||||
|
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
|
||||||
|
{
|
||||||
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
||||||
|
reg_to_push = ARM64Reg::X1;
|
||||||
|
}
|
||||||
constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2;
|
constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2;
|
||||||
host_address_after_return = GetCodePtr() + adr_offset;
|
host_address_after_return = GetCodePtr() + adr_offset;
|
||||||
ADR(ARM64Reg::X0, adr_offset);
|
ADR(ARM64Reg::X0, adr_offset);
|
||||||
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
|
STP(IndexType::Pre, ARM64Reg::X0, reg_to_push, ARM64Reg::SP, -16);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr size_t primary_farcode_size = 3 * sizeof(u32);
|
constexpr size_t primary_farcode_size = 3 * sizeof(u32);
|
||||||
|
@ -457,7 +463,8 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_after_return)
|
void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_after_return,
|
||||||
|
ARM64Reg exit_address_after_return_reg)
|
||||||
{
|
{
|
||||||
if (dest != DISPATCHER_PC)
|
if (dest != DISPATCHER_PC)
|
||||||
MOV(DISPATCHER_PC, dest);
|
MOV(DISPATCHER_PC, dest);
|
||||||
|
@ -475,11 +482,17 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Push {ARM_PC, PPC_PC} on the stack
|
// Push {ARM_PC, PPC_PC} on the stack
|
||||||
|
ARM64Reg reg_to_push = exit_address_after_return_reg;
|
||||||
|
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
|
||||||
|
{
|
||||||
|
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
||||||
|
reg_to_push = ARM64Reg::X1;
|
||||||
|
}
|
||||||
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
MOVI2R(ARM64Reg::X1, exit_address_after_return);
|
||||||
constexpr s32 adr_offset = sizeof(u32) * 3;
|
constexpr s32 adr_offset = sizeof(u32) * 3;
|
||||||
const u8* host_address_after_return = GetCodePtr() + adr_offset;
|
const u8* host_address_after_return = GetCodePtr() + adr_offset;
|
||||||
ADR(ARM64Reg::X0, adr_offset);
|
ADR(ARM64Reg::X0, adr_offset);
|
||||||
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
|
STP(IndexType::Pre, ARM64Reg::X0, reg_to_push, ARM64Reg::SP, -16);
|
||||||
|
|
||||||
BL(dispatcher);
|
BL(dispatcher);
|
||||||
DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed());
|
DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed());
|
||||||
|
@ -515,26 +528,43 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::FakeLKExit(u32 exit_address_after_return)
|
void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_after_return_reg)
|
||||||
{
|
{
|
||||||
if (!m_enable_blr_optimization)
|
if (!m_enable_blr_optimization)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// We may need to fake the BLR stack on inlined CALL instructions.
|
// We may need to fake the BLR stack on inlined CALL instructions.
|
||||||
// Else we can't return to this location any more.
|
// Else we can't return to this location any more.
|
||||||
|
if (exit_address_after_return_reg != ARM64Reg::W30)
|
||||||
|
{
|
||||||
|
// Do not lock W30 if it is the same as the exit address register, since
|
||||||
|
// it's already locked. It'll only get clobbered at the BL (below) where
|
||||||
|
// we do not need its value anymore.
|
||||||
|
// NOTE: This means W30 won't contain the return address anymore after this
|
||||||
|
// function has been called!
|
||||||
gpr.Lock(ARM64Reg::W30);
|
gpr.Lock(ARM64Reg::W30);
|
||||||
ARM64Reg after_reg = gpr.GetReg();
|
}
|
||||||
ARM64Reg code_reg = gpr.GetReg();
|
ARM64Reg after_reg = exit_address_after_return_reg;
|
||||||
|
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
|
||||||
|
{
|
||||||
|
after_reg = gpr.GetReg();
|
||||||
MOVI2R(after_reg, exit_address_after_return);
|
MOVI2R(after_reg, exit_address_after_return);
|
||||||
|
}
|
||||||
|
ARM64Reg code_reg = gpr.GetReg();
|
||||||
constexpr s32 adr_offset = sizeof(u32) * 3;
|
constexpr s32 adr_offset = sizeof(u32) * 3;
|
||||||
const u8* host_address_after_return = GetCodePtr() + adr_offset;
|
const u8* host_address_after_return = GetCodePtr() + adr_offset;
|
||||||
ADR(EncodeRegTo64(code_reg), adr_offset);
|
ADR(EncodeRegTo64(code_reg), adr_offset);
|
||||||
STP(IndexType::Pre, EncodeRegTo64(code_reg), EncodeRegTo64(after_reg), ARM64Reg::SP, -16);
|
STP(IndexType::Pre, EncodeRegTo64(code_reg), EncodeRegTo64(after_reg), ARM64Reg::SP, -16);
|
||||||
gpr.Unlock(after_reg, code_reg);
|
gpr.Unlock(code_reg);
|
||||||
|
if (after_reg != exit_address_after_return_reg)
|
||||||
|
gpr.Unlock(after_reg);
|
||||||
|
|
||||||
FixupBranch skip_exit = BL();
|
FixupBranch skip_exit = BL();
|
||||||
DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed());
|
DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed());
|
||||||
|
if (exit_address_after_return_reg != ARM64Reg::W30)
|
||||||
|
{
|
||||||
gpr.Unlock(ARM64Reg::W30);
|
gpr.Unlock(ARM64Reg::W30);
|
||||||
|
}
|
||||||
|
|
||||||
// Write the regular exit node after the return.
|
// Write the regular exit node after the return.
|
||||||
JitBlock* b = js.curBlock;
|
JitBlock* b = js.curBlock;
|
||||||
|
|
|
@ -315,8 +315,12 @@ protected:
|
||||||
void EmitStoreMembase(const Arm64Gen::ARM64Reg& msr);
|
void EmitStoreMembase(const Arm64Gen::ARM64Reg& msr);
|
||||||
|
|
||||||
// Exits
|
// Exits
|
||||||
void WriteExit(u32 destination, bool LK = false, u32 exit_address_after_return = 0);
|
void
|
||||||
void WriteExit(Arm64Gen::ARM64Reg dest, bool LK = false, u32 exit_address_after_return = 0);
|
WriteExit(u32 destination, bool LK = false, u32 exit_address_after_return = 0,
|
||||||
|
Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||||
|
void
|
||||||
|
WriteExit(Arm64Gen::ARM64Reg dest, bool LK = false, u32 exit_address_after_return = 0,
|
||||||
|
Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||||
void WriteExceptionExit(u32 destination, bool only_external = false,
|
void WriteExceptionExit(u32 destination, bool only_external = false,
|
||||||
bool always_exception = false);
|
bool always_exception = false);
|
||||||
void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false,
|
void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false,
|
||||||
|
@ -325,7 +329,9 @@ protected:
|
||||||
void WriteConditionalExceptionExit(int exception, Arm64Gen::ARM64Reg temp_gpr,
|
void WriteConditionalExceptionExit(int exception, Arm64Gen::ARM64Reg temp_gpr,
|
||||||
Arm64Gen::ARM64Reg temp_fpr = Arm64Gen::ARM64Reg::INVALID_REG,
|
Arm64Gen::ARM64Reg temp_fpr = Arm64Gen::ARM64Reg::INVALID_REG,
|
||||||
u64 increment_sp_on_exit = 0);
|
u64 increment_sp_on_exit = 0);
|
||||||
void FakeLKExit(u32 exit_address_after_return);
|
void
|
||||||
|
FakeLKExit(u32 exit_address_after_return,
|
||||||
|
Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||||
void WriteBLRExit(Arm64Gen::ARM64Reg dest);
|
void WriteBLRExit(Arm64Gen::ARM64Reg dest);
|
||||||
|
|
||||||
Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
|
Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
|
||||||
|
|
|
@ -79,12 +79,12 @@ void JitArm64::bx(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITBranchOff);
|
JITDISABLE(bJITBranchOff);
|
||||||
|
|
||||||
|
ARM64Reg WA = ARM64Reg::INVALID_REG;
|
||||||
if (inst.LK)
|
if (inst.LK)
|
||||||
{
|
{
|
||||||
ARM64Reg WA = gpr.GetReg();
|
WA = gpr.GetReg();
|
||||||
MOVI2R(WA, js.compilerPC + 4);
|
MOVI2R(WA, js.compilerPC + 4);
|
||||||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
|
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
|
||||||
gpr.Unlock(WA);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!js.isLastInstruction)
|
if (!js.isLastInstruction)
|
||||||
|
@ -94,8 +94,12 @@ void JitArm64::bx(UGeckoInstruction inst)
|
||||||
// We have to fake the stack as the RET instruction was not
|
// We have to fake the stack as the RET instruction was not
|
||||||
// found in the same block. This is a big overhead, but still
|
// found in the same block. This is a big overhead, but still
|
||||||
// better than calling the dispatcher.
|
// better than calling the dispatcher.
|
||||||
FakeLKExit(js.compilerPC + 4);
|
FakeLKExit(js.compilerPC + 4, WA);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (WA != ARM64Reg::INVALID_REG)
|
||||||
|
gpr.Unlock(WA);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -104,19 +108,24 @@ void JitArm64::bx(UGeckoInstruction inst)
|
||||||
|
|
||||||
if (js.op->branchIsIdleLoop)
|
if (js.op->branchIsIdleLoop)
|
||||||
{
|
{
|
||||||
// make idle loops go faster
|
if (WA != ARM64Reg::INVALID_REG)
|
||||||
ARM64Reg WA = gpr.GetReg();
|
|
||||||
ARM64Reg XA = EncodeRegTo64(WA);
|
|
||||||
|
|
||||||
MOVP2R(XA, &CoreTiming::GlobalIdle);
|
|
||||||
BLR(XA);
|
|
||||||
gpr.Unlock(WA);
|
gpr.Unlock(WA);
|
||||||
|
|
||||||
|
// make idle loops go faster
|
||||||
|
ARM64Reg WB = gpr.GetReg();
|
||||||
|
ARM64Reg XB = EncodeRegTo64(WB);
|
||||||
|
|
||||||
|
MOVP2R(XB, &CoreTiming::GlobalIdle);
|
||||||
|
BLR(XB);
|
||||||
|
gpr.Unlock(WB);
|
||||||
|
|
||||||
WriteExceptionExit(js.op->branchTo);
|
WriteExceptionExit(js.op->branchTo);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4);
|
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, inst.LK ? WA : ARM64Reg::INVALID_REG);
|
||||||
|
if (WA != ARM64Reg::INVALID_REG)
|
||||||
|
gpr.Unlock(WA);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::bcx(UGeckoInstruction inst)
|
void JitArm64::bcx(UGeckoInstruction inst)
|
||||||
|
@ -125,6 +134,8 @@ void JitArm64::bcx(UGeckoInstruction inst)
|
||||||
JITDISABLE(bJITBranchOff);
|
JITDISABLE(bJITBranchOff);
|
||||||
|
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA = gpr.GetReg();
|
||||||
|
ARM64Reg WB = inst.LK ? gpr.GetReg() : WA;
|
||||||
|
|
||||||
FixupBranch pCTRDontBranch;
|
FixupBranch pCTRDontBranch;
|
||||||
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
|
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
|
||||||
{
|
{
|
||||||
|
@ -156,7 +167,7 @@ void JitArm64::bcx(UGeckoInstruction inst)
|
||||||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
|
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.Flush(FlushMode::MaintainState, WA);
|
gpr.Flush(FlushMode::MaintainState, WB);
|
||||||
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
|
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
|
||||||
|
|
||||||
if (js.op->branchIsIdleLoop)
|
if (js.op->branchIsIdleLoop)
|
||||||
|
@ -171,7 +182,7 @@ void JitArm64::bcx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4);
|
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, inst.LK ? WA : ARM64Reg::INVALID_REG);
|
||||||
}
|
}
|
||||||
|
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
|
@ -189,6 +200,8 @@ void JitArm64::bcx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.Unlock(WA);
|
gpr.Unlock(WA);
|
||||||
|
if (WB != WA)
|
||||||
|
gpr.Unlock(WB);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::bcctrx(UGeckoInstruction inst)
|
void JitArm64::bcctrx(UGeckoInstruction inst)
|
||||||
|
@ -211,12 +224,12 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
|
||||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||||
|
|
||||||
|
ARM64Reg WB = ARM64Reg::INVALID_REG;
|
||||||
if (inst.LK_3)
|
if (inst.LK_3)
|
||||||
{
|
{
|
||||||
ARM64Reg WB = gpr.GetReg();
|
WB = gpr.GetReg();
|
||||||
MOVI2R(WB, js.compilerPC + 4);
|
MOVI2R(WB, js.compilerPC + 4);
|
||||||
STR(IndexType::Unsigned, WB, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
|
STR(IndexType::Unsigned, WB, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
|
||||||
gpr.Unlock(WB);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA = gpr.GetReg();
|
||||||
|
@ -224,8 +237,10 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
|
||||||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR));
|
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR));
|
||||||
AND(WA, WA, LogicalImm(~0x3, 32));
|
AND(WA, WA, LogicalImm(~0x3, 32));
|
||||||
|
|
||||||
WriteExit(WA, inst.LK_3, js.compilerPC + 4);
|
WriteExit(WA, inst.LK_3, js.compilerPC + 4, inst.LK_3 ? WB : ARM64Reg::INVALID_REG);
|
||||||
|
|
||||||
|
if (WB != ARM64Reg::INVALID_REG)
|
||||||
|
gpr.Unlock(WB);
|
||||||
gpr.Unlock(WA);
|
gpr.Unlock(WA);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue