PowerPC: More idle loop detections.
This commit is contained in:
parent
55db7c7a05
commit
b8b4b4a383
|
@ -285,8 +285,20 @@ void Jit64::bclrx(UGeckoInstruction inst)
|
|||
RCForkGuard fpr_guard = fpr.Fork();
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
|
||||
if (js.op->branchIsIdleLoop)
|
||||
{
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(CoreTiming::Idle);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
MOV(32, PPCSTATE(pc), Imm32(js.op->branchTo));
|
||||
WriteExceptionExit();
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteBLRExit();
|
||||
}
|
||||
}
|
||||
|
||||
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
|
||||
SetJumpTarget(pConditionDontBranch);
|
||||
|
|
|
@ -278,7 +278,20 @@ void JitArm64::bclrx(UGeckoInstruction inst)
|
|||
gpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL);
|
||||
fpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL);
|
||||
|
||||
if (js.op->branchIsIdleLoop)
|
||||
{
|
||||
// make idle loops go faster
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
|
||||
MOVP2R(XA, &CoreTiming::Idle);
|
||||
BLR(XA);
|
||||
|
||||
WriteExceptionExit(js.op->branchTo);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteBLRExit(WA);
|
||||
}
|
||||
|
||||
gpr.Unlock(WA);
|
||||
|
||||
|
|
|
@ -782,9 +782,6 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
|||
|
||||
SetInstructionStats(block, &code[i], opinfo, static_cast<u32>(i));
|
||||
|
||||
code[i].branchIsIdleLoop =
|
||||
code[i].branchTo == block->m_address && IsBusyWaitLoop(block, code, i);
|
||||
|
||||
bool follow = false;
|
||||
|
||||
bool conditional_continue = false;
|
||||
|
@ -793,7 +790,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
|||
// If it is small, the performance will be down.
|
||||
// If it is big, the size of generated code will be big and
|
||||
// cache clearning will happen many times.
|
||||
if (enable_follow && HasOption(OPTION_BRANCH_FOLLOW) && numFollows < BRANCH_FOLLOWING_THRESHOLD)
|
||||
if (enable_follow && HasOption(OPTION_BRANCH_FOLLOW))
|
||||
{
|
||||
if (inst.OPCD == 18 && block_size > 1)
|
||||
{
|
||||
|
@ -816,8 +813,11 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
|||
caller = i;
|
||||
}
|
||||
}
|
||||
else if (inst.OPCD == 19 && inst.SUBOP10 == 16 && !inst.LK && found_call &&
|
||||
(inst.BO & BO_DONT_DECREMENT_FLAG) && (inst.BO & BO_DONT_CHECK_CONDITION))
|
||||
else if (inst.OPCD == 19 && inst.SUBOP10 == 16 && !inst.LK && found_call)
|
||||
{
|
||||
code[i].branchTo = code[caller].address + 4;
|
||||
if ((inst.BO & BO_DONT_DECREMENT_FLAG) && (inst.BO & BO_DONT_CHECK_CONDITION) &&
|
||||
numFollows < BRANCH_FOLLOWING_THRESHOLD)
|
||||
{
|
||||
// bclrx with unconditional branch = return
|
||||
// Follow it if we can propagate the LR value of the last CALL instruction.
|
||||
|
@ -826,13 +826,13 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
|||
// the LR value on the stack as there are no spare registers. So we'd need
|
||||
// to check all store instruction to not alias with the stack.
|
||||
follow = true;
|
||||
code[i].branchTo = code[caller].address + 4;
|
||||
found_call = false;
|
||||
code[i].skip = true;
|
||||
|
||||
// Skip the RET, so also don't generate the stack entry for the BLR optimization.
|
||||
code[caller].skipLRStack = true;
|
||||
}
|
||||
}
|
||||
else if (inst.OPCD == 31 && inst.SUBOP10 == 467)
|
||||
{
|
||||
// mtspr, skip CALL/RET merging as LR is overwritten.
|
||||
|
@ -874,7 +874,10 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
|||
}
|
||||
}
|
||||
|
||||
if (follow)
|
||||
code[i].branchIsIdleLoop =
|
||||
code[i].branchTo == block->m_address && IsBusyWaitLoop(block, code, i);
|
||||
|
||||
if (follow && numFollows < BRANCH_FOLLOWING_THRESHOLD)
|
||||
{
|
||||
// Follow the unconditional branch.
|
||||
numFollows++;
|
||||
|
|
Loading…
Reference in New Issue