PowerPC: More idle loop detections.
This commit is contained in:
parent
55db7c7a05
commit
b8b4b4a383
|
@ -285,7 +285,19 @@ void Jit64::bclrx(UGeckoInstruction inst)
|
||||||
RCForkGuard fpr_guard = fpr.Fork();
|
RCForkGuard fpr_guard = fpr.Fork();
|
||||||
gpr.Flush();
|
gpr.Flush();
|
||||||
fpr.Flush();
|
fpr.Flush();
|
||||||
WriteBLRExit();
|
|
||||||
|
if (js.op->branchIsIdleLoop)
|
||||||
|
{
|
||||||
|
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||||
|
ABI_CallFunction(CoreTiming::Idle);
|
||||||
|
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||||
|
MOV(32, PPCSTATE(pc), Imm32(js.op->branchTo));
|
||||||
|
WriteExceptionExit();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
WriteBLRExit();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
|
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
|
||||||
|
|
|
@ -278,7 +278,20 @@ void JitArm64::bclrx(UGeckoInstruction inst)
|
||||||
gpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL);
|
gpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL);
|
||||||
fpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL);
|
fpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL);
|
||||||
|
|
||||||
WriteBLRExit(WA);
|
if (js.op->branchIsIdleLoop)
|
||||||
|
{
|
||||||
|
// make idle loops go faster
|
||||||
|
ARM64Reg XA = EncodeRegTo64(WA);
|
||||||
|
|
||||||
|
MOVP2R(XA, &CoreTiming::Idle);
|
||||||
|
BLR(XA);
|
||||||
|
|
||||||
|
WriteExceptionExit(js.op->branchTo);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
WriteBLRExit(WA);
|
||||||
|
}
|
||||||
|
|
||||||
gpr.Unlock(WA);
|
gpr.Unlock(WA);
|
||||||
|
|
||||||
|
|
|
@ -782,9 +782,6 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
||||||
|
|
||||||
SetInstructionStats(block, &code[i], opinfo, static_cast<u32>(i));
|
SetInstructionStats(block, &code[i], opinfo, static_cast<u32>(i));
|
||||||
|
|
||||||
code[i].branchIsIdleLoop =
|
|
||||||
code[i].branchTo == block->m_address && IsBusyWaitLoop(block, code, i);
|
|
||||||
|
|
||||||
bool follow = false;
|
bool follow = false;
|
||||||
|
|
||||||
bool conditional_continue = false;
|
bool conditional_continue = false;
|
||||||
|
@ -793,7 +790,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
||||||
// If it is small, the performance will be down.
|
// If it is small, the performance will be down.
|
||||||
// If it is big, the size of generated code will be big and
|
// If it is big, the size of generated code will be big and
|
||||||
// cache clearning will happen many times.
|
// cache clearning will happen many times.
|
||||||
if (enable_follow && HasOption(OPTION_BRANCH_FOLLOW) && numFollows < BRANCH_FOLLOWING_THRESHOLD)
|
if (enable_follow && HasOption(OPTION_BRANCH_FOLLOW))
|
||||||
{
|
{
|
||||||
if (inst.OPCD == 18 && block_size > 1)
|
if (inst.OPCD == 18 && block_size > 1)
|
||||||
{
|
{
|
||||||
|
@ -816,22 +813,25 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
||||||
caller = i;
|
caller = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (inst.OPCD == 19 && inst.SUBOP10 == 16 && !inst.LK && found_call &&
|
else if (inst.OPCD == 19 && inst.SUBOP10 == 16 && !inst.LK && found_call)
|
||||||
(inst.BO & BO_DONT_DECREMENT_FLAG) && (inst.BO & BO_DONT_CHECK_CONDITION))
|
|
||||||
{
|
{
|
||||||
// bclrx with unconditional branch = return
|
|
||||||
// Follow it if we can propagate the LR value of the last CALL instruction.
|
|
||||||
// Through it would be easy to track the upper level of call/return,
|
|
||||||
// we can't guarantee the LR value. The PPC ABI forces all functions to push
|
|
||||||
// the LR value on the stack as there are no spare registers. So we'd need
|
|
||||||
// to check all store instruction to not alias with the stack.
|
|
||||||
follow = true;
|
|
||||||
code[i].branchTo = code[caller].address + 4;
|
code[i].branchTo = code[caller].address + 4;
|
||||||
found_call = false;
|
if ((inst.BO & BO_DONT_DECREMENT_FLAG) && (inst.BO & BO_DONT_CHECK_CONDITION) &&
|
||||||
code[i].skip = true;
|
numFollows < BRANCH_FOLLOWING_THRESHOLD)
|
||||||
|
{
|
||||||
|
// bclrx with unconditional branch = return
|
||||||
|
// Follow it if we can propagate the LR value of the last CALL instruction.
|
||||||
|
// Through it would be easy to track the upper level of call/return,
|
||||||
|
// we can't guarantee the LR value. The PPC ABI forces all functions to push
|
||||||
|
// the LR value on the stack as there are no spare registers. So we'd need
|
||||||
|
// to check all store instruction to not alias with the stack.
|
||||||
|
follow = true;
|
||||||
|
found_call = false;
|
||||||
|
code[i].skip = true;
|
||||||
|
|
||||||
// Skip the RET, so also don't generate the stack entry for the BLR optimization.
|
// Skip the RET, so also don't generate the stack entry for the BLR optimization.
|
||||||
code[caller].skipLRStack = true;
|
code[caller].skipLRStack = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (inst.OPCD == 31 && inst.SUBOP10 == 467)
|
else if (inst.OPCD == 31 && inst.SUBOP10 == 467)
|
||||||
{
|
{
|
||||||
|
@ -874,7 +874,10 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (follow)
|
code[i].branchIsIdleLoop =
|
||||||
|
code[i].branchTo == block->m_address && IsBusyWaitLoop(block, code, i);
|
||||||
|
|
||||||
|
if (follow && numFollows < BRANCH_FOLLOWING_THRESHOLD)
|
||||||
{
|
{
|
||||||
// Follow the unconditional branch.
|
// Follow the unconditional branch.
|
||||||
numFollows++;
|
numFollows++;
|
||||||
|
|
Loading…
Reference in New Issue