From b8b4b4a3835a54b562499fe077a7fd0d291b7ffc Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 9 Aug 2018 09:40:12 +0200 Subject: [PATCH] PowerPC: More idle loop detections. --- Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp | 14 ++++++- .../Core/PowerPC/JitArm64/JitArm64_Branch.cpp | 15 ++++++- Source/Core/Core/PowerPC/PPCAnalyst.cpp | 39 ++++++++++--------- 3 files changed, 48 insertions(+), 20 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 3e6ef5e255..2ebb8e4175 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -285,7 +285,19 @@ void Jit64::bclrx(UGeckoInstruction inst) RCForkGuard fpr_guard = fpr.Fork(); gpr.Flush(); fpr.Flush(); - WriteBLRExit(); + + if (js.op->branchIsIdleLoop) + { + ABI_PushRegistersAndAdjustStack({}, 0); + ABI_CallFunction(CoreTiming::Idle); + ABI_PopRegistersAndAdjustStack({}, 0); + MOV(32, PPCSTATE(pc), Imm32(js.op->branchTo)); + WriteExceptionExit(); + } + else + { + WriteBLRExit(); + } } if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp index 08fc9c8aa4..97a81df2d4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -278,7 +278,20 @@ void JitArm64::bclrx(UGeckoInstruction inst) gpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL); fpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL); - WriteBLRExit(WA); + if (js.op->branchIsIdleLoop) + { + // make idle loops go faster + ARM64Reg XA = EncodeRegTo64(WA); + + MOVP2R(XA, &CoreTiming::Idle); + BLR(XA); + + WriteExceptionExit(js.op->branchTo); + } + else + { + WriteBLRExit(WA); + } gpr.Unlock(WA); diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index 3866b2e672..7b18aa378f 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -782,9 +782,6 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std: SetInstructionStats(block, &code[i], opinfo, static_cast(i)); - code[i].branchIsIdleLoop = - code[i].branchTo == block->m_address && IsBusyWaitLoop(block, code, i); - bool follow = false; bool conditional_continue = false; @@ -793,7 +790,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std: // If it is small, the performance will be down. // If it is big, the size of generated code will be big and // cache clearning will happen many times. - if (enable_follow && HasOption(OPTION_BRANCH_FOLLOW) && numFollows < BRANCH_FOLLOWING_THRESHOLD) + if (enable_follow && HasOption(OPTION_BRANCH_FOLLOW)) { if (inst.OPCD == 18 && block_size > 1) { @@ -816,22 +813,25 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std: caller = i; } } - else if (inst.OPCD == 19 && inst.SUBOP10 == 16 && !inst.LK && found_call && - (inst.BO & BO_DONT_DECREMENT_FLAG) && (inst.BO & BO_DONT_CHECK_CONDITION)) + else if (inst.OPCD == 19 && inst.SUBOP10 == 16 && !inst.LK && found_call) { - // bclrx with unconditional branch = return - // Follow it if we can propagate the LR value of the last CALL instruction. - // Through it would be easy to track the upper level of call/return, - // we can't guarantee the LR value. The PPC ABI forces all functions to push - // the LR value on the stack as there are no spare registers. So we'd need - // to check all store instruction to not alias with the stack. - follow = true; code[i].branchTo = code[caller].address + 4; - found_call = false; - code[i].skip = true; + if ((inst.BO & BO_DONT_DECREMENT_FLAG) && (inst.BO & BO_DONT_CHECK_CONDITION) && + numFollows < BRANCH_FOLLOWING_THRESHOLD) + { + // bclrx with unconditional branch = return + // Follow it if we can propagate the LR value of the last CALL instruction. + // Through it would be easy to track the upper level of call/return, + // we can't guarantee the LR value. The PPC ABI forces all functions to push + // the LR value on the stack as there are no spare registers. So we'd need + // to check all store instruction to not alias with the stack. + follow = true; + found_call = false; + code[i].skip = true; - // Skip the RET, so also don't generate the stack entry for the BLR optimization. - code[caller].skipLRStack = true; + // Skip the RET, so also don't generate the stack entry for the BLR optimization. + code[caller].skipLRStack = true; + } } else if (inst.OPCD == 31 && inst.SUBOP10 == 467) { @@ -874,7 +874,10 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std: } } - if (follow) + code[i].branchIsIdleLoop = + code[i].branchTo == block->m_address && IsBusyWaitLoop(block, code, i); + + if (follow && numFollows < BRANCH_FOLLOWING_THRESHOLD) { // Follow the unconditional branch. numFollows++;