JIT: use BLR optimization to avoid anding LR with 0xFFFFFFFC

Should save roughly one instruction per blr.
2014-10-08 20:06:46 -07:00 · 2014-10-08 20:06:46 -07:00 · 7388c62439
parent cec2cb9d38
commit 7388c62439
3 changed files with 8 additions and 2 deletions
--- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
@ -48,6 +48,7 @@ void Jit64AsmRoutineManager::Generate()
 		ABI_PopRegistersAndAdjustStack({}, 0);
 		FixupBranch skipToRealDispatch = J(SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging); //skip the sync and compare first time
 		dispatcherMispredictedBLR = GetCodePtr();
+		AND(32, PPCSTATE(pc), Imm32(0xFFFFFFFC));

 		#if 0 // debug mispredicts
 		MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp
@ -229,7 +229,11 @@ void Jit64::bclrx(UGeckoInstruction inst)
 #endif

 	MOV(32, R(RSCRATCH), PPCSTATE_LR);
-	AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
+	// We don't have to do this because WriteBLRExit handles it for us. Specifically, since we only ever push
+	// divisible-by-four instruction addresses onto the stack, if the return address matches, we're already
+	// good. If it doesn't match, the mispredicted-BLR code handles the fixup.
+	if (!m_enable_blr_optimization)
+		AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
 	if (inst.LK)
 		MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));

--- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
@ -355,7 +355,8 @@ void Jit64::DoMergedBranch()
 	else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
 	{
 		MOV(32, R(RSCRATCH), M(&LR));
-		AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
+		if (!m_enable_blr_optimization)
+			AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
 		if (js.next_inst.LK)
 			MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
 		WriteBLRExit();