From cec2cb9d38c857bd05c02604e10888d0cdf74e38 Mon Sep 17 00:00:00 2001
From: Fiora <fioraaeterna@gmail.com>
Date: Wed, 8 Oct 2014 11:44:14 -0700
Subject: [PATCH] JIT: micro-optimize cmpXX a bit more for lower latency/code
 size

---
 Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
index 399020b03d..ae3b74ee3d 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
@@ -544,7 +544,16 @@ void Jit64::cmpXX(UGeckoInstruction inst)
 			MOV(64, PPCSTATE(cr_val[crf]), R(input));
 			// Place the comparison next to the branch for macro-op fusion
 			if (merge_branch)
-				TEST(64, R(input), R(input));
+			{
+				// We only need to do a 32-bit compare, since the flags set will be the same as a sign-extended
+				// result.
+				// We should also test against gpr.R(a) if it's bound, since that's one less cycle of latency
+				// (the CPU doesn't have to wait for the movsxd to finish to resolve the branch).
+				if (gpr.R(a).IsSimpleReg())
+					TEST(32, gpr.R(a), gpr.R(a));
+				else
+					TEST(32, R(input), R(input));
+			}
 		}
 		else
 		{