From cec2cb9d38c857bd05c02604e10888d0cdf74e38 Mon Sep 17 00:00:00 2001 From: Fiora Date: Wed, 8 Oct 2014 11:44:14 -0700 Subject: [PATCH] JIT: micro-optimize cmpXX a bit more for lower latency/code size --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 399020b03d..ae3b74ee3d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -544,7 +544,16 @@ void Jit64::cmpXX(UGeckoInstruction inst) MOV(64, PPCSTATE(cr_val[crf]), R(input)); // Place the comparison next to the branch for macro-op fusion if (merge_branch) - TEST(64, R(input), R(input)); + { + // We only need to do a 32-bit compare, since the flags set will be the same as a sign-extended + // result. + // We should also test against gpr.R(a) if it's bound, since that's one less cycle of latency + // (the CPU doesn't have to wait for the movsxd to finish to resolve the branch). + if (gpr.R(a).IsSimpleReg()) + TEST(32, gpr.R(a), gpr.R(a)); + else + TEST(32, R(input), R(input)); + } } else {