JitArm64: Optimize rlwinmx.

The new code adds fast paths for most usages which fits in one instruction with one cycle latency.
2017-08-11 23:32:36 +02:00 · 2017-08-11 23:32:36 +02:00 · 5ee7f86199
parent 0a8e1bd864
commit 5ee7f86199
1 changed files with 22 additions and 5 deletions
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
@ -516,11 +516,28 @@ void JitArm64::rlwinmx(UGeckoInstruction inst)

  gpr.BindToRegister(a, a == s);

-  ARM64Reg WA = gpr.GetReg();
-  ArithOption Shift(gpr.R(s), ST_ROR, 32 - inst.SH);
-  MOVI2R(WA, mask);
-  AND(gpr.R(a), WA, gpr.R(s), Shift);
-  gpr.Unlock(WA);
+  if (!inst.SH)
+  {
+    // Immediate mask
+    ANDI2R(gpr.R(a), gpr.R(s), mask);
+  }
+  else if (inst.ME == 31 && 31 < inst.SH + inst.MB)
+  {
+    // Bit select of the upper part
+    UBFX(gpr.R(a), gpr.R(s), 32 - inst.SH, 32 - inst.MB);
+  }
+  else if (inst.ME == 31 - inst.SH && 32 > inst.SH + inst.MB)
+  {
+    // Bit select of the lower part
+    UBFIZ(gpr.R(a), gpr.R(s), inst.SH, 32 - inst.SH - inst.MB);
+  }
+  else
+  {
+    ARM64Reg WA = gpr.GetReg();
+    MOVI2R(WA, mask);
+    AND(gpr.R(a), WA, gpr.R(s), ArithOption(gpr.R(s), ST_ROR, 32 - inst.SH));
+    gpr.Unlock(WA);
+  }

  if (inst.Rc)
    ComputeRC(gpr.R(a), 0);