BitUtils: Add CountLeadingZeros

2020-12-27 22:37:37 +00:00 · 2020-12-27 22:37:37 +00:00 · d695fcb126
parent 4705af59c6
commit d695fcb126
4 changed files with 52 additions and 45 deletions
--- a/Source/Core/Common/Arm64Emitter.cpp
+++ b/Source/Core/Common/Arm64Emitter.cpp
@ -26,20 +26,6 @@ namespace
 const int kWRegSizeInBits = 32;
 const int kXRegSizeInBits = 64;
 // The below few functions are taken from V8.
 int CountLeadingZeros(uint64_t value, int width)
 {
  // TODO(jbramley): Optimize this for ARM64 hosts.
  int count = 0;
  uint64_t bit_test = 1ULL << (width - 1);
  while ((count < width) && ((bit_test & value) == 0))
  {
    count++;
    bit_test >>= 1;
  }
  return count;
 }
 uint64_t LargestPowerOf2Divisor(uint64_t value)
 {
  return value & -(int64_t)value;
@ -155,8 +141,8 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
    // Compute the repeat distance d, and set up a bitmask covering the basic
    // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
    // of these cases the N bit of the output will be zero.
-    clz_a = CountLeadingZeros(a, kXRegSizeInBits);
+    clz_a = Common::CountLeadingZeros(a);
-    int clz_c = CountLeadingZeros(c, kXRegSizeInBits);
+    int clz_c = Common::CountLeadingZeros(c);
    d = clz_a - clz_c;
    mask = ((UINT64_C(1) << d) - 1);
    out_n = 0;
@ -182,7 +168,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
      // of set bits in our word, meaning that we have the trivial case of
      // d == 64 and only one 'repetition'. Set up all the same variables as in
      // the general case above, and set the N bit in the output.
-      clz_a = CountLeadingZeros(a, kXRegSizeInBits);
+      clz_a = Common::CountLeadingZeros(a);
      d = 64;
      mask = ~UINT64_C(0);
      out_n = 1;
@ -214,7 +200,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
      0x5555555555555555UL,
  }};
-  int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;
+  int multiplier_idx = Common::CountLeadingZeros((u64)d) - 57;
  // Ensure that the index to the multipliers array is within bounds.
  DEBUG_ASSERT((multiplier_idx >= 0) && (static_cast<size_t>(multiplier_idx) < multipliers.size()));
@ -233,7 +219,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
  // Count the set bits in our basic stretch. The special case of clz(0) == -1
  // makes the answer come out right for stretches that reach the very top of
  // the word (e.g. numbers like 0xffffc00000000000).
-  int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits);
+  int clz_b = (b == 0) ? -1 : Common::CountLeadingZeros(b);
  int s = clz_a - clz_b;
  // Decide how many bits to rotate right by, to put the low bit of that basic
--- a/Source/Core/Common/BitUtils.h
+++ b/Source/Core/Common/BitUtils.h
@ -11,6 +11,10 @@
 #include <initializer_list>
 #include <type_traits>
 #ifdef _MSC_VER
 #include <intrin.h>
 #endif
 namespace Common
 {
 ///
@ -357,4 +361,44 @@ T ExpandValue(T value, size_t left_shift_amount)
         (T(-ExtractBit<0>(value)) >> (BitSize<T>() - left_shift_amount));
 }
 constexpr int CountLeadingZeros(uint64_t value)
 {
 #if defined(__GNUC__)
  return __builtin_clzll(value);
 #elif defined(_MSC_VER) && defined(_M_ARM_64)
  return _CountLeadingZeros64(value);
 #elif defined(_MSC_VER) && defined(_M_X86_64)
  unsigned long index;
  return _BitScanReverse64(&index, value) ? 63 - index : 64;
 #else
  int result = 64;
  while (value)
  {
    result--;
    value >>= 1;
  }
  return result;
 #endif
 }
 constexpr int CountLeadingZeros(uint32_t value)
 {
 #if defined(__GNUC__)
  return __builtin_clz(value);
 #elif defined(_MSC_VER) && defined(_M_ARM_64)
  return _CountLeadingZeros(value);
 #elif defined(_MSC_VER) && defined(_M_X86_64)
  unsigned long index;
  return _BitScanReverse(&index, value) ? 31 - index : 32;
 #else
  int result = 32;
  while (value)
  {
    result--;
    value >>= 1;
  }
  return result;
 #endif
 }
 }  // namespace Common
--- a/Source/Core/Common/MathUtil.h
+++ b/Source/Core/Common/MathUtil.h
@ -9,12 +9,9 @@
 #include <type_traits>
 #include <vector>
 #include "Common/BitUtils.h"
 #include "Common/CommonTypes.h"
 #ifdef _MSC_VER
 #include <intrin.h>
 #endif
 namespace MathUtil
 {
 constexpr double TAU = 6.2831853071795865;
@ -154,21 +151,5 @@ float MathFloatVectorSum(const std::vector<float>&);
 // Rounds down. 0 -> undefined
 inline int IntLog2(u64 val)
 {
-#if defined(__GNUC__)
+  return 63 - Common::CountLeadingZeros(val);
  return 63 - __builtin_clzll(val);
 #elif defined(_MSC_VER)
  unsigned long result = ULONG_MAX;
  _BitScanReverse64(&result, val);
  return result;
 #else
  int result = -1;
  while (val != 0)
  {
    val >>= 1;
    ++result;
  }
  return result;
 #endif
 }
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
@ -368,11 +368,7 @@ void JitArm64::cntlzwx(UGeckoInstruction inst)
  if (gpr.IsImm(s))
  {
-#ifdef _MSC_VER
+    gpr.SetImmediate(a, Common::CountLeadingZeros(gpr.GetImm(s)));
    gpr.SetImmediate(a, _CountLeadingZeros(gpr.GetImm(s)));
 #else
    gpr.SetImmediate(a, __builtin_clz(gpr.GetImm(s)));
 #endif
    if (inst.Rc)
      ComputeRC0(gpr.GetImm(a));
  }