Replace BitUtils with C++20: Counting Zeroes

With the upgrade to C++20, std::countl_zero and std::countr_zero can replace these home-spun implementations from the BitUtil.h library.
2022-10-10 04:03:15 -05:00 · 2022-10-10 04:03:15 -05:00 · 05bebee802
parent d853da3b0b
commit 05bebee802
8 changed files with 15 additions and 119 deletions
--- a/Source/Core/Common/Arm64Emitter.h
+++ b/Source/Core/Common/Arm64Emitter.h
@ -559,11 +559,11 @@ struct LogicalImm
    // pick the next sequence of ones. This ensures we get a complete element
    // that has not been cut-in-half due to rotation across the word boundary.

-    const int rotation = Common::CountTrailingZeros(value & (value + 1));
+    const int rotation = std::countr_zero(value & (value + 1));
    const u64 normalized = std::rotr(value, rotation);

-    const int element_size = Common::CountTrailingZeros(normalized & (normalized + 1));
-    const int ones = Common::CountTrailingZeros(~normalized);
+    const int element_size = std::countr_zero(normalized & (normalized + 1));
+    const int ones = std::countr_one(normalized);

    // Check the value is repeating; also ensures element size is a power of two.

@ -578,8 +578,8 @@ struct LogicalImm
    // segment.

    r = static_cast<u8>((element_size - rotation) & (element_size - 1));
-    s = (((~element_size + 1) << 1) | (ones - 1)) & 0x3f;
-    n = (element_size >> 6) & 1;
+    s = static_cast<u8>((((~element_size + 1) << 1) | (ones - 1)) & 0x3f);
+    n = Common::ExtractBit<6>(element_size);

    valid = true;
  }
--- a/Source/Core/Common/BitUtils.h
+++ b/Source/Core/Common/BitUtils.h
@ -11,10 +11,6 @@
 #include <initializer_list>
 #include <type_traits>

-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-
 namespace Common
 {
 ///
@ -316,105 +312,4 @@ T ExpandValue(T value, size_t left_shift_amount)
  return (value << left_shift_amount) |
         (T(-ExtractBit<0>(value)) >> (BitSize<T>() - left_shift_amount));
 }
-
-template <typename T>
-constexpr int CountLeadingZerosConst(T value)
-{
-  int result = sizeof(T) * 8;
-  while (value)
-  {
-    result--;
-    value >>= 1;
-  }
-  return result;
-}
-
-constexpr int CountLeadingZeros(uint64_t value)
-{
-#if defined(__GNUC__)
-  return value ? __builtin_clzll(value) : 64;
-#elif defined(_MSC_VER)
-  if (std::is_constant_evaluated())
-  {
-    return CountLeadingZerosConst(value);
-  }
-  else
-  {
-    unsigned long index = 0;
-    return _BitScanReverse64(&index, value) ? 63 - index : 64;
-  }
-#else
-  return CountLeadingZerosConst(value);
-#endif
-}
-
-constexpr int CountLeadingZeros(uint32_t value)
-{
-#if defined(__GNUC__)
-  return value ? __builtin_clz(value) : 32;
-#elif defined(_MSC_VER)
-  if (std::is_constant_evaluated())
-  {
-    return CountLeadingZerosConst(value);
-  }
-  else
-  {
-    unsigned long index = 0;
-    return _BitScanReverse(&index, value) ? 31 - index : 32;
-  }
-#else
-  return CountLeadingZerosConst(value);
-#endif
-}
-
-template <typename T>
-constexpr int CountTrailingZerosConst(T value)
-{
-  int result = sizeof(T) * 8;
-  while (value)
-  {
-    result--;
-    value <<= 1;
-  }
-  return result;
-}
-
-constexpr int CountTrailingZeros(uint64_t value)
-{
-#if defined(__GNUC__)
-  return value ? __builtin_ctzll(value) : 64;
-#elif defined(_MSC_VER)
-  if (std::is_constant_evaluated())
-  {
-    return CountTrailingZerosConst(value);
-  }
-  else
-  {
-    unsigned long index = 0;
-    return _BitScanForward64(&index, value) ? index : 64;
-  }
-#else
-  return CountTrailingZerosConst(value);
-#endif
-}
-
-constexpr int CountTrailingZeros(uint32_t value)
-{
-#if defined(__GNUC__)
-  return value ? __builtin_ctz(value) : 32;
-#elif defined(_MSC_VER)
-  if (std::is_constant_evaluated())
-  {
-    return CountTrailingZerosConst(value);
-  }
-  else
-  {
-    unsigned long index = 0;
-    return _BitScanForward(&index, value) ? index : 32;
-  }
-#else
-  return CountTrailingZerosConst(value);
-#endif
-}
-
 }  // namespace Common
--- a/Source/Core/Common/MathUtil.h
+++ b/Source/Core/Common/MathUtil.h
@ -4,12 +4,12 @@
 #pragma once

 #include <algorithm>
+#include <bit>
 #include <cmath>
 #include <limits>
 #include <type_traits>
 #include <vector>

-#include "Common/BitUtils.h"
 #include "Common/CommonTypes.h"

 namespace MathUtil
@ -193,5 +193,5 @@ float MathFloatVectorSum(const std::vector<float>&);
 // Rounds down. 0 -> undefined
 constexpr int IntLog2(u64 val)
 {
-  return 63 - Common::CountLeadingZeros(val);
+  return 63 - std::countl_zero(val);
 }
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp
@ -209,7 +209,7 @@ void Interpreter::cmpl(UGeckoInstruction inst)

 void Interpreter::cntlzwx(UGeckoInstruction inst)
 {
-  rGPR[inst.RA] = u32(Common::CountLeadingZeros(rGPR[inst.RS]));
+  rGPR[inst.RA] = u32(std::countl_zero(rGPR[inst.RS]));

  if (inst.Rc)
    Helper_UpdateCR0(rGPR[inst.RA]);
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
@ -2660,7 +2660,7 @@ void Jit64::cntlzwx(UGeckoInstruction inst)

  if (gpr.IsImm(s))
  {
-    gpr.SetImmediate32(a, Common::CountLeadingZeros(gpr.Imm32(s)));
+    gpr.SetImmediate32(a, static_cast<u32>(std::countl_zero(gpr.Imm32(s))));
  }
  else
  {
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
@ -528,7 +528,7 @@ void JitArm64::cntlzwx(UGeckoInstruction inst)

  if (gpr.IsImm(s))
  {
-    gpr.SetImmediate(a, Common::CountLeadingZeros(gpr.GetImm(s)));
+    gpr.SetImmediate(a, static_cast<u32>(std::countl_zero(gpr.GetImm(s))));
    if (inst.Rc)
      ComputeRC0(gpr.GetImm(a));
  }
--- a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm
+++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm
@ -4,10 +4,10 @@
 #include "VideoBackends/Metal/MTLStateTracker.h"

 #include <algorithm>
+#include <bit>
 #include <mutex>

 #include "Common/Assert.h"
-#include "Common/BitUtils.h"

 #include "VideoBackends/Metal/MTLObjectCache.h"
 #include "VideoBackends/Metal/MTLPerfQuery.h"
@ -713,8 +713,8 @@ static constexpr NSString* LABEL_UTIL = @"Utility Draw";
 static NSRange RangeOfBits(u32 value)
 {
  ASSERT(value && "Value must be nonzero");
-  u32 low = Common::CountTrailingZeros(value);
-  u32 high = 31 - Common::CountLeadingZeros(value);
+  int low = std::countr_zero(value);
+  int high = 31 - std::countl_zero(value);
  return NSMakeRange(low, high + 1 - low);
 }

--- a/Source/Core/VideoCommon/VideoCommon.h
+++ b/Source/Core/VideoCommon/VideoCommon.h
@ -4,6 +4,7 @@
 #pragma once

 #include <algorithm>
+#include <bit>

 #include "Common/BitUtils.h"
 #include "Common/CommonTypes.h"
@ -82,7 +83,7 @@ inline u32 CompressZ16(u32 z24depth, DepthFormat format)
  // If exponent is at the MAX (3, 7, or 12) then the next bit might still be a one, and can't
  // be skipped, so the mantissa simply contains the next 14/13/12 bits

-  u32 leading_ones = Common::CountLeadingZeros((~z24depth) << 8);
+  u32 leading_ones = static_cast<u32>(std::countl_one(z24depth << 8));
  bool next_bit_is_one = false;  // AKA: Did we clamp leading_ones?
  u32 exp_bits;