Dep: Update vixl to 662828c

2024-06-14 17:27:12 +10:00 · 2024-06-14 17:27:12 +10:00 · f0c2832d03
parent d45e218da7
commit f0c2832d03
66 changed files with 65453 additions and 10345 deletions
--- a/dep/vixl/CMakeLists.txt
+++ b/dep/vixl/CMakeLists.txt
@ -59,26 +59,30 @@ if(CPU_ARCH_ARM64)
    include/vixl/aarch64/constants-aarch64.h
    include/vixl/aarch64/cpu-aarch64.h
    include/vixl/aarch64/cpu-features-auditor-aarch64.h
    include/vixl/aarch64/debugger-aarch64.h
    include/vixl/aarch64/decoder-aarch64.h
    include/vixl/aarch64/decoder-constants-aarch64.h
    include/vixl/aarch64/decoder-visitor-map-aarch64.h
    include/vixl/aarch64/disasm-aarch64.h
    include/vixl/aarch64/instructions-aarch64.h
    include/vixl/aarch64/instrument-aarch64.h
    include/vixl/aarch64/macro-assembler-aarch64.h
    include/vixl/aarch64/operands-aarch64.h
    include/vixl/aarch64/registers-aarch64.h
    include/vixl/aarch64/simulator-aarch64.h
    include/vixl/aarch64/simulator-constants-aarch64.h
    src/aarch64/assembler-aarch64.cc
    src/aarch64/assembler-sve-aarch64.cc
    src/aarch64/cpu-aarch64.cc
    src/aarch64/cpu-features-auditor-aarch64.cc
    src/aarch64/decoder-aarch64.cc
    src/aarch64/disasm-aarch64.cc
    src/aarch64/instructions-aarch64.cc
    src/aarch64/instrument-aarch64.cc
    src/aarch64/logic-aarch64.cc
    src/aarch64/macro-assembler-aarch64.cc
    src/aarch64/macro-assembler-sve-aarch64.cc
    src/aarch64/operands-aarch64.cc
    src/aarch64/pointer-auth-aarch64.cc
-    src/aarch64/simulator-aarch64.cc
+    src/aarch64/registers-aarch64.cc
  )
  target_include_directories(vixl PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}/include/vixl/aarch64
--- a/dep/vixl/include/vixl/aarch32/assembler-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/assembler-aarch32.h
@ -27,10 +27,10 @@
 #ifndef VIXL_AARCH32_ASSEMBLER_AARCH32_H_
 #define VIXL_AARCH32_ASSEMBLER_AARCH32_H_
-#include "../assembler-base-vixl.h"
+#include "assembler-base-vixl.h"
-#include "instructions-aarch32.h"
+#include "aarch32/instructions-aarch32.h"
-#include "location-aarch32.h"
+#include "aarch32/location-aarch32.h"
 namespace vixl {
 namespace aarch32 {
--- a/dep/vixl/include/vixl/aarch32/constants-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/constants-aarch32.h
@ -32,7 +32,7 @@ extern "C" {
 #include <stdint.h>
 }
-#include "../globals-vixl.h"
+#include "globals-vixl.h"
 namespace vixl {
--- a/dep/vixl/include/vixl/aarch32/disasm-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/disasm-aarch32.h
@ -33,8 +33,14 @@ extern "C" {
 #include <iomanip>
-#include "constants-aarch32.h"
+#include "aarch32/constants-aarch32.h"
-#include "operands-aarch32.h"
+#include "aarch32/operands-aarch32.h"
 // Microsoft Visual C++ defines a `mvn` macro that conflicts with our own
 // definition.
 #if defined(_MSC_VER) && defined(mvn)
 #undef mvn
 #endif
 namespace vixl {
 namespace aarch32 {
--- a/dep/vixl/include/vixl/aarch32/instructions-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/instructions-aarch32.h
@ -34,13 +34,14 @@ extern "C" {
 #include <algorithm>
 #include <ostream>
-#include "../code-buffer-vixl.h"
+#include "code-buffer-vixl.h"
-#include "../utils-vixl.h"
+#include "utils-vixl.h"
 #include "aarch32/constants-aarch32.h"
-#include "constants-aarch32.h"
+#if defined(__arm__) && !defined(__SOFTFP__)
 #ifdef __arm__
 #define HARDFLOAT __attribute__((noinline, pcs("aapcs-vfp")))
 #elif defined(_MSC_VER)
 #define HARDFLOAT __declspec(noinline)
 #else
 #define HARDFLOAT __attribute__((noinline))
 #endif
@ -492,6 +493,8 @@ class RegisterList {
  }
  Register GetFirstAvailableRegister() const;
  bool IsEmpty() const { return list_ == 0; }
  bool IsSingleRegister() const { return IsPowerOf2(list_); }
  int GetCount() const { return CountSetBits(list_); }
  static RegisterList Union(const RegisterList& list_1,
                            const RegisterList& list_2) {
    return RegisterList(list_1.list_ | list_2.list_);
@ -1039,7 +1042,9 @@ class Sign {
  const char* GetName() const { return (IsPlus() ? "" : "-"); }
  bool IsPlus() const { return sign_ == plus; }
  bool IsMinus() const { return sign_ == minus; }
-  int32_t ApplyTo(uint32_t value) { return IsPlus() ? value : -value; }
+  int32_t ApplyTo(uint32_t value) {
    return IsPlus() ? value : UnsignedNegate(value);
  }
 private:
  SignType sign_;
--- a/dep/vixl/include/vixl/aarch32/location-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/location-aarch32.h
@ -36,9 +36,9 @@ extern "C" {
 #include <iomanip>
 #include <list>
-#include "../invalset-vixl.h"
+#include "invalset-vixl.h"
-#include "../pool-manager.h"
+#include "pool-manager.h"
-#include "../utils-vixl.h"
+#include "utils-vixl.h"
 #include "constants-aarch32.h"
 #include "instructions-aarch32.h"
@ -58,12 +58,12 @@ class Location : public LocationBase<int32_t> {
  // with the assembler methods for generating instructions, but will never
  // be handled by the pool manager.
  Location()
-      : LocationBase<int32_t>(kRawLocation, 1 /* dummy size*/),
+      : LocationBase<int32_t>(kRawLocation, 1 /* placeholder size*/),
        referenced_(false) {}
  typedef int32_t Offset;
-  ~Location() {
+  ~Location() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
 #ifdef VIXL_DEBUG
    if (IsReferenced() && !IsBound()) {
      VIXL_ABORT_WITH_MSG("Location, label or literal used but not bound.\n");
@ -217,7 +217,7 @@ class Location : public LocationBase<int32_t> {
 protected:
  // Types passed to LocationBase. Must be distinct for unbound Locations (not
-  // relevant for bound locations, as they don't have a correspoding
+  // relevant for bound locations, as they don't have a corresponding
  // PoolObject).
  static const int kRawLocation = 0;  // Will not be used by the pool manager.
  static const int kVeneerType = 1;
--- a/dep/vixl/include/vixl/aarch32/macro-assembler-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/macro-assembler-aarch32.h
@ -28,15 +28,15 @@
 #ifndef VIXL_AARCH32_MACRO_ASSEMBLER_AARCH32_H_
 #define VIXL_AARCH32_MACRO_ASSEMBLER_AARCH32_H_
-#include "../code-generation-scopes-vixl.h"
+#include "code-generation-scopes-vixl.h"
-#include "../macro-assembler-interface.h"
+#include "macro-assembler-interface.h"
-#include "../pool-manager-impl.h"
+#include "pool-manager-impl.h"
-#include "../pool-manager.h"
+#include "pool-manager.h"
-#include "../utils-vixl.h"
+#include "utils-vixl.h"
-#include "assembler-aarch32.h"
+#include "aarch32/assembler-aarch32.h"
-#include "instructions-aarch32.h"
+#include "aarch32/instructions-aarch32.h"
-#include "operands-aarch32.h"
+#include "aarch32/operands-aarch32.h"
 namespace vixl {
@ -268,7 +268,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
        generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
        pool_end_(NULL) {
 #ifdef VIXL_DEBUG
-    SetAllowMacroInstructions(true);
+    SetAllowMacroInstructions(  // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
        true);
 #else
    USE(allow_macro_instructions_);
 #endif
@ -283,7 +284,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
        generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
        pool_end_(NULL) {
 #ifdef VIXL_DEBUG
-    SetAllowMacroInstructions(true);
+    SetAllowMacroInstructions(  // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
        true);
 #endif
  }
  MacroAssembler(byte* buffer, size_t size, InstructionSet isa = kDefaultISA)
@ -296,7 +298,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
        generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
        pool_end_(NULL) {
 #ifdef VIXL_DEBUG
-    SetAllowMacroInstructions(true);
+    SetAllowMacroInstructions(  // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
        true);
 #endif
  }
@ -399,13 +402,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
      VIXL_ASSERT(GetBuffer()->Is32bitAligned());
    }
    // If we need to add padding, check if we have to emit the pool.
-    const int32_t pc = GetCursorOffset();
+    const int32_t cursor = GetCursorOffset();
-    if (label->Needs16BitPadding(pc)) {
+    if (label->Needs16BitPadding(cursor)) {
      const int kPaddingBytes = 2;
-      if (pool_manager_.MustEmit(pc, kPaddingBytes)) {
+      if (pool_manager_.MustEmit(cursor, kPaddingBytes)) {
-        int32_t new_pc = pool_manager_.Emit(this, pc, kPaddingBytes);
+        int32_t new_cursor = pool_manager_.Emit(this, cursor, kPaddingBytes);
-        USE(new_pc);
+        USE(new_cursor);
-        VIXL_ASSERT(new_pc == GetCursorOffset());
+        VIXL_ASSERT(new_cursor == GetCursorOffset());
      }
    }
    pool_manager_.Bind(this, label, GetCursorOffset());
@ -427,30 +430,30 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
                                   Location* location,
                                   Condition* cond = NULL) {
    int size = info->size;
-    int32_t pc = GetCursorOffset();
+    int32_t cursor = GetCursorOffset();
    // If we need to emit a branch over the instruction, take this into account.
    if ((cond != NULL) && NeedBranch(cond)) {
      size += kBranchSize;
-      pc += kBranchSize;
+      cursor += kBranchSize;
    }
-    int32_t from = pc;
+    int32_t from = cursor;
    from += IsUsingT32() ? kT32PcDelta : kA32PcDelta;
    if (info->pc_needs_aligning) from = AlignDown(from, 4);
    int32_t min = from + info->min_offset;
    int32_t max = from + info->max_offset;
-    ForwardReference<int32_t> temp_ref(pc,
+    ForwardReference<int32_t> temp_ref(cursor,
                                       info->size,
                                       min,
                                       max,
                                       info->alignment);
    if (pool_manager_.MustEmit(GetCursorOffset(), size, &temp_ref, location)) {
-      int32_t new_pc = pool_manager_.Emit(this,
+      int32_t new_cursor = pool_manager_.Emit(this,
                                              GetCursorOffset(),
                                              info->size,
                                              &temp_ref,
                                              location);
-      USE(new_pc);
+      USE(new_cursor);
-      VIXL_ASSERT(new_pc == GetCursorOffset());
+      VIXL_ASSERT(new_cursor == GetCursorOffset());
    }
  }
@ -461,13 +464,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
    // into account, as well as potential 16-bit padding needed to reach the
    // minimum accessible location.
    int alignment = literal->GetMaxAlignment();
-    int32_t pc = GetCursorOffset();
+    int32_t cursor = GetCursorOffset();
-    int total_size = AlignUp(pc, alignment) - pc + literal->GetSize();
+    int total_size = AlignUp(cursor, alignment) - cursor + literal->GetSize();
-    if (literal->Needs16BitPadding(pc)) total_size += 2;
+    if (literal->Needs16BitPadding(cursor)) total_size += 2;
-    if (pool_manager_.MustEmit(pc, total_size)) {
+    if (pool_manager_.MustEmit(cursor, total_size)) {
-      int32_t new_pc = pool_manager_.Emit(this, pc, total_size);
+      int32_t new_cursor = pool_manager_.Emit(this, cursor, total_size);
-      USE(new_pc);
+      USE(new_cursor);
-      VIXL_ASSERT(new_pc == GetCursorOffset());
+      VIXL_ASSERT(new_cursor == GetCursorOffset());
    }
    pool_manager_.Bind(this, literal, GetCursorOffset());
    literal->EmitPoolObject(this);
@ -2894,8 +2897,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
    VIXL_ASSERT(OutsideITBlock());
    MacroEmissionCheckScope guard(this);
    ITScope it_scope(this, &cond, guard);
    if (registers.IsSingleRegister() &&
        (!IsUsingT32() || !registers.IsR0toR7orPC())) {
      pop(cond, registers.GetFirstAvailableRegister());
    } else if (!registers.IsEmpty()) {
      pop(cond, registers);
    }
  }
  void Pop(RegisterList registers) { Pop(al, registers); }
  void Pop(Condition cond, Register rt) {
@ -2914,8 +2922,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
    VIXL_ASSERT(OutsideITBlock());
    MacroEmissionCheckScope guard(this);
    ITScope it_scope(this, &cond, guard);
    if (registers.IsSingleRegister() && !registers.Includes(sp) &&
        (!IsUsingT32() || !registers.IsR0toR7orLR())) {
      push(cond, registers.GetFirstAvailableRegister());
    } else if (!registers.IsEmpty()) {
      push(cond, registers);
    }
  }
  void Push(RegisterList registers) { Push(al, registers); }
  void Push(Condition cond, Register rt) {
@ -2924,8 +2937,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
    VIXL_ASSERT(OutsideITBlock());
    MacroEmissionCheckScope guard(this);
    ITScope it_scope(this, &cond, guard);
    if (IsUsingA32() && rt.IsSP()) {
      // Only the A32 multiple-register form can push sp.
      push(cond, RegisterList(rt));
    } else {
      push(cond, rt);
    }
  }
  void Push(Register rt) { Push(al, rt); }
  void Qadd(Condition cond, Register rd, Register rm, Register rn) {
@ -11170,10 +11188,11 @@ class UseScratchRegisterScope {
  uint32_t old_available_;      // kRRegister
  uint64_t old_available_vfp_;  // kVRegister
-  VIXL_DEBUG_NO_RETURN UseScratchRegisterScope(const UseScratchRegisterScope&) {
+  VIXL_NO_RETURN_IN_DEBUG_MODE UseScratchRegisterScope(
      const UseScratchRegisterScope&) {
    VIXL_UNREACHABLE();
  }
-  VIXL_DEBUG_NO_RETURN void operator=(const UseScratchRegisterScope&) {
+  VIXL_NO_RETURN_IN_DEBUG_MODE void operator=(const UseScratchRegisterScope&) {
    VIXL_UNREACHABLE();
  }
 };
--- a/dep/vixl/include/vixl/aarch32/operands-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/operands-aarch32.h
@ -28,7 +28,7 @@
 #ifndef VIXL_AARCH32_OPERANDS_AARCH32_H_
 #define VIXL_AARCH32_OPERANDS_AARCH32_H_
-#include "instructions-aarch32.h"
+#include "aarch32/instructions-aarch32.h"
 namespace vixl {
 namespace aarch32 {
@ -54,28 +54,16 @@ class Operand {
  // This is allowed to be an implicit constructor because Operand is
  // a wrapper class that doesn't normally perform any type conversion.
  Operand(uint32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {}
        rm_(NoReg),
        shift_(LSL),
        amount_(0),
        rs_(NoReg) {}
  Operand(int32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {}
        rm_(NoReg),
        shift_(LSL),
        amount_(0),
        rs_(NoReg) {}
  // rm
  // where rm is the base register
  // This is allowed to be an implicit constructor because Operand is
  // a wrapper class that doesn't normally perform any type conversion.
  Operand(Register rm)  // NOLINT(runtime/explicit)
-      : imm_(0),
+      : imm_(0), rm_(rm), shift_(LSL), amount_(0), rs_(NoReg) {
        rm_(rm),
        shift_(LSL),
        amount_(0),
        rs_(NoReg) {
    VIXL_ASSERT(rm_.IsValid());
  }
@ -202,7 +190,7 @@ class Operand {
  }
 private:
-// Forbid implicitely creating operands around types that cannot be encoded
+// Forbid implicitly creating operands around types that cannot be encoded
 // into a uint32_t without loss.
 #if __cplusplus >= 201103L
  Operand(int64_t) = delete;   // NOLINT(runtime/explicit)
@ -245,22 +233,18 @@ class NeonImmediate {
  // This is allowed to be an implicit constructor because NeonImmediate is
  // a wrapper class that doesn't normally perform any type conversion.
  NeonImmediate(uint32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), immediate_type_(I32) {}
        immediate_type_(I32) {}
  NeonImmediate(int immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), immediate_type_(I32) {}
        immediate_type_(I32) {}
  // { #<immediate> }
  // where <immediate> is a 64 bit number
  // This is allowed to be an implicit constructor because NeonImmediate is
  // a wrapper class that doesn't normally perform any type conversion.
  NeonImmediate(int64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), immediate_type_(I64) {}
        immediate_type_(I64) {}
  NeonImmediate(uint64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), immediate_type_(I64) {}
        immediate_type_(I64) {}
  // { #<immediate> }
  // where <immediate> is a non zero floating point number which can be encoded
@ -268,11 +252,9 @@ class NeonImmediate {
  // This is allowed to be an implicit constructor because NeonImmediate is
  // a wrapper class that doesn't normally perform any type conversion.
  NeonImmediate(float immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), immediate_type_(F32) {}
        immediate_type_(F32) {}
  NeonImmediate(double immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), immediate_type_(F64) {}
        immediate_type_(F64) {}
  NeonImmediate(const NeonImmediate& src)
      : imm_(src.imm_), immediate_type_(src.immediate_type_) {}
@ -311,7 +293,7 @@ class NeonImmediate {
  bool IsInteger32() const { return immediate_type_.Is(I32); }
  bool IsInteger64() const { return immediate_type_.Is(I64); }
-  bool IsInteger() const { return IsInteger32() | IsInteger64(); }
+  bool IsInteger() const { return IsInteger32() || IsInteger64(); }
  bool IsFloat() const { return immediate_type_.Is(F32); }
  bool IsDouble() const { return immediate_type_.Is(F64); }
  bool IsFloatZero() const {
@ -374,29 +356,21 @@ std::ostream& operator<<(std::ostream& os, const NeonImmediate& operand);
 class NeonOperand {
 public:
  NeonOperand(int32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), rm_(NoDReg) {}
        rm_(NoDReg) {}
  NeonOperand(uint32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), rm_(NoDReg) {}
        rm_(NoDReg) {}
  NeonOperand(int64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), rm_(NoDReg) {}
        rm_(NoDReg) {}
  NeonOperand(uint64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), rm_(NoDReg) {}
        rm_(NoDReg) {}
  NeonOperand(float immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), rm_(NoDReg) {}
        rm_(NoDReg) {}
  NeonOperand(double immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
+      : imm_(immediate), rm_(NoDReg) {}
        rm_(NoDReg) {}
  NeonOperand(const NeonImmediate& imm)  // NOLINT(runtime/explicit)
-      : imm_(imm),
+      : imm_(imm), rm_(NoDReg) {}
        rm_(NoDReg) {}
  NeonOperand(const VRegister& rm)  // NOLINT(runtime/explicit)
-      : imm_(0),
+      : imm_(0), rm_(rm) {
        rm_(rm) {
    VIXL_ASSERT(rm_.IsValid());
  }
@ -641,7 +615,7 @@ class ImmediateVorn : public ImmediateVorr {
 //     - a shifted index register <Rm>, <shift> #<amount>
 //
 //   The index register may have an associated {+/-} sign,
-//   which if ommitted, defaults to + .
+//   which if omitted, defaults to + .
 //
 //   We have two constructors for the offset:
 //
--- a/dep/vixl/include/vixl/aarch64/abi-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/abi-aarch64.h
@ -105,7 +105,7 @@ class ABI {
    // Stage C.1
    if (is_floating_point_type && (NSRN_ < 8)) {
-      return GenericOperand(FPRegister(NSRN_++, size * kBitsPerByte));
+      return GenericOperand(VRegister(NSRN_++, size * kBitsPerByte));
    }
    // Stages C.2, C.3, and C.4: Unsupported. Caught by the assertions above.
    // Stages C.5 and C.6
@ -159,8 +159,8 @@ template <>
 inline GenericOperand ABI::GetReturnGenericOperand<void>() const {
  return GenericOperand();
 }
-}
+}  // namespace aarch64
-}  // namespace vixl::aarch64
+}  // namespace vixl
 #endif  // VIXL_AARCH64_ABI_AARCH64_H_
--- a/dep/vixl/include/vixl/aarch64/assembler-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/assembler-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/constants-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/constants-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/cpu-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/cpu-aarch64.h
@ -27,13 +27,219 @@
 #ifndef VIXL_CPU_AARCH64_H
 #define VIXL_CPU_AARCH64_H
 #include "../cpu-features.h"
 #include "../globals-vixl.h"
 #include "instructions-aarch64.h"
 #include "simulator-aarch64.h"
 #ifndef VIXL_INCLUDE_TARGET_AARCH64
 // The supporting .cc file is only compiled when the A64 target is selected.
 // Throw an explicit error now to avoid a harder-to-debug linker error later.
 //
 // These helpers _could_ work on any AArch64 host, even when generating AArch32
 // code, but we don't support this because the available features may differ
 // between AArch32 and AArch64 on the same platform, so basing AArch32 code
 // generation on aarch64::CPU features is probably broken.
 #error cpu-aarch64.h requires VIXL_INCLUDE_TARGET_AARCH64 (scons target=a64).
 #endif
 namespace vixl {
 namespace aarch64 {
 // A CPU ID register, for use with CPUFeatures::kIDRegisterEmulation. Fields
 // specific to each register are described in relevant subclasses.
 class IDRegister {
 protected:
  explicit IDRegister(uint64_t value = 0) : value_(value) {}
  class Field {
   public:
    enum Type { kUnsigned, kSigned };
    static const int kMaxWidthInBits = 4;
    // This needs to be constexpr so that fields have "constant initialisation".
    // This avoids initialisation order problems when these values are used to
    // (dynamically) initialise static variables, etc.
    explicit constexpr Field(int lsb,
                             int bitWidth = kMaxWidthInBits,
                             Type type = kUnsigned)
        : lsb_(lsb), bitWidth_(bitWidth), type_(type) {}
    int GetWidthInBits() const { return bitWidth_; }
    int GetLsb() const { return lsb_; }
    int GetMsb() const { return lsb_ + GetWidthInBits() - 1; }
    Type GetType() const { return type_; }
   private:
    int lsb_;
    int bitWidth_;
    Type type_;
  };
 public:
  // Extract the specified field, performing sign-extension for signed fields.
  // This allows us to implement the 'value >= number' detection mechanism
  // recommended by the Arm ARM, for both signed and unsigned fields.
  int Get(Field field) const;
 private:
  uint64_t value_;
 };
 class AA64PFR0 : public IDRegister {
 public:
  explicit AA64PFR0(uint64_t value) : IDRegister(value) {}
  CPUFeatures GetCPUFeatures() const;
 private:
  static const Field kFP;
  static const Field kAdvSIMD;
  static const Field kRAS;
  static const Field kSVE;
  static const Field kDIT;
  static const Field kCSV2;
  static const Field kCSV3;
 };
 class AA64PFR1 : public IDRegister {
 public:
  explicit AA64PFR1(uint64_t value) : IDRegister(value) {}
  CPUFeatures GetCPUFeatures() const;
 private:
  static const Field kBT;
  static const Field kSSBS;
  static const Field kMTE;
  static const Field kSME;
 };
 class AA64ISAR0 : public IDRegister {
 public:
  explicit AA64ISAR0(uint64_t value) : IDRegister(value) {}
  CPUFeatures GetCPUFeatures() const;
 private:
  static const Field kAES;
  static const Field kSHA1;
  static const Field kSHA2;
  static const Field kCRC32;
  static const Field kAtomic;
  static const Field kRDM;
  static const Field kSHA3;
  static const Field kSM3;
  static const Field kSM4;
  static const Field kDP;
  static const Field kFHM;
  static const Field kTS;
  static const Field kRNDR;
 };
 class AA64ISAR1 : public IDRegister {
 public:
  explicit AA64ISAR1(uint64_t value) : IDRegister(value) {}
  CPUFeatures GetCPUFeatures() const;
 private:
  static const Field kDPB;
  static const Field kAPA;
  static const Field kAPI;
  static const Field kJSCVT;
  static const Field kFCMA;
  static const Field kLRCPC;
  static const Field kGPA;
  static const Field kGPI;
  static const Field kFRINTTS;
  static const Field kSB;
  static const Field kSPECRES;
  static const Field kBF16;
  static const Field kDGH;
  static const Field kI8MM;
 };
 class AA64ISAR2 : public IDRegister {
 public:
  explicit AA64ISAR2(uint64_t value) : IDRegister(value) {}
  CPUFeatures GetCPUFeatures() const;
 private:
  static const Field kWFXT;
  static const Field kRPRES;
  static const Field kMOPS;
  static const Field kCSSC;
 };
 class AA64MMFR0 : public IDRegister {
 public:
  explicit AA64MMFR0(uint64_t value) : IDRegister(value) {}
  CPUFeatures GetCPUFeatures() const;
 private:
  static const Field kECV;
 };
 class AA64MMFR1 : public IDRegister {
 public:
  explicit AA64MMFR1(uint64_t value) : IDRegister(value) {}
  CPUFeatures GetCPUFeatures() const;
 private:
  static const Field kLO;
  static const Field kAFP;
 };
 class AA64MMFR2 : public IDRegister {
 public:
  explicit AA64MMFR2(uint64_t value) : IDRegister(value) {}
  CPUFeatures GetCPUFeatures() const;
 private:
  static const Field kAT;
 };
 class AA64ZFR0 : public IDRegister {
 public:
  explicit AA64ZFR0(uint64_t value) : IDRegister(value) {}
  CPUFeatures GetCPUFeatures() const;
 private:
  static const Field kSVEver;
  static const Field kAES;
  static const Field kBitPerm;
  static const Field kBF16;
  static const Field kSHA3;
  static const Field kSM4;
  static const Field kI8MM;
  static const Field kF32MM;
  static const Field kF64MM;
 };
 class AA64SMFR0 : public IDRegister {
 public:
  explicit AA64SMFR0(uint64_t value) : IDRegister(value) {}
  CPUFeatures GetCPUFeatures() const;
 private:
  static const Field kSMEf32f32;
  static const Field kSMEb16f32;
  static const Field kSMEf16f32;
  static const Field kSMEi8i32;
  static const Field kSMEf64f64;
  static const Field kSMEi16i64;
  static const Field kSMEfa64;
 };
 class CPU {
 public:
  // Initialise CPU support.
@ -45,6 +251,25 @@ class CPU {
  // safely run.
  static void EnsureIAndDCacheCoherency(void *address, size_t length);
  // Read and interpret the ID registers. This requires
  // CPUFeatures::kIDRegisterEmulation, and therefore cannot be called on
  // non-AArch64 platforms.
  static CPUFeatures InferCPUFeaturesFromIDRegisters();
  // Read and interpret CPUFeatures reported by the OS. Failed queries (or
  // unsupported platforms) return an empty list. Note that this is
  // indistinguishable from a successful query on a platform that advertises no
  // features.
  //
  // Non-AArch64 hosts are considered to be unsupported platforms, and this
  // function returns an empty list.
  static CPUFeatures InferCPUFeaturesFromOS(
      CPUFeatures::QueryIDRegistersOption option =
          CPUFeatures::kQueryIDRegistersIfAvailable);
  // Query the SVE vector length. This requires CPUFeatures::kSVE.
  static int ReadSVEVectorLengthInBits();
  // Handle tagged pointers.
  template <typename T>
  static T SetPointerTag(T pointer, uint64_t tag) {
@ -72,6 +297,27 @@ class CPU {
  }
 private:
 #define VIXL_AARCH64_ID_REG_LIST(V)                                           \
  V(AA64PFR0, "ID_AA64PFR0_EL1")                                              \
  V(AA64PFR1, "ID_AA64PFR1_EL1")                                              \
  V(AA64ISAR0, "ID_AA64ISAR0_EL1")                                            \
  V(AA64ISAR1, "ID_AA64ISAR1_EL1")                                            \
  V(AA64MMFR0, "ID_AA64MMFR0_EL1")                                            \
  V(AA64MMFR1, "ID_AA64MMFR1_EL1")                                            \
  /* These registers are RES0 in the baseline Arm8.0. We can always safely */ \
  /* read them, but some compilers don't accept the symbolic names. */        \
  V(AA64SMFR0, "S3_0_C0_C4_5")                                                \
  V(AA64ISAR2, "S3_0_C0_C6_2")                                                \
  V(AA64MMFR2, "S3_0_C0_C7_2")                                                \
  V(AA64ZFR0, "S3_0_C0_C4_4")
 #define VIXL_READ_ID_REG(NAME, MRS_ARG) static NAME Read##NAME();
  // On native AArch64 platforms, read the named CPU ID registers. These require
  // CPUFeatures::kIDRegisterEmulation, and should not be called on non-AArch64
  // platforms.
  VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
 #undef VIXL_READ_ID_REG
  // Return the content of the cache type register.
  static uint32_t GetCacheType();
--- a/dep/vixl/include/vixl/aarch64/cpu-features-auditor-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/cpu-features-auditor-aarch64.h
@ -27,10 +27,14 @@
 #ifndef VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_
 #define VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_
 #include <functional>
 #include <iostream>
 #include <unordered_map>
 #include "../cpu-features.h"
 #include "decoder-aarch64.h"
 #include "decoder-visitor-map-aarch64.h"
 namespace vixl {
 namespace aarch64 {
@ -100,15 +104,16 @@ class CPUFeaturesAuditor : public DecoderVisitor {
    SetAvailableFeatures(available);
  }
-// Declare all Visitor functions.
+  virtual void Visit(Metadata* metadata,
-#define DECLARE(A) \
+                     const Instruction* instr) VIXL_OVERRIDE;
  virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
  VISITOR_LIST(DECLARE)
 #undef DECLARE
 private:
  class RecordInstructionFeaturesScope;
 #define DECLARE(A) virtual void Visit##A(const Instruction* instr);
  VISITOR_LIST(DECLARE)
 #undef DECLARE
  void LoadStoreHelper(const Instruction* instr);
  void LoadStorePairHelper(const Instruction* instr);
@ -117,6 +122,12 @@ class CPUFeaturesAuditor : public DecoderVisitor {
  CPUFeatures available_;
  Decoder* decoder_;
  using FormToVisitorFnMap = std::unordered_map<
      uint32_t,
      std::function<void(CPUFeaturesAuditor*, const Instruction*)>>;
  static const FormToVisitorFnMap* GetFormToVisitorFnMap();
  uint32_t form_hash_;
 };
 }  // namespace aarch64
--- a/dep/vixl/include/vixl/aarch64/debugger-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/debugger-aarch64.h
@ -0,0 +1,276 @@
 // Copyright 2023, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //   * Neither the name of ARM Limited nor the names of its contributors may be
 //     used to endorse or promote products derived from this software without
 //     specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef VIXL_AARCH64_DEBUGGER_AARCH64_H_
 #define VIXL_AARCH64_DEBUGGER_AARCH64_H_
 #include <optional>
 #include <unordered_set>
 #include <vector>
 #include "../globals-vixl.h"
 #include "../utils-vixl.h"
 #include "../cpu-features.h"
 #include "abi-aarch64.h"
 #include "cpu-features-auditor-aarch64.h"
 #include "disasm-aarch64.h"
 #include "instructions-aarch64.h"
 #include "simulator-aarch64.h"
 #include "simulator-constants-aarch64.h"
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 namespace vixl {
 namespace aarch64 {
 class Simulator;
 enum DebugReturn { DebugContinue, DebugExit };
 // A debugger command that performs some action when used by the simulator
 // debugger.
 class DebuggerCmd {
 public:
  DebuggerCmd(Simulator* sim,
              std::string cmd_word,
              std::string cmd_alias,
              std::string usage,
              std::string description);
  virtual ~DebuggerCmd() {}
  // Perform some action based on the arguments passed in. Returns true if the
  // debugger should exit after the action, false otherwise.
  virtual DebugReturn Action(const std::vector<std::string>& args) = 0;
  // Return the command word.
  std::string_view GetCommandWord() { return command_word_; }
  // Return the alias for this command. Returns an empty string if this command
  // has no alias.
  std::string_view GetCommandAlias() { return command_alias_; }
  // Return this commands usage.
  std::string_view GetArgsString() { return args_str_; }
  // Return this commands description.
  std::string_view GetDescription() { return description_; }
 protected:
  // Simulator which this command will be performed on.
  Simulator* sim_;
  // Stream to output the result of the command to.
  FILE* ostream_;
  // Command word that, when given to the interactive debugger, calls Action.
  std::string command_word_;
  // Optional alias for the command_word.
  std::string command_alias_;
  // Optional string showing the arguments that can be passed to the command.
  std::string args_str_;
  // Optional description of the command.
  std::string description_;
 };
 //
 // Base debugger command handlers:
 //
 class HelpCmd : public DebuggerCmd {
 public:
  HelpCmd(Simulator* sim)
      : DebuggerCmd(sim, "help", "h", "", "Display this help message.") {}
  DebugReturn Action(const std::vector<std::string>& args) override;
 };
 class BreakCmd : public DebuggerCmd {
 public:
  BreakCmd(Simulator* sim)
      : DebuggerCmd(sim,
                    "break",
                    "b",
                    "<address>",
                    "Set or remove a breakpoint.") {}
  DebugReturn Action(const std::vector<std::string>& args) override;
 };
 class StepCmd : public DebuggerCmd {
 public:
  StepCmd(Simulator* sim)
      : DebuggerCmd(sim,
                    "step",
                    "s",
                    "[<n>]",
                    "Step n instructions, default step 1 instruction.") {}
  DebugReturn Action(const std::vector<std::string>& args) override;
 };
 class ContinueCmd : public DebuggerCmd {
 public:
  ContinueCmd(Simulator* sim)
      : DebuggerCmd(sim,
                    "continue",
                    "c",
                    "",
                    "Exit the debugger and continue executing instructions.") {}
  DebugReturn Action(const std::vector<std::string>& args) override;
 };
 class PrintCmd : public DebuggerCmd {
 public:
  PrintCmd(Simulator* sim)
      : DebuggerCmd(sim,
                    "print",
                    "p",
                    "<register|all|system>",
                    "Print the contents of a register, all registers or all"
                    " system registers.") {}
  DebugReturn Action(const std::vector<std::string>& args) override;
 };
 class TraceCmd : public DebuggerCmd {
 public:
  TraceCmd(Simulator* sim)
      : DebuggerCmd(sim,
                    "trace",
                    "t",
                    "",
                    "Start/stop memory and register tracing.") {}
  DebugReturn Action(const std::vector<std::string>& args) override;
 };
 class GdbCmd : public DebuggerCmd {
 public:
  GdbCmd(Simulator* sim)
      : DebuggerCmd(sim,
                    "gdb",
                    "g",
                    "",
                    "Enter an already running instance of gdb.") {}
  DebugReturn Action(const std::vector<std::string>& args) override;
 };
 // A debugger for the Simulator which takes input from the user in order to
 // control the running of the Simulator.
 class Debugger {
 public:
  // A pair consisting of a register character (e.g: W, X, V) and a register
  // code (e.g: 0, 1 ...31) which represents a single parsed register.
  //
  // Note: the register character is guaranteed to be upper case.
  using RegisterParsedFormat = std::pair<char, unsigned>;
  Debugger(Simulator* sim);
  // Set the input stream, from which commands are read, to a custom stream.
  void SetInputStream(std::istream* stream) { input_stream_ = stream; }
  // Register a new command for the debugger.
  template <class T>
  void RegisterCmd();
  // Set a breakpoint at the given address.
  void RegisterBreakpoint(uint64_t addr) { breakpoints_.insert(addr); }
  // Remove a breakpoint at the given address.
  void RemoveBreakpoint(uint64_t addr) { breakpoints_.erase(addr); }
  // Return true if the address is the location of a breakpoint.
  bool IsBreakpoint(uint64_t addr) const {
    return (breakpoints_.find(addr) != breakpoints_.end());
  }
  // Return true if the simulator pc is a breakpoint.
  bool IsAtBreakpoint() const;
  // Main loop for the debugger. Keep prompting for user inputted debugger
  // commands and try to execute them until a command is given that exits the
  // interactive debugger.
  void Debug();
  // Get an unsigned integer value from a string and return it in 'value'.
  // Base is used to determine the numeric base of the number to be read,
  // i.e: 8 for octal, 10 for decimal, 16 for hexadecimal and 0 for
  // auto-detect. Return true if an integer value was found, false otherwise.
  static std::optional<uint64_t> ParseUint64String(std::string_view uint64_str,
                                                   int base = 0);
  // Get a register from a string and return it in 'reg'. Return true if a
  // valid register character and code (e.g: W0, X29, V31) was found, false
  // otherwise.
  static std::optional<RegisterParsedFormat> ParseRegString(
      std::string_view reg_str);
  // Print the usage of each debugger command.
  void PrintUsage();
 private:
  // Split a string based on the separator given (a single space character by
  // default) and return as a std::vector of strings.
  static std::vector<std::string> Tokenize(std::string_view input_line,
                                           char separator = ' ');
  // Try to execute a single debugger command.
  DebugReturn ExecDebugCommand(const std::vector<std::string>& tokenized_cmd);
  // Return true if the string is zero, i.e: all characters in the string
  // (other than prefixes) are zero.
  static bool IsZeroUint64String(std::string_view uint64_str, int base);
  // The simulator that this debugger acts on.
  Simulator* sim_;
  // A vector of all commands recognised by the debugger.
  std::vector<std::unique_ptr<DebuggerCmd>> debugger_cmds_;
  // Input stream from which commands are read. Default is std::cin.
  std::istream* input_stream_;
  // Output stream from the simulator.
  FILE* ostream_;
  // A list of all instruction addresses that, when executed by the
  // simulator, will start the interactive debugger if it hasn't already.
  std::unordered_set<uint64_t> breakpoints_;
 };
 }  // namespace aarch64
 }  // namespace vixl
 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
 #endif  // VIXL_AARCH64_DEBUGGER_AARCH64_H_
--- a/dep/vixl/include/vixl/aarch64/decoder-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/decoder-aarch64.h
@ -1,4 +1,4 @@
-// Copyright 2014, VIXL authors
+// Copyright 2019, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@ -28,14 +28,14 @@
 #define VIXL_AARCH64_DECODER_AARCH64_H_
 #include <list>
 #include <map>
 #include <string>
 #include "../globals-vixl.h"
 #include "instructions-aarch64.h"
 // List macro containing all visitors needed by the decoder class.
 #define VISITOR_LIST_THAT_RETURN(V)                              \
  V(AddSubExtended)                                              \
  V(AddSubImmediate)                                             \
@ -54,6 +54,7 @@
  V(DataProcessing1Source)                                       \
  V(DataProcessing2Source)                                       \
  V(DataProcessing3Source)                                       \
  V(EvaluateIntoFlags)                                           \
  V(Exception)                                                   \
  V(Extract)                                                     \
  V(FPCompare)                                                   \
@ -67,12 +68,14 @@
  V(FPIntegerConvert)                                            \
  V(LoadLiteral)                                                 \
  V(LoadStoreExclusive)                                          \
  V(LoadStorePAC)                                                \
  V(LoadStorePairNonTemporal)                                    \
  V(LoadStorePairOffset)                                         \
  V(LoadStorePairPostIndex)                                      \
  V(LoadStorePairPreIndex)                                       \
  V(LoadStorePostIndex)                                          \
  V(LoadStorePreIndex)                                           \
  V(LoadStoreRCpcUnscaledOffset)                                 \
  V(LoadStoreRegisterOffset)                                     \
  V(LoadStoreUnscaledOffset)                                     \
  V(LoadStoreUnsignedOffset)                                     \
@ -108,15 +111,162 @@
  V(NEONShiftImmediate)                                          \
  V(NEONTable)                                                   \
  V(PCRelAddressing)                                             \
  V(RotateRightIntoFlags)                                        \
  V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)           \
  V(SVE32BitGatherLoad_VectorPlusImm)                            \
  V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)    \
  V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)        \
  V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)         \
  V(SVE32BitGatherPrefetch_VectorPlusImm)                        \
  V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)           \
  V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)         \
  V(SVE32BitScatterStore_VectorPlusImm)                          \
  V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets)     \
  V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets)             \
  V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets)           \
  V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets)   \
  V(SVE64BitGatherLoad_VectorPlusImm)                            \
  V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)         \
  V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \
  V(SVE64BitGatherPrefetch_VectorPlusImm)                        \
  V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)           \
  V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets)         \
  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)   \
  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \
  V(SVE64BitScatterStore_VectorPlusImm)                          \
  V(SVEAddressGeneration)                                        \
  V(SVEBitwiseLogicalUnpredicated)                               \
  V(SVEBitwiseShiftUnpredicated)                                 \
  V(SVEFFRInitialise)                                            \
  V(SVEFFRWriteFromPredicate)                                    \
  V(SVEFPAccumulatingReduction)                                  \
  V(SVEFPArithmeticUnpredicated)                                 \
  V(SVEFPCompareVectors)                                         \
  V(SVEFPCompareWithZero)                                        \
  V(SVEFPComplexAddition)                                        \
  V(SVEFPComplexMulAdd)                                          \
  V(SVEFPComplexMulAddIndex)                                     \
  V(SVEFPFastReduction)                                          \
  V(SVEFPMulIndex)                                               \
  V(SVEFPMulAdd)                                                 \
  V(SVEFPMulAddIndex)                                            \
  V(SVEFPUnaryOpUnpredicated)                                    \
  V(SVEIncDecByPredicateCount)                                   \
  V(SVEIndexGeneration)                                          \
  V(SVEIntArithmeticUnpredicated)                                \
  V(SVEIntCompareSignedImm)                                      \
  V(SVEIntCompareUnsignedImm)                                    \
  V(SVEIntCompareVectors)                                        \
  V(SVEIntMulAddPredicated)                                      \
  V(SVEIntMulAddUnpredicated)                                    \
  V(SVEIntReduction)                                             \
  V(SVEIntUnaryArithmeticPredicated)                             \
  V(SVEMovprfx)                                                  \
  V(SVEMulIndex)                                                 \
  V(SVEPermuteVectorExtract)                                     \
  V(SVEPermuteVectorInterleaving)                                \
  V(SVEPredicateCount)                                           \
  V(SVEPredicateLogical)                                         \
  V(SVEPropagateBreak)                                           \
  V(SVEStackFrameAdjustment)                                     \
  V(SVEStackFrameSize)                                           \
  V(SVEVectorSelect)                                             \
  V(SVEBitwiseLogical_Predicated)                                \
  V(SVEBitwiseLogicalWithImm_Unpredicated)                       \
  V(SVEBitwiseShiftByImm_Predicated)                             \
  V(SVEBitwiseShiftByVector_Predicated)                          \
  V(SVEBitwiseShiftByWideElements_Predicated)                    \
  V(SVEBroadcastBitmaskImm)                                      \
  V(SVEBroadcastFPImm_Unpredicated)                              \
  V(SVEBroadcastGeneralRegister)                                 \
  V(SVEBroadcastIndexElement)                                    \
  V(SVEBroadcastIntImm_Unpredicated)                             \
  V(SVECompressActiveElements)                                   \
  V(SVEConditionallyBroadcastElementToVector)                    \
  V(SVEConditionallyExtractElementToSIMDFPScalar)                \
  V(SVEConditionallyExtractElementToGeneralRegister)             \
  V(SVEConditionallyTerminateScalars)                            \
  V(SVEConstructivePrefix_Unpredicated)                          \
  V(SVEContiguousFirstFaultLoad_ScalarPlusScalar)                \
  V(SVEContiguousLoad_ScalarPlusImm)                             \
  V(SVEContiguousLoad_ScalarPlusScalar)                          \
  V(SVEContiguousNonFaultLoad_ScalarPlusImm)                     \
  V(SVEContiguousNonTemporalLoad_ScalarPlusImm)                  \
  V(SVEContiguousNonTemporalLoad_ScalarPlusScalar)               \
  V(SVEContiguousNonTemporalStore_ScalarPlusImm)                 \
  V(SVEContiguousNonTemporalStore_ScalarPlusScalar)              \
  V(SVEContiguousPrefetch_ScalarPlusImm)                         \
  V(SVEContiguousPrefetch_ScalarPlusScalar)                      \
  V(SVEContiguousStore_ScalarPlusImm)                            \
  V(SVEContiguousStore_ScalarPlusScalar)                         \
  V(SVECopySIMDFPScalarRegisterToVector_Predicated)              \
  V(SVECopyFPImm_Predicated)                                     \
  V(SVECopyGeneralRegisterToVector_Predicated)                   \
  V(SVECopyIntImm_Predicated)                                    \
  V(SVEElementCount)                                             \
  V(SVEExtractElementToSIMDFPScalarRegister)                     \
  V(SVEExtractElementToGeneralRegister)                          \
  V(SVEFPArithmetic_Predicated)                                  \
  V(SVEFPArithmeticWithImm_Predicated)                           \
  V(SVEFPConvertPrecision)                                       \
  V(SVEFPConvertToInt)                                           \
  V(SVEFPExponentialAccelerator)                                 \
  V(SVEFPRoundToIntegralValue)                                   \
  V(SVEFPTrigMulAddCoefficient)                                  \
  V(SVEFPTrigSelectCoefficient)                                  \
  V(SVEFPUnaryOp)                                                \
  V(SVEIncDecRegisterByElementCount)                             \
  V(SVEIncDecVectorByElementCount)                               \
  V(SVEInsertSIMDFPScalarRegister)                               \
  V(SVEInsertGeneralRegister)                                    \
  V(SVEIntAddSubtractImm_Unpredicated)                           \
  V(SVEIntAddSubtractVectors_Predicated)                         \
  V(SVEIntCompareScalarCountAndLimit)                            \
  V(SVEIntConvertToFP)                                           \
  V(SVEIntDivideVectors_Predicated)                              \
  V(SVEIntMinMaxImm_Unpredicated)                                \
  V(SVEIntMinMaxDifference_Predicated)                           \
  V(SVEIntMulImm_Unpredicated)                                   \
  V(SVEIntMulVectors_Predicated)                                 \
  V(SVELoadAndBroadcastElement)                                  \
  V(SVELoadAndBroadcastQOWord_ScalarPlusImm)                     \
  V(SVELoadAndBroadcastQOWord_ScalarPlusScalar)                  \
  V(SVELoadMultipleStructures_ScalarPlusImm)                     \
  V(SVELoadMultipleStructures_ScalarPlusScalar)                  \
  V(SVELoadPredicateRegister)                                    \
  V(SVELoadVectorRegister)                                       \
  V(SVEPartitionBreakCondition)                                  \
  V(SVEPermutePredicateElements)                                 \
  V(SVEPredicateFirstActive)                                     \
  V(SVEPredicateInitialize)                                      \
  V(SVEPredicateNextActive)                                      \
  V(SVEPredicateReadFromFFR_Predicated)                          \
  V(SVEPredicateReadFromFFR_Unpredicated)                        \
  V(SVEPredicateTest)                                            \
  V(SVEPredicateZero)                                            \
  V(SVEPropagateBreakToNextPartition)                            \
  V(SVEReversePredicateElements)                                 \
  V(SVEReverseVectorElements)                                    \
  V(SVEReverseWithinElements)                                    \
  V(SVESaturatingIncDecRegisterByElementCount)                   \
  V(SVESaturatingIncDecVectorByElementCount)                     \
  V(SVEStoreMultipleStructures_ScalarPlusImm)                    \
  V(SVEStoreMultipleStructures_ScalarPlusScalar)                 \
  V(SVEStorePredicateRegister)                                   \
  V(SVEStoreVectorRegister)                                      \
  V(SVETableLookup)                                              \
  V(SVEUnpackPredicateElements)                                  \
  V(SVEUnpackVectorElements)                                     \
  V(SVEVectorSplice)                                             \
  V(System)                                                      \
  V(TestBranch)                                                  \
  V(UnconditionalBranch)                \
  V(UnconditionalBranchToRegister)
 #define VISITOR_LIST_THAT_DONT_RETURN(V) \
  V(Unallocated)                                                 \
  V(UnconditionalBranch)                                         \
  V(UnconditionalBranchToRegister)                               \
  V(Unimplemented)
 #define VISITOR_LIST_THAT_DONT_RETURN(V) V(Reserved)
 #define VISITOR_LIST(V)       \
  VISITOR_LIST_THAT_RETURN(V) \
  VISITOR_LIST_THAT_DONT_RETURN(V)
@ -124,8 +274,12 @@
 namespace vixl {
 namespace aarch64 {
-// The Visitor interface. Disassembler and simulator (and other tools)
+using Metadata = std::map<std::string, std::string>;
-// must provide implementations for all of these functions.
+
 // The Visitor interface consists only of the Visit() method. User classes
 // that inherit from this one must provide an implementation of the method.
 // Information about the instruction encountered by the Decoder is available
 // via the metadata pointer.
 class DecoderVisitor {
 public:
  enum VisitorConstness { kConstVisitor, kNonConstVisitor };
@ -134,9 +288,7 @@ class DecoderVisitor {
  virtual ~DecoderVisitor() {}
-#define DECLARE(A) virtual void Visit##A(const Instruction* instr) = 0;
+  virtual void Visit(Metadata* metadata, const Instruction* instr) = 0;
  VISITOR_LIST(DECLARE)
 #undef DECLARE
  bool IsConstVisitor() const { return constness_ == kConstVisitor; }
  Instruction* MutableInstruction(const Instruction* instr) {
@ -148,22 +300,22 @@ class DecoderVisitor {
  const VisitorConstness constness_;
 };
 class DecodeNode;
 class CompiledDecodeNode;
 // The instruction decoder is constructed from a graph of decode nodes. At each
 // node, a number of bits are sampled from the instruction being decoded. The
 // resulting value is used to look up the next node in the graph, which then
 // samples other bits, and moves to other decode nodes. Eventually, a visitor
 // node is reached, and the corresponding visitor function is called, which
 // handles the instruction.
 class Decoder {
 public:
-  Decoder() {}
+  Decoder() { ConstructDecodeGraph(); }
  // Top-level wrappers around the actual decoding function.
-  void Decode(const Instruction* instr) {
+  void Decode(const Instruction* instr);
-    std::list<DecoderVisitor*>::iterator it;
+  void Decode(Instruction* instr);
    for (it = visitors_.begin(); it != visitors_.end(); it++) {
      VIXL_ASSERT((*it)->IsConstVisitor());
    }
    DecodeInstruction(instr);
  }
  void Decode(Instruction* instr) {
    DecodeInstruction(const_cast<const Instruction*>(instr));
  }
  // Decode all instructions from start (inclusive) to end (exclusive).
  template <typename T>
@ -212,76 +364,329 @@ class Decoder {
  // of visitors stored by the decoder.
  void RemoveVisitor(DecoderVisitor* visitor);
-#define DECLARE(A) void Visit##A(const Instruction* instr);
+  void VisitNamedInstruction(const Instruction* instr, const std::string& name);
  VISITOR_LIST(DECLARE)
 #undef DECLARE
  std::list<DecoderVisitor*>* visitors() { return &visitors_; }
  // Get a DecodeNode by name from the Decoder's map.
  DecodeNode* GetDecodeNode(std::string name);
 private:
  // Decodes an instruction and calls the visitor functions registered with the
  // Decoder class.
  void DecodeInstruction(const Instruction* instr);
-  // Decode the PC relative addressing instruction, and call the corresponding
+  // Add an initialised DecodeNode to the decode_node_ map.
-  // visitors.
+  void AddDecodeNode(const DecodeNode& node);
  // On entry, instruction bits 27:24 = 0x0.
  void DecodePCRelAddressing(const Instruction* instr);
  // Decode the add/subtract immediate instruction, and call the correspoding
  // visitors.
  // On entry, instruction bits 27:24 = 0x1.
  void DecodeAddSubImmediate(const Instruction* instr);
  // Decode the branch, system command, and exception generation parts of
  // the instruction tree, and call the corresponding visitors.
  // On entry, instruction bits 27:24 = {0x4, 0x5, 0x6, 0x7}.
  void DecodeBranchSystemException(const Instruction* instr);
  // Decode the load and store parts of the instruction tree, and call
  // the corresponding visitors.
  // On entry, instruction bits 27:24 = {0x8, 0x9, 0xC, 0xD}.
  void DecodeLoadStore(const Instruction* instr);
  // Decode the logical immediate and move wide immediate parts of the
  // instruction tree, and call the corresponding visitors.
  // On entry, instruction bits 27:24 = 0x2.
  void DecodeLogical(const Instruction* instr);
  // Decode the bitfield and extraction parts of the instruction tree,
  // and call the corresponding visitors.
  // On entry, instruction bits 27:24 = 0x3.
  void DecodeBitfieldExtract(const Instruction* instr);
  // Decode the data processing parts of the instruction tree, and call the
  // corresponding visitors.
  // On entry, instruction bits 27:24 = {0x1, 0xA, 0xB}.
  void DecodeDataProcessing(const Instruction* instr);
  // Decode the floating point parts of the instruction tree, and call the
  // corresponding visitors.
  // On entry, instruction bits 27:24 = {0xE, 0xF}.
  void DecodeFP(const Instruction* instr);
  // Decode the Advanced SIMD (NEON) load/store part of the instruction tree,
  // and call the corresponding visitors.
  // On entry, instruction bits 29:25 = 0x6.
  void DecodeNEONLoadStore(const Instruction* instr);
  // Decode the Advanced SIMD (NEON) vector data processing part of the
  // instruction tree, and call the corresponding visitors.
  // On entry, instruction bits 28:25 = 0x7.
  void DecodeNEONVectorDataProcessing(const Instruction* instr);
  // Decode the Advanced SIMD (NEON) scalar data processing part of the
  // instruction tree, and call the corresponding visitors.
  // On entry, instruction bits 28:25 = 0xF.
  void DecodeNEONScalarDataProcessing(const Instruction* instr);
 private:
  // Visitors are registered in a list.
  std::list<DecoderVisitor*> visitors_;
  // Compile the dynamically generated decode graph based on the static
  // information in kDecodeMapping and kVisitorNodes.
  void ConstructDecodeGraph();
  // Root node for the compiled decoder graph, stored here to avoid a map lookup
  // for every instruction decoded.
  CompiledDecodeNode* compiled_decoder_root_;
  // Map of node names to DecodeNodes.
  std::map<std::string, DecodeNode> decode_nodes_;
 };
 typedef void (Decoder::*DecodeFnPtr)(const Instruction*);
 typedef uint32_t (Instruction::*BitExtractFn)(void) const;
 // A Visitor node maps the name of a visitor to the function that handles it.
 struct VisitorNode {
  const char* name;
  const DecodeFnPtr visitor_fn;
 };
 // DecodePattern and DecodeMapping represent the input data to the decoder
 // compilation stage. After compilation, the decoder is embodied in the graph
 // of CompiledDecodeNodes pointer to by compiled_decoder_root_.
 // A DecodePattern maps a pattern of set/unset/don't care (1, 0, x) bits encoded
 // as uint32_t to its handler.
 // The encoding uses two bits per symbol: 0 => 0b00, 1 => 0b01, x => 0b10.
 // 0b11 marks the edge of the most-significant bits of the pattern, which is
 // required to determine the length. For example, the pattern "1x01"_b is
 // encoded in a uint32_t as 0b11_01_10_00_01.
 struct DecodePattern {
  uint32_t pattern;
  const char* handler;
 };
 // A DecodeMapping consists of the name of a handler, the bits sampled in the
 // instruction by that handler, and a mapping from the pattern that those
 // sampled bits match to the corresponding name of a node.
 struct DecodeMapping {
  const char* name;
  const std::vector<uint8_t> sampled_bits;
  const std::vector<DecodePattern> mapping;
 };
 // For speed, before nodes can be used for decoding instructions, they must
 // be compiled. This converts the mapping "bit pattern strings to decoder name
 // string" stored in DecodeNodes to an array look up for the pointer to the next
 // node, stored in CompiledDecodeNodes. Compilation may also apply other
 // optimisations for simple decode patterns.
 class CompiledDecodeNode {
 public:
  // Constructor for decode node, containing a decode table and pointer to a
  // function that extracts the bits to be sampled.
  CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size)
      : bit_extract_fn_(bit_extract_fn),
        instruction_name_("node"),
        decode_table_size_(decode_table_size),
        decoder_(NULL) {
    decode_table_ = new CompiledDecodeNode*[decode_table_size_];
    memset(decode_table_, 0, decode_table_size_ * sizeof(decode_table_[0]));
  }
  // Constructor for wrappers around visitor functions. These require no
  // decoding, so no bit extraction function or decode table is assigned.
  explicit CompiledDecodeNode(std::string iname, Decoder* decoder)
      : bit_extract_fn_(NULL),
        instruction_name_(iname),
        decode_table_(NULL),
        decode_table_size_(0),
        decoder_(decoder) {}
  ~CompiledDecodeNode() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
    // Free the decode table, if this is a compiled, non-leaf node.
    if (decode_table_ != NULL) {
      VIXL_ASSERT(!IsLeafNode());
      delete[] decode_table_;
    }
  }
  // Decode the instruction by either sampling the bits using the bit extract
  // function to find the next node, or, if we're at a leaf, calling the visitor
  // function.
  void Decode(const Instruction* instr) const;
  // A leaf node is a wrapper for a visitor function.
  bool IsLeafNode() const {
    VIXL_ASSERT(((instruction_name_ == "node") && (bit_extract_fn_ != NULL)) ||
                ((instruction_name_ != "node") && (bit_extract_fn_ == NULL)));
    return instruction_name_ != "node";
  }
  // Get a pointer to the next node required in the decode process, based on the
  // bits sampled by the current node.
  CompiledDecodeNode* GetNodeForBits(uint32_t bits) const {
    VIXL_ASSERT(bits < decode_table_size_);
    return decode_table_[bits];
  }
  // Set the next node in the decode process for the pattern of sampled bits in
  // the current node.
  void SetNodeForBits(uint32_t bits, CompiledDecodeNode* n) {
    VIXL_ASSERT(bits < decode_table_size_);
    VIXL_ASSERT(n != NULL);
    decode_table_[bits] = n;
  }
 private:
  // Pointer to an instantiated template function for extracting the bits
  // sampled by this node. Set to NULL for leaf nodes.
  const BitExtractFn bit_extract_fn_;
  // Visitor function that handles the instruction identified. Set only for
  // leaf nodes, where no extra decoding is required, otherwise NULL.
  std::string instruction_name_;
  // Mapping table from instruction bits to next decode stage.
  CompiledDecodeNode** decode_table_;
  const size_t decode_table_size_;
  // Pointer to the decoder containing this node, used to call its visitor
  // function for leaf nodes. Set to NULL for non-leaf nodes.
  Decoder* decoder_;
 };
 class DecodeNode {
 public:
  // Default constructor needed for map initialisation.
  DecodeNode()
      : sampled_bits_(DecodeNode::kEmptySampledBits),
        pattern_table_(DecodeNode::kEmptyPatternTable),
        compiled_node_(NULL) {}
  // Constructor for DecodeNode wrappers around visitor functions. These are
  // marked as "compiled", as there is no decoding left to do.
  explicit DecodeNode(const std::string& iname, Decoder* decoder)
      : name_(iname),
        sampled_bits_(DecodeNode::kEmptySampledBits),
        instruction_name_(iname),
        pattern_table_(DecodeNode::kEmptyPatternTable),
        decoder_(decoder),
        compiled_node_(NULL) {}
  // Constructor for DecodeNodes that map bit patterns to other DecodeNodes.
  explicit DecodeNode(const DecodeMapping& map, Decoder* decoder = NULL)
      : name_(map.name),
        sampled_bits_(map.sampled_bits),
        instruction_name_("node"),
        pattern_table_(map.mapping),
        decoder_(decoder),
        compiled_node_(NULL) {
    // With the current two bits per symbol encoding scheme, the maximum pattern
    // length is (32 - 2) / 2 = 15 bits.
    VIXL_CHECK(GetPatternLength(map.mapping[0].pattern) <= 15);
    for (const DecodePattern& p : map.mapping) {
      VIXL_CHECK(GetPatternLength(p.pattern) == map.sampled_bits.size());
    }
  }
  ~DecodeNode() {
    // Delete the compiled version of this node, if one was created.
    if (compiled_node_ != NULL) {
      delete compiled_node_;
    }
  }
  // Get the bits sampled from the instruction by this node.
  const std::vector<uint8_t>& GetSampledBits() const { return sampled_bits_; }
  // Get the number of bits sampled from the instruction by this node.
  size_t GetSampledBitsCount() const { return sampled_bits_.size(); }
  // A leaf node is a DecodeNode that wraps the visitor function for the
  // identified instruction class.
  bool IsLeafNode() const { return instruction_name_ != "node"; }
  std::string GetName() const { return name_; }
  // Create a CompiledDecodeNode of specified table size that uses
  // bit_extract_fn to sample bits from the instruction.
  void CreateCompiledNode(BitExtractFn bit_extract_fn, size_t table_size) {
    VIXL_ASSERT(bit_extract_fn != NULL);
    VIXL_ASSERT(table_size > 0);
    compiled_node_ = new CompiledDecodeNode(bit_extract_fn, table_size);
  }
  // Create a CompiledDecodeNode wrapping a visitor function. No decoding is
  // required for this node; the visitor function is called instead.
  void CreateVisitorNode() {
    compiled_node_ = new CompiledDecodeNode(instruction_name_, decoder_);
  }
  // Find and compile the DecodeNode named "name", and set it as the node for
  // the pattern "bits".
  void CompileNodeForBits(Decoder* decoder, std::string name, uint32_t bits);
  // Get a pointer to an instruction method that extracts the instruction bits
  // specified by the mask argument, and returns those sampled bits as a
  // contiguous sequence, suitable for indexing an array.
  // For example, a mask of 0b1010 returns a function that, given an instruction
  // 0bXYZW, will return 0bXZ.
  BitExtractFn GetBitExtractFunction(uint32_t mask) {
    return GetBitExtractFunctionHelper(mask, 0);
  }
  // Get a pointer to an Instruction method that applies a mask to the
  // instruction bits, and tests if the result is equal to value. The returned
  // function gives a 1 result if (inst & mask == value), 0 otherwise.
  BitExtractFn GetBitExtractFunction(uint32_t mask, uint32_t value) {
    return GetBitExtractFunctionHelper(value, mask);
  }
  // Compile this DecodeNode into a new CompiledDecodeNode and returns a pointer
  // to it. This pointer is also stored inside the DecodeNode itself. Destroying
  // a DecodeNode frees its associated CompiledDecodeNode.
  CompiledDecodeNode* Compile(Decoder* decoder);
  // Get a pointer to the CompiledDecodeNode associated with this DecodeNode.
  // Returns NULL if the node has not been compiled yet.
  CompiledDecodeNode* GetCompiledNode() const { return compiled_node_; }
  bool IsCompiled() const { return GetCompiledNode() != NULL; }
  enum class PatternSymbol { kSymbol0 = 0, kSymbol1 = 1, kSymbolX = 2 };
  static const uint32_t kEndOfPattern = 3;
  static const uint32_t kPatternSymbolMask = 3;
  size_t GetPatternLength(uint32_t pattern) const {
    uint32_t hsb = HighestSetBitPosition(pattern);
    // The pattern length is signified by two set bits in a two bit-aligned
    // position. Ensure that the pattern has a highest set bit, it's at an odd
    // bit position, and that the bit to the right of the hsb is also set.
    VIXL_ASSERT(((hsb % 2) == 1) && (pattern >> (hsb - 1)) == kEndOfPattern);
    return hsb / 2;
  }
  bool PatternContainsSymbol(uint32_t pattern, PatternSymbol symbol) const {
    while ((pattern & kPatternSymbolMask) != kEndOfPattern) {
      if (static_cast<PatternSymbol>(pattern & kPatternSymbolMask) == symbol)
        return true;
      pattern >>= 2;
    }
    return false;
  }
  PatternSymbol GetSymbolAt(uint32_t pattern, size_t pos) const {
    size_t len = GetPatternLength(pattern);
    VIXL_ASSERT((pos < 15) && (pos < len));
    uint32_t shift = static_cast<uint32_t>(2 * (len - pos - 1));
    uint32_t sym = (pattern >> shift) & kPatternSymbolMask;
    return static_cast<PatternSymbol>(sym);
  }
 private:
  // Generate a mask and value pair from a pattern constructed from 0, 1 and x
  // (don't care) 2-bit symbols.
  // For example "10x1"_b should return mask = 0b1101, value = 0b1001.
  typedef std::pair<Instr, Instr> MaskValuePair;
  MaskValuePair GenerateMaskValuePair(uint32_t pattern) const;
  // Generate a pattern ordered by the bit positions sampled by this node.
  // The symbol corresponding to the lowest sample position is placed in the
  // least-significant bits of the result pattern.
  // For example, a pattern of "1x0"_b expected when sampling bits 31, 1 and 30
  // returns the pattern "x01"_b; bit 1 should be 'x', bit 30 '0' and bit 31
  // '1'.
  // This output makes comparisons easier between the pattern and bits sampled
  // from an instruction using the fast "compress" algorithm. See
  // Instruction::Compress().
  uint32_t GenerateOrderedPattern(uint32_t pattern) const;
  // Generate a mask with a bit set at each sample position.
  uint32_t GenerateSampledBitsMask() const;
  // Try to compile a more optimised decode operation for this node, returning
  // true if successful.
  bool TryCompileOptimisedDecodeTable(Decoder* decoder);
  // Helper function that returns a bit extracting function. If y is zero,
  // x is a bit extraction mask. Otherwise, y is the mask, and x is the value
  // to match after masking.
  BitExtractFn GetBitExtractFunctionHelper(uint32_t x, uint32_t y);
  // Name of this decoder node, used to construct edges in the decode graph.
  std::string name_;
  // Vector of bits sampled from an instruction to determine which node to look
  // up next in the decode process.
  const std::vector<uint8_t>& sampled_bits_;
  static const std::vector<uint8_t> kEmptySampledBits;
  // For leaf nodes, this is the name of the instruction form that the node
  // represents. For other nodes, this is always set to "node".
  std::string instruction_name_;
  // Source mapping from bit pattern to name of next decode stage.
  const std::vector<DecodePattern>& pattern_table_;
  static const std::vector<DecodePattern> kEmptyPatternTable;
  // Pointer to the decoder containing this node, used to call its visitor
  // function for leaf nodes.
  Decoder* decoder_;
  // Pointer to the compiled version of this node. Is this node hasn't been
  // compiled yet, this pointer is NULL.
  CompiledDecodeNode* compiled_node_;
 };
 }  // namespace aarch64
--- a/dep/vixl/include/vixl/aarch64/decoder-constants-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/decoder-constants-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/decoder-visitor-map-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/decoder-visitor-map-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/disasm-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/disasm-aarch64.h
@ -27,11 +27,16 @@
 #ifndef VIXL_AARCH64_DISASM_AARCH64_H
 #define VIXL_AARCH64_DISASM_AARCH64_H
 #include <functional>
 #include <unordered_map>
 #include <utility>
 #include "../globals-vixl.h"
 #include "../utils-vixl.h"
 #include "cpu-features-auditor-aarch64.h"
 #include "decoder-aarch64.h"
 #include "decoder-visitor-map-aarch64.h"
 #include "instructions-aarch64.h"
 #include "operands-aarch64.h"
@ -45,11 +50,9 @@ class Disassembler : public DecoderVisitor {
  virtual ~Disassembler();
  char* GetOutput();
-// Declare all Visitor functions.
+  // Declare all Visitor functions.
-#define DECLARE(A) \
+  virtual void Visit(Metadata* metadata,
-  virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
+                     const Instruction* instr) VIXL_OVERRIDE;
  VISITOR_LIST(DECLARE)
 #undef DECLARE
 protected:
  virtual void ProcessOutput(const Instruction* instr);
@ -110,12 +113,145 @@ class Disassembler : public DecoderVisitor {
  int64_t CodeRelativeAddress(const void* instr);
 private:
 #define DECLARE(A) virtual void Visit##A(const Instruction* instr);
  VISITOR_LIST(DECLARE)
 #undef DECLARE
  using FormToVisitorFnMap = std::unordered_map<
      uint32_t,
      std::function<void(Disassembler*, const Instruction*)>>;
  static const FormToVisitorFnMap* GetFormToVisitorFnMap();
  std::string mnemonic_;
  uint32_t form_hash_;
  void SetMnemonicFromForm(const std::string& form) {
    if (form != "unallocated") {
      VIXL_ASSERT(form.find_first_of('_') != std::string::npos);
      mnemonic_ = form.substr(0, form.find_first_of('_'));
    }
  }
  void Disassemble_PdT_PgZ_ZnT_ZmT(const Instruction* instr);
  void Disassemble_ZdB_Zn1B_Zn2B_imm(const Instruction* instr);
  void Disassemble_ZdB_ZnB_ZmB(const Instruction* instr);
  void Disassemble_ZdD_PgM_ZnS(const Instruction* instr);
  void Disassemble_ZdD_ZnD_ZmD(const Instruction* instr);
  void Disassemble_ZdD_ZnD_ZmD_imm(const Instruction* instr);
  void Disassemble_ZdD_ZnS_ZmS_imm(const Instruction* instr);
  void Disassemble_ZdH_PgM_ZnS(const Instruction* instr);
  void Disassemble_ZdH_ZnH_ZmH_imm(const Instruction* instr);
  void Disassemble_ZdS_PgM_ZnD(const Instruction* instr);
  void Disassemble_ZdS_PgM_ZnH(const Instruction* instr);
  void Disassemble_ZdS_PgM_ZnS(const Instruction* instr);
  void Disassemble_ZdS_ZnH_ZmH_imm(const Instruction* instr);
  void Disassemble_ZdS_ZnS_ZmS(const Instruction* instr);
  void Disassemble_ZdS_ZnS_ZmS_imm(const Instruction* instr);
  void Disassemble_ZdT_PgM_ZnT(const Instruction* instr);
  void Disassemble_ZdT_PgZ_ZnT_ZmT(const Instruction* instr);
  void Disassemble_ZdT_Pg_Zn1T_Zn2T(const Instruction* instr);
  void Disassemble_ZdT_Zn1T_Zn2T_ZmT(const Instruction* instr);
  void Disassemble_ZdT_ZnT_ZmT(const Instruction* instr);
  void Disassemble_ZdT_ZnT_ZmTb(const Instruction* instr);
  void Disassemble_ZdT_ZnTb(const Instruction* instr);
  void Disassemble_ZdT_ZnTb_ZmTb(const Instruction* instr);
  void Disassemble_ZdaD_ZnD_ZmD_imm(const Instruction* instr);
  void Disassemble_ZdaD_ZnH_ZmH_imm_const(const Instruction* instr);
  void Disassemble_ZdaD_ZnS_ZmS_imm(const Instruction* instr);
  void Disassemble_ZdaH_ZnH_ZmH_imm(const Instruction* instr);
  void Disassemble_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr);
  void Disassemble_ZdaS_ZnB_ZmB_imm_const(const Instruction* instr);
  void Disassemble_ZdaS_ZnH_ZmH(const Instruction* instr);
  void Disassemble_ZdaS_ZnH_ZmH_imm(const Instruction* instr);
  void Disassemble_ZdaS_ZnS_ZmS_imm(const Instruction* instr);
  void Disassemble_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr);
  void Disassemble_ZdaT_PgM_ZnTb(const Instruction* instr);
  void Disassemble_ZdaT_ZnT_ZmT(const Instruction* instr);
  void Disassemble_ZdaT_ZnT_ZmT_const(const Instruction* instr);
  void Disassemble_ZdaT_ZnT_const(const Instruction* instr);
  void Disassemble_ZdaT_ZnTb_ZmTb(const Instruction* instr);
  void Disassemble_ZdaT_ZnTb_ZmTb_const(const Instruction* instr);
  void Disassemble_ZdnB_ZdnB(const Instruction* instr);
  void Disassemble_ZdnB_ZdnB_ZmB(const Instruction* instr);
  void Disassemble_ZdnS_ZdnS_ZmS(const Instruction* instr);
  void Disassemble_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr);
  void Disassemble_ZdnT_PgM_ZdnT_const(const Instruction* instr);
  void Disassemble_ZdnT_ZdnT_ZmT_const(const Instruction* instr);
  void Disassemble_ZtD_PgZ_ZnD_Xm(const Instruction* instr);
  void Disassemble_ZtD_Pg_ZnD_Xm(const Instruction* instr);
  void Disassemble_ZtS_PgZ_ZnS_Xm(const Instruction* instr);
  void Disassemble_ZtS_Pg_ZnS_Xm(const Instruction* instr);
  void Disassemble_ZdaS_ZnB_ZmB(const Instruction* instr);
  void Disassemble_Vd4S_Vn16B_Vm16B(const Instruction* instr);
  void DisassembleCpy(const Instruction* instr);
  void DisassembleSet(const Instruction* instr);
  void DisassembleMinMaxImm(const Instruction* instr);
  void DisassembleSVEShiftLeftImm(const Instruction* instr);
  void DisassembleSVEShiftRightImm(const Instruction* instr);
  void DisassembleSVEAddSubCarry(const Instruction* instr);
  void DisassembleSVEAddSubHigh(const Instruction* instr);
  void DisassembleSVEComplexIntAddition(const Instruction* instr);
  void DisassembleSVEBitwiseTernary(const Instruction* instr);
  void DisassembleSVEFlogb(const Instruction* instr);
  void DisassembleSVEFPPair(const Instruction* instr);
  void DisassembleNoArgs(const Instruction* instr);
  void DisassembleNEONMulByElementLong(const Instruction* instr);
  void DisassembleNEONDotProdByElement(const Instruction* instr);
  void DisassembleNEONFPMulByElement(const Instruction* instr);
  void DisassembleNEONHalfFPMulByElement(const Instruction* instr);
  void DisassembleNEONFPMulByElementLong(const Instruction* instr);
  void DisassembleNEONComplexMulByElement(const Instruction* instr);
  void DisassembleNEON2RegLogical(const Instruction* instr);
  void DisassembleNEON2RegExtract(const Instruction* instr);
  void DisassembleNEON2RegAddlp(const Instruction* instr);
  void DisassembleNEON2RegCompare(const Instruction* instr);
  void DisassembleNEON2RegFPCompare(const Instruction* instr);
  void DisassembleNEON2RegFPConvert(const Instruction* instr);
  void DisassembleNEON2RegFP(const Instruction* instr);
  void DisassembleNEON3SameLogical(const Instruction* instr);
  void DisassembleNEON3SameFHM(const Instruction* instr);
  void DisassembleNEON3SameNoD(const Instruction* instr);
  void DisassembleNEONShiftLeftLongImm(const Instruction* instr);
  void DisassembleNEONShiftRightImm(const Instruction* instr);
  void DisassembleNEONShiftRightNarrowImm(const Instruction* instr);
  void DisassembleNEONScalarSatMulLongIndex(const Instruction* instr);
  void DisassembleNEONFPScalarMulIndex(const Instruction* instr);
  void DisassembleNEONFPScalar3Same(const Instruction* instr);
  void DisassembleNEONScalar3SameOnlyD(const Instruction* instr);
  void DisassembleNEONFPAcrossLanes(const Instruction* instr);
  void DisassembleNEONFP16AcrossLanes(const Instruction* instr);
  void DisassembleNEONScalarShiftImmOnlyD(const Instruction* instr);
  void DisassembleNEONScalarShiftRightNarrowImm(const Instruction* instr);
  void DisassembleNEONScalar2RegMiscOnlyD(const Instruction* instr);
  void DisassembleNEONFPScalar2RegMisc(const Instruction* instr);
  void DisassembleNEONPolynomialMul(const Instruction* instr);
  void DisassembleMTELoadTag(const Instruction* instr);
  void DisassembleMTEStoreTag(const Instruction* instr);
  void DisassembleMTEStoreTagPair(const Instruction* instr);
  void Disassemble_XdSP_XnSP_Xm(const Instruction* instr);
  void Disassemble_XdSP_XnSP_uimm6_uimm4(const Instruction* instr);
  void Disassemble_Xd_XnSP_Xm(const Instruction* instr);
  void Disassemble_Xd_XnSP_XmSP(const Instruction* instr);
  void Format(const Instruction* instr,
              const char* mnemonic,
-              const char* format);
+              const char* format0,
              const char* format1 = NULL);
  void FormatWithDecodedMnemonic(const Instruction* instr,
                                 const char* format0,
                                 const char* format1 = NULL);
  void Substitute(const Instruction* instr, const char* string);
  int SubstituteField(const Instruction* instr, const char* format);
  int SubstituteRegisterField(const Instruction* instr, const char* format);
  int SubstitutePredicateRegisterField(const Instruction* instr,
                                       const char* format);
  int SubstituteImmediateField(const Instruction* instr, const char* format);
  int SubstituteLiteralField(const Instruction* instr, const char* format);
  int SubstituteBitfieldImmediateField(const Instruction* instr,
@ -130,6 +266,14 @@ class Disassembler : public DecoderVisitor {
  int SubstituteBarrierField(const Instruction* instr, const char* format);
  int SubstituteSysOpField(const Instruction* instr, const char* format);
  int SubstituteCrField(const Instruction* instr, const char* format);
  int SubstituteIntField(const Instruction* instr, const char* format);
  int SubstituteSVESize(const Instruction* instr, const char* format);
  int SubstituteTernary(const Instruction* instr, const char* format);
  std::pair<unsigned, unsigned> GetRegNumForField(const Instruction* instr,
                                                  char reg_prefix,
                                                  const char* field);
  bool RdIsZROrSP(const Instruction* instr) const {
    return (instr->GetRd() == kZeroRegCode);
  }
@ -173,6 +317,7 @@ class PrintDisassembler : public Disassembler {
      : cpu_features_auditor_(NULL),
        cpu_features_prefix_("// Needs: "),
        cpu_features_suffix_(""),
        signed_addresses_(false),
        stream_(stream) {}
  // Convenience helpers for quick disassembly, without having to manually
@ -201,12 +346,23 @@ class PrintDisassembler : public Disassembler {
    cpu_features_suffix_ = suffix;
  }
  // By default, addresses are printed as simple, unsigned 64-bit hex values.
  //
  // With `PrintSignedAddresses(true)`:
  //  - negative addresses are printed as "-0x1234...",
  //  - positive addresses have a leading space, like " 0x1234...", to maintain
  //    alignment.
  //
  // This is most useful in combination with Disassembler::MapCodeAddress(...).
  void PrintSignedAddresses(bool s) { signed_addresses_ = s; }
 protected:
  virtual void ProcessOutput(const Instruction* instr) VIXL_OVERRIDE;
  CPUFeaturesAuditor* cpu_features_auditor_;
  const char* cpu_features_prefix_;
  const char* cpu_features_suffix_;
  bool signed_addresses_;
 private:
  FILE* stream_;
--- a/dep/vixl/include/vixl/aarch64/instructions-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/instructions-aarch64.h
@ -32,6 +32,11 @@
 #include "constants-aarch64.h"
 #ifdef __clang__
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdeprecated-enum-enum-conversion"
 #endif
 namespace vixl {
 namespace aarch64 {
 // ISA constants. --------------------------------------------------------------
@ -81,6 +86,7 @@ const uint64_t kXRegMask = UINT64_C(0xffffffffffffffff);
 const uint64_t kHRegMask = UINT64_C(0xffff);
 const uint64_t kSRegMask = UINT64_C(0xffffffff);
 const uint64_t kDRegMask = UINT64_C(0xffffffffffffffff);
 const uint64_t kHSignMask = UINT64_C(0x8000);
 const uint64_t kSSignMask = UINT64_C(0x80000000);
 const uint64_t kDSignMask = UINT64_C(0x8000000000000000);
 const uint64_t kWSignMask = UINT64_C(0x80000000);
@ -106,6 +112,8 @@ const unsigned kZeroRegCode = 31;
 const unsigned kSPRegInternalCode = 63;
 const unsigned kRegCodeMask = 0x1f;
 const unsigned kAtomicAccessGranule = 16;
 const unsigned kAddressTagOffset = 56;
 const unsigned kAddressTagWidth = 8;
 const uint64_t kAddressTagMask = ((UINT64_C(1) << kAddressTagWidth) - 1)
@ -114,21 +122,49 @@ VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000));
 const uint64_t kTTBRMask = UINT64_C(1) << 55;
 // We can't define a static kZRegSize because the size depends on the
 // implementation. However, it is sometimes useful to know the minimum and
 // maximum possible sizes.
 const unsigned kZRegMinSize = 128;
 const unsigned kZRegMinSizeLog2 = 7;
 const unsigned kZRegMinSizeInBytes = kZRegMinSize / 8;
 const unsigned kZRegMinSizeInBytesLog2 = kZRegMinSizeLog2 - 3;
 const unsigned kZRegMaxSize = 2048;
 const unsigned kZRegMaxSizeLog2 = 11;
 const unsigned kZRegMaxSizeInBytes = kZRegMaxSize / 8;
 const unsigned kZRegMaxSizeInBytesLog2 = kZRegMaxSizeLog2 - 3;
 // The P register size depends on the Z register size.
 const unsigned kZRegBitsPerPRegBit = kBitsPerByte;
 const unsigned kZRegBitsPerPRegBitLog2 = 3;
 const unsigned kPRegMinSize = kZRegMinSize / kZRegBitsPerPRegBit;
 const unsigned kPRegMinSizeLog2 = kZRegMinSizeLog2 - 3;
 const unsigned kPRegMinSizeInBytes = kPRegMinSize / 8;
 const unsigned kPRegMinSizeInBytesLog2 = kPRegMinSizeLog2 - 3;
 const unsigned kPRegMaxSize = kZRegMaxSize / kZRegBitsPerPRegBit;
 const unsigned kPRegMaxSizeLog2 = kZRegMaxSizeLog2 - 3;
 const unsigned kPRegMaxSizeInBytes = kPRegMaxSize / 8;
 const unsigned kPRegMaxSizeInBytesLog2 = kPRegMaxSizeLog2 - 3;
 const unsigned kMTETagGranuleInBytes = 16;
 const unsigned kMTETagGranuleInBytesLog2 = 4;
 const unsigned kMTETagWidth = 4;
 // Make these moved float constants backwards compatible
 // with explicit vixl::aarch64:: namespace references.
 using vixl::kDoubleMantissaBits;
 using vixl::kDoubleExponentBits;
-using vixl::kFloatMantissaBits;
+using vixl::kDoubleMantissaBits;
 using vixl::kFloatExponentBits;
 using vixl::kFloat16MantissaBits;
 using vixl::kFloat16ExponentBits;
 using vixl::kFloat16MantissaBits;
 using vixl::kFloatExponentBits;
 using vixl::kFloatMantissaBits;
 using vixl::kFP16PositiveInfinity;
 using vixl::kFP16NegativeInfinity;
-using vixl::kFP32PositiveInfinity;
+using vixl::kFP16PositiveInfinity;
 using vixl::kFP32NegativeInfinity;
-using vixl::kFP64PositiveInfinity;
+using vixl::kFP32PositiveInfinity;
 using vixl::kFP64NegativeInfinity;
 using vixl::kFP64PositiveInfinity;
 using vixl::kFP16DefaultNaN;
 using vixl::kFP32DefaultNaN;
@ -149,6 +185,49 @@ enum AddrMode { Offset, PreIndex, PostIndex };
 enum Reg31Mode { Reg31IsStackPointer, Reg31IsZeroRegister };
 enum VectorFormat {
  kFormatUndefined = 0xffffffff,
  kFormat8B = NEON_8B,
  kFormat16B = NEON_16B,
  kFormat4H = NEON_4H,
  kFormat8H = NEON_8H,
  kFormat2S = NEON_2S,
  kFormat4S = NEON_4S,
  kFormat1D = NEON_1D,
  kFormat2D = NEON_2D,
  // Scalar formats. We add the scalar bit to distinguish between scalar and
  // vector enumerations; the bit is always set in the encoding of scalar ops
  // and always clear for vector ops. Although kFormatD and kFormat1D appear
  // to be the same, their meaning is subtly different. The first is a scalar
  // operation, the second a vector operation that only affects one lane.
  kFormatB = NEON_B | NEONScalar,
  kFormatH = NEON_H | NEONScalar,
  kFormatS = NEON_S | NEONScalar,
  kFormatD = NEON_D | NEONScalar,
  // An artificial value, used to distinguish from NEON format category.
  kFormatSVE = 0x0000fffd,
  // Artificial values. Q and O lane sizes aren't encoded in the usual size
  // field.
  kFormatSVEQ = 0x00080000,
  kFormatSVEO = 0x00040000,
  // Vector element width of SVE register with the unknown lane count since
  // the vector length is implementation dependent.
  kFormatVnB = SVE_B | kFormatSVE,
  kFormatVnH = SVE_H | kFormatSVE,
  kFormatVnS = SVE_S | kFormatSVE,
  kFormatVnD = SVE_D | kFormatSVE,
  kFormatVnQ = kFormatSVEQ | kFormatSVE,
  kFormatVnO = kFormatSVEO | kFormatSVE,
  // Artificial values, used by simulator trace tests and a few oddball
  // instructions (such as FMLAL).
  kFormat2H = 0xfffffffe,
  kFormat1Q = 0xfffffffd
 };
 // Instructions. ---------------------------------------------------------------
 class Instruction {
@ -176,6 +255,47 @@ class Instruction {
    return ExtractBits(msb, lsb);
  }
  // Compress bit extraction operation from Hacker's Delight.
  // https://github.com/hcs0/Hackers-Delight/blob/master/compress.c.txt
  uint32_t Compress(uint32_t mask) const {
    uint32_t mk, mp, mv, t;
    uint32_t x = GetInstructionBits() & mask;  // Clear irrelevant bits.
    mk = ~mask << 1;                           // We will count 0's to right.
    for (int i = 0; i < 5; i++) {
      mp = mk ^ (mk << 1);  // Parallel suffix.
      mp = mp ^ (mp << 2);
      mp = mp ^ (mp << 4);
      mp = mp ^ (mp << 8);
      mp = mp ^ (mp << 16);
      mv = mp & mask;                         // Bits to move.
      mask = (mask ^ mv) | (mv >> (1 << i));  // Compress mask.
      t = x & mv;
      x = (x ^ t) | (t >> (1 << i));  // Compress x.
      mk = mk & ~mp;
    }
    return x;
  }
  template <uint32_t M>
  uint32_t ExtractBits() const {
    return Compress(M);
  }
  uint32_t ExtractBitsAbsent() const {
    VIXL_UNREACHABLE();
    return 0;
  }
  template <uint32_t M, uint32_t V>
  uint32_t IsMaskedValue() const {
    return (Mask(M) == V) ? 1 : 0;
  }
  uint32_t IsMaskedValueAbsent() const {
    VIXL_UNREACHABLE();
    return 0;
  }
  int32_t ExtractSignedBits(int msb, int lsb) const {
    int32_t bits = *(reinterpret_cast<const int32_t*>(this));
    return ExtractSignedBitfield32(msb, lsb, bits);
@ -196,6 +316,34 @@ class Instruction {
  INSTRUCTION_FIELDS_LIST(DEFINE_GETTER)
 #undef DEFINE_GETTER
  template <int msb, int lsb>
  int32_t GetRx() const {
    // We don't have any register fields wider than five bits, so the result
    // will always fit into an int32_t.
    VIXL_ASSERT((msb - lsb + 1) <= 5);
    return this->ExtractBits(msb, lsb);
  }
  VectorFormat GetSVEVectorFormat(int field_lsb = 22) const {
    VIXL_ASSERT((field_lsb >= 0) && (field_lsb <= 30));
    uint32_t instr = ExtractUnsignedBitfield32(field_lsb + 1,
                                               field_lsb,
                                               GetInstructionBits())
                     << 22;
    switch (instr & SVESizeFieldMask) {
      case SVE_B:
        return kFormatVnB;
      case SVE_H:
        return kFormatVnH;
      case SVE_S:
        return kFormatVnS;
      case SVE_D:
        return kFormatVnD;
    }
    VIXL_UNREACHABLE();
    return kFormatUndefined;
  }
  // ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST),
  // formed from ImmPCRelLo and ImmPCRelHi.
  int GetImmPCRel() const {
@ -207,10 +355,40 @@ class Instruction {
  }
  VIXL_DEPRECATED("GetImmPCRel", int ImmPCRel() const) { return GetImmPCRel(); }
  // ImmLSPAC is a compound field (not present in INSTRUCTION_FIELDS_LIST),
  // formed from ImmLSPACLo and ImmLSPACHi.
  int GetImmLSPAC() const {
    uint32_t hi = static_cast<uint32_t>(GetImmLSPACHi());
    uint32_t lo = GetImmLSPACLo();
    uint32_t offset = (hi << ImmLSPACLo_width) | lo;
    int width = ImmLSPACLo_width + ImmLSPACHi_width;
    return ExtractSignedBitfield32(width - 1, 0, offset) << 3;
  }
  uint64_t GetImmLogical() const;
  VIXL_DEPRECATED("GetImmLogical", uint64_t ImmLogical() const) {
    return GetImmLogical();
  }
  uint64_t GetSVEImmLogical() const;
  int GetSVEBitwiseImmLaneSizeInBytesLog2() const;
  uint64_t DecodeImmBitMask(int32_t n,
                            int32_t imm_s,
                            int32_t imm_r,
                            int32_t size) const;
  std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const;
  std::pair<int, int> GetSVEMulZmAndIndex() const;
  std::pair<int, int> GetSVEMulLongZmAndIndex() const;
  std::pair<int, int> GetSVEImmShiftAndLaneSizeLog2(bool is_predicated) const;
  int GetSVEExtractImmediate() const;
  int GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb = 23) const;
  int GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb = 21) const;
  unsigned GetImmNEONabcdefgh() const;
  VIXL_DEPRECATED("GetImmNEONabcdefgh", unsigned ImmNEONabcdefgh() const) {
@ -237,6 +415,16 @@ class Instruction {
    return GetImmNEONFP64();
  }
  Float16 GetSVEImmFP16() const { return Imm8ToFloat16(ExtractBits(12, 5)); }
  float GetSVEImmFP32() const { return Imm8ToFP32(ExtractBits(12, 5)); }
  double GetSVEImmFP64() const { return Imm8ToFP64(ExtractBits(12, 5)); }
  static Float16 Imm8ToFloat16(uint32_t imm8);
  static float Imm8ToFP32(uint32_t imm8);
  static double Imm8ToFP64(uint32_t imm8);
  unsigned GetSizeLS() const {
    return CalcLSDataSize(static_cast<LoadStoreOp>(Mask(LoadStoreMask)));
  }
@ -299,6 +487,10 @@ class Instruction {
    return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed;
  }
  // True if `this` is valid immediately after the provided movprfx instruction.
  bool CanTakeSVEMovprfx(uint32_t form_hash, Instruction const* movprfx) const;
  bool CanTakeSVEMovprfx(const char* form, Instruction const* movprfx) const;
  bool IsLoad() const;
  bool IsStore() const;
@ -312,6 +504,83 @@ class Instruction {
           (Mask(MoveWideImmediateMask) == MOVN_w);
  }
  bool IsException() const { return Mask(ExceptionFMask) == ExceptionFixed; }
  bool IsPAuth() const { return Mask(SystemPAuthFMask) == SystemPAuthFixed; }
  bool IsBti() const {
    if (Mask(SystemHintFMask) == SystemHintFixed) {
      int imm_hint = GetImmHint();
      switch (imm_hint) {
        case BTI:
        case BTI_c:
        case BTI_j:
        case BTI_jc:
          return true;
      }
    }
    return false;
  }
  bool IsMOPSPrologueOf(const Instruction* instr, uint32_t mops_type) const {
    VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
                (mops_type == "cpy"_h));
    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
    return GetInstructionBits() == instr->Mask(~(0x3U << op_lsb));
  }
  bool IsMOPSMainOf(const Instruction* instr, uint32_t mops_type) const {
    VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
                (mops_type == "cpy"_h));
    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
    return GetInstructionBits() ==
           (instr->Mask(~(0x3U << op_lsb)) | (0x1 << op_lsb));
  }
  bool IsMOPSEpilogueOf(const Instruction* instr, uint32_t mops_type) const {
    VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
                (mops_type == "cpy"_h));
    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
    return GetInstructionBits() ==
           (instr->Mask(~(0x3U << op_lsb)) | (0x2 << op_lsb));
  }
  template <uint32_t mops_type>
  bool IsConsistentMOPSTriplet() const {
    VIXL_STATIC_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
                       (mops_type == "cpy"_h));
    int64_t isize = static_cast<int64_t>(kInstructionSize);
    const Instruction* prev2 = GetInstructionAtOffset(-2 * isize);
    const Instruction* prev1 = GetInstructionAtOffset(-1 * isize);
    const Instruction* next1 = GetInstructionAtOffset(1 * isize);
    const Instruction* next2 = GetInstructionAtOffset(2 * isize);
    // Use the encoding of the current instruction to determine the expected
    // adjacent instructions. NB. this doesn't check if the nearby instructions
    // are MOPS-type, but checks that they form a consistent triplet if they
    // are. For example, 'mov x0, #0; mov x0, #512; mov x0, #1024' is a
    // consistent triplet, but they are not MOPS instructions.
    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
    const uint32_t kMOPSOpfield = 0x3 << op_lsb;
    const uint32_t kMOPSPrologue = 0;
    const uint32_t kMOPSMain = 0x1 << op_lsb;
    const uint32_t kMOPSEpilogue = 0x2 << op_lsb;
    switch (Mask(kMOPSOpfield)) {
      case kMOPSPrologue:
        return next1->IsMOPSMainOf(this, mops_type) &&
               next2->IsMOPSEpilogueOf(this, mops_type);
      case kMOPSMain:
        return prev1->IsMOPSPrologueOf(this, mops_type) &&
               next1->IsMOPSEpilogueOf(this, mops_type);
      case kMOPSEpilogue:
        return prev2->IsMOPSPrologueOf(this, mops_type) &&
               prev1->IsMOPSMainOf(this, mops_type);
      default:
        VIXL_ABORT_WITH_MSG("Undefined MOPS operation\n");
    }
  }
  static int GetImmBranchRangeBitwidth(ImmBranchType branch_type);
  VIXL_DEPRECATED(
      "GetImmBranchRangeBitwidth",
@ -496,40 +765,12 @@ class Instruction {
 private:
  int GetImmBranch() const;
  static Float16 Imm8ToFloat16(uint32_t imm8);
  static float Imm8ToFP32(uint32_t imm8);
  static double Imm8ToFP64(uint32_t imm8);
  void SetPCRelImmTarget(const Instruction* target);
  void SetBranchImmTarget(const Instruction* target);
 };
-// Functions for handling NEON vector format information.
+// Functions for handling NEON and SVE vector format information.
 enum VectorFormat {
  kFormatUndefined = 0xffffffff,
  kFormat8B = NEON_8B,
  kFormat16B = NEON_16B,
  kFormat4H = NEON_4H,
  kFormat8H = NEON_8H,
  kFormat2S = NEON_2S,
  kFormat4S = NEON_4S,
  kFormat1D = NEON_1D,
  kFormat2D = NEON_2D,
  // Scalar formats. We add the scalar bit to distinguish between scalar and
  // vector enumerations; the bit is always set in the encoding of scalar ops
  // and always clear for vector ops. Although kFormatD and kFormat1D appear
  // to be the same, their meaning is subtly different. The first is a scalar
  // operation, the second a vector operation that only affects one lane.
  kFormatB = NEON_B | NEONScalar,
  kFormatH = NEON_H | NEONScalar,
  kFormatS = NEON_S | NEONScalar,
  kFormatD = NEON_D | NEONScalar,
  // A value invented solely for FP16 scalar pairwise simulator trace tests.
  kFormat2H = 0xfffffffe
 };
 const int kMaxLanesPerVector = 16;
@ -537,12 +778,16 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform);
 VectorFormat VectorFormatDoubleWidth(VectorFormat vform);
 VectorFormat VectorFormatDoubleLanes(VectorFormat vform);
 VectorFormat VectorFormatHalfLanes(VectorFormat vform);
-VectorFormat ScalarFormatFromLaneSize(int lanesize);
+VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits);
 VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform);
 VectorFormat VectorFormatFillQ(VectorFormat vform);
 VectorFormat ScalarFormatFromFormat(VectorFormat vform);
 VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits);
 VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes);
 VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log_2);
 unsigned RegisterSizeInBitsFromFormat(VectorFormat vform);
 unsigned RegisterSizeInBytesFromFormat(VectorFormat vform);
 bool IsSVEFormat(VectorFormat vform);
 // TODO: Make the return types of these functions consistent.
 unsigned LaneSizeInBitsFromFormat(VectorFormat vform);
 int LaneSizeInBytesFromFormat(VectorFormat vform);
@ -588,7 +833,7 @@ class NEONFormatDecoder {
  enum SubstitutionMode { kPlaceholder, kFormat };
  // Construct a format decoder with increasingly specific format maps for each
-  // subsitution. If no format map is specified, the default is the integer
+  // substitution. If no format map is specified, the default is the integer
  // format map.
  explicit NEONFormatDecoder(const Instruction* instr) {
    instrbits_ = instr->GetInstructionBits();
@ -639,18 +884,26 @@ class NEONFormatDecoder {
                         SubstitutionMode mode0 = kFormat,
                         SubstitutionMode mode1 = kFormat,
                         SubstitutionMode mode2 = kFormat) {
    const char* subst0 = GetSubstitute(0, mode0);
    const char* subst1 = GetSubstitute(1, mode1);
    const char* subst2 = GetSubstitute(2, mode2);
    if ((subst0 == NULL) || (subst1 == NULL) || (subst2 == NULL)) {
      return NULL;
    }
    snprintf(form_buffer_,
             sizeof(form_buffer_),
             string,
-             GetSubstitute(0, mode0),
+             subst0,
-             GetSubstitute(1, mode1),
+             subst1,
-             GetSubstitute(2, mode2));
+             subst2);
    return form_buffer_;
  }
-  // Append a "2" to a mnemonic string based of the state of the Q bit.
+  // Append a "2" to a mnemonic string based on the state of the Q bit.
  const char* Mnemonic(const char* mnemonic) {
-    if ((instrbits_ & NEON_Q) != 0) {
+    if ((mnemonic != NULL) && (instrbits_ & NEON_Q) != 0) {
      snprintf(mne_buffer_, sizeof(mne_buffer_), "%s2", mnemonic);
      return mne_buffer_;
    }
@ -745,6 +998,33 @@ class NEONFormatDecoder {
    return &map;
  }
  // The shift immediate map uses between two and five bits to encode the NEON
  // vector format:
  // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
  // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
  static const NEONFormatMap* ShiftImmFormatMap() {
    static const NEONFormatMap map = {{22, 21, 20, 19, 30},
                                      {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B,
                                       NF_4H,    NF_8H,    NF_4H,    NF_8H,
                                       NF_2S,    NF_4S,    NF_2S,    NF_4S,
                                       NF_2S,    NF_4S,    NF_2S,    NF_4S,
                                       NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
                                       NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
                                       NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
                                       NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D}};
    return &map;
  }
  // The shift long/narrow immediate map uses between two and four bits to
  // encode the NEON vector format:
  // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
  static const NEONFormatMap* ShiftLongNarrowImmFormatMap() {
    static const NEONFormatMap map =
        {{22, 21, 20, 19},
         {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}};
    return &map;
  }
  // The scalar format map uses two bits (size<1:0>) to encode the NEON scalar
  // formats: NF_B, NF_H, NF_S, NF_D.
  static const NEONFormatMap* ScalarFormatMap() {
@ -818,7 +1098,7 @@ class NEONFormatDecoder {
  static const char* NEONFormatAsString(NEONFormat format) {
    // clang-format off
    static const char* formats[] = {
-      "undefined",
+      NULL,
      "8b", "16b", "4h", "8h", "2s", "4s", "1d", "2d",
      "b", "h", "s", "d"
    };
@ -833,9 +1113,9 @@ class NEONFormatDecoder {
                (format == NF_D) || (format == NF_UNDEF));
    // clang-format off
    static const char* formats[] = {
-      "undefined",
+      NULL,
-      "undefined", "undefined", "undefined", "undefined",
+      NULL, NULL, NULL, NULL,
-      "undefined", "undefined", "undefined", "undefined",
+      NULL, NULL, NULL, NULL,
      "'B", "'H", "'S", "'D"
    };
    // clang-format on
@ -862,4 +1142,8 @@ class NEONFormatDecoder {
 }  // namespace aarch64
 }  // namespace vixl
 #ifdef __clang__
 #pragma clang diagnostic pop
 #endif
 #endif  // VIXL_AARCH64_INSTRUCTIONS_AARCH64_H_
--- a/dep/vixl/include/vixl/aarch64/instrument-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/instrument-aarch64.h
@ -1,117 +0,0 @@
 // Copyright 2014, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //   * Neither the name of ARM Limited nor the names of its contributors may be
 //     used to endorse or promote products derived from this software without
 //     specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef VIXL_AARCH64_INSTRUMENT_AARCH64_H_
 #define VIXL_AARCH64_INSTRUMENT_AARCH64_H_
 #include "../globals-vixl.h"
 #include "../utils-vixl.h"
 #include "constants-aarch64.h"
 #include "decoder-aarch64.h"
 #include "instrument-aarch64.h"
 namespace vixl {
 namespace aarch64 {
 const int kCounterNameMaxLength = 256;
 const uint64_t kDefaultInstrumentationSamplingPeriod = 1 << 22;
 enum InstrumentState { InstrumentStateDisable = 0, InstrumentStateEnable = 1 };
 enum CounterType {
  Gauge = 0,      // Gauge counters reset themselves after reading.
  Cumulative = 1  // Cumulative counters keep their value after reading.
 };
 class Counter {
 public:
  explicit Counter(const char* name, CounterType type = Gauge);
  void Increment();
  void Enable();
  void Disable();
  bool IsEnabled();
  uint64_t GetCount();
  VIXL_DEPRECATED("GetCount", uint64_t count()) { return GetCount(); }
  const char* GetName();
  VIXL_DEPRECATED("GetName", const char* name()) { return GetName(); }
  CounterType GetType();
  VIXL_DEPRECATED("GetType", CounterType type()) { return GetType(); }
 private:
  char name_[kCounterNameMaxLength];
  uint64_t count_;
  bool enabled_;
  CounterType type_;
 };
 class Instrument : public DecoderVisitor {
 public:
  explicit Instrument(
      const char* datafile = NULL,
      uint64_t sample_period = kDefaultInstrumentationSamplingPeriod);
  ~Instrument();
  void Enable();
  void Disable();
 // Declare all Visitor functions.
 #define DECLARE(A) void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
  VISITOR_LIST(DECLARE)
 #undef DECLARE
 private:
  void Update();
  void DumpCounters();
  void DumpCounterNames();
  void DumpEventMarker(unsigned marker);
  void HandleInstrumentationEvent(unsigned event);
  Counter* GetCounter(const char* name);
  void InstrumentLoadStore(const Instruction* instr);
  void InstrumentLoadStorePair(const Instruction* instr);
  std::list<Counter*> counters_;
  FILE* output_stream_;
  // Counter information is dumped every sample_period_ instructions decoded.
  // For a sample_period_ = 0 a final counter value is only produced when the
  // Instrumentation class is destroyed.
  uint64_t sample_period_;
 };
 }  // namespace aarch64
 }  // namespace vixl
 #endif  // VIXL_AARCH64_INSTRUMENT_AARCH64_H_
--- a/dep/vixl/include/vixl/aarch64/macro-assembler-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/macro-assembler-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/operands-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/operands-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/registers-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/registers-aarch64.h
@ -0,0 +1,902 @@
 // Copyright 2019, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //   * Neither the name of ARM Limited nor the names of its contributors may be
 //     used to endorse or promote products derived from this software without
 //     specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef VIXL_AARCH64_REGISTERS_AARCH64_H_
 #define VIXL_AARCH64_REGISTERS_AARCH64_H_
 #include <string>
 #include "instructions-aarch64.h"
 namespace vixl {
 namespace aarch64 {
 // An integer type capable of representing a homogeneous, non-overlapping set of
 // registers as a bitmask of their codes.
 typedef uint64_t RegList;
 static const int kRegListSizeInBits = sizeof(RegList) * 8;
 class Register;
 class WRegister;
 class XRegister;
 class VRegister;
 class BRegister;
 class HRegister;
 class SRegister;
 class DRegister;
 class QRegister;
 class ZRegister;
 class PRegister;
 class PRegisterWithLaneSize;
 class PRegisterM;
 class PRegisterZ;
 // A container for any single register supported by the processor. Selected
 // qualifications are also supported. Basic registers can be constructed
 // directly as CPURegister objects. Other variants should be constructed as one
 // of the derived classes.
 //
 // CPURegister aims to support any getter that would also be available to more
 // specialised register types. However, using the equivalent functions on the
 // specialised register types can avoid run-time checks, and should therefore be
 // preferred where run-time polymorphism isn't required.
 //
 // Type-specific modifiers are typically implemented only on the derived
 // classes.
 //
 // The encoding is such that CPURegister objects are cheap to pass by value.
 class CPURegister {
 public:
  enum RegisterBank : uint8_t {
    kNoRegisterBank = 0,
    kRRegisterBank,
    kVRegisterBank,
    kPRegisterBank
  };
  enum RegisterType {
    kNoRegister,
    kRegister,
    kVRegister,
    kZRegister,
    kPRegister
  };
  static const unsigned kUnknownSize = 0;
  VIXL_CONSTEXPR CPURegister()
      : code_(0),
        bank_(kNoRegisterBank),
        size_(kEncodedUnknownSize),
        qualifiers_(kNoQualifiers),
        lane_size_(kEncodedUnknownSize) {}
  CPURegister(int code, int size_in_bits, RegisterType type)
      : code_(code),
        bank_(GetBankFor(type)),
        size_(EncodeSizeInBits(size_in_bits)),
        qualifiers_(kNoQualifiers),
        lane_size_(EncodeSizeInBits(size_in_bits)) {
    VIXL_ASSERT(IsValid());
  }
  // Basic accessors.
  // TODO: Make this return 'int'.
  unsigned GetCode() const { return code_; }
  RegisterBank GetBank() const { return bank_; }
  // For scalar registers, the lane size matches the register size, and is
  // always known.
  bool HasSize() const { return size_ != kEncodedUnknownSize; }
  bool HasLaneSize() const { return lane_size_ != kEncodedUnknownSize; }
  RegList GetBit() const {
    if (IsNone()) return 0;
    VIXL_ASSERT(code_ < kRegListSizeInBits);
    return static_cast<RegList>(1) << code_;
  }
  // Return the architectural name for this register.
  // TODO: This is temporary. Ultimately, we should move the
  // Simulator::*RegNameForCode helpers out of the simulator, and provide an
  // independent way to obtain the name of a register.
  std::string GetArchitecturalName() const;
  // Return the highest valid register code for this type, to allow generic
  // loops to be written. This excludes kSPRegInternalCode, since it is not
  // contiguous, and sp usually requires special handling anyway.
  unsigned GetMaxCode() const { return GetMaxCodeFor(GetBank()); }
  // Registers without a known size report kUnknownSize.
  int GetSizeInBits() const { return DecodeSizeInBits(size_); }
  int GetSizeInBytes() const { return DecodeSizeInBytes(size_); }
  // TODO: Make these return 'int'.
  unsigned GetLaneSizeInBits() const { return DecodeSizeInBits(lane_size_); }
  unsigned GetLaneSizeInBytes() const { return DecodeSizeInBytes(lane_size_); }
  unsigned GetLaneSizeInBytesLog2() const {
    VIXL_ASSERT(HasLaneSize());
    return DecodeSizeInBytesLog2(lane_size_);
  }
  int GetLanes() const {
    if (HasSize() && HasLaneSize()) {
      // Take advantage of the size encoding to calculate this efficiently.
      VIXL_STATIC_ASSERT(kEncodedHRegSize == (kEncodedBRegSize + 1));
      VIXL_STATIC_ASSERT(kEncodedSRegSize == (kEncodedHRegSize + 1));
      VIXL_STATIC_ASSERT(kEncodedDRegSize == (kEncodedSRegSize + 1));
      VIXL_STATIC_ASSERT(kEncodedQRegSize == (kEncodedDRegSize + 1));
      int log2_delta = static_cast<int>(size_) - static_cast<int>(lane_size_);
      VIXL_ASSERT(log2_delta >= 0);
      return 1 << log2_delta;
    }
    return kUnknownSize;
  }
  bool Is8Bits() const { return size_ == kEncodedBRegSize; }
  bool Is16Bits() const { return size_ == kEncodedHRegSize; }
  bool Is32Bits() const { return size_ == kEncodedSRegSize; }
  bool Is64Bits() const { return size_ == kEncodedDRegSize; }
  bool Is128Bits() const { return size_ == kEncodedQRegSize; }
  bool IsLaneSizeB() const { return lane_size_ == kEncodedBRegSize; }
  bool IsLaneSizeH() const { return lane_size_ == kEncodedHRegSize; }
  bool IsLaneSizeS() const { return lane_size_ == kEncodedSRegSize; }
  bool IsLaneSizeD() const { return lane_size_ == kEncodedDRegSize; }
  bool IsLaneSizeQ() const { return lane_size_ == kEncodedQRegSize; }
  // If Is<Foo>Register(), then it is valid to convert the CPURegister to some
  // <Foo>Register<Bar> type.
  //
  //  If...                              ... then it is safe to construct ...
  //      r.IsRegister()                       -> Register(r)
  //      r.IsVRegister()                      -> VRegister(r)
  //      r.IsZRegister()                      -> ZRegister(r)
  //      r.IsPRegister()                      -> PRegister(r)
  //
  //      r.IsPRegister() && HasLaneSize()     -> PRegisterWithLaneSize(r)
  //      r.IsPRegister() && IsMerging()       -> PRegisterM(r)
  //      r.IsPRegister() && IsZeroing()       -> PRegisterZ(r)
  bool IsRegister() const { return GetType() == kRegister; }
  bool IsVRegister() const { return GetType() == kVRegister; }
  bool IsZRegister() const { return GetType() == kZRegister; }
  bool IsPRegister() const { return GetType() == kPRegister; }
  bool IsNone() const { return GetType() == kNoRegister; }
  // `GetType() == kNoRegister` implies IsNone(), and vice-versa.
  // `GetType() == k<Foo>Register` implies Is<Foo>Register(), and vice-versa.
  RegisterType GetType() const {
    switch (bank_) {
      case kNoRegisterBank:
        return kNoRegister;
      case kRRegisterBank:
        return kRegister;
      case kVRegisterBank:
        return HasSize() ? kVRegister : kZRegister;
      case kPRegisterBank:
        return kPRegister;
    }
    VIXL_UNREACHABLE();
    return kNoRegister;
  }
  // IsFPRegister() is true for scalar FP types (and therefore implies
  // IsVRegister()). There is no corresponding FPRegister type.
  bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); }
  // TODO: These are stricter forms of the helpers above. We should make the
  // basic helpers strict, and remove these.
  bool IsValidRegister() const;
  bool IsValidVRegister() const;
  bool IsValidFPRegister() const;
  bool IsValidZRegister() const;
  bool IsValidPRegister() const;
  bool IsValid() const;
  bool IsValidOrNone() const { return IsNone() || IsValid(); }
  bool IsVector() const { return HasLaneSize() && (size_ != lane_size_); }
  bool IsScalar() const { return HasLaneSize() && (size_ == lane_size_); }
  bool IsSameType(const CPURegister& other) const {
    return GetType() == other.GetType();
  }
  bool IsSameBank(const CPURegister& other) const {
    return GetBank() == other.GetBank();
  }
  // Two registers with unknown size are considered to have the same size if
  // they also have the same type. For example, all Z registers have the same
  // size, even though we don't know what that is.
  bool IsSameSizeAndType(const CPURegister& other) const {
    return IsSameType(other) && (size_ == other.size_);
  }
  bool IsSameFormat(const CPURegister& other) const {
    return IsSameSizeAndType(other) && (lane_size_ == other.lane_size_);
  }
  // Note that NoReg aliases itself, so that 'Is' implies 'Aliases'.
  bool Aliases(const CPURegister& other) const {
    return IsSameBank(other) && (code_ == other.code_);
  }
  bool Is(const CPURegister& other) const {
    if (IsRegister() || IsVRegister()) {
      // For core (W, X) and FP/NEON registers, we only consider the code, size
      // and type. This is legacy behaviour.
      // TODO: We should probably check every field for all registers.
      return Aliases(other) && (size_ == other.size_);
    } else {
      // For Z and P registers, we require all fields to match exactly.
      VIXL_ASSERT(IsNone() || IsZRegister() || IsPRegister());
      return (code_ == other.code_) && (bank_ == other.bank_) &&
             (size_ == other.size_) && (qualifiers_ == other.qualifiers_) &&
             (lane_size_ == other.lane_size_);
    }
  }
  // Conversions to specific register types. The result is a register that
  // aliases the original CPURegister. That is, the original register bank
  // (`GetBank()`) is checked and the code (`GetCode()`) preserved, but all
  // other properties are ignored.
  //
  // Typical usage:
  //
  //     if (reg.GetBank() == kVRegisterBank) {
  //       DRegister d = reg.D();
  //       ...
  //     }
  //
  // These could all return types with compile-time guarantees (like XRegister),
  // but this breaks backwards-compatibility quite severely, particularly with
  // code like `cond ? reg.W() : reg.X()`, which would have indeterminate type.
  // Core registers, like "w0".
  Register W() const;
  Register X() const;
  // FP/NEON registers, like "b0".
  VRegister B() const;
  VRegister H() const;
  VRegister S() const;
  VRegister D() const;
  VRegister Q() const;
  VRegister V() const;
  // SVE registers, like "z0".
  ZRegister Z() const;
  PRegister P() const;
  // Utilities for kRegister types.
  bool IsZero() const { return IsRegister() && (code_ == kZeroRegCode); }
  bool IsSP() const { return IsRegister() && (code_ == kSPRegInternalCode); }
  bool IsW() const { return IsRegister() && Is32Bits(); }
  bool IsX() const { return IsRegister() && Is64Bits(); }
  // Utilities for FP/NEON kVRegister types.
  // These helpers ensure that the size and type of the register are as
  // described. They do not consider the number of lanes that make up a vector.
  // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
  // does not imply Is1D() or Is8B().
  // Check the number of lanes, ie. the format of the vector, using methods such
  // as Is8B(), Is1D(), etc.
  bool IsB() const { return IsVRegister() && Is8Bits(); }
  bool IsH() const { return IsVRegister() && Is16Bits(); }
  bool IsS() const { return IsVRegister() && Is32Bits(); }
  bool IsD() const { return IsVRegister() && Is64Bits(); }
  bool IsQ() const { return IsVRegister() && Is128Bits(); }
  // As above, but also check that the register has exactly one lane. For
  // example, reg.Is1D() implies DRegister(reg).IsValid(), but reg.IsD() does
  // not.
  bool Is1B() const { return IsB() && IsScalar(); }
  bool Is1H() const { return IsH() && IsScalar(); }
  bool Is1S() const { return IsS() && IsScalar(); }
  bool Is1D() const { return IsD() && IsScalar(); }
  bool Is1Q() const { return IsQ() && IsScalar(); }
  // Check the specific NEON format.
  bool Is8B() const { return IsD() && IsLaneSizeB(); }
  bool Is16B() const { return IsQ() && IsLaneSizeB(); }
  bool Is2H() const { return IsS() && IsLaneSizeH(); }
  bool Is4H() const { return IsD() && IsLaneSizeH(); }
  bool Is8H() const { return IsQ() && IsLaneSizeH(); }
  bool Is2S() const { return IsD() && IsLaneSizeS(); }
  bool Is4S() const { return IsQ() && IsLaneSizeS(); }
  bool Is2D() const { return IsQ() && IsLaneSizeD(); }
  // A semantic alias for sdot and udot (indexed and by element) instructions.
  // The current CPURegister implementation cannot not tell this from Is1S(),
  // but it might do later.
  // TODO: Do this with the qualifiers_ field.
  bool Is1S4B() const { return Is1S(); }
  // Utilities for SVE registers.
  bool IsUnqualified() const { return qualifiers_ == kNoQualifiers; }
  bool IsMerging() const { return IsPRegister() && (qualifiers_ == kMerging); }
  bool IsZeroing() const { return IsPRegister() && (qualifiers_ == kZeroing); }
  // SVE types have unknown sizes, but within known bounds.
  int GetMaxSizeInBytes() const {
    switch (GetType()) {
      case kZRegister:
        return kZRegMaxSizeInBytes;
      case kPRegister:
        return kPRegMaxSizeInBytes;
      default:
        VIXL_ASSERT(HasSize());
        return GetSizeInBits();
    }
  }
  int GetMinSizeInBytes() const {
    switch (GetType()) {
      case kZRegister:
        return kZRegMinSizeInBytes;
      case kPRegister:
        return kPRegMinSizeInBytes;
      default:
        VIXL_ASSERT(HasSize());
        return GetSizeInBits();
    }
  }
  int GetMaxSizeInBits() const { return GetMaxSizeInBytes() * kBitsPerByte; }
  int GetMinSizeInBits() const { return GetMinSizeInBytes() * kBitsPerByte; }
  static RegisterBank GetBankFor(RegisterType type) {
    switch (type) {
      case kNoRegister:
        return kNoRegisterBank;
      case kRegister:
        return kRRegisterBank;
      case kVRegister:
      case kZRegister:
        return kVRegisterBank;
      case kPRegister:
        return kPRegisterBank;
    }
    VIXL_UNREACHABLE();
    return kNoRegisterBank;
  }
  static unsigned GetMaxCodeFor(CPURegister::RegisterType type) {
    return GetMaxCodeFor(GetBankFor(type));
  }
 protected:
  enum EncodedSize : uint8_t {
    // Ensure that kUnknownSize (and therefore kNoRegister) is encoded as zero.
    kEncodedUnknownSize = 0,
    // The implementation assumes that the remaining sizes are encoded as
    // `log2(size) + c`, so the following names must remain in sequence.
    kEncodedBRegSize,
    kEncodedHRegSize,
    kEncodedSRegSize,
    kEncodedDRegSize,
    kEncodedQRegSize,
    kEncodedWRegSize = kEncodedSRegSize,
    kEncodedXRegSize = kEncodedDRegSize
  };
  VIXL_STATIC_ASSERT(kSRegSize == kWRegSize);
  VIXL_STATIC_ASSERT(kDRegSize == kXRegSize);
  char GetLaneSizeSymbol() const {
    switch (lane_size_) {
      case kEncodedBRegSize:
        return 'B';
      case kEncodedHRegSize:
        return 'H';
      case kEncodedSRegSize:
        return 'S';
      case kEncodedDRegSize:
        return 'D';
      case kEncodedQRegSize:
        return 'Q';
      case kEncodedUnknownSize:
        break;
    }
    VIXL_UNREACHABLE();
    return '?';
  }
  static EncodedSize EncodeSizeInBits(int size_in_bits) {
    switch (size_in_bits) {
      case kUnknownSize:
        return kEncodedUnknownSize;
      case kBRegSize:
        return kEncodedBRegSize;
      case kHRegSize:
        return kEncodedHRegSize;
      case kSRegSize:
        return kEncodedSRegSize;
      case kDRegSize:
        return kEncodedDRegSize;
      case kQRegSize:
        return kEncodedQRegSize;
    }
    VIXL_UNREACHABLE();
    return kEncodedUnknownSize;
  }
  static int DecodeSizeInBytesLog2(EncodedSize encoded_size) {
    switch (encoded_size) {
      case kEncodedUnknownSize:
        // Log2 of B-sized lane in bytes is 0, so we can't just return 0 here.
        VIXL_UNREACHABLE();
        return -1;
      case kEncodedBRegSize:
        return kBRegSizeInBytesLog2;
      case kEncodedHRegSize:
        return kHRegSizeInBytesLog2;
      case kEncodedSRegSize:
        return kSRegSizeInBytesLog2;
      case kEncodedDRegSize:
        return kDRegSizeInBytesLog2;
      case kEncodedQRegSize:
        return kQRegSizeInBytesLog2;
    }
    VIXL_UNREACHABLE();
    return kUnknownSize;
  }
  static int DecodeSizeInBytes(EncodedSize encoded_size) {
    if (encoded_size == kEncodedUnknownSize) {
      return kUnknownSize;
    }
    return 1 << DecodeSizeInBytesLog2(encoded_size);
  }
  static int DecodeSizeInBits(EncodedSize encoded_size) {
    VIXL_STATIC_ASSERT(kUnknownSize == 0);
    return DecodeSizeInBytes(encoded_size) * kBitsPerByte;
  }
  static unsigned GetMaxCodeFor(CPURegister::RegisterBank bank);
  enum Qualifiers : uint8_t {
    kNoQualifiers = 0,
    // Used by P registers.
    kMerging,
    kZeroing
  };
  // An unchecked constructor, for use by derived classes.
  CPURegister(int code,
              EncodedSize size,
              RegisterBank bank,
              EncodedSize lane_size,
              Qualifiers qualifiers = kNoQualifiers)
      : code_(code),
        bank_(bank),
        size_(size),
        qualifiers_(qualifiers),
        lane_size_(lane_size) {}
  // TODO: Check that access to these fields is reasonably efficient.
  uint8_t code_;
  RegisterBank bank_;
  EncodedSize size_;
  Qualifiers qualifiers_;
  EncodedSize lane_size_;
 };
 // Ensure that CPURegisters can fit in a single (64-bit) register. This is a
 // proxy for being "cheap to pass by value", which is hard to check directly.
 VIXL_STATIC_ASSERT(sizeof(CPURegister) <= sizeof(uint64_t));
 // TODO: Add constexpr constructors.
 #define VIXL_DECLARE_REGISTER_COMMON(NAME, REGISTER_TYPE, PARENT_TYPE) \
  VIXL_CONSTEXPR NAME() : PARENT_TYPE() {}                             \
                                                                       \
  explicit NAME(CPURegister other) : PARENT_TYPE(other) {              \
    VIXL_ASSERT(IsValid());                                            \
  }                                                                    \
                                                                       \
  VIXL_CONSTEXPR static unsigned GetMaxCode() {                        \
    return kNumberOf##REGISTER_TYPE##s - 1;                            \
  }
 // Any W or X register, including the zero register and the stack pointer.
 class Register : public CPURegister {
 public:
  VIXL_DECLARE_REGISTER_COMMON(Register, Register, CPURegister)
  Register(int code, int size_in_bits)
      : CPURegister(code, size_in_bits, kRegister) {
    VIXL_ASSERT(IsValidRegister());
  }
  bool IsValid() const { return IsValidRegister(); }
 };
 // Any FP or NEON V register, including vector (V.<T>) and scalar forms
 // (B, H, S, D, Q).
 class VRegister : public CPURegister {
 public:
  VIXL_DECLARE_REGISTER_COMMON(VRegister, VRegister, CPURegister)
  // For historical reasons, VRegister(0) returns v0.1Q (or equivalently, q0).
  explicit VRegister(int code, int size_in_bits = kQRegSize, int lanes = 1)
      : CPURegister(code,
                    EncodeSizeInBits(size_in_bits),
                    kVRegisterBank,
                    EncodeLaneSizeInBits(size_in_bits, lanes)) {
    VIXL_ASSERT(IsValidVRegister());
  }
  VRegister(int code, VectorFormat format)
      : CPURegister(code,
                    EncodeSizeInBits(RegisterSizeInBitsFromFormat(format)),
                    kVRegisterBank,
                    EncodeSizeInBits(LaneSizeInBitsFromFormat(format)),
                    kNoQualifiers) {
    VIXL_ASSERT(IsValid());
  }
  VRegister V8B() const;
  VRegister V16B() const;
  VRegister V2H() const;
  VRegister V4H() const;
  VRegister V8H() const;
  VRegister V2S() const;
  VRegister V4S() const;
  VRegister V1D() const;
  VRegister V2D() const;
  VRegister V1Q() const;
  VRegister S4B() const;
  bool IsValid() const { return IsValidVRegister(); }
 protected:
  static EncodedSize EncodeLaneSizeInBits(int size_in_bits, int lanes) {
    VIXL_ASSERT(lanes >= 1);
    VIXL_ASSERT((size_in_bits % lanes) == 0);
    return EncodeSizeInBits(size_in_bits / lanes);
  }
 };
 // Any SVE Z register, with or without a lane size specifier.
 class ZRegister : public CPURegister {
 public:
  VIXL_DECLARE_REGISTER_COMMON(ZRegister, ZRegister, CPURegister)
  explicit ZRegister(int code, int lane_size_in_bits = kUnknownSize)
      : CPURegister(code,
                    kEncodedUnknownSize,
                    kVRegisterBank,
                    EncodeSizeInBits(lane_size_in_bits)) {
    VIXL_ASSERT(IsValid());
  }
  ZRegister(int code, VectorFormat format)
      : CPURegister(code,
                    kEncodedUnknownSize,
                    kVRegisterBank,
                    EncodeSizeInBits(LaneSizeInBitsFromFormat(format)),
                    kNoQualifiers) {
    VIXL_ASSERT(IsValid());
  }
  // Return a Z register with a known lane size (like "z0.B").
  ZRegister VnB() const { return ZRegister(GetCode(), kBRegSize); }
  ZRegister VnH() const { return ZRegister(GetCode(), kHRegSize); }
  ZRegister VnS() const { return ZRegister(GetCode(), kSRegSize); }
  ZRegister VnD() const { return ZRegister(GetCode(), kDRegSize); }
  ZRegister VnQ() const { return ZRegister(GetCode(), kQRegSize); }
  template <typename T>
  ZRegister WithLaneSize(T format) const {
    return ZRegister(GetCode(), format);
  }
  ZRegister WithSameLaneSizeAs(const CPURegister& other) const {
    VIXL_ASSERT(other.HasLaneSize());
    return this->WithLaneSize(other.GetLaneSizeInBits());
  }
  bool IsValid() const { return IsValidZRegister(); }
 };
 // Any SVE P register, with or without a qualifier or lane size specifier.
 class PRegister : public CPURegister {
 public:
  VIXL_DECLARE_REGISTER_COMMON(PRegister, PRegister, CPURegister)
  explicit PRegister(int code) : CPURegister(code, kUnknownSize, kPRegister) {
    VIXL_ASSERT(IsValid());
  }
  bool IsValid() const {
    return IsValidPRegister() && !HasLaneSize() && IsUnqualified();
  }
  // Return a P register with a known lane size (like "p0.B").
  PRegisterWithLaneSize VnB() const;
  PRegisterWithLaneSize VnH() const;
  PRegisterWithLaneSize VnS() const;
  PRegisterWithLaneSize VnD() const;
  template <typename T>
  PRegisterWithLaneSize WithLaneSize(T format) const;
  PRegisterWithLaneSize WithSameLaneSizeAs(const CPURegister& other) const;
  // SVE predicates are specified (in normal assembly) with a "/z" (zeroing) or
  // "/m" (merging) suffix. These methods are VIXL's equivalents.
  PRegisterZ Zeroing() const;
  PRegisterM Merging() const;
 protected:
  // Unchecked constructors, for use by derived classes.
  PRegister(int code, EncodedSize encoded_lane_size)
      : CPURegister(code,
                    kEncodedUnknownSize,
                    kPRegisterBank,
                    encoded_lane_size,
                    kNoQualifiers) {}
  PRegister(int code, Qualifiers qualifiers)
      : CPURegister(code,
                    kEncodedUnknownSize,
                    kPRegisterBank,
                    kEncodedUnknownSize,
                    qualifiers) {}
 };
 // Any SVE P register with a known lane size (like "p0.B").
 class PRegisterWithLaneSize : public PRegister {
 public:
  VIXL_DECLARE_REGISTER_COMMON(PRegisterWithLaneSize, PRegister, PRegister)
  PRegisterWithLaneSize(int code, int lane_size_in_bits)
      : PRegister(code, EncodeSizeInBits(lane_size_in_bits)) {
    VIXL_ASSERT(IsValid());
  }
  PRegisterWithLaneSize(int code, VectorFormat format)
      : PRegister(code, EncodeSizeInBits(LaneSizeInBitsFromFormat(format))) {
    VIXL_ASSERT(IsValid());
  }
  bool IsValid() const {
    return IsValidPRegister() && HasLaneSize() && IsUnqualified();
  }
  // Overload lane size accessors so we can assert `HasLaneSize()`. This allows
  // tools such as clang-tidy to prove that the result of GetLaneSize* is
  // non-zero.
  // TODO: Make these return 'int'.
  unsigned GetLaneSizeInBits() const {
    VIXL_ASSERT(HasLaneSize());
    return PRegister::GetLaneSizeInBits();
  }
  unsigned GetLaneSizeInBytes() const {
    VIXL_ASSERT(HasLaneSize());
    return PRegister::GetLaneSizeInBytes();
  }
 };
 // Any SVE P register with the zeroing qualifier (like "p0/z").
 class PRegisterZ : public PRegister {
 public:
  VIXL_DECLARE_REGISTER_COMMON(PRegisterZ, PRegister, PRegister)
  explicit PRegisterZ(int code) : PRegister(code, kZeroing) {
    VIXL_ASSERT(IsValid());
  }
  bool IsValid() const {
    return IsValidPRegister() && !HasLaneSize() && IsZeroing();
  }
 };
 // Any SVE P register with the merging qualifier (like "p0/m").
 class PRegisterM : public PRegister {
 public:
  VIXL_DECLARE_REGISTER_COMMON(PRegisterM, PRegister, PRegister)
  explicit PRegisterM(int code) : PRegister(code, kMerging) {
    VIXL_ASSERT(IsValid());
  }
  bool IsValid() const {
    return IsValidPRegister() && !HasLaneSize() && IsMerging();
  }
 };
 inline PRegisterWithLaneSize PRegister::VnB() const {
  return PRegisterWithLaneSize(GetCode(), kBRegSize);
 }
 inline PRegisterWithLaneSize PRegister::VnH() const {
  return PRegisterWithLaneSize(GetCode(), kHRegSize);
 }
 inline PRegisterWithLaneSize PRegister::VnS() const {
  return PRegisterWithLaneSize(GetCode(), kSRegSize);
 }
 inline PRegisterWithLaneSize PRegister::VnD() const {
  return PRegisterWithLaneSize(GetCode(), kDRegSize);
 }
 template <typename T>
 inline PRegisterWithLaneSize PRegister::WithLaneSize(T format) const {
  return PRegisterWithLaneSize(GetCode(), format);
 }
 inline PRegisterWithLaneSize PRegister::WithSameLaneSizeAs(
    const CPURegister& other) const {
  VIXL_ASSERT(other.HasLaneSize());
  return this->WithLaneSize(other.GetLaneSizeInBits());
 }
 inline PRegisterZ PRegister::Zeroing() const { return PRegisterZ(GetCode()); }
 inline PRegisterM PRegister::Merging() const { return PRegisterM(GetCode()); }
 #define VIXL_REGISTER_WITH_SIZE_LIST(V) \
  V(WRegister, kWRegSize, Register)     \
  V(XRegister, kXRegSize, Register)     \
  V(QRegister, kQRegSize, VRegister)    \
  V(DRegister, kDRegSize, VRegister)    \
  V(SRegister, kSRegSize, VRegister)    \
  V(HRegister, kHRegSize, VRegister)    \
  V(BRegister, kBRegSize, VRegister)
 #define VIXL_DEFINE_REGISTER_WITH_SIZE(NAME, SIZE, PARENT)           \
  class NAME : public PARENT {                                       \
   public:                                                           \
    VIXL_CONSTEXPR NAME() : PARENT() {}                              \
    explicit NAME(int code) : PARENT(code, SIZE) {}                  \
                                                                     \
    explicit NAME(PARENT other) : PARENT(other) {                    \
      VIXL_ASSERT(GetSizeInBits() == SIZE);                          \
    }                                                                \
                                                                     \
    PARENT As##PARENT() const { return *this; }                      \
                                                                     \
    VIXL_CONSTEXPR int GetSizeInBits() const { return SIZE; }        \
                                                                     \
    bool IsValid() const {                                           \
      return PARENT::IsValid() && (PARENT::GetSizeInBits() == SIZE); \
    }                                                                \
  };
 VIXL_REGISTER_WITH_SIZE_LIST(VIXL_DEFINE_REGISTER_WITH_SIZE)
 // No*Reg is used to provide default values for unused arguments, error cases
 // and so on. Note that these (and the default constructors) all compare equal
 // (using the Is() method).
 const Register NoReg;
 const VRegister NoVReg;
 const CPURegister NoCPUReg;
 const ZRegister NoZReg;
 // TODO: Ideally, these would use specialised register types (like XRegister and
 // so on). However, doing so throws up template overloading problems elsewhere.
 #define VIXL_DEFINE_REGISTERS(N)       \
  const Register w##N = WRegister(N);  \
  const Register x##N = XRegister(N);  \
  const VRegister b##N = BRegister(N); \
  const VRegister h##N = HRegister(N); \
  const VRegister s##N = SRegister(N); \
  const VRegister d##N = DRegister(N); \
  const VRegister q##N = QRegister(N); \
  const VRegister v##N(N);             \
  const ZRegister z##N(N);
 AARCH64_REGISTER_CODE_LIST(VIXL_DEFINE_REGISTERS)
 #undef VIXL_DEFINE_REGISTERS
 #define VIXL_DEFINE_P_REGISTERS(N) const PRegister p##N(N);
 AARCH64_P_REGISTER_CODE_LIST(VIXL_DEFINE_P_REGISTERS)
 #undef VIXL_DEFINE_P_REGISTERS
 // VIXL represents 'sp' with a unique code, to tell it apart from 'xzr'.
 const Register wsp = WRegister(kSPRegInternalCode);
 const Register sp = XRegister(kSPRegInternalCode);
 // Standard aliases.
 const Register ip0 = x16;
 const Register ip1 = x17;
 const Register lr = x30;
 const Register xzr = x31;
 const Register wzr = w31;
 // AreAliased returns true if any of the named registers overlap. Arguments
 // set to NoReg are ignored. The system stack pointer may be specified.
 bool AreAliased(const CPURegister& reg1,
                const CPURegister& reg2,
                const CPURegister& reg3 = NoReg,
                const CPURegister& reg4 = NoReg,
                const CPURegister& reg5 = NoReg,
                const CPURegister& reg6 = NoReg,
                const CPURegister& reg7 = NoReg,
                const CPURegister& reg8 = NoReg);
 // AreSameSizeAndType returns true if all of the specified registers have the
 // same size, and are of the same type. The system stack pointer may be
 // specified. Arguments set to NoReg are ignored, as are any subsequent
 // arguments. At least one argument (reg1) must be valid (not NoCPUReg).
 bool AreSameSizeAndType(const CPURegister& reg1,
                        const CPURegister& reg2,
                        const CPURegister& reg3 = NoCPUReg,
                        const CPURegister& reg4 = NoCPUReg,
                        const CPURegister& reg5 = NoCPUReg,
                        const CPURegister& reg6 = NoCPUReg,
                        const CPURegister& reg7 = NoCPUReg,
                        const CPURegister& reg8 = NoCPUReg);
 // AreEven returns true if all of the specified registers have even register
 // indices. Arguments set to NoReg are ignored, as are any subsequent
 // arguments. At least one argument (reg1) must be valid (not NoCPUReg).
 bool AreEven(const CPURegister& reg1,
             const CPURegister& reg2,
             const CPURegister& reg3 = NoReg,
             const CPURegister& reg4 = NoReg,
             const CPURegister& reg5 = NoReg,
             const CPURegister& reg6 = NoReg,
             const CPURegister& reg7 = NoReg,
             const CPURegister& reg8 = NoReg);
 // AreConsecutive returns true if all of the specified registers are
 // consecutive in the register file. Arguments set to NoReg are ignored, as are
 // any subsequent arguments. At least one argument (reg1) must be valid
 // (not NoCPUReg).
 bool AreConsecutive(const CPURegister& reg1,
                    const CPURegister& reg2,
                    const CPURegister& reg3 = NoCPUReg,
                    const CPURegister& reg4 = NoCPUReg);
 // AreSameFormat returns true if all of the specified registers have the same
 // vector format. Arguments set to NoReg are ignored, as are any subsequent
 // arguments. At least one argument (reg1) must be valid (not NoVReg).
 bool AreSameFormat(const CPURegister& reg1,
                   const CPURegister& reg2,
                   const CPURegister& reg3 = NoCPUReg,
                   const CPURegister& reg4 = NoCPUReg);
 // AreSameLaneSize returns true if all of the specified registers have the same
 // element lane size, B, H, S or D. It doesn't compare the type of registers.
 // Arguments set to NoReg are ignored, as are any subsequent arguments.
 // At least one argument (reg1) must be valid (not NoVReg).
 // TODO: Remove this, and replace its uses with AreSameFormat.
 bool AreSameLaneSize(const CPURegister& reg1,
                     const CPURegister& reg2,
                     const CPURegister& reg3 = NoCPUReg,
                     const CPURegister& reg4 = NoCPUReg);
 }  // namespace aarch64
 }  // namespace vixl
 #endif  // VIXL_AARCH64_REGISTERS_AARCH64_H_
--- a/dep/vixl/include/vixl/aarch64/simulator-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/simulator-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/simulator-constants-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/simulator-constants-aarch64.h
@ -56,6 +56,8 @@ enum DebugHltOpcode {
  kDisableCPUFeaturesOpcode,
  kSaveCPUFeaturesOpcode,
  kRestoreCPUFeaturesOpcode,
  kMTEActive,
  kMTEInactive,
  // Aliases.
  kDebugHltFirstOpcode = kUnreachableOpcode,
  kDebugHltLastOpcode = kLogOpcode
@ -88,7 +90,7 @@ VIXL_DEPRECATED("DebugHltOpcode", typedef DebugHltOpcode DebugHltOpcodes);
 // call):
 //    x0: The format string
 // x1-x7: Optional arguments, if type == CPURegister::kRegister
-// d0-d7: Optional arguments, if type == CPURegister::kFPRegister
+// d0-d7: Optional arguments, if type == CPURegister::kVRegister
 const unsigned kPrintfArgCountOffset = 1 * kInstructionSize;
 const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize;
 const unsigned kPrintfLength = 3 * kInstructionSize;
@ -121,7 +123,7 @@ const unsigned kTraceLength = 3 * kInstructionSize;
 enum TraceParameters {
  LOG_DISASM = 1 << 0,   // Log disassembly.
  LOG_REGS = 1 << 1,     // Log general purpose registers.
-  LOG_VREGS = 1 << 2,    // Log NEON and floating-point registers.
+  LOG_VREGS = 1 << 2,    // Log SVE, NEON and floating-point registers.
  LOG_SYSREGS = 1 << 3,  // Log the flags and system registers.
  LOG_WRITE = 1 << 4,    // Log writes to memory.
  LOG_BRANCH = 1 << 5,   // Log taken branches.
--- a/dep/vixl/include/vixl/assembler-base-vixl.h
+++ b/dep/vixl/include/vixl/assembler-base-vixl.h
@ -29,6 +29,12 @@
 #include "code-buffer-vixl.h"
 // Microsoft Visual C++ defines a `mvn` macro that conflicts with our own
 // definition.
 #if defined(_MSC_VER) && defined(mvn)
 #undef mvn
 #endif
 namespace vixl {
 class CodeBufferCheckScope;
@ -37,9 +43,8 @@ namespace internal {
 class AssemblerBase {
 public:
-  AssemblerBase() : allow_assembler_(false) {}
+  AssemblerBase()
-  explicit AssemblerBase(size_t capacity)
+      : allow_assembler_(false) {}
      : buffer_(capacity), allow_assembler_(false) {}
  AssemblerBase(byte* buffer, size_t capacity)
      : buffer_(buffer, capacity), allow_assembler_(false) {}
--- a/dep/vixl/include/vixl/code-buffer-vixl.h
+++ b/dep/vixl/include/vixl/code-buffer-vixl.h
@ -36,24 +36,12 @@ namespace vixl {
 class CodeBuffer {
 public:
-  static const size_t kDefaultCapacity = 4 * KBytes;
+  CodeBuffer();
  explicit CodeBuffer(size_t capacity = kDefaultCapacity);
  CodeBuffer(byte* buffer, size_t capacity);
-  ~CodeBuffer();
+  ~CodeBuffer() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION;
  void Reset();
-  void Reset(byte* buffer, size_t capacity, bool managed = false);
+  void Reset(byte* buffer, size_t capacity);
 #ifdef VIXL_CODE_BUFFER_MMAP
  void SetExecutable();
  void SetWritable();
 #else
  // These require page-aligned memory blocks, which we can only guarantee with
  // mmap.
  VIXL_NO_RETURN_IN_DEBUG_MODE void SetExecutable() { VIXL_UNIMPLEMENTED(); }
  VIXL_NO_RETURN_IN_DEBUG_MODE void SetWritable() { VIXL_UNIMPLEMENTED(); }
 #endif
  ptrdiff_t GetOffsetFrom(ptrdiff_t offset) const {
    ptrdiff_t cursor_offset = cursor_ - buffer_;
@ -128,8 +116,9 @@ class CodeBuffer {
  void Emit(T value) {
    VIXL_ASSERT(HasSpaceFor(sizeof(value)));
    dirty_ = true;
-    memcpy(cursor_, &value, sizeof(value));
+    byte* c = cursor_;
-    cursor_ += sizeof(value);
+    memcpy(c, &value, sizeof(value));
    cursor_ = c + sizeof(value);
  }
  void UpdateData(size_t offset, const void* data, size_t size);
@ -149,10 +138,6 @@ class CodeBuffer {
    return GetCapacity();
  }
  bool IsManaged() const { return managed_; }
  void Grow(size_t new_capacity);
  bool IsDirty() const { return dirty_; }
  void SetClean() { dirty_ = false; }
@ -161,24 +146,9 @@ class CodeBuffer {
    return GetRemainingBytes() >= amount;
  }
  void EnsureSpaceFor(size_t amount, bool* has_grown) {
    bool is_full = !HasSpaceFor(amount);
    if (is_full) Grow(capacity_ * 2 + amount);
    VIXL_ASSERT(has_grown != NULL);
    *has_grown = is_full;
  }
  void EnsureSpaceFor(size_t amount) {
    bool dummy;
    EnsureSpaceFor(amount, &dummy);
  }
 private:
  // Backing store of the buffer.
  byte* buffer_;
  // If true the backing store is allocated and deallocated by the buffer. The
  // backing store can then grow on demand. If false the backing store is
  // provided by the user and cannot be resized internally.
  bool managed_;
  // Pointer to the next location to be written.
  byte* cursor_;
  // True if there has been any write since the buffer was created or cleaned.
--- a/dep/vixl/include/vixl/code-generation-scopes-vixl.h
+++ b/dep/vixl/include/vixl/code-generation-scopes-vixl.h
@ -68,14 +68,19 @@ class CodeBufferCheckScope {
                       size_t size,
                       BufferSpacePolicy check_policy = kReserveBufferSpace,
                       SizePolicy size_policy = kMaximumSize)
-      : assembler_(NULL), initialised_(false) {
+      : CodeBufferCheckScope() {
    Open(assembler, size, check_policy, size_policy);
  }
  // This constructor does not implicitly initialise the scope. Instead, the
  // user is required to explicitly call the `Open` function before using the
  // scope.
-  CodeBufferCheckScope() : assembler_(NULL), initialised_(false) {
+  CodeBufferCheckScope()
      : assembler_(NULL),
        assert_policy_(kMaximumSize),
        limit_(0),
        previous_allow_assembler_(false),
        initialised_(false) {
    // Nothing to do.
  }
@ -90,7 +95,7 @@ class CodeBufferCheckScope {
    VIXL_ASSERT(assembler != NULL);
    assembler_ = assembler;
    if (check_policy == kReserveBufferSpace) {
-      assembler->GetBuffer()->EnsureSpaceFor(size);
+      VIXL_ASSERT(assembler->GetBuffer()->HasSpaceFor(size));
    }
 #ifdef VIXL_DEBUG
    limit_ = assembler_->GetSizeOfCodeGenerated() + size;
@ -152,14 +157,15 @@ class EmissionCheckScope : public CodeBufferCheckScope {
  // constructed.
  EmissionCheckScope(MacroAssemblerInterface* masm,
                     size_t size,
-                     SizePolicy size_policy = kMaximumSize) {
+                     SizePolicy size_policy = kMaximumSize)
      : EmissionCheckScope() {
    Open(masm, size, size_policy);
  }
  // This constructor does not implicitly initialise the scope. Instead, the
  // user is required to explicitly call the `Open` function before using the
  // scope.
-  EmissionCheckScope() {}
+  EmissionCheckScope() : masm_(nullptr), pool_policy_(kBlockPools) {}
  virtual ~EmissionCheckScope() { Close(); }
@ -250,14 +256,15 @@ class ExactAssemblyScope : public EmissionCheckScope {
  // constructed.
  ExactAssemblyScope(MacroAssemblerInterface* masm,
                     size_t size,
-                     SizePolicy size_policy = kExactSize) {
+                     SizePolicy size_policy = kExactSize)
      : ExactAssemblyScope() {
    Open(masm, size, size_policy);
  }
  // This constructor does not implicitly initialise the scope. Instead, the
  // user is required to explicitly call the `Open` function before using the
  // scope.
-  ExactAssemblyScope() {}
+  ExactAssemblyScope() : previous_allow_macro_assembler_(false) {}
  virtual ~ExactAssemblyScope() { Close(); }
--- a/dep/vixl/include/vixl/compiler-intrinsics-vixl.h
+++ b/dep/vixl/include/vixl/compiler-intrinsics-vixl.h
@ -28,6 +28,8 @@
 #ifndef VIXL_COMPILER_INTRINSICS_H
 #define VIXL_COMPILER_INTRINSICS_H
 #include <limits.h>
 #include "globals-vixl.h"
 namespace vixl {
@ -104,16 +106,23 @@ int CountTrailingZerosFallBack(uint64_t value, int width);
 // TODO: The implementations could be improved for sizes different from 32bit
 // and 64bit: we could mask the values and call the appropriate builtin.
 // Return the number of leading bits that match the topmost (sign) bit,
 // excluding the topmost bit itself.
 template <typename V>
 inline int CountLeadingSignBits(V value, int width = (sizeof(V) * 8)) {
  VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
 #if COMPILER_HAS_BUILTIN_CLRSB
-  if (width == 32) {
+  VIXL_ASSERT((LLONG_MIN <= value) && (value <= LLONG_MAX));
-    return __builtin_clrsb(value);
+  int ll_width =
-  } else if (width == 64) {
+      sizeof(long long) * kBitsPerByte;  // NOLINT(google-runtime-int)
-    return __builtin_clrsbll(value);
+  int result = __builtin_clrsbll(value) - (ll_width - width);
-  }
+  // Check that the value fits in the specified width.
-#endif
+  VIXL_ASSERT(result >= 0);
  return result;
 #else
  VIXL_ASSERT((INT64_MIN <= value) && (value <= INT64_MAX));
  return CountLeadingSignBitsFallBack(value, width);
 #endif
 }
--- a/dep/vixl/include/vixl/cpu-features.h
+++ b/dep/vixl/include/vixl/cpu-features.h
@ -27,6 +27,7 @@
 #ifndef VIXL_CPU_FEATURES_H
 #define VIXL_CPU_FEATURES_H
 #include <bitset>
 #include <ostream>
 #include "globals-vixl.h"
@ -34,16 +35,65 @@
 namespace vixl {
 // VIXL aims to handle and detect all architectural features that are likely to
 // influence code-generation decisions at EL0 (user-space).
 //
 // - There may be multiple VIXL feature flags for a given architectural
 //   extension. This occurs where the extension allow components to be
 //   implemented independently, or where kernel support is needed, and is likely
 //   to be fragmented.
 //
 //   For example, Pointer Authentication (kPAuth*) has a separate feature flag
 //   for access to PACGA, and to indicate that the QARMA algorithm is
 //   implemented.
 //
 // - Conversely, some extensions have configuration options that do not affect
 //   EL0, so these are presented as a single VIXL feature.
 //
 //   For example, the RAS extension (kRAS) has several variants, but the only
 //   feature relevant to VIXL is the addition of the ESB instruction so we only
 //   need a single flag.
 //
 // - VIXL offers separate flags for separate features even if they're
 //   architecturally linked.
 //
 //   For example, the architecture requires kFPHalf and kNEONHalf to be equal,
 //   but they have separate hardware ID register fields so VIXL presents them as
 //   separate features.
 //
 // - VIXL can detect every feature for which it can generate code.
 //
 // - VIXL can detect some features for which it cannot generate code.
 //
 // The CPUFeatures::Feature enum — derived from the macro list below — is
 // frequently extended. New features may be added to the list at any point, and
 // no assumptions should be made about the numerical values assigned to each
 // enum constant. The symbolic names can be considered to be stable.
 //
 // The debug descriptions are used only for debug output. The 'cpuinfo' strings
 // are informative; VIXL does not use /proc/cpuinfo for feature detection.
 // clang-format off
 #define VIXL_CPU_FEATURE_LIST(V)                                               \
  /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_*  */ \
  /* registers, so that the detailed feature registers can be read          */ \
  /* directly.                                                              */ \
                                                                               \
  /* Constant name        Debug description         Linux 'cpuinfo' string. */ \
  V(kIDRegisterEmulation, "ID register emulation",  "cpuid")                   \
                                                                               \
  V(kFP,                  "FP",                     "fp")                      \
  V(kNEON,                "NEON",                   "asimd")                   \
  V(kCRC32,               "CRC32",                  "crc32")                   \
  V(kDGH,                 "DGH",                    "dgh")                     \
  /* Speculation control features.                                          */ \
  V(kCSV2,                "CSV2",                   NULL)                      \
  V(kSCXTNUM,             "SCXTNUM",                NULL)                      \
  V(kCSV3,                "CSV3",                   NULL)                      \
  V(kSB,                  "SB",                     "sb")                      \
  V(kSPECRES,             "SPECRES",                NULL)                      \
  V(kSSBS,                "SSBS",                   NULL)                      \
  V(kSSBSControl,         "SSBS (PSTATE control)",  "ssbs")                    \
  /* Cryptographic support instructions.                                    */ \
  V(kAES,                 "AES",                    "aes")                     \
  V(kSHA1,                "SHA1",                   "sha1")                    \
@ -56,34 +106,102 @@ namespace vixl {
  V(kLORegions,           "LORegions",              NULL)                      \
  /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH.        */ \
  V(kRDM,                 "RDM",                    "asimdrdm")                \
  /* Scalable Vector Extension.                                             */ \
  V(kSVE,                 "SVE",                    "sve")                     \
  V(kSVEF64MM,            "SVE F64MM",              "svef64mm")                \
  V(kSVEF32MM,            "SVE F32MM",              "svef32mm")                \
  V(kSVEI8MM,             "SVE I8MM",               "svei8imm")                \
  V(kSVEBF16,             "SVE BFloat16",           "svebf16")                 \
  /* SDOT and UDOT support (in NEON).                                       */ \
  V(kDotProduct,          "DotProduct",             "asimddp")                 \
  /* Int8 matrix multiplication (in NEON).                                  */ \
  V(kI8MM,                "NEON I8MM",              "i8mm")                    \
  /* Half-precision (FP16) support for FP and NEON, respectively.           */ \
  V(kFPHalf,              "FPHalf",                 "fphp")                    \
  V(kNEONHalf,            "NEONHalf",               "asimdhp")                 \
  /* BFloat16 support (in both FP and NEON.)                                */ \
  V(kBF16,                "FP/NEON BFloat 16",      "bf16")                    \
  /* The RAS extension, including the ESB instruction.                      */ \
  V(kRAS,                 "RAS",                    NULL)                      \
  /* Data cache clean to the point of persistence: DC CVAP.                 */ \
  V(kDCPoP,               "DCPoP",                  "dcpop")                   \
  /* Data cache clean to the point of deep persistence: DC CVADP.           */ \
  V(kDCCVADP,             "DCCVADP",                "dcpodp")                  \
  /* Cryptographic support instructions.                                    */ \
  V(kSHA3,                "SHA3",                   "sha3")                    \
  V(kSHA512,              "SHA512",                 "sha512")                  \
  V(kSM3,                 "SM3",                    "sm3")                     \
  V(kSM4,                 "SM4",                    "sm4")                     \
  /* Pointer authentication for addresses.                                  */ \
-  V(kPAuth,               "PAuth",                  NULL)                      \
+  V(kPAuth,               "PAuth",                  "paca")                    \
  /* Pointer authentication for addresses uses QARMA.                       */ \
  V(kPAuthQARMA,          "PAuthQARMA",             NULL)                      \
  /* Generic authentication (using the PACGA instruction).                  */ \
-  V(kPAuthGeneric,        "PAuthGeneric",           NULL)                      \
+  V(kPAuthGeneric,        "PAuthGeneric",           "pacg")                    \
  /* Generic authentication uses QARMA.                                     */ \
  V(kPAuthGenericQARMA,   "PAuthGenericQARMA",      NULL)                      \
-  /* JavaScript-style FP <-> integer conversion instruction: FJCVTZS.       */ \
+  /* JavaScript-style FP -> integer conversion instruction: FJCVTZS.        */ \
  V(kJSCVT,               "JSCVT",                  "jscvt")                   \
  /* Complex number support for NEON: FCMLA and FCADD.                      */ \
  V(kFcma,                "Fcma",                   "fcma")                    \
  /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \
  V(kRCpc,                "RCpc",                   "lrcpc")                   \
-  /* Complex number support for NEON: FCMLA and FCADD.                      */ \
+  V(kRCpcImm,             "RCpc (imm)",             "ilrcpc")                  \
-  V(kFcma,                "Fcma",                   "fcma")
+  /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF.               */ \
  V(kFlagM,               "FlagM",                  "flagm")                   \
  /* Unaligned single-copy atomicity.                                       */ \
  V(kUSCAT,               "USCAT",                  "uscat")                   \
  /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}.         */ \
  V(kFHM,                 "FHM",                    "asimdfhm")                \
  /* Data-independent timing (for selected instructions).                   */ \
  V(kDIT,                 "DIT",                    "dit")                     \
  /* Branch target identification.                                          */ \
  V(kBTI,                 "BTI",                    "bti")                     \
  /* Flag manipulation instructions: {AX,XA}FLAG                            */ \
  V(kAXFlag,              "AXFlag",                 "flagm2")                  \
  /* Random number generation extension,                                    */ \
  V(kRNG,                 "RNG",                    "rng")                     \
  /* Floating-point round to {32,64}-bit integer.                           */ \
  V(kFrintToFixedSizedInt,"Frint (bounded)",        "frint")                   \
  /* Memory Tagging Extension.                                              */ \
  V(kMTEInstructions,     "MTE (EL0 instructions)", NULL)                      \
  V(kMTE,                 "MTE",                    NULL)                      \
  V(kMTE3,                "MTE (asymmetric)",       "mte3")                    \
  /* PAuth extensions.                                                      */ \
  V(kPAuthEnhancedPAC,    "PAuth EnhancedPAC",      NULL)                      \
  V(kPAuthEnhancedPAC2,   "PAuth EnhancedPAC2",     NULL)                      \
  V(kPAuthFPAC,           "PAuth FPAC",             NULL)                      \
  V(kPAuthFPACCombined,   "PAuth FPACCombined",     NULL)                      \
  /* Scalable Vector Extension 2.                                           */ \
  V(kSVE2,                "SVE2",                   "sve2")                    \
  V(kSVESM4,              "SVE SM4",                "svesm4")                  \
  V(kSVESHA3,             "SVE SHA3",               "svesha3")                 \
  V(kSVEBitPerm,          "SVE BitPerm",            "svebitperm")              \
  V(kSVEAES,              "SVE AES",                "sveaes")                  \
  V(kSVEPmull128,         "SVE Pmull128",           "svepmull")                \
  /* Alternate floating-point behavior                                      */ \
  V(kAFP,                 "AFP",                    "afp")                     \
  /* Enhanced Counter Virtualization                                        */ \
  V(kECV,                 "ECV",                    "ecv")                     \
  /* Increased precision of Reciprocal Estimate and Square Root Estimate    */ \
  V(kRPRES,               "RPRES",                  "rpres")                   \
  /* Memory operation instructions, for memcpy, memset                      */ \
  V(kMOPS,                "Memory ops",             NULL)                      \
  /* Scalable Matrix Extension (SME)                                        */ \
  V(kSME,                 "SME",                    "sme")                     \
  V(kSMEi16i64,           "SME (i16i64)",           "smei16i64")               \
  V(kSMEf64f64,           "SME (f64f64)",           "smef64f64")               \
  V(kSMEi8i32,            "SME (i8i32)",            "smei8i32")                \
  V(kSMEf16f32,           "SME (f16f32)",           "smef16f32")               \
  V(kSMEb16f32,           "SME (b16f32)",           "smeb16f32")               \
  V(kSMEf32f32,           "SME (f32f32)",           "smef32f32")               \
  V(kSMEfa64,             "SME (fa64)",             "smefa64")                 \
  /* WFET and WFIT instruction support                                      */ \
  V(kWFXT,                "WFXT",                   "wfxt")                    \
  /* Extended BFloat16 instructions                                         */ \
  V(kEBF16,               "EBF16",                  "ebf16")                   \
  V(kSVE_EBF16,           "EBF16 (SVE)",            "sveebf16")                \
  V(kCSSC,                "CSSC",                   "cssc")
 // clang-format on
@ -176,13 +294,13 @@ class CPUFeatures {
  // clang-format on
  // By default, construct with no features enabled.
-  CPUFeatures() : features_(0) {}
+  CPUFeatures() : features_{} {}
  // Construct with some features already enabled.
-  CPUFeatures(Feature feature0,
+  template <typename T, typename... U>
-              Feature feature1 = kNone,
+  CPUFeatures(T first, U... others) : features_{} {
-              Feature feature2 = kNone,
+    Combine(first, others...);
-              Feature feature3 = kNone);
+  }
  // Construct with all features enabled. This can be used to disable feature
  // checking: `Has(...)` returns true regardless of the argument.
@ -198,51 +316,80 @@ class CPUFeatures {
    return CPUFeatures(kFP, kNEON, kCRC32);
  }
  // Construct a new CPUFeatures object using ID registers. This assumes that
  // kIDRegisterEmulation is present.
  static CPUFeatures InferFromIDRegisters();
  enum QueryIDRegistersOption {
    kDontQueryIDRegisters,
    kQueryIDRegistersIfAvailable
  };
  // Construct a new CPUFeatures object based on what the OS reports.
-  static CPUFeatures InferFromOS();
+  static CPUFeatures InferFromOS(
      QueryIDRegistersOption option = kQueryIDRegistersIfAvailable);
  // Combine another CPUFeatures object into this one. Features that already
  // exist in this set are left unchanged.
  void Combine(const CPUFeatures& other);
-  // Combine specific features into this set. Features that already exist in
+  // Combine a specific feature into this set. If it already exists in the set,
-  // this set are left unchanged.
+  // the set is left unchanged.
-  void Combine(Feature feature0,
+  void Combine(Feature feature);
-               Feature feature1 = kNone,
+
-               Feature feature2 = kNone,
+  // Combine multiple features (or feature sets) into this set.
-               Feature feature3 = kNone);
+  template <typename T, typename... U>
  void Combine(T first, U... others) {
    Combine(first);
    Combine(others...);
  }
  // Remove features in another CPUFeatures object from this one.
  void Remove(const CPUFeatures& other);
-  // Remove specific features from this set.
+  // Remove a specific feature from this set. This has no effect if the feature
-  void Remove(Feature feature0,
+  // doesn't exist in the set.
-              Feature feature1 = kNone,
+  void Remove(Feature feature0);
              Feature feature2 = kNone,
              Feature feature3 = kNone);
-  // Chaining helpers for convenient construction.
+  // Remove multiple features (or feature sets) from this set.
-  CPUFeatures With(const CPUFeatures& other) const;
+  template <typename T, typename... U>
-  CPUFeatures With(Feature feature0,
+  void Remove(T first, U... others) {
-                   Feature feature1 = kNone,
+    Remove(first);
-                   Feature feature2 = kNone,
+    Remove(others...);
-                   Feature feature3 = kNone) const;
+  }
  CPUFeatures Without(const CPUFeatures& other) const;
  CPUFeatures Without(Feature feature0,
                      Feature feature1 = kNone,
                      Feature feature2 = kNone,
                      Feature feature3 = kNone) const;
-  // Query features.
+  // Chaining helpers for convenient construction by combining other CPUFeatures
-  // Note that an empty query (like `Has(kNone)`) always returns true.
+  // or individual Features.
  template <typename... T>
  CPUFeatures With(T... others) const {
    CPUFeatures f(*this);
    f.Combine(others...);
    return f;
  }
  template <typename... T>
  CPUFeatures Without(T... others) const {
    CPUFeatures f(*this);
    f.Remove(others...);
    return f;
  }
  // Test whether the `other` feature set is equal to or a subset of this one.
  bool Has(const CPUFeatures& other) const;
-  bool Has(Feature feature0,
+
-           Feature feature1 = kNone,
+  // Test whether a single feature exists in this set.
-           Feature feature2 = kNone,
+  // Note that `Has(kNone)` always returns true.
-           Feature feature3 = kNone) const;
+  bool Has(Feature feature) const;
  // Test whether all of the specified features exist in this set.
  template <typename T, typename... U>
  bool Has(T first, U... others) const {
    return Has(first) && Has(others...);
  }
  // Return the number of enabled features.
  size_t Count() const;
  bool HasNoFeatures() const { return Count() == 0; }
  // Check for equivalence.
  bool operator==(const CPUFeatures& other) const {
@ -256,9 +403,8 @@ class CPUFeatures {
  const_iterator end() const;
 private:
-  // Each bit represents a feature. This field will be replaced as needed if
+  // Each bit represents a feature. This set will be extended as needed.
-  // features are added.
+  std::bitset<kNumberOfFeatures> features_;
  uint64_t features_;
  friend std::ostream& operator<<(std::ostream& os,
                                  const vixl::CPUFeatures& features);
@ -281,8 +427,8 @@ class CPUFeaturesConstIterator {
  bool operator!=(const CPUFeaturesConstIterator& other) const {
    return !(*this == other);
  }
-  CPUFeatures::Feature operator++();
+  CPUFeaturesConstIterator& operator++();
-  CPUFeatures::Feature operator++(int);
+  CPUFeaturesConstIterator operator++(int);
  CPUFeatures::Feature operator*() const {
    VIXL_ASSERT(IsValid());
@ -301,8 +447,10 @@ class CPUFeaturesConstIterator {
  CPUFeatures::Feature feature_;
  bool IsValid() const {
-    return ((cpu_features_ == NULL) && (feature_ == CPUFeatures::kNone)) ||
+    if (cpu_features_ == NULL) {
-           cpu_features_->Has(feature_);
+      return feature_ == CPUFeatures::kNone;
    }
    return cpu_features_->Has(feature_);
  }
 };
@ -325,21 +473,17 @@ class CPUFeaturesScope {
  // Start a CPUFeaturesScope on any object that implements
  // `CPUFeatures* GetCPUFeatures()`.
  template <typename T>
-  explicit CPUFeaturesScope(T* cpu_features_wrapper,
+  explicit CPUFeaturesScope(T* cpu_features_wrapper)
                            CPUFeatures::Feature feature0 = CPUFeatures::kNone,
                            CPUFeatures::Feature feature1 = CPUFeatures::kNone,
                            CPUFeatures::Feature feature2 = CPUFeatures::kNone,
                            CPUFeatures::Feature feature3 = CPUFeatures::kNone)
      : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
-        old_features_(*cpu_features_) {
+        old_features_(*cpu_features_) {}
    cpu_features_->Combine(feature0, feature1, feature2, feature3);
  }
-  template <typename T>
+  // Start a CPUFeaturesScope on any object that implements
-  CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other)
+  // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled.
  template <typename T, typename U, typename... V>
  CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features)
      : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
        old_features_(*cpu_features_) {
-    cpu_features_->Combine(other);
+    cpu_features_->Combine(first, features...);
  }
  ~CPUFeaturesScope() { *cpu_features_ = old_features_; }
--- a/dep/vixl/include/vixl/globals-vixl.h
+++ b/dep/vixl/include/vixl/globals-vixl.h
@ -27,6 +27,10 @@
 #ifndef VIXL_GLOBALS_H
 #define VIXL_GLOBALS_H
 #if __cplusplus < 201703L
 #error VIXL requires C++17
 #endif
 // Get standard C99 macros for integer types.
 #ifndef __STDC_CONSTANT_MACROS
 #define __STDC_CONSTANT_MACROS
@ -66,7 +70,8 @@ typedef uint8_t byte;
 const int KBytes = 1024;
 const int MBytes = 1024 * KBytes;
-const int kBitsPerByte = 8;
+const int kBitsPerByteLog2 = 3;
 const int kBitsPerByte = 1 << kBitsPerByteLog2;
 template <int SizeInBits>
 struct Unsigned;
@ -153,7 +158,7 @@ struct Unsigned<64> {
 #endif
 // This is not as powerful as template based assertions, but it is simple.
 // It assumes that the descriptions are unique. If this starts being a problem,
-// we can switch to a different implemention.
+// we can switch to a different implementation.
 #define VIXL_CONCAT(a, b) a##b
 #if __cplusplus >= 201103L
 #define VIXL_STATIC_ASSERT_LINE(line_unused, condition, message) \
@ -187,8 +192,7 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}
 #define VIXL_ALIGNMENT_EXCEPTION()                \
  do {                                            \
-    fprintf(stderr, "ALIGNMENT EXCEPTION\t"); \
+    VIXL_ABORT_WITH_MSG("ALIGNMENT EXCEPTION\t"); \
    VIXL_ABORT();                             \
  } while (0)
 // The clang::fallthrough attribute is used along with the Wimplicit-fallthrough
@ -203,7 +207,7 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}
 #if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L
 #define VIXL_FALLTHROUGH() [[clang::fallthrough]]
 // Fallthrough annotation for GCC >= 7.
-#elif __GNUC__ >= 7
+#elif defined(__GNUC__) && __GNUC__ >= 7
 #define VIXL_FALLTHROUGH() __attribute__((fallthrough))
 #else
 #define VIXL_FALLTHROUGH() \
@ -211,6 +215,18 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}
  } while (0)
 #endif
 // Evaluate 'init' to an std::optional and return if it's empty. If 'init' is
 // not empty then define a variable 'name' with the value inside the
 // std::optional.
 #define VIXL_DEFINE_OR_RETURN(name, init) \
  auto opt##name = init;                  \
  if (!opt##name) return;                 \
  auto name = *opt##name;
 #define VIXL_DEFINE_OR_RETURN_FALSE(name, init) \
  auto opt##name = init;                        \
  if (!opt##name) return false;                 \
  auto name = *opt##name;
 #if __cplusplus >= 201103L
 #define VIXL_NO_RETURN [[noreturn]]
 #else
@ -224,17 +240,19 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}
 #if __cplusplus >= 201103L
 #define VIXL_OVERRIDE override
 #define VIXL_CONSTEXPR constexpr
 #define VIXL_HAS_CONSTEXPR 1
 #else
 #define VIXL_OVERRIDE
 #define VIXL_CONSTEXPR
 #endif
-// Some functions might only be marked as "noreturn" for the DEBUG build. This
+// With VIXL_NEGATIVE_TESTING on, VIXL_ASSERT and VIXL_CHECK will throw
-// macro should be used for such cases (for more details see what
+// exceptions but C++11 marks destructors as noexcept(true) by default.
-// VIXL_UNREACHABLE expands to).
+#if defined(VIXL_NEGATIVE_TESTING) && __cplusplus >= 201103L
-#ifdef VIXL_DEBUG
+#define VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION noexcept(false)
 #define VIXL_DEBUG_NO_RETURN VIXL_NO_RETURN
 #else
-#define VIXL_DEBUG_NO_RETURN
+#define VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION
 #endif
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
@ -269,16 +287,24 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}
 // Target Architecture/ISA
 #ifdef VIXL_INCLUDE_TARGET_A64
 #ifndef VIXL_INCLUDE_TARGET_AARCH64
 #define VIXL_INCLUDE_TARGET_AARCH64
 #endif
 #endif
 #if defined(VIXL_INCLUDE_TARGET_A32) && defined(VIXL_INCLUDE_TARGET_T32)
 #ifndef VIXL_INCLUDE_TARGET_AARCH32
 #define VIXL_INCLUDE_TARGET_AARCH32
 #endif
 #elif defined(VIXL_INCLUDE_TARGET_A32)
 #ifndef VIXL_INCLUDE_TARGET_A32_ONLY
 #define VIXL_INCLUDE_TARGET_A32_ONLY
 #endif
 #else
 #ifndef VIXL_INCLUDE_TARGET_T32_ONLY
 #define VIXL_INCLUDE_TARGET_T32_ONLY
 #endif
 #endif
 #endif  // VIXL_GLOBALS_H
--- a/dep/vixl/include/vixl/invalset-vixl.h
+++ b/dep/vixl/include/vixl/invalset-vixl.h
@ -27,9 +27,8 @@
 #ifndef VIXL_INVALSET_H_
 #define VIXL_INVALSET_H_
 #include <cstring>
 #include <algorithm>
 #include <cstring>
 #include <vector>
 #include "globals-vixl.h"
@ -91,7 +90,7 @@ template <TEMPLATE_INVALSET_P_DECL>
 class InvalSet {
 public:
  InvalSet();
-  ~InvalSet();
+  ~InvalSet() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION;
  static const size_t kNPreallocatedElements = N_PREALLOCATED_ELEMENTS;
  static const KeyType kInvalidKey = INVALID_KEY;
@ -112,7 +111,7 @@ class InvalSet {
  size_t size() const;
  // Returns true if no elements are stored in the set.
-  // Note that this does not mean the the backing storage is empty: it can still
+  // Note that this does not mean the backing storage is empty: it can still
  // contain invalid elements.
  bool empty() const;
@ -244,8 +243,13 @@ class InvalSet {
 template <class S>
-class InvalSetIterator/* : public std::iterator<std::forward_iterator_tag,
+class InvalSetIterator {
-                                              typename S::_ElementType> */{
+  using iterator_category = std::forward_iterator_tag;
  using value_type = typename S::_ElementType;
  using difference_type = std::ptrdiff_t;
  using pointer = S*;
  using reference = S&;
 private:
  // Redefine types to mirror the associated set types.
  typedef typename S::_ElementType ElementType;
@ -323,7 +327,8 @@ InvalSet<TEMPLATE_INVALSET_P_DEF>::InvalSet()
 template <TEMPLATE_INVALSET_P_DECL>
-InvalSet<TEMPLATE_INVALSET_P_DEF>::~InvalSet() {
+InvalSet<TEMPLATE_INVALSET_P_DEF>::~InvalSet()
    VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
  VIXL_ASSERT(monitor_ == 0);
  delete vector_;
 }
@ -841,9 +846,7 @@ InvalSetIterator<S>::InvalSetIterator(const InvalSetIterator<S>& other)
 #if __cplusplus >= 201103L
 template <class S>
 InvalSetIterator<S>::InvalSetIterator(InvalSetIterator<S>&& other) noexcept
-    : using_vector_(false),
+    : using_vector_(false), index_(0), inval_set_(NULL) {
      index_(0),
      inval_set_(NULL) {
  swap(*this, other);
 }
 #endif
--- a/dep/vixl/include/vixl/macro-assembler-interface.h
+++ b/dep/vixl/include/vixl/macro-assembler-interface.h
@ -35,7 +35,7 @@ class MacroAssemblerInterface {
 public:
  virtual internal::AssemblerBase* AsAssemblerBase() = 0;
-  virtual ~MacroAssemblerInterface() {}
+  virtual ~MacroAssemblerInterface() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {}
  virtual bool AllowMacroInstructions() const = 0;
  virtual bool ArePoolsBlocked() const = 0;
--- a/dep/vixl/include/vixl/pool-manager-impl.h
+++ b/dep/vixl/include/vixl/pool-manager-impl.h
@ -27,10 +27,10 @@
 #ifndef VIXL_POOL_MANAGER_IMPL_H_
 #define VIXL_POOL_MANAGER_IMPL_H_
 #include "pool-manager.h"
 #include <algorithm>
 #include "assembler-base-vixl.h"
 #include "pool-manager.h"
 namespace vixl {
@ -264,14 +264,14 @@ bool PoolManager<T>::MustEmit(T pc,
    if (checkpoint < temp.min_location_) return true;
  }
-  bool tempNotPlacedYet = true;
+  bool temp_not_placed_yet = true;
  for (int i = static_cast<int>(objects_.size()) - 1; i >= 0; --i) {
    const PoolObject<T>& current = objects_[i];
-    if (tempNotPlacedYet && PoolObjectLessThan(current, temp)) {
+    if (temp_not_placed_yet && PoolObjectLessThan(current, temp)) {
      checkpoint = UpdateCheckpointForObject(checkpoint, &temp);
      if (checkpoint < temp.min_location_) return true;
      if (CheckFuturePC(pc, checkpoint)) return true;
-      tempNotPlacedYet = false;
+      temp_not_placed_yet = false;
    }
    if (current.label_base_ == label_base) continue;
    checkpoint = UpdateCheckpointForObject(checkpoint, &current);
@ -279,7 +279,7 @@ bool PoolManager<T>::MustEmit(T pc,
    if (CheckFuturePC(pc, checkpoint)) return true;
  }
  // temp is the object with the smallest max_location_.
-  if (tempNotPlacedYet) {
+  if (temp_not_placed_yet) {
    checkpoint = UpdateCheckpointForObject(checkpoint, &temp);
    if (checkpoint < temp.min_location_) return true;
  }
@ -487,7 +487,7 @@ void PoolManager<T>::Release(T pc) {
 }
 template <typename T>
-PoolManager<T>::~PoolManager<T>() {
+PoolManager<T>::~PoolManager<T>() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
 #ifdef VIXL_DEBUG
  // Check for unbound objects.
  for (objects_iter iter = objects_.begin(); iter != objects_.end(); ++iter) {
@ -497,7 +497,7 @@ PoolManager<T>::~PoolManager<T>() {
  }
 #endif
  // Delete objects the pool manager owns.
-  for (typename std::vector<LocationBase<T> *>::iterator
+  for (typename std::vector<LocationBase<T>*>::iterator
           iter = delete_on_destruction_.begin(),
           end = delete_on_destruction_.end();
       iter != end;
@ -517,6 +517,6 @@ int PoolManager<T>::GetPoolSizeForTest() const {
  }
  return size;
 }
-}
+}  // namespace vixl
 #endif  // VIXL_POOL_MANAGER_IMPL_H_
--- a/dep/vixl/include/vixl/pool-manager.h
+++ b/dep/vixl/include/vixl/pool-manager.h
@ -27,11 +27,10 @@
 #ifndef VIXL_POOL_MANAGER_H_
 #define VIXL_POOL_MANAGER_H_
 #include <stdint.h>
 #include <cstddef>
 #include <limits>
 #include <map>
 #include <stdint.h>
 #include <vector>
 #include "globals-vixl.h"
@ -142,7 +141,7 @@ class LocationBase {
        is_bound_(true),
        location_(location) {}
-  virtual ~LocationBase() {}
+  virtual ~LocationBase() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {}
  // The PoolManager should assume ownership of some objects, and delete them
  // after they have been placed. This can happen for example for literals that
@ -369,8 +368,8 @@ class ForwardReference {
  // Specify the possible locations where the object could be stored. AArch32's
  // PC offset, and T32's PC alignment calculations should be applied by the
-  // Assembler, not here. The PoolManager deals only with simple locationes.
+  // Assembler, not here. The PoolManager deals only with simple locations.
-  // Including min_object_adddress_ is necessary to handle AArch32 some
+  // Including min_object_address_ is necessary to handle AArch32 some
  // instructions which have a minimum offset of 0, but also have the implicit
  // PC offset.
  // Note that this structure cannot handle sparse ranges, such as A32's ADR,
@ -397,7 +396,7 @@ class PoolManager {
        max_pool_size_(0),
        monitor_(0) {}
-  ~PoolManager();
+  ~PoolManager() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION;
  // Check if we will need to emit the pool at location 'pc', when planning to
  // generate a certain number of bytes. This optionally takes a
--- a/dep/vixl/include/vixl/utils-vixl.h
+++ b/dep/vixl/include/vixl/utils-vixl.h
@ -30,6 +30,7 @@
 #include <cmath>
 #include <cstring>
 #include <limits>
 #include <type_traits>
 #include <vector>
 #include "compiler-intrinsics-vixl.h"
@ -67,29 +68,40 @@ namespace vixl {
 #endif
 template <typename T, size_t n>
-size_t ArrayLength(const T (&)[n]) {
+constexpr size_t ArrayLength(const T (&)[n]) {
  return n;
 }
 inline uint64_t GetUintMask(unsigned bits) {
  VIXL_ASSERT(bits <= 64);
  uint64_t base = (bits >= 64) ? 0 : (UINT64_C(1) << bits);
  return base - 1;
 }
 inline uint64_t GetSignMask(unsigned bits) {
  VIXL_ASSERT(bits <= 64);
  return UINT64_C(1) << (bits - 1);
 }
 // Check number width.
 // TODO: Refactor these using templates.
 inline bool IsIntN(unsigned n, uint32_t x) {
-  VIXL_ASSERT((0 < n) && (n < 32));
+  VIXL_ASSERT((0 < n) && (n <= 32));
-  uint32_t limit = UINT32_C(1) << (n - 1);
+  return x <= static_cast<uint32_t>(INT32_MAX >> (32 - n));
  return x < limit;
 }
 inline bool IsIntN(unsigned n, int32_t x) {
-  VIXL_ASSERT((0 < n) && (n < 32));
+  VIXL_ASSERT((0 < n) && (n <= 32));
  if (n == 32) return true;
  int32_t limit = INT32_C(1) << (n - 1);
  return (-limit <= x) && (x < limit);
 }
 inline bool IsIntN(unsigned n, uint64_t x) {
-  VIXL_ASSERT((0 < n) && (n < 64));
+  VIXL_ASSERT((0 < n) && (n <= 64));
-  uint64_t limit = UINT64_C(1) << (n - 1);
+  return x <= static_cast<uint64_t>(INT64_MAX >> (64 - n));
  return x < limit;
 }
 inline bool IsIntN(unsigned n, int64_t x) {
-  VIXL_ASSERT((0 < n) && (n < 64));
+  VIXL_ASSERT((0 < n) && (n <= 64));
  if (n == 64) return true;
  int64_t limit = INT64_C(1) << (n - 1);
  return (-limit <= x) && (x < limit);
 }
@ -98,7 +110,8 @@ VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) {
 }
 inline bool IsUintN(unsigned n, uint32_t x) {
-  VIXL_ASSERT((0 < n) && (n < 32));
+  VIXL_ASSERT((0 < n) && (n <= 32));
  if (n >= 32) return true;
  return !(x >> n);
 }
 inline bool IsUintN(unsigned n, int32_t x) {
@ -107,7 +120,8 @@ inline bool IsUintN(unsigned n, int32_t x) {
  return !(static_cast<uint32_t>(x) >> n);
 }
 inline bool IsUintN(unsigned n, uint64_t x) {
-  VIXL_ASSERT((0 < n) && (n < 64));
+  VIXL_ASSERT((0 < n) && (n <= 64));
  if (n >= 64) return true;
  return !(x >> n);
 }
 inline bool IsUintN(unsigned n, int64_t x) {
@ -183,14 +197,14 @@ inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) {
 }
-inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint32_t x) {
+inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint64_t x) {
  VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
              (msb >= lsb));
  return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x));
 }
-inline int64_t ExtractSignedBitfield64(int msb, int lsb, int64_t x) {
+inline int64_t ExtractSignedBitfield64(int msb, int lsb, uint64_t x) {
  VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
              (msb >= lsb));
  uint64_t temp = ExtractUnsignedBitfield64(msb, lsb, x);
@ -203,8 +217,7 @@ inline int64_t ExtractSignedBitfield64(int msb, int lsb, int64_t x) {
  return result;
 }
-
+inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint64_t x) {
 inline int32_t ExtractSignedBitfield32(int msb, int lsb, int32_t x) {
  VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
              (msb >= lsb));
  uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x));
@ -213,7 +226,6 @@ inline int32_t ExtractSignedBitfield32(int msb, int lsb, int32_t x) {
  return result;
 }
 inline uint64_t RotateRight(uint64_t value,
                            unsigned int rotate,
                            unsigned int width) {
@ -271,6 +283,39 @@ VIXL_DEPRECATED("RawbitsToDouble",
  return RawbitsToDouble(bits);
 }
 // Some compilers dislike negating unsigned integers,
 // so we provide an equivalent.
 template <typename T>
 T UnsignedNegate(T value) {
  VIXL_STATIC_ASSERT(std::is_unsigned<T>::value);
  return ~value + 1;
 }
 // An absolute operation for signed integers that is defined for results outside
 // the representable range. Specifically, Abs(MIN_INT) is MIN_INT.
 template <typename T>
 T Abs(T val) {
  // TODO: this static assertion is for signed integer inputs, as that's the
  // only type tested. However, the code should work for all numeric inputs.
  // Remove the assertion and this comment when more tests are available.
  VIXL_STATIC_ASSERT(std::is_signed<T>::value && std::is_integral<T>::value);
  return ((val >= -std::numeric_limits<T>::max()) && (val < 0)) ? -val : val;
 }
 // Convert unsigned to signed numbers in a well-defined way (using two's
 // complement representations).
 inline int64_t RawbitsToInt64(uint64_t bits) {
  return (bits >= UINT64_C(0x8000000000000000))
             ? (-static_cast<int64_t>(UnsignedNegate(bits) - 1) - 1)
             : static_cast<int64_t>(bits);
 }
 inline int32_t RawbitsToInt32(uint32_t bits) {
  return (bits >= UINT64_C(0x80000000))
             ? (-static_cast<int32_t>(UnsignedNegate(bits) - 1) - 1)
             : static_cast<int32_t>(bits);
 }
 namespace internal {
 // Internal simulation class used solely by the simulator to
@ -294,7 +339,7 @@ class SimFloat16 : public Float16 {
  bool operator>(SimFloat16 rhs) const;
  bool operator==(SimFloat16 rhs) const;
  bool operator!=(SimFloat16 rhs) const;
-  // This is necessary for conversions peformed in (macro asm) Fmov.
+  // This is necessary for conversions performed in (macro asm) Fmov.
  bool operator==(double rhs) const;
  operator double() const;
 };
@ -365,6 +410,10 @@ VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) {
 bool IsZero(Float16 value);
 inline bool IsPositiveZero(double value) {
  return (value == 0.0) && (copysign(1.0, value) > 0.0);
 }
 inline bool IsNaN(float value) { return std::isnan(value); }
 inline bool IsNaN(double value) { return std::isnan(value); }
@ -447,7 +496,9 @@ inline float FusedMultiplyAdd(float op1, float op2, float a) {
 }
-inline uint64_t LowestSetBit(uint64_t value) { return value & static_cast<uint64_t>(-static_cast<int64_t>(value)); }
+inline uint64_t LowestSetBit(uint64_t value) {
  return value & UnsignedNegate(value);
 }
 template <typename T>
@ -484,11 +535,11 @@ T ReverseBits(T value) {
 template <typename T>
-inline T SignExtend(T val, int bitSize) {
+inline T SignExtend(T val, int size_in_bits) {
-  VIXL_ASSERT(bitSize > 0);
+  VIXL_ASSERT(size_in_bits > 0);
-  T mask = (T(2) << (bitSize - 1)) - T(1);
+  T mask = (T(2) << (size_in_bits - 1)) - T(1);
  val &= mask;
-  T sign_bits = -((val >> (bitSize - 1)) << bitSize);
+  T sign_bits = -((val >> (size_in_bits - 1)) << size_in_bits);
  val |= sign_bits;
  return val;
 }
@ -570,7 +621,7 @@ T AlignUp(T pointer,
  // reinterpret_cast behaviour for other types.
  typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
-      (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer;
+      (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer;
  VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
  size_t mask = alignment - 1;
@ -590,7 +641,7 @@ T AlignDown(T pointer,
  // reinterpret_cast behaviour for other types.
  typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
-      (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer;
+      (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer;
  VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
  size_t mask = alignment - 1;
@ -801,7 +852,7 @@ class Uint32 {
  }
  int32_t GetSigned() const { return data_; }
  Uint32 operator~() const { return Uint32(~data_); }
-  Uint32 operator-() const { return Uint32(static_cast<uint32_t>(-static_cast<int32_t>(data_))); }
+  Uint32 operator-() const { return Uint32(UnsignedNegate(data_)); }
  bool operator==(Uint32 value) const { return data_ == value.data_; }
  bool operator!=(Uint32 value) const { return data_ != value.data_; }
  bool operator>(Uint32 value) const { return data_ > value.data_; }
@ -869,7 +920,7 @@ class Uint64 {
  Uint32 GetHigh32() const { return Uint32(data_ >> 32); }
  Uint32 GetLow32() const { return Uint32(data_ & 0xffffffff); }
  Uint64 operator~() const { return Uint64(~data_); }
-  Uint64 operator-() const { return Uint64(static_cast<uint64_t>(-static_cast<int64_t>(data_))); }
+  Uint64 operator-() const { return Uint64(UnsignedNegate(data_)); }
  bool operator==(Uint64 value) const { return data_ == value.data_; }
  bool operator!=(Uint64 value) const { return data_ != value.data_; }
  Uint64 operator+(Uint64 value) const { return Uint64(data_ + value.data_); }
@ -974,6 +1025,42 @@ Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {}
 Int64 BitCount(Uint32 value);
 // The algorithm used is adapted from the one described in section 8.2 of
 // Hacker's Delight, by Henry S. Warren, Jr.
 template <unsigned N, typename T>
 int64_t MultiplyHigh(T u, T v) {
  uint64_t u0, v0, w0, u1, v1, w1, w2, t;
  VIXL_STATIC_ASSERT((N == 8) || (N == 16) || (N == 32) || (N == 64));
  uint64_t sign_mask = UINT64_C(1) << (N - 1);
  uint64_t sign_ext = 0;
  unsigned half_bits = N / 2;
  uint64_t half_mask = GetUintMask(half_bits);
  if (std::numeric_limits<T>::is_signed) {
    sign_ext = UINT64_C(0xffffffffffffffff) << half_bits;
  }
  VIXL_ASSERT(sizeof(u) == sizeof(uint64_t));
  VIXL_ASSERT(sizeof(u) == sizeof(u0));
  u0 = u & half_mask;
  u1 = u >> half_bits | (((u & sign_mask) != 0) ? sign_ext : 0);
  v0 = v & half_mask;
  v1 = v >> half_bits | (((v & sign_mask) != 0) ? sign_ext : 0);
  w0 = u0 * v0;
  t = u1 * v0 + (w0 >> half_bits);
  w1 = t & half_mask;
  w2 = t >> half_bits | (((t & sign_mask) != 0) ? sign_ext : 0);
  w1 = u0 * v1 + w1;
  w1 = w1 >> half_bits | (((w1 & sign_mask) != 0) ? sign_ext : 0);
  uint64_t value = u1 * v1 + w2 + w1;
  int64_t result;
  memcpy(&result, &value, sizeof(result));
  return result;
 }
 }  // namespace internal
 // The default NaN values (for FPCR.DN=1).
@ -1139,7 +1226,7 @@ T FPRound(int64_t sign,
    // For subnormal outputs, the shift must be adjusted by the exponent. The +1
    // is necessary because the exponent of a subnormal value (encoded as 0) is
    // the same as the exponent of the smallest normal value (encoded as 1).
-    shift += -exponent + 1;
+    shift += static_cast<int>(-exponent + 1);
    // Handle inputs that would produce a zero output.
    //
@ -1238,9 +1325,8 @@ inline Float16 FPRoundToFloat16(int64_t sign,
                                uint64_t mantissa,
                                FPRounding round_mode) {
  return RawbitsToFloat16(
-      FPRound<uint16_t,
+      FPRound<uint16_t, kFloat16ExponentBits, kFloat16MantissaBits>(
-              kFloat16ExponentBits,
+          sign, exponent, mantissa, round_mode));
              kFloat16MantissaBits>(sign, exponent, mantissa, round_mode));
 }
@ -1276,6 +1362,81 @@ Float16 FPToFloat16(double value,
                    FPRounding round_mode,
                    UseDefaultNaN DN,
                    bool* exception = NULL);
 // Like static_cast<T>(value), but with specialisations for the Float16 type.
 template <typename T, typename F>
 T StaticCastFPTo(F value) {
  return static_cast<T>(value);
 }
 template <>
 inline float StaticCastFPTo<float, Float16>(Float16 value) {
  return FPToFloat(value, kIgnoreDefaultNaN);
 }
 template <>
 inline double StaticCastFPTo<double, Float16>(Float16 value) {
  return FPToDouble(value, kIgnoreDefaultNaN);
 }
 template <>
 inline Float16 StaticCastFPTo<Float16, float>(float value) {
  return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN);
 }
 template <>
 inline Float16 StaticCastFPTo<Float16, double>(double value) {
  return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN);
 }
 template <typename T>
 uint64_t FPToRawbitsWithSize(unsigned size_in_bits, T value) {
  switch (size_in_bits) {
    case 16:
      return Float16ToRawbits(StaticCastFPTo<Float16>(value));
    case 32:
      return FloatToRawbits(StaticCastFPTo<float>(value));
    case 64:
      return DoubleToRawbits(StaticCastFPTo<double>(value));
  }
  VIXL_UNREACHABLE();
  return 0;
 }
 template <typename T>
 T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) {
  VIXL_ASSERT(IsUintN(size_in_bits, value));
  switch (size_in_bits) {
    case 16:
      return StaticCastFPTo<T>(RawbitsToFloat16(static_cast<uint16_t>(value)));
    case 32:
      return StaticCastFPTo<T>(RawbitsToFloat(static_cast<uint32_t>(value)));
    case 64:
      return StaticCastFPTo<T>(RawbitsToDouble(value));
  }
  VIXL_UNREACHABLE();
  return 0;
 }
 // Jenkins one-at-a-time hash, based on
 // https://en.wikipedia.org/wiki/Jenkins_hash_function citing
 // https://www.drdobbs.com/database/algorithm-alley/184410284.
 constexpr uint32_t Hash(const char* str, uint32_t hash = 0) {
  if (*str == '\0') {
    hash += hash << 3;
    hash ^= hash >> 11;
    hash += hash << 15;
    return hash;
  } else {
    hash += *str;
    hash += hash << 10;
    hash ^= hash >> 6;
    return Hash(str + 1, hash);
  }
 }
 constexpr uint32_t operator"" _h(const char* x, size_t) { return Hash(x); }
 }  // namespace vixl
 #endif  // VIXL_UTILS_H
--- a/dep/vixl/src/aarch32/assembler-aarch32.cc
+++ b/dep/vixl/src/aarch32/assembler-aarch32.cc
@ -2557,13 +2557,13 @@ void Assembler::adr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= 0) && (offset <= 1020) &&
+              loc->GetLocation() - AlignDown(program_counter, 4);
-                      ((offset & 0x3) == 0));
+          VIXL_ASSERT((off >= 0) && (off <= 1020) && ((off & 0x3) == 0));
-          const int32_t target = offset >> 2;
+          const int32_t target = off >> 2;
          return instr | (target & 0xff);
        }
      } immop;
@ -2588,15 +2588,16 @@ void Assembler::adr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
              loc->GetLocation() - AlignDown(program_counter, 4);
          int32_t target;
-          if ((offset >= 0) && (offset <= 4095)) {
+          if ((off >= 0) && (off <= 4095)) {
-            target = offset;
+            target = off;
          } else {
-            target = -offset;
+            target = -off;
            VIXL_ASSERT((target >= 0) && (target <= 4095));
            // Emit the T2 encoding.
            instr |= 0x00a00000;
@ -2622,19 +2623,20 @@ void Assembler::adr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
+          program_counter += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
              loc->GetLocation() - AlignDown(program_counter, 4);
          int32_t target;
-          ImmediateA32 positive_immediate_a32(offset);
+          ImmediateA32 pos_imm_a32(off);
-          if (positive_immediate_a32.IsValid()) {
+          if (pos_imm_a32.IsValid()) {
-            target = positive_immediate_a32.GetEncodingValue();
+            target = pos_imm_a32.GetEncodingValue();
          } else {
-            ImmediateA32 negative_immediate_a32(-offset);
+            ImmediateA32 neg_imm_a32(-off);
-            VIXL_ASSERT(negative_immediate_a32.IsValid());
+            VIXL_ASSERT(neg_imm_a32.IsValid());
            // Emit the A2 encoding.
-            target = negative_immediate_a32.GetEncodingValue();
+            target = neg_imm_a32.GetEncodingValue();
            instr = (instr & ~0x00f00000) | 0x00400000;
          }
          return instr | (target & 0xfff);
@ -3024,13 +3026,12 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
+          Location::Offset off = loc->GetLocation() - program_counter;
-          VIXL_ASSERT((offset >= -256) && (offset <= 254) &&
+          VIXL_ASSERT((off >= -256) && (off <= 254) && ((off & 0x1) == 0));
-                      ((offset & 0x1) == 0));
+          const int32_t target = off >> 1;
          const int32_t target = offset >> 1;
          return instr | (target & 0xff);
        }
      } immop;
@ -3051,13 +3052,12 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
+          Location::Offset off = loc->GetLocation() - program_counter;
-          VIXL_ASSERT((offset >= -2048) && (offset <= 2046) &&
+          VIXL_ASSERT((off >= -2048) && (off <= 2046) && ((off & 0x1) == 0));
-                      ((offset & 0x1) == 0));
+          const int32_t target = off >> 1;
          const int32_t target = offset >> 1;
          return instr | (target & 0x7ff);
        }
      } immop;
@ -3075,13 +3075,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
+          Location::Offset off = loc->GetLocation() - program_counter;
-          VIXL_ASSERT((offset >= -1048576) && (offset <= 1048574) &&
+          VIXL_ASSERT((off >= -1048576) && (off <= 1048574) &&
-                      ((offset & 0x1) == 0));
+                      ((off & 0x1) == 0));
-          const int32_t target = offset >> 1;
+          const int32_t target = off >> 1;
          return instr | (target & 0x7ff) | ((target & 0x1f800) << 5) |
                 ((target & 0x20000) >> 4) | ((target & 0x40000) >> 7) |
                 ((target & 0x80000) << 7);
@ -3104,13 +3104,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
+          Location::Offset off = loc->GetLocation() - program_counter;
-          VIXL_ASSERT((offset >= -16777216) && (offset <= 16777214) &&
+          VIXL_ASSERT((off >= -16777216) && (off <= 16777214) &&
-                      ((offset & 0x1) == 0));
+                      ((off & 0x1) == 0));
-          int32_t target = offset >> 1;
+          int32_t target = off >> 1;
          uint32_t S = target & (1 << 23);
          target ^= ((S >> 1) | (S >> 2)) ^ (3 << 21);
          return instr | (target & 0x7ff) | ((target & 0x1ff800) << 5) |
@ -3132,13 +3132,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
+          program_counter += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
+          Location::Offset off = loc->GetLocation() - program_counter;
-          VIXL_ASSERT((offset >= -33554432) && (offset <= 33554428) &&
+          VIXL_ASSERT((off >= -33554432) && (off <= 33554428) &&
-                      ((offset & 0x3) == 0));
+                      ((off & 0x3) == 0));
-          const int32_t target = offset >> 2;
+          const int32_t target = off >> 2;
          return instr | (target & 0xffffff);
        }
      } immop;
@ -3462,13 +3462,13 @@ void Assembler::bl(Condition cond, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
+          Location::Offset off = loc->GetLocation() - program_counter;
-          VIXL_ASSERT((offset >= -16777216) && (offset <= 16777214) &&
+          VIXL_ASSERT((off >= -16777216) && (off <= 16777214) &&
-                      ((offset & 0x1) == 0));
+                      ((off & 0x1) == 0));
-          int32_t target = offset >> 1;
+          int32_t target = off >> 1;
          uint32_t S = target & (1 << 23);
          target ^= ((S >> 1) | (S >> 2)) ^ (3 << 21);
          return instr | (target & 0x7ff) | ((target & 0x1ff800) << 5) |
@ -3490,13 +3490,13 @@ void Assembler::bl(Condition cond, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
+          program_counter += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
+          Location::Offset off = loc->GetLocation() - program_counter;
-          VIXL_ASSERT((offset >= -33554432) && (offset <= 33554428) &&
+          VIXL_ASSERT((off >= -33554432) && (off <= 33554428) &&
-                      ((offset & 0x3) == 0));
+                      ((off & 0x3) == 0));
-          const int32_t target = offset >> 2;
+          const int32_t target = off >> 2;
          return instr | (target & 0xffffff);
        }
      } immop;
@ -3549,13 +3549,14 @@ void Assembler::blx(Condition cond, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -16777216) && (offset <= 16777212) &&
+              loc->GetLocation() - AlignDown(program_counter, 4);
-                      ((offset & 0x3) == 0));
+          VIXL_ASSERT((off >= -16777216) && (off <= 16777212) &&
-          int32_t target = offset >> 2;
+                      ((off & 0x3) == 0));
          int32_t target = off >> 2;
          uint32_t S = target & (1 << 22);
          target ^= ((S >> 1) | (S >> 2)) ^ (3 << 20);
          return instr | ((target & 0x3ff) << 1) | ((target & 0xffc00) << 6) |
@ -3577,15 +3578,14 @@ void Assembler::blx(Condition cond, Location* location) {
         public:
          EmitOp() : Location::EmitOperator(A32) {}
          virtual uint32_t Encode(uint32_t instr,
-                                  Location::Offset pc,
+                                  Location::Offset program_counter,
-                                  const Location* location) const
+                                  const Location* loc) const VIXL_OVERRIDE {
-              VIXL_OVERRIDE {
+            program_counter += kA32PcDelta;
-            pc += kA32PcDelta;
+            Location::Offset off =
-            Location::Offset offset =
+                loc->GetLocation() - AlignDown(program_counter, 4);
-                location->GetLocation() - AlignDown(pc, 4);
+            VIXL_ASSERT((off >= -33554432) && (off <= 33554430) &&
-            VIXL_ASSERT((offset >= -33554432) && (offset <= 33554430) &&
+                        ((off & 0x1) == 0));
-                        ((offset & 0x1) == 0));
+            const int32_t target = off >> 1;
            const int32_t target = offset >> 1;
            return instr | ((target & 0x1) << 24) | ((target & 0x1fffffe) >> 1);
          }
        } immop;
@ -3698,13 +3698,12 @@ void Assembler::cbnz(Register rn, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
+          Location::Offset off = loc->GetLocation() - program_counter;
-          VIXL_ASSERT((offset >= 0) && (offset <= 126) &&
+          VIXL_ASSERT((off >= 0) && (off <= 126) && ((off & 0x1) == 0));
-                      ((offset & 0x1) == 0));
+          const int32_t target = off >> 1;
          const int32_t target = offset >> 1;
          return instr | ((target & 0x1f) << 3) | ((target & 0x20) << 4);
        }
      } immop;
@ -3748,13 +3747,12 @@ void Assembler::cbz(Register rn, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
+          Location::Offset off = loc->GetLocation() - program_counter;
-          VIXL_ASSERT((offset >= 0) && (offset <= 126) &&
+          VIXL_ASSERT((off >= 0) && (off <= 126) && ((off & 0x1) == 0));
-                      ((offset & 0x1) == 0));
+          const int32_t target = off >> 1;
          const int32_t target = offset >> 1;
          return instr | ((target & 0x1f) << 3) | ((target & 0x20) << 4);
        }
      } immop;
@ -4790,7 +4788,7 @@ void Assembler::ldm(Condition cond,
    }
    // LDM{<c>}{<q>} SP!, <registers> ; T1
    if (!size.IsWide() && rn.Is(sp) && write_back.DoesWriteBack() &&
-        ((registers.GetList() & ~0x80ff) == 0)) {
+        registers.IsR0toR7orPC()) {
      EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) |
                 GetRegisterListEncoding(registers, 0, 8));
      AdvanceIT();
@ -5208,13 +5206,13 @@ void Assembler::ldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= 0) && (offset <= 1020) &&
+              loc->GetLocation() - AlignDown(program_counter, 4);
-                      ((offset & 0x3) == 0));
+          VIXL_ASSERT((off >= 0) && (off <= 1020) && ((off & 0x3) == 0));
-          const int32_t target = offset >> 2;
+          const int32_t target = off >> 2;
          return instr | (target & 0xff);
        }
      } immop;
@ -5233,13 +5231,14 @@ void Assembler::ldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
-          int32_t target = abs(offset) | (U << 12);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -5259,13 +5258,14 @@ void Assembler::ldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
+          program_counter += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
-          int32_t target = abs(offset) | (U << 12);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -5505,13 +5505,14 @@ void Assembler::ldrb(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
-          int32_t target = abs(offset) | (U << 12);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -5531,13 +5532,14 @@ void Assembler::ldrb(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
+          program_counter += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
-          int32_t target = abs(offset) | (U << 12);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -5747,13 +5749,13 @@ void Assembler::ldrd(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
+              loc->GetLocation() - AlignDown(program_counter, 4);
-                      ((offset & 0x3) == 0));
+          VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
-          int32_t target = offset >> 2;
+          int32_t target = off >> 2;
          uint32_t U = (target >= 0);
          target = abs(target) | (U << 8);
          return instr | (target & 0xff) | ((target & 0x100) << 15);
@ -5777,13 +5779,14 @@ void Assembler::ldrd(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
+          program_counter += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -255) && (offset <= 255));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -255) && (off <= 255));
-          int32_t target = abs(offset) | (U << 8);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 8);
          return instr | (target & 0xf) | ((target & 0xf0) << 4) |
                 ((target & 0x100) << 15);
        }
@ -6129,13 +6132,14 @@ void Assembler::ldrh(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
-          int32_t target = abs(offset) | (U << 12);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -6155,13 +6159,14 @@ void Assembler::ldrh(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
+          program_counter += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -255) && (offset <= 255));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -255) && (off <= 255));
-          int32_t target = abs(offset) | (U << 8);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 8);
          return instr | (target & 0xf) | ((target & 0xf0) << 4) |
                 ((target & 0x100) << 15);
        }
@ -6382,13 +6387,14 @@ void Assembler::ldrsb(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
-          int32_t target = abs(offset) | (U << 12);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -6408,13 +6414,14 @@ void Assembler::ldrsb(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
+          program_counter += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -255) && (offset <= 255));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -255) && (off <= 255));
-          int32_t target = abs(offset) | (U << 8);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 8);
          return instr | (target & 0xf) | ((target & 0xf0) << 4) |
                 ((target & 0x100) << 15);
        }
@ -6635,13 +6642,14 @@ void Assembler::ldrsh(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
-          int32_t target = abs(offset) | (U << 12);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -6661,13 +6669,14 @@ void Assembler::ldrsh(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
+          program_counter += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -255) && (offset <= 255));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -255) && (off <= 255));
-          int32_t target = abs(offset) | (U << 8);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 8);
          return instr | (target & 0xf) | ((target & 0xf0) << 4) |
                 ((target & 0x100) << 15);
        }
@ -8039,13 +8048,14 @@ void Assembler::pld(Condition cond, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
-          int32_t target = abs(offset) | (U << 12);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -8062,15 +8072,14 @@ void Assembler::pld(Condition cond, Location* location) {
         public:
          EmitOp() : Location::EmitOperator(A32) {}
          virtual uint32_t Encode(uint32_t instr,
-                                  Location::Offset pc,
+                                  Location::Offset program_counter,
-                                  const Location* location) const
+                                  const Location* loc) const VIXL_OVERRIDE {
-              VIXL_OVERRIDE {
+            program_counter += kA32PcDelta;
-            pc += kA32PcDelta;
+            Location::Offset off =
-            Location::Offset offset =
+                loc->GetLocation() - AlignDown(program_counter, 4);
-                location->GetLocation() - AlignDown(pc, 4);
+            VIXL_ASSERT((off >= -4095) && (off <= 4095));
-            VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
+            uint32_t U = (off >= 0);
-            uint32_t U = (offset >= 0);
+            int32_t target = abs(off) | (U << 12);
            int32_t target = abs(offset) | (U << 12);
            return instr | (target & 0xfff) | ((target & 0x1000) << 11);
          }
        } immop;
@ -8403,13 +8412,14 @@ void Assembler::pli(Condition cond, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
+              loc->GetLocation() - AlignDown(program_counter, 4);
-          uint32_t U = (offset >= 0);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
-          int32_t target = abs(offset) | (U << 12);
+          uint32_t U = (off >= 0);
          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -8426,15 +8436,14 @@ void Assembler::pli(Condition cond, Location* location) {
         public:
          EmitOp() : Location::EmitOperator(A32) {}
          virtual uint32_t Encode(uint32_t instr,
-                                  Location::Offset pc,
+                                  Location::Offset program_counter,
-                                  const Location* location) const
+                                  const Location* loc) const VIXL_OVERRIDE {
-              VIXL_OVERRIDE {
+            program_counter += kA32PcDelta;
-            pc += kA32PcDelta;
+            Location::Offset off =
-            Location::Offset offset =
+                loc->GetLocation() - AlignDown(program_counter, 4);
-                location->GetLocation() - AlignDown(pc, 4);
+            VIXL_ASSERT((off >= -4095) && (off <= 4095));
-            VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
+            uint32_t U = (off >= 0);
-            uint32_t U = (offset >= 0);
+            int32_t target = abs(off) | (U << 12);
            int32_t target = abs(offset) | (U << 12);
            return instr | (target & 0xfff) | ((target & 0x1000) << 11);
          }
        } immop;
@ -8471,40 +8480,52 @@ bool Assembler::pli_info(Condition cond,
 void Assembler::pop(Condition cond, EncodingSize size, RegisterList registers) {
  VIXL_ASSERT(AllowAssembler());
  CheckIT(cond);
  if (!registers.IsEmpty() || AllowUnpredictable()) {
    if (IsUsingT32()) {
      // A branch out of an IT block should be the last instruction in the
      // block.
      if (!registers.Includes(pc) || OutsideITBlockAndAlOrLast(cond) ||
          AllowUnpredictable()) {
        // POP{<c>}{<q>} <registers> ; T1
-    if (!size.IsWide() && ((registers.GetList() & ~0x80ff) == 0)) {
+        if (!size.IsWide() && registers.IsR0toR7orPC()) {
          EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) |
                     GetRegisterListEncoding(registers, 0, 8));
          AdvanceIT();
          return;
        }
        // POP{<c>}{<q>} <registers> ; T2
-    if (!size.IsNarrow() && ((registers.GetList() & ~0xdfff) == 0)) {
+        // Alias of: LDM{<c>}{<q>} SP!, <registers> ; T2
-      EmitT32_32(0xe8bd0000U |
+        if (!size.IsNarrow() &&
-                 (GetRegisterListEncoding(registers, 15, 1) << 15) |
+            ((!registers.Includes(sp) && (registers.GetCount() > 1) &&
-                 (GetRegisterListEncoding(registers, 14, 1) << 14) |
+              !(registers.Includes(pc) && registers.Includes(lr))) ||
-                 GetRegisterListEncoding(registers, 0, 13));
+             AllowUnpredictable())) {
          EmitT32_32(0xe8bd0000U | GetRegisterListEncoding(registers, 0, 16));
          AdvanceIT();
          return;
        }
      }
    } else {
      // POP{<c>}{<q>} <registers> ; A1
-    if (cond.IsNotNever()) {
+      // Alias of: LDM{<c>}{<q>} SP!, <registers> ; A1
      if (cond.IsNotNever() &&
          (!registers.Includes(sp) || AllowUnpredictable())) {
        EmitA32(0x08bd0000U | (cond.GetCondition() << 28) |
                GetRegisterListEncoding(registers, 0, 16));
        return;
      }
    }
  }
  Delegate(kPop, &Assembler::pop, cond, size, registers);
 }
 void Assembler::pop(Condition cond, EncodingSize size, Register rt) {
  VIXL_ASSERT(AllowAssembler());
  CheckIT(cond);
  if (!rt.IsSP() || AllowUnpredictable()) {
    if (IsUsingT32()) {
      // POP{<c>}{<q>} <single_register_list> ; T4
-    if (!size.IsNarrow() && ((!rt.IsPC() || OutsideITBlockAndAlOrLast(cond)) ||
+      // Alias of: LDR{<c>}{<q>} <Rt>, [SP], #4 ; T4
      if (!size.IsNarrow() && (!rt.IsPC() || OutsideITBlockAndAlOrLast(cond) ||
                               AllowUnpredictable())) {
        EmitT32_32(0xf85d0b04U | (rt.GetCode() << 12));
        AdvanceIT();
@ -8512,11 +8533,14 @@ void Assembler::pop(Condition cond, EncodingSize size, Register rt) {
      }
    } else {
      // POP{<c>}{<q>} <single_register_list> ; A1
      // Alias of: LDR{<c>}{<q>} <Rt>, [SP], #4 ; T1
      if (cond.IsNotNever()) {
-      EmitA32(0x049d0004U | (cond.GetCondition() << 28) | (rt.GetCode() << 12));
+        EmitA32(0x049d0004U | (cond.GetCondition() << 28) |
                (rt.GetCode() << 12));
        return;
      }
    }
  }
  Delegate(kPop, &Assembler::pop, cond, size, rt);
 }
@ -8525,30 +8549,39 @@ void Assembler::push(Condition cond,
                     RegisterList registers) {
  VIXL_ASSERT(AllowAssembler());
  CheckIT(cond);
  if (!registers.IsEmpty() || AllowUnpredictable()) {
    if (IsUsingT32()) {
      // PUSH{<c>}{<q>} <registers> ; T1
-    if (!size.IsWide() && ((registers.GetList() & ~0x40ff) == 0)) {
+      if (!size.IsWide() && registers.IsR0toR7orLR()) {
        EmitT32_16(0xb400 | (GetRegisterListEncoding(registers, 14, 1) << 8) |
                   GetRegisterListEncoding(registers, 0, 8));
        AdvanceIT();
        return;
      }
      // PUSH{<c>}{<q>} <registers> ; T1
-    if (!size.IsNarrow() && ((registers.GetList() & ~0x5fff) == 0)) {
+      // Alias of: STMDB SP!, <registers> ; T1
-      EmitT32_32(0xe92d0000U |
+      if (!size.IsNarrow() && !registers.Includes(pc) &&
-                 (GetRegisterListEncoding(registers, 14, 1) << 14) |
+          ((!registers.Includes(sp) && (registers.GetCount() > 1)) ||
-                 GetRegisterListEncoding(registers, 0, 13));
+           AllowUnpredictable())) {
        EmitT32_32(0xe92d0000U | GetRegisterListEncoding(registers, 0, 15));
        AdvanceIT();
        return;
      }
    } else {
      // PUSH{<c>}{<q>} <registers> ; A1
-    if (cond.IsNotNever()) {
+      // Alias of: STMDB SP!, <registers> ; A1
      if (cond.IsNotNever() &&
          // For A32, sp can appear in the list, but stores an UNKNOWN value if
          // it is not the lowest-valued register.
          (!registers.Includes(sp) ||
           registers.GetFirstAvailableRegister().IsSP() ||
           AllowUnpredictable())) {
        EmitA32(0x092d0000U | (cond.GetCondition() << 28) |
                GetRegisterListEncoding(registers, 0, 16));
        return;
      }
    }
  }
  Delegate(kPush, &Assembler::push, cond, size, registers);
 }
@ -8557,14 +8590,17 @@ void Assembler::push(Condition cond, EncodingSize size, Register rt) {
  CheckIT(cond);
  if (IsUsingT32()) {
    // PUSH{<c>}{<q>} <single_register_list> ; T4
-    if (!size.IsNarrow() && (!rt.IsPC() || AllowUnpredictable())) {
+    // Alias of: STR{<c>}{<q>} <Rt>, [SP, #4]! ; T4
    if (!size.IsNarrow() &&
        ((!rt.IsPC() && !rt.IsSP()) || AllowUnpredictable())) {
      EmitT32_32(0xf84d0d04U | (rt.GetCode() << 12));
      AdvanceIT();
      return;
    }
  } else {
    // PUSH{<c>}{<q>} <single_register_list> ; A1
-    if (cond.IsNotNever() && (!rt.IsPC() || AllowUnpredictable())) {
+    // Alias of: STR{<c>}{<q>} <Rt>, [SP, #4]! ; A1
    if (cond.IsNotNever() && (!rt.IsSP() || AllowUnpredictable())) {
      EmitA32(0x052d0004U | (cond.GetCondition() << 28) | (rt.GetCode() << 12));
      return;
    }
@ -11177,7 +11213,7 @@ void Assembler::stmdb(Condition cond,
  if (IsUsingT32()) {
    // STMDB{<c>}{<q>} SP!, <registers> ; T1
    if (!size.IsWide() && rn.Is(sp) && write_back.DoesWriteBack() &&
-        ((registers.GetList() & ~0x40ff) == 0)) {
+        registers.IsR0toR7orLR()) {
      EmitT32_16(0xb400 | (GetRegisterListEncoding(registers, 14, 1) << 8) |
                 GetRegisterListEncoding(registers, 0, 8));
      AdvanceIT();
@ -19589,13 +19625,13 @@ void Assembler::vldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
+              loc->GetLocation() - AlignDown(program_counter, 4);
-                      ((offset & 0x3) == 0));
+          VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
-          int32_t target = offset >> 2;
+          int32_t target = off >> 2;
          uint32_t U = (target >= 0);
          target = abs(target) | (U << 8);
          return instr | (target & 0xff) | ((target & 0x100) << 15);
@ -19619,13 +19655,13 @@ void Assembler::vldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
+          program_counter += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
+              loc->GetLocation() - AlignDown(program_counter, 4);
-                      ((offset & 0x3) == 0));
+          VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
-          int32_t target = offset >> 2;
+          int32_t target = off >> 2;
          uint32_t U = (target >= 0);
          target = abs(target) | (U << 8);
          return instr | (target & 0xff) | ((target & 0x100) << 15);
@ -19743,13 +19779,13 @@ void Assembler::vldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
+          program_counter += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
+              loc->GetLocation() - AlignDown(program_counter, 4);
-                      ((offset & 0x3) == 0));
+          VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
-          int32_t target = offset >> 2;
+          int32_t target = off >> 2;
          uint32_t U = (target >= 0);
          target = abs(target) | (U << 8);
          return instr | (target & 0xff) | ((target & 0x100) << 15);
@ -19773,13 +19809,13 @@ void Assembler::vldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
+                                Location::Offset program_counter,
-                                const Location* location) const VIXL_OVERRIDE {
+                                const Location* loc) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
+          program_counter += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+          Location::Offset off =
-          VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
+              loc->GetLocation() - AlignDown(program_counter, 4);
-                      ((offset & 0x3) == 0));
+          VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
-          int32_t target = offset >> 2;
+          int32_t target = off >> 2;
          uint32_t U = (target >= 0);
          target = abs(target) | (U << 8);
          return instr | (target & 0xff) | ((target & 0x100) << 15);
--- a/dep/vixl/src/aarch32/disasm-aarch32.cc
+++ b/dep/vixl/src/aarch32/disasm-aarch32.cc
@ -348,7 +348,7 @@ DataTypeValue Dt_U_opc1_opc2_1_Decode(uint32_t value, unsigned* lane) {
    *lane = (value >> 2) & 1;
    return Untyped32;
  }
-  *lane = -1;
+  *lane = ~0U;
  return kDataTypeValueInvalid;
 }
@ -365,7 +365,7 @@ DataTypeValue Dt_opc1_opc2_1_Decode(uint32_t value, unsigned* lane) {
    *lane = (value >> 2) & 1;
    return Untyped32;
  }
-  *lane = -1;
+  *lane = ~0U;
  return kDataTypeValueInvalid;
 }
@ -382,7 +382,7 @@ DataTypeValue Dt_imm4_1_Decode(uint32_t value, unsigned* lane) {
    *lane = (value >> 3) & 1;
    return Untyped32;
  }
-  *lane = -1;
+  *lane = ~0U;
  return kDataTypeValueInvalid;
 }
@ -8288,13 +8288,13 @@ void Disassembler::DecodeT32(uint32_t instr) {
                            UnallocatedT32(instr);
                            return;
                          }
-                          unsigned firstcond = (instr >> 20) & 0xf;
+                          unsigned first_cond = (instr >> 20) & 0xf;
                          unsigned mask = (instr >> 16) & 0xf;
-                          bool wasInITBlock = InITBlock();
+                          bool was_in_it_block = InITBlock();
-                          SetIT(Condition(firstcond), mask);
+                          SetIT(Condition(first_cond), mask);
-                          it(Condition(firstcond), mask);
+                          it(Condition(first_cond), mask);
-                          if (wasInITBlock || (firstcond == 15) ||
+                          if (was_in_it_block || (first_cond == 15) ||
-                              ((firstcond == al) &&
+                              ((first_cond == al) &&
                               (BitCount(Uint32(mask)) != 1))) {
                            UnpredictableT32(instr);
                          }
@ -60977,7 +60977,7 @@ void Disassembler::DecodeA32(uint32_t instr) {
                        Condition condition((instr >> 28) & 0xf);
                        unsigned rd = (instr >> 12) & 0xf;
                        uint32_t imm = ImmediateA32::Decode(instr & 0xfff);
-                        Location location(-imm, kA32PcDelta);
+                        Location location(UnsignedNegate(imm), kA32PcDelta);
                        // ADR{<c>}{<q>} <Rd>, <label> ; A2
                        adr(condition, Best, Register(rd), &location);
                        break;
--- a/dep/vixl/src/aarch32/instructions-aarch32.cc
+++ b/dep/vixl/src/aarch32/instructions-aarch32.cc
@ -95,10 +95,10 @@ QRegister VRegister::Q() const {
 Register RegisterList::GetFirstAvailableRegister() const {
-  for (uint32_t i = 0; i < kNumberOfRegisters; i++) {
+  if (list_ == 0) {
    if (((list_ >> i) & 1) != 0) return Register(i);
  }
    return Register();
  }
  return Register(CountTrailingZeros(list_));
 }
@ -651,7 +651,7 @@ bool ImmediateT32::IsImmediateT32(uint32_t imm) {
      (((imm & 0xff00) == 0) || ((imm & 0xff) == 0)))
    return true;
  /* isolate least-significant set bit */
-  uint32_t lsb = imm & -imm;
+  uint32_t lsb = imm & UnsignedNegate(imm);
  /* if imm is less than lsb*256 then it fits, but instead we test imm/256 to
  * avoid overflow (underflow is always a successful case) */
  return ((imm >> 8) < lsb);
@ -702,7 +702,7 @@ bool ImmediateA32::IsImmediateA32(uint32_t imm) {
   * that the least-significant set bit is always an even bit */
  imm = imm | ((imm >> 1) & 0x55555555);
  /* isolate least-significant set bit (always even) */
-  uint32_t lsb = imm & -imm;
+  uint32_t lsb = imm & UnsignedNegate(imm);
  /* if imm is less than lsb*256 then it fits, but instead we test imm/256 to
   * avoid overflow (underflow is always a successful case) */
  return ((imm >> 8) < lsb);
--- a/dep/vixl/src/aarch32/macro-assembler-aarch32.cc
+++ b/dep/vixl/src/aarch32/macro-assembler-aarch32.cc
@ -266,8 +266,8 @@ MemOperand MacroAssembler::MemOperandComputationHelper(
  uint32_t load_store_offset = offset & extra_offset_mask;
  uint32_t add_offset = offset & ~extra_offset_mask;
-  if ((add_offset != 0) &&
+  if ((add_offset != 0) && (IsModifiedImmediate(offset) ||
-      (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
+                            IsModifiedImmediate(UnsignedNegate(offset)))) {
    load_store_offset = 0;
    add_offset = offset;
  }
@ -288,7 +288,7 @@ MemOperand MacroAssembler::MemOperandComputationHelper(
      // of ADR -- to get behaviour like loads and stores. This ADR can handle
      // at least as much offset as the load_store_offset so it can replace it.
-      uint32_t sub_pc_offset = (-offset) & 0xfff;
+      uint32_t sub_pc_offset = UnsignedNegate(offset) & 0xfff;
      load_store_offset = (offset + sub_pc_offset) & extra_offset_mask;
      add_offset = (offset + sub_pc_offset) & ~extra_offset_mask;
@ -599,7 +599,7 @@ void MacroAssembler::Printf(const char* format,
    Vmsr(FPSCR, tmp);
    Pop(tmp);
    Msr(APSR_nzcvqg, tmp);
-    // Restore the regsisters.
+    // Restore the registers.
    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
    Vpop(Untyped64, DRegisterList(d0, 8));
    Pop(RegisterList(saved_registers_mask));
--- a/dep/vixl/src/aarch64/assembler-aarch64.cc
+++ b/dep/vixl/src/aarch64/assembler-aarch64.cc
--- a/dep/vixl/src/aarch64/assembler-sve-aarch64.cc
+++ b/dep/vixl/src/aarch64/assembler-sve-aarch64.cc
--- a/dep/vixl/src/aarch64/cpu-aarch64.cc
+++ b/dep/vixl/src/aarch64/cpu-aarch64.cc
@ -24,6 +24,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
 #include <sys/auxv.h>
 #define VIXL_USE_LINUX_HWCAP 1
 #endif
 #include "../utils-vixl.h"
 #include "cpu-aarch64.h"
@ -31,6 +36,382 @@
 namespace vixl {
 namespace aarch64 {
 const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
 const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
 const IDRegister::Field AA64PFR0::kRAS(28);
 const IDRegister::Field AA64PFR0::kSVE(32);
 const IDRegister::Field AA64PFR0::kDIT(48);
 const IDRegister::Field AA64PFR0::kCSV2(56);
 const IDRegister::Field AA64PFR0::kCSV3(60);
 const IDRegister::Field AA64PFR1::kBT(0);
 const IDRegister::Field AA64PFR1::kSSBS(4);
 const IDRegister::Field AA64PFR1::kMTE(8);
 const IDRegister::Field AA64PFR1::kSME(24);
 const IDRegister::Field AA64ISAR0::kAES(4);
 const IDRegister::Field AA64ISAR0::kSHA1(8);
 const IDRegister::Field AA64ISAR0::kSHA2(12);
 const IDRegister::Field AA64ISAR0::kCRC32(16);
 const IDRegister::Field AA64ISAR0::kAtomic(20);
 const IDRegister::Field AA64ISAR0::kRDM(28);
 const IDRegister::Field AA64ISAR0::kSHA3(32);
 const IDRegister::Field AA64ISAR0::kSM3(36);
 const IDRegister::Field AA64ISAR0::kSM4(40);
 const IDRegister::Field AA64ISAR0::kDP(44);
 const IDRegister::Field AA64ISAR0::kFHM(48);
 const IDRegister::Field AA64ISAR0::kTS(52);
 const IDRegister::Field AA64ISAR0::kRNDR(60);
 const IDRegister::Field AA64ISAR1::kDPB(0);
 const IDRegister::Field AA64ISAR1::kAPA(4);
 const IDRegister::Field AA64ISAR1::kAPI(8);
 const IDRegister::Field AA64ISAR1::kJSCVT(12);
 const IDRegister::Field AA64ISAR1::kFCMA(16);
 const IDRegister::Field AA64ISAR1::kLRCPC(20);
 const IDRegister::Field AA64ISAR1::kGPA(24);
 const IDRegister::Field AA64ISAR1::kGPI(28);
 const IDRegister::Field AA64ISAR1::kFRINTTS(32);
 const IDRegister::Field AA64ISAR1::kSB(36);
 const IDRegister::Field AA64ISAR1::kSPECRES(40);
 const IDRegister::Field AA64ISAR1::kBF16(44);
 const IDRegister::Field AA64ISAR1::kDGH(48);
 const IDRegister::Field AA64ISAR1::kI8MM(52);
 const IDRegister::Field AA64ISAR2::kWFXT(0);
 const IDRegister::Field AA64ISAR2::kRPRES(4);
 const IDRegister::Field AA64ISAR2::kMOPS(16);
 const IDRegister::Field AA64ISAR2::kCSSC(52);
 const IDRegister::Field AA64MMFR0::kECV(60);
 const IDRegister::Field AA64MMFR1::kLO(16);
 const IDRegister::Field AA64MMFR1::kAFP(44);
 const IDRegister::Field AA64MMFR2::kAT(32);
 const IDRegister::Field AA64ZFR0::kSVEver(0);
 const IDRegister::Field AA64ZFR0::kAES(4);
 const IDRegister::Field AA64ZFR0::kBitPerm(16);
 const IDRegister::Field AA64ZFR0::kBF16(20);
 const IDRegister::Field AA64ZFR0::kSHA3(32);
 const IDRegister::Field AA64ZFR0::kSM4(40);
 const IDRegister::Field AA64ZFR0::kI8MM(44);
 const IDRegister::Field AA64ZFR0::kF32MM(52);
 const IDRegister::Field AA64ZFR0::kF64MM(56);
 const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1);
 const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1);
 const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1);
 const IDRegister::Field AA64SMFR0::kSMEi8i32(36);
 const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1);
 const IDRegister::Field AA64SMFR0::kSMEi16i64(52);
 const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1);
 CPUFeatures AA64PFR0::GetCPUFeatures() const {
  CPUFeatures f;
  if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
  if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
  if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
  if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
  if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS);
  if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
  if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
  if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2);
  if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM);
  if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3);
  return f;
 }
 CPUFeatures AA64PFR1::GetCPUFeatures() const {
  CPUFeatures f;
  if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
  if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS);
  if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
  if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
  if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
  if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3);
  if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME);
  return f;
 }
 CPUFeatures AA64ISAR0::GetCPUFeatures() const {
  CPUFeatures f;
  if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
  if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
  if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
  if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
  if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
  if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
  if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
  if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
  if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
  if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
  if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
  if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
  if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
  if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
  if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
  if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG);
  return f;
 }
 CPUFeatures AA64ISAR1::GetCPUFeatures() const {
  CPUFeatures f;
  if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
  if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP);
  if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
  if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
  if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
  if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
  if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
  if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
  if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
  if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
  if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16);
  if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
  if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
  // Only one of these fields should be non-zero, but they have the same
  // encodings, so merge the logic.
  int apx = std::max(Get(kAPI), Get(kAPA));
  if (apx >= 1) {
    f.Combine(CPUFeatures::kPAuth);
    // APA (rather than API) indicates QARMA.
    if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA);
    if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC);
    if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2);
    if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC);
    if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined);
  }
  if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
  if (Get(kGPA) >= 1) {
    f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
  }
  return f;
 }
 CPUFeatures AA64ISAR2::GetCPUFeatures() const {
  CPUFeatures f;
  if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT);
  if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
  if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS);
  if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC);
  return f;
 }
 CPUFeatures AA64MMFR0::GetCPUFeatures() const {
  CPUFeatures f;
  if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV);
  return f;
 }
 CPUFeatures AA64MMFR1::GetCPUFeatures() const {
  CPUFeatures f;
  if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
  if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP);
  return f;
 }
 CPUFeatures AA64MMFR2::GetCPUFeatures() const {
  CPUFeatures f;
  if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT);
  return f;
 }
 CPUFeatures AA64ZFR0::GetCPUFeatures() const {
  // This register is only available with SVE, but reads-as-zero in its absence,
  // so it's always safe to read it.
  CPUFeatures f;
  if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
  if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
  if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
  if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4);
  if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3);
  if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
  if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kSVE_EBF16);
  if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm);
  if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES);
  if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128);
  if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2);
  return f;
 }
 CPUFeatures AA64SMFR0::GetCPUFeatures() const {
  CPUFeatures f;
  if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32);
  if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32);
  if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32);
  if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32);
  if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64);
  if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64);
  if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64);
  return f;
 }
 int IDRegister::Get(IDRegister::Field field) const {
  int msb = field.GetMsb();
  int lsb = field.GetLsb();
  VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
                     (sizeof(int) * kBitsPerByte));
  switch (field.GetType()) {
    case Field::kSigned:
      return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
    case Field::kUnsigned:
      return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
  }
  VIXL_UNREACHABLE();
  return 0;
 }
 CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
  CPUFeatures f;
 #define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \
  f.Combine(Read##NAME().GetCPUFeatures());
  VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
 #undef VIXL_COMBINE_ID_REG
  return f;
 }
 CPUFeatures CPU::InferCPUFeaturesFromOS(
    CPUFeatures::QueryIDRegistersOption option) {
  CPUFeatures features;
 #ifdef VIXL_USE_LINUX_HWCAP
  // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
  // than explicit bits, but explicit bits allow us to identify features that
  // the toolchain doesn't know about.
  static const CPUFeatures::Feature kFeatureBitsLow[] =
      {// Bits 0-7
       CPUFeatures::kFP,
       CPUFeatures::kNEON,
       CPUFeatures::kNone,  // "EVTSTRM", which VIXL doesn't track.
       CPUFeatures::kAES,
       CPUFeatures::kPmull1Q,
       CPUFeatures::kSHA1,
       CPUFeatures::kSHA2,
       CPUFeatures::kCRC32,
       // Bits 8-15
       CPUFeatures::kAtomics,
       CPUFeatures::kFPHalf,
       CPUFeatures::kNEONHalf,
       CPUFeatures::kIDRegisterEmulation,
       CPUFeatures::kRDM,
       CPUFeatures::kJSCVT,
       CPUFeatures::kFcma,
       CPUFeatures::kRCpc,
       // Bits 16-23
       CPUFeatures::kDCPoP,
       CPUFeatures::kSHA3,
       CPUFeatures::kSM3,
       CPUFeatures::kSM4,
       CPUFeatures::kDotProduct,
       CPUFeatures::kSHA512,
       CPUFeatures::kSVE,
       CPUFeatures::kFHM,
       // Bits 24-31
       CPUFeatures::kDIT,
       CPUFeatures::kUSCAT,
       CPUFeatures::kRCpcImm,
       CPUFeatures::kFlagM,
       CPUFeatures::kSSBSControl,
       CPUFeatures::kSB,
       CPUFeatures::kPAuth,
       CPUFeatures::kPAuthGeneric};
  VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64);
  static const CPUFeatures::Feature kFeatureBitsHigh[] =
      {// Bits 0-7
       CPUFeatures::kDCCVADP,
       CPUFeatures::kSVE2,
       CPUFeatures::kSVEAES,
       CPUFeatures::kSVEPmull128,
       CPUFeatures::kSVEBitPerm,
       CPUFeatures::kSVESHA3,
       CPUFeatures::kSVESM4,
       CPUFeatures::kAXFlag,
       // Bits 8-15
       CPUFeatures::kFrintToFixedSizedInt,
       CPUFeatures::kSVEI8MM,
       CPUFeatures::kSVEF32MM,
       CPUFeatures::kSVEF64MM,
       CPUFeatures::kSVEBF16,
       CPUFeatures::kI8MM,
       CPUFeatures::kBF16,
       CPUFeatures::kDGH,
       // Bits 16-23
       CPUFeatures::kRNG,
       CPUFeatures::kBTI,
       CPUFeatures::kMTE,
       CPUFeatures::kECV,
       CPUFeatures::kAFP,
       CPUFeatures::kRPRES,
       CPUFeatures::kMTE3,
       CPUFeatures::kSME,
       // Bits 24-31
       CPUFeatures::kSMEi16i64,
       CPUFeatures::kSMEf64f64,
       CPUFeatures::kSMEi8i32,
       CPUFeatures::kSMEf16f32,
       CPUFeatures::kSMEb16f32,
       CPUFeatures::kSMEf32f32,
       CPUFeatures::kSMEfa64,
       CPUFeatures::kWFXT,
       // Bits 32-39
       CPUFeatures::kEBF16,
       CPUFeatures::kSVE_EBF16};
  VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64);
  auto combine_features = [&features](uint64_t hwcap,
                                      const CPUFeatures::Feature* feature_array,
                                      size_t features_size) {
    for (size_t i = 0; i < features_size; i++) {
      if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]);
    }
  };
  uint64_t hwcap_low = getauxval(AT_HWCAP);
  uint64_t hwcap_high = getauxval(AT_HWCAP2);
  combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow));
  combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh));
  // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support
  if (features.Has(CPUFeatures::kMTE)) {
    features.Combine(CPUFeatures::kMTEInstructions);
  }
 #endif  // VIXL_USE_LINUX_HWCAP
  if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
      (features.Has(CPUFeatures::kIDRegisterEmulation))) {
    features.Combine(InferCPUFeaturesFromIDRegisters());
  }
  return features;
 }
 #ifdef __aarch64__
 #define VIXL_READ_ID_REG(NAME, MRS_ARG)        \
  NAME CPU::Read##NAME() {                     \
    uint64_t value = 0;                        \
    __asm__("mrs %0, " MRS_ARG : "=r"(value)); \
    return NAME(value);                        \
  }
 #else  // __aarch64__
 #define VIXL_READ_ID_REG(NAME, MRS_ARG) \
  NAME CPU::Read##NAME() {              \
    VIXL_UNREACHABLE();                 \
    return NAME(0);                     \
  }
 #endif  // __aarch64__
 VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
 #undef VIXL_READ_ID_REG
 // Initialise to smallest possible cache size.
 unsigned CPU::dcache_line_size_ = 1;
 unsigned CPU::icache_line_size_ = 1;
@ -76,7 +457,28 @@ uint32_t CPU::GetCacheType() {
 }
-void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
+// Query the SVE vector length. This requires CPUFeatures::kSVE.
 int CPU::ReadSVEVectorLengthInBits() {
 #ifdef __aarch64__
  uint64_t vl;
  // To support compilers that don't understand `rdvl`, encode the value
  // directly and move it manually.
  __asm__(
      "   .word 0x04bf5100\n"  // rdvl x0, #8
      "   mov %[vl], x0\n"
      : [vl] "=r"(vl)
      :
      : "x0");
  VIXL_ASSERT(vl <= INT_MAX);
  return static_cast<int>(vl);
 #else
  VIXL_UNREACHABLE();
  return 0;
 #endif
 }
 void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
 #ifdef __aarch64__
  // Implement the cache synchronisation for all targets where AArch64 is the
  // host, even if we're building the simulator for an AAarch64 host. This
@ -174,5 +576,6 @@ void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
 #endif
 }
 }  // namespace aarch64
 }  // namespace vixl
--- a/dep/vixl/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/dep/vixl/src/aarch64/cpu-features-auditor-aarch64.cc
@ -24,16 +24,54 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cpu-features-auditor-aarch64.h"
 #include "cpu-features.h"
 #include "globals-vixl.h"
 #include "utils-vixl.h"
 #include "decoder-aarch64.h"
-#include "cpu-features-auditor-aarch64.h"
+#include "decoder-aarch64.h"
 namespace vixl {
 namespace aarch64 {
 const CPUFeaturesAuditor::FormToVisitorFnMap*
 CPUFeaturesAuditor::GetFormToVisitorFnMap() {
  static const FormToVisitorFnMap form_to_visitor = {
      DEFAULT_FORM_TO_VISITOR_MAP(CPUFeaturesAuditor),
      SIM_AUD_VISITOR_MAP(CPUFeaturesAuditor),
      {"fcmla_asimdelem_c_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fcmla_asimdelem_c_s"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmlal2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmlal_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmla_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmla_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmlsl2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmlsl_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmls_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmls_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmulx_asimdelem_rh_h"_h,
       &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmulx_asimdelem_r_sd"_h,
       &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmul_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"fmul_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"sdot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"smlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"smlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"smull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"sqdmlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"sqdmlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"sqdmull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"udot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"umlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"umlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
      {"umull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
  };
  return &form_to_visitor;
 }
 // Every instruction must update last_instruction_, even if only to clear it,
 // and every instruction must also update seen_ once it has been fully handled.
 // This scope makes that simple, and allows early returns in the decode logic.
@ -140,6 +178,25 @@ void CPUFeaturesAuditor::VisitAddSubWithCarry(const Instruction* instr) {
  USE(instr);
 }
 void CPUFeaturesAuditor::VisitRotateRightIntoFlags(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  switch (instr->Mask(RotateRightIntoFlagsMask)) {
    case RMIF:
      scope.Record(CPUFeatures::kFlagM);
      return;
  }
 }
 void CPUFeaturesAuditor::VisitEvaluateIntoFlags(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  switch (instr->Mask(EvaluateIntoFlagsMask)) {
    case SETF8:
    case SETF16:
      scope.Record(CPUFeatures::kFlagM);
      return;
  }
 }
 void CPUFeaturesAuditor::VisitAtomicMemory(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  switch (instr->Mask(AtomicMemoryMask)) {
@ -254,6 +311,37 @@ void CPUFeaturesAuditor::VisitDataProcessing2Source(const Instruction* instr) {
  }
 }
 void CPUFeaturesAuditor::VisitLoadStoreRCpcUnscaledOffset(
    const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) {
    case LDAPURB:
    case LDAPURSB_w:
    case LDAPURSB_x:
    case LDAPURH:
    case LDAPURSH_w:
    case LDAPURSH_x:
    case LDAPUR_w:
    case LDAPURSW:
    case LDAPUR_x:
    // These stores don't actually have RCpc semantics but they're included with
    // the RCpc extensions.
    case STLURB:
    case STLURH:
    case STLUR_w:
    case STLUR_x:
      scope.Record(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm);
      return;
  }
 }
 void CPUFeaturesAuditor::VisitLoadStorePAC(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  USE(instr);
  scope.Record(CPUFeatures::kPAuth);
 }
 void CPUFeaturesAuditor::VisitDataProcessing3Source(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  USE(instr);
@ -329,6 +417,16 @@ void CPUFeaturesAuditor::VisitFPDataProcessing1Source(
    case FRINTI_h:
      scope.Record(CPUFeatures::kFPHalf);
      return;
    case FRINT32X_s:
    case FRINT32X_d:
    case FRINT32Z_s:
    case FRINT32Z_d:
    case FRINT64X_s:
    case FRINT64X_d:
    case FRINT64Z_s:
    case FRINT64Z_d:
      scope.Record(CPUFeatures::kFrintToFixedSizedInt);
      return;
    default:
      // No special CPU features.
      // This category includes some half-precision FCVT instructions that do
@ -410,8 +508,6 @@ void CPUFeaturesAuditor::VisitFPImmediate(const Instruction* instr) {
 void CPUFeaturesAuditor::VisitFPIntegerConvert(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  // All of these instructions require FP.
  scope.Record(CPUFeatures::kFP);
  switch (instr->Mask(FPIntegerConvertMask)) {
    case FCVTAS_wh:
    case FCVTAS_xh:
@ -441,17 +537,23 @@ void CPUFeaturesAuditor::VisitFPIntegerConvert(const Instruction* instr) {
    case SCVTF_hx:
    case UCVTF_hw:
    case UCVTF_hx:
      scope.Record(CPUFeatures::kFP);
      scope.Record(CPUFeatures::kFPHalf);
      return;
    case FMOV_dx:
      scope.RecordOneOrBothOf(CPUFeatures::kFP, CPUFeatures::kNEON);
      return;
    case FMOV_d1_x:
    case FMOV_x_d1:
      scope.Record(CPUFeatures::kFP);
      scope.Record(CPUFeatures::kNEON);
      return;
    case FJCVTZS:
      scope.Record(CPUFeatures::kFP);
      scope.Record(CPUFeatures::kJSCVT);
      return;
    default:
-      // No special CPU features.
+      scope.Record(CPUFeatures::kFP);
      return;
  }
 }
@ -611,6 +713,12 @@ void CPUFeaturesAuditor::VisitNEON2RegMisc(const Instruction* instr) {
    case NEON_FCMLT_zero:
      scope.Record(CPUFeatures::kFP);
      return;
    case NEON_FRINT32X:
    case NEON_FRINT32Z:
    case NEON_FRINT64X:
    case NEON_FRINT64Z:
      scope.Record(CPUFeatures::kFP, CPUFeatures::kFrintToFixedSizedInt);
      return;
    default:
      // No additional features.
      return;
@ -628,6 +736,12 @@ void CPUFeaturesAuditor::VisitNEON3Different(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  // All of these instructions require NEON.
  scope.Record(CPUFeatures::kNEON);
  if (form_hash_ == "pmull_asimddiff_l"_h) {
    if (instr->GetNEONSize() == 3) {
      // Source is 1D or 2D, destination is 1Q.
      scope.Record(CPUFeatures::kPmull1Q);
    }
  }
  USE(instr);
 }
@ -638,6 +752,17 @@ void CPUFeaturesAuditor::VisitNEON3Same(const Instruction* instr) {
  if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
    scope.Record(CPUFeatures::kFP);
  }
  switch (instr->Mask(NEON3SameFHMMask)) {
    case NEON_FMLAL:
    case NEON_FMLAL2:
    case NEON_FMLSL:
    case NEON_FMLSL2:
      scope.Record(CPUFeatures::kFP, CPUFeatures::kNEONHalf, CPUFeatures::kFHM);
      return;
    default:
      // No additional features.
      return;
  }
 }
 void CPUFeaturesAuditor::VisitNEON3SameExtra(const Instruction* instr) {
@ -699,7 +824,18 @@ void CPUFeaturesAuditor::VisitNEONByIndexedElement(const Instruction* instr) {
      scope.Record(CPUFeatures::kRDM);
      return;
    default:
-      // Fall through to check other FP instructions.
+      // Fall through to check other instructions.
      break;
  }
  switch (instr->Mask(NEONByIndexedElementFPLongMask)) {
    case NEON_FMLAL_H_byelement:
    case NEON_FMLAL2_H_byelement:
    case NEON_FMLSL_H_byelement:
    case NEON_FMLSL2_H_byelement:
      scope.Record(CPUFeatures::kFP, CPUFeatures::kNEONHalf, CPUFeatures::kFHM);
      return;
    default:
      // Fall through to check other instructions.
      break;
  }
  switch (instr->Mask(NEONByIndexedElementFPMask)) {
@ -782,7 +918,6 @@ void CPUFeaturesAuditor::VisitNEONModifiedImmediate(const Instruction* instr) {
    scope.Record(CPUFeatures::kFP);
    if (instr->ExtractBit(11)) scope.Record(CPUFeatures::kNEONHalf);
  }
  USE(instr);
 }
 void CPUFeaturesAuditor::VisitNEONPerm(const Instruction* instr) {
@ -980,6 +1115,165 @@ void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) {
  USE(instr);
 }
 // Most SVE visitors require only SVE.
 #define VIXL_SIMPLE_SVE_VISITOR_LIST(V)                          \
  V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)           \
  V(SVE32BitGatherLoad_VectorPlusImm)                            \
  V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)    \
  V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)        \
  V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)         \
  V(SVE32BitGatherPrefetch_VectorPlusImm)                        \
  V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)           \
  V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)         \
  V(SVE32BitScatterStore_VectorPlusImm)                          \
  V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets)     \
  V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets)             \
  V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets)           \
  V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets)   \
  V(SVE64BitGatherLoad_VectorPlusImm)                            \
  V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)         \
  V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \
  V(SVE64BitGatherPrefetch_VectorPlusImm)                        \
  V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)           \
  V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets)         \
  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)   \
  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \
  V(SVE64BitScatterStore_VectorPlusImm)                          \
  V(SVEAddressGeneration)                                        \
  V(SVEBitwiseLogicalUnpredicated)                               \
  V(SVEBitwiseShiftUnpredicated)                                 \
  V(SVEFFRInitialise)                                            \
  V(SVEFFRWriteFromPredicate)                                    \
  V(SVEFPAccumulatingReduction)                                  \
  V(SVEFPArithmeticUnpredicated)                                 \
  V(SVEFPCompareVectors)                                         \
  V(SVEFPCompareWithZero)                                        \
  V(SVEFPComplexAddition)                                        \
  V(SVEFPComplexMulAdd)                                          \
  V(SVEFPComplexMulAddIndex)                                     \
  V(SVEFPFastReduction)                                          \
  V(SVEFPMulIndex)                                               \
  V(SVEFPMulAdd)                                                 \
  V(SVEFPMulAddIndex)                                            \
  V(SVEFPUnaryOpUnpredicated)                                    \
  V(SVEIncDecByPredicateCount)                                   \
  V(SVEIndexGeneration)                                          \
  V(SVEIntArithmeticUnpredicated)                                \
  V(SVEIntCompareSignedImm)                                      \
  V(SVEIntCompareUnsignedImm)                                    \
  V(SVEIntCompareVectors)                                        \
  V(SVEIntMulAddPredicated)                                      \
  V(SVEIntMulAddUnpredicated)                                    \
  V(SVEIntReduction)                                             \
  V(SVEIntUnaryArithmeticPredicated)                             \
  V(SVEMovprfx)                                                  \
  V(SVEMulIndex)                                                 \
  V(SVEPermuteVectorExtract)                                     \
  V(SVEPermuteVectorInterleaving)                                \
  V(SVEPredicateCount)                                           \
  V(SVEPredicateLogical)                                         \
  V(SVEPropagateBreak)                                           \
  V(SVEStackFrameAdjustment)                                     \
  V(SVEStackFrameSize)                                           \
  V(SVEVectorSelect)                                             \
  V(SVEBitwiseLogical_Predicated)                                \
  V(SVEBitwiseLogicalWithImm_Unpredicated)                       \
  V(SVEBitwiseShiftByImm_Predicated)                             \
  V(SVEBitwiseShiftByVector_Predicated)                          \
  V(SVEBitwiseShiftByWideElements_Predicated)                    \
  V(SVEBroadcastBitmaskImm)                                      \
  V(SVEBroadcastFPImm_Unpredicated)                              \
  V(SVEBroadcastGeneralRegister)                                 \
  V(SVEBroadcastIndexElement)                                    \
  V(SVEBroadcastIntImm_Unpredicated)                             \
  V(SVECompressActiveElements)                                   \
  V(SVEConditionallyBroadcastElementToVector)                    \
  V(SVEConditionallyExtractElementToSIMDFPScalar)                \
  V(SVEConditionallyExtractElementToGeneralRegister)             \
  V(SVEConditionallyTerminateScalars)                            \
  V(SVEConstructivePrefix_Unpredicated)                          \
  V(SVEContiguousFirstFaultLoad_ScalarPlusScalar)                \
  V(SVEContiguousLoad_ScalarPlusImm)                             \
  V(SVEContiguousLoad_ScalarPlusScalar)                          \
  V(SVEContiguousNonFaultLoad_ScalarPlusImm)                     \
  V(SVEContiguousNonTemporalLoad_ScalarPlusImm)                  \
  V(SVEContiguousNonTemporalLoad_ScalarPlusScalar)               \
  V(SVEContiguousNonTemporalStore_ScalarPlusImm)                 \
  V(SVEContiguousNonTemporalStore_ScalarPlusScalar)              \
  V(SVEContiguousPrefetch_ScalarPlusImm)                         \
  V(SVEContiguousPrefetch_ScalarPlusScalar)                      \
  V(SVEContiguousStore_ScalarPlusImm)                            \
  V(SVEContiguousStore_ScalarPlusScalar)                         \
  V(SVECopySIMDFPScalarRegisterToVector_Predicated)              \
  V(SVECopyFPImm_Predicated)                                     \
  V(SVECopyGeneralRegisterToVector_Predicated)                   \
  V(SVECopyIntImm_Predicated)                                    \
  V(SVEElementCount)                                             \
  V(SVEExtractElementToSIMDFPScalarRegister)                     \
  V(SVEExtractElementToGeneralRegister)                          \
  V(SVEFPArithmetic_Predicated)                                  \
  V(SVEFPArithmeticWithImm_Predicated)                           \
  V(SVEFPConvertPrecision)                                       \
  V(SVEFPConvertToInt)                                           \
  V(SVEFPExponentialAccelerator)                                 \
  V(SVEFPRoundToIntegralValue)                                   \
  V(SVEFPTrigMulAddCoefficient)                                  \
  V(SVEFPTrigSelectCoefficient)                                  \
  V(SVEFPUnaryOp)                                                \
  V(SVEIncDecRegisterByElementCount)                             \
  V(SVEIncDecVectorByElementCount)                               \
  V(SVEInsertSIMDFPScalarRegister)                               \
  V(SVEInsertGeneralRegister)                                    \
  V(SVEIntAddSubtractImm_Unpredicated)                           \
  V(SVEIntAddSubtractVectors_Predicated)                         \
  V(SVEIntCompareScalarCountAndLimit)                            \
  V(SVEIntConvertToFP)                                           \
  V(SVEIntDivideVectors_Predicated)                              \
  V(SVEIntMinMaxImm_Unpredicated)                                \
  V(SVEIntMinMaxDifference_Predicated)                           \
  V(SVEIntMulImm_Unpredicated)                                   \
  V(SVEIntMulVectors_Predicated)                                 \
  V(SVELoadAndBroadcastElement)                                  \
  V(SVELoadAndBroadcastQOWord_ScalarPlusImm)                     \
  V(SVELoadAndBroadcastQOWord_ScalarPlusScalar)                  \
  V(SVELoadMultipleStructures_ScalarPlusImm)                     \
  V(SVELoadMultipleStructures_ScalarPlusScalar)                  \
  V(SVELoadPredicateRegister)                                    \
  V(SVELoadVectorRegister)                                       \
  V(SVEPartitionBreakCondition)                                  \
  V(SVEPermutePredicateElements)                                 \
  V(SVEPredicateFirstActive)                                     \
  V(SVEPredicateInitialize)                                      \
  V(SVEPredicateNextActive)                                      \
  V(SVEPredicateReadFromFFR_Predicated)                          \
  V(SVEPredicateReadFromFFR_Unpredicated)                        \
  V(SVEPredicateTest)                                            \
  V(SVEPredicateZero)                                            \
  V(SVEPropagateBreakToNextPartition)                            \
  V(SVEReversePredicateElements)                                 \
  V(SVEReverseVectorElements)                                    \
  V(SVEReverseWithinElements)                                    \
  V(SVESaturatingIncDecRegisterByElementCount)                   \
  V(SVESaturatingIncDecVectorByElementCount)                     \
  V(SVEStoreMultipleStructures_ScalarPlusImm)                    \
  V(SVEStoreMultipleStructures_ScalarPlusScalar)                 \
  V(SVEStorePredicateRegister)                                   \
  V(SVEStoreVectorRegister)                                      \
  V(SVETableLookup)                                              \
  V(SVEUnpackPredicateElements)                                  \
  V(SVEUnpackVectorElements)                                     \
  V(SVEVectorSplice)
 #define VIXL_DEFINE_SIMPLE_SVE_VISITOR(NAME)                       \
  void CPUFeaturesAuditor::Visit##NAME(const Instruction* instr) { \
    RecordInstructionFeaturesScope scope(this);                    \
    scope.Record(CPUFeatures::kSVE);                               \
    USE(instr);                                                    \
  }
 VIXL_SIMPLE_SVE_VISITOR_LIST(VIXL_DEFINE_SIMPLE_SVE_VISITOR)
 #undef VIXL_DEFINE_SIMPLE_SVE_VISITOR
 #undef VIXL_SIMPLE_SVE_VISITOR_LIST
 void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
@ -1001,7 +1295,19 @@ void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
        required.Combine(CPUFeatures::kPAuth);
        break;
      default:
-        if (instr->GetImmHint() == ESB) required.Combine(CPUFeatures::kRAS);
+        switch (instr->GetImmHint()) {
          case ESB:
            required.Combine(CPUFeatures::kRAS);
            break;
          case BTI:
          case BTI_j:
          case BTI_c:
          case BTI_jc:
            required.Combine(CPUFeatures::kBTI);
            break;
          default:
            break;
        }
        break;
    }
@ -1009,6 +1315,52 @@ void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
    // features are not implemented, so we record the corresponding features
    // only if they are available.
    if (available_.Has(required)) scope.Record(required);
  } else if (instr->Mask(SystemSysMask) == SYS) {
    switch (instr->GetSysOp()) {
      // DC instruction variants.
      case CGVAC:
      case CGDVAC:
      case CGVAP:
      case CGDVAP:
      case CIGVAC:
      case CIGDVAC:
      case GVA:
      case GZVA:
        scope.Record(CPUFeatures::kMTE);
        break;
      case CVAP:
        scope.Record(CPUFeatures::kDCPoP);
        break;
      case CVADP:
        scope.Record(CPUFeatures::kDCCVADP);
        break;
      case IVAU:
      case CVAC:
      case CVAU:
      case CIVAC:
      case ZVA:
        // No special CPU features.
        break;
    }
  } else if (instr->Mask(SystemPStateFMask) == SystemPStateFixed) {
    switch (instr->Mask(SystemPStateMask)) {
      case CFINV:
        scope.Record(CPUFeatures::kFlagM);
        break;
      case AXFLAG:
      case XAFLAG:
        scope.Record(CPUFeatures::kAXFlag);
        break;
    }
  } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
    if (instr->Mask(SystemSysRegMask) == MRS) {
      switch (instr->GetImmSystemRegister()) {
        case RNDR:
        case RNDRRS:
          scope.Record(CPUFeatures::kRNG);
          break;
      }
    }
  }
 }
@ -1049,11 +1401,447 @@ void CPUFeaturesAuditor::VisitUnconditionalBranchToRegister(
  }
 }
 void CPUFeaturesAuditor::VisitReserved(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  USE(instr);
 }
 void CPUFeaturesAuditor::VisitUnimplemented(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  USE(instr);
 }
 void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) {
  VIXL_ASSERT(metadata->count("form") > 0);
  const std::string& form = (*metadata)["form"];
  form_hash_ = Hash(form.c_str());
  const FormToVisitorFnMap* fv = CPUFeaturesAuditor::GetFormToVisitorFnMap();
  FormToVisitorFnMap::const_iterator it = fv->find(form_hash_);
  if (it == fv->end()) {
    RecordInstructionFeaturesScope scope(this);
    std::map<uint32_t, const CPUFeatures> features = {
        {"adclb_z_zzz"_h, CPUFeatures::kSVE2},
        {"adclt_z_zzz"_h, CPUFeatures::kSVE2},
        {"addhnb_z_zz"_h, CPUFeatures::kSVE2},
        {"addhnt_z_zz"_h, CPUFeatures::kSVE2},
        {"addp_z_p_zz"_h, CPUFeatures::kSVE2},
        {"bcax_z_zzz"_h, CPUFeatures::kSVE2},
        {"bdep_z_zz"_h,
         CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
        {"bext_z_zz"_h,
         CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
        {"bgrp_z_zz"_h,
         CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
        {"bsl1n_z_zzz"_h, CPUFeatures::kSVE2},
        {"bsl2n_z_zzz"_h, CPUFeatures::kSVE2},
        {"bsl_z_zzz"_h, CPUFeatures::kSVE2},
        {"cadd_z_zz"_h, CPUFeatures::kSVE2},
        {"cdot_z_zzz"_h, CPUFeatures::kSVE2},
        {"cdot_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"cdot_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"cmla_z_zzz"_h, CPUFeatures::kSVE2},
        {"cmla_z_zzzi_h"_h, CPUFeatures::kSVE2},
        {"cmla_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"eor3_z_zzz"_h, CPUFeatures::kSVE2},
        {"eorbt_z_zz"_h, CPUFeatures::kSVE2},
        {"eortb_z_zz"_h, CPUFeatures::kSVE2},
        {"ext_z_zi_con"_h, CPUFeatures::kSVE2},
        {"faddp_z_p_zz"_h, CPUFeatures::kSVE2},
        {"fcvtlt_z_p_z_h2s"_h, CPUFeatures::kSVE2},
        {"fcvtlt_z_p_z_s2d"_h, CPUFeatures::kSVE2},
        {"fcvtnt_z_p_z_d2s"_h, CPUFeatures::kSVE2},
        {"fcvtnt_z_p_z_s2h"_h, CPUFeatures::kSVE2},
        {"fcvtx_z_p_z_d2s"_h, CPUFeatures::kSVE2},
        {"fcvtxnt_z_p_z_d2s"_h, CPUFeatures::kSVE2},
        {"flogb_z_p_z"_h, CPUFeatures::kSVE2},
        {"fmaxnmp_z_p_zz"_h, CPUFeatures::kSVE2},
        {"fmaxp_z_p_zz"_h, CPUFeatures::kSVE2},
        {"fminnmp_z_p_zz"_h, CPUFeatures::kSVE2},
        {"fminp_z_p_zz"_h, CPUFeatures::kSVE2},
        {"fmlalb_z_zzz"_h, CPUFeatures::kSVE2},
        {"fmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"fmlalt_z_zzz"_h, CPUFeatures::kSVE2},
        {"fmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"fmlslb_z_zzz"_h, CPUFeatures::kSVE2},
        {"fmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"fmlslt_z_zzz"_h, CPUFeatures::kSVE2},
        {"fmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"histcnt_z_p_zz"_h, CPUFeatures::kSVE2},
        {"histseg_z_zz"_h, CPUFeatures::kSVE2},
        {"ldnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
        {"ldnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
        {"ldnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
        {"ldnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
        {"ldnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
        {"ldnt1sb_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
        {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
        {"ldnt1sh_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
        {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
        {"ldnt1sw_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
        {"ldnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
        {"ldnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
        {"match_p_p_zz"_h, CPUFeatures::kSVE2},
        {"mla_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"mla_z_zzzi_h"_h, CPUFeatures::kSVE2},
        {"mla_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"mls_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"mls_z_zzzi_h"_h, CPUFeatures::kSVE2},
        {"mls_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"mul_z_zz"_h, CPUFeatures::kSVE2},
        {"mul_z_zzi_d"_h, CPUFeatures::kSVE2},
        {"mul_z_zzi_h"_h, CPUFeatures::kSVE2},
        {"mul_z_zzi_s"_h, CPUFeatures::kSVE2},
        {"nbsl_z_zzz"_h, CPUFeatures::kSVE2},
        {"nmatch_p_p_zz"_h, CPUFeatures::kSVE2},
        {"pmul_z_zz"_h, CPUFeatures::kSVE2},
        {"pmullb_z_zz"_h, CPUFeatures::kSVE2},
        {"pmullt_z_zz"_h, CPUFeatures::kSVE2},
        {"raddhnb_z_zz"_h, CPUFeatures::kSVE2},
        {"raddhnt_z_zz"_h, CPUFeatures::kSVE2},
        {"rshrnb_z_zi"_h, CPUFeatures::kSVE2},
        {"rshrnt_z_zi"_h, CPUFeatures::kSVE2},
        {"rsubhnb_z_zz"_h, CPUFeatures::kSVE2},
        {"rsubhnt_z_zz"_h, CPUFeatures::kSVE2},
        {"saba_z_zzz"_h, CPUFeatures::kSVE2},
        {"sabalb_z_zzz"_h, CPUFeatures::kSVE2},
        {"sabalt_z_zzz"_h, CPUFeatures::kSVE2},
        {"sabdlb_z_zz"_h, CPUFeatures::kSVE2},
        {"sabdlt_z_zz"_h, CPUFeatures::kSVE2},
        {"sadalp_z_p_z"_h, CPUFeatures::kSVE2},
        {"saddlb_z_zz"_h, CPUFeatures::kSVE2},
        {"saddlbt_z_zz"_h, CPUFeatures::kSVE2},
        {"saddlt_z_zz"_h, CPUFeatures::kSVE2},
        {"saddwb_z_zz"_h, CPUFeatures::kSVE2},
        {"saddwt_z_zz"_h, CPUFeatures::kSVE2},
        {"sbclb_z_zzz"_h, CPUFeatures::kSVE2},
        {"sbclt_z_zzz"_h, CPUFeatures::kSVE2},
        {"shadd_z_p_zz"_h, CPUFeatures::kSVE2},
        {"shrnb_z_zi"_h, CPUFeatures::kSVE2},
        {"shrnt_z_zi"_h, CPUFeatures::kSVE2},
        {"shsub_z_p_zz"_h, CPUFeatures::kSVE2},
        {"shsubr_z_p_zz"_h, CPUFeatures::kSVE2},
        {"sli_z_zzi"_h, CPUFeatures::kSVE2},
        {"smaxp_z_p_zz"_h, CPUFeatures::kSVE2},
        {"sminp_z_p_zz"_h, CPUFeatures::kSVE2},
        {"smlalb_z_zzz"_h, CPUFeatures::kSVE2},
        {"smlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"smlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"smlalt_z_zzz"_h, CPUFeatures::kSVE2},
        {"smlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"smlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"smlslb_z_zzz"_h, CPUFeatures::kSVE2},
        {"smlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"smlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"smlslt_z_zzz"_h, CPUFeatures::kSVE2},
        {"smlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"smlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"smulh_z_zz"_h, CPUFeatures::kSVE2},
        {"smullb_z_zz"_h, CPUFeatures::kSVE2},
        {"smullb_z_zzi_d"_h, CPUFeatures::kSVE2},
        {"smullb_z_zzi_s"_h, CPUFeatures::kSVE2},
        {"smullt_z_zz"_h, CPUFeatures::kSVE2},
        {"smullt_z_zzi_d"_h, CPUFeatures::kSVE2},
        {"smullt_z_zzi_s"_h, CPUFeatures::kSVE2},
        {"splice_z_p_zz_con"_h, CPUFeatures::kSVE2},
        {"sqabs_z_p_z"_h, CPUFeatures::kSVE2},
        {"sqadd_z_p_zz"_h, CPUFeatures::kSVE2},
        {"sqcadd_z_zz"_h, CPUFeatures::kSVE2},
        {"sqdmlalb_z_zzz"_h, CPUFeatures::kSVE2},
        {"sqdmlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"sqdmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"sqdmlalbt_z_zzz"_h, CPUFeatures::kSVE2},
        {"sqdmlalt_z_zzz"_h, CPUFeatures::kSVE2},
        {"sqdmlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"sqdmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"sqdmlslb_z_zzz"_h, CPUFeatures::kSVE2},
        {"sqdmlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"sqdmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"sqdmlslbt_z_zzz"_h, CPUFeatures::kSVE2},
        {"sqdmlslt_z_zzz"_h, CPUFeatures::kSVE2},
        {"sqdmlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"sqdmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"sqdmulh_z_zz"_h, CPUFeatures::kSVE2},
        {"sqdmulh_z_zzi_d"_h, CPUFeatures::kSVE2},
        {"sqdmulh_z_zzi_h"_h, CPUFeatures::kSVE2},
        {"sqdmulh_z_zzi_s"_h, CPUFeatures::kSVE2},
        {"sqdmullb_z_zz"_h, CPUFeatures::kSVE2},
        {"sqdmullb_z_zzi_d"_h, CPUFeatures::kSVE2},
        {"sqdmullb_z_zzi_s"_h, CPUFeatures::kSVE2},
        {"sqdmullt_z_zz"_h, CPUFeatures::kSVE2},
        {"sqdmullt_z_zzi_d"_h, CPUFeatures::kSVE2},
        {"sqdmullt_z_zzi_s"_h, CPUFeatures::kSVE2},
        {"sqneg_z_p_z"_h, CPUFeatures::kSVE2},
        {"sqrdcmlah_z_zzz"_h, CPUFeatures::kSVE2},
        {"sqrdcmlah_z_zzzi_h"_h, CPUFeatures::kSVE2},
        {"sqrdcmlah_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"sqrdmlah_z_zzz"_h, CPUFeatures::kSVE2},
        {"sqrdmlah_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"sqrdmlah_z_zzzi_h"_h, CPUFeatures::kSVE2},
        {"sqrdmlah_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"sqrdmlsh_z_zzz"_h, CPUFeatures::kSVE2},
        {"sqrdmlsh_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"sqrdmlsh_z_zzzi_h"_h, CPUFeatures::kSVE2},
        {"sqrdmlsh_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"sqrdmulh_z_zz"_h, CPUFeatures::kSVE2},
        {"sqrdmulh_z_zzi_d"_h, CPUFeatures::kSVE2},
        {"sqrdmulh_z_zzi_h"_h, CPUFeatures::kSVE2},
        {"sqrdmulh_z_zzi_s"_h, CPUFeatures::kSVE2},
        {"sqrshl_z_p_zz"_h, CPUFeatures::kSVE2},
        {"sqrshlr_z_p_zz"_h, CPUFeatures::kSVE2},
        {"sqrshrnb_z_zi"_h, CPUFeatures::kSVE2},
        {"sqrshrnt_z_zi"_h, CPUFeatures::kSVE2},
        {"sqrshrunb_z_zi"_h, CPUFeatures::kSVE2},
        {"sqrshrunt_z_zi"_h, CPUFeatures::kSVE2},
        {"sqshl_z_p_zi"_h, CPUFeatures::kSVE2},
        {"sqshl_z_p_zz"_h, CPUFeatures::kSVE2},
        {"sqshlr_z_p_zz"_h, CPUFeatures::kSVE2},
        {"sqshlu_z_p_zi"_h, CPUFeatures::kSVE2},
        {"sqshrnb_z_zi"_h, CPUFeatures::kSVE2},
        {"sqshrnt_z_zi"_h, CPUFeatures::kSVE2},
        {"sqshrunb_z_zi"_h, CPUFeatures::kSVE2},
        {"sqshrunt_z_zi"_h, CPUFeatures::kSVE2},
        {"sqsub_z_p_zz"_h, CPUFeatures::kSVE2},
        {"sqsubr_z_p_zz"_h, CPUFeatures::kSVE2},
        {"sqxtnb_z_zz"_h, CPUFeatures::kSVE2},
        {"sqxtnt_z_zz"_h, CPUFeatures::kSVE2},
        {"sqxtunb_z_zz"_h, CPUFeatures::kSVE2},
        {"sqxtunt_z_zz"_h, CPUFeatures::kSVE2},
        {"srhadd_z_p_zz"_h, CPUFeatures::kSVE2},
        {"sri_z_zzi"_h, CPUFeatures::kSVE2},
        {"srshl_z_p_zz"_h, CPUFeatures::kSVE2},
        {"srshlr_z_p_zz"_h, CPUFeatures::kSVE2},
        {"srshr_z_p_zi"_h, CPUFeatures::kSVE2},
        {"srsra_z_zi"_h, CPUFeatures::kSVE2},
        {"sshllb_z_zi"_h, CPUFeatures::kSVE2},
        {"sshllt_z_zi"_h, CPUFeatures::kSVE2},
        {"ssra_z_zi"_h, CPUFeatures::kSVE2},
        {"ssublb_z_zz"_h, CPUFeatures::kSVE2},
        {"ssublbt_z_zz"_h, CPUFeatures::kSVE2},
        {"ssublt_z_zz"_h, CPUFeatures::kSVE2},
        {"ssubltb_z_zz"_h, CPUFeatures::kSVE2},
        {"ssubwb_z_zz"_h, CPUFeatures::kSVE2},
        {"ssubwt_z_zz"_h, CPUFeatures::kSVE2},
        {"stnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
        {"stnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
        {"stnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
        {"stnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
        {"stnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
        {"stnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
        {"stnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
        {"subhnb_z_zz"_h, CPUFeatures::kSVE2},
        {"subhnt_z_zz"_h, CPUFeatures::kSVE2},
        {"suqadd_z_p_zz"_h, CPUFeatures::kSVE2},
        {"tbl_z_zz_2"_h, CPUFeatures::kSVE2},
        {"tbx_z_zz"_h, CPUFeatures::kSVE2},
        {"uaba_z_zzz"_h, CPUFeatures::kSVE2},
        {"uabalb_z_zzz"_h, CPUFeatures::kSVE2},
        {"uabalt_z_zzz"_h, CPUFeatures::kSVE2},
        {"uabdlb_z_zz"_h, CPUFeatures::kSVE2},
        {"uabdlt_z_zz"_h, CPUFeatures::kSVE2},
        {"uadalp_z_p_z"_h, CPUFeatures::kSVE2},
        {"uaddlb_z_zz"_h, CPUFeatures::kSVE2},
        {"uaddlt_z_zz"_h, CPUFeatures::kSVE2},
        {"uaddwb_z_zz"_h, CPUFeatures::kSVE2},
        {"uaddwt_z_zz"_h, CPUFeatures::kSVE2},
        {"uhadd_z_p_zz"_h, CPUFeatures::kSVE2},
        {"uhsub_z_p_zz"_h, CPUFeatures::kSVE2},
        {"uhsubr_z_p_zz"_h, CPUFeatures::kSVE2},
        {"umaxp_z_p_zz"_h, CPUFeatures::kSVE2},
        {"uminp_z_p_zz"_h, CPUFeatures::kSVE2},
        {"umlalb_z_zzz"_h, CPUFeatures::kSVE2},
        {"umlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"umlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"umlalt_z_zzz"_h, CPUFeatures::kSVE2},
        {"umlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"umlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"umlslb_z_zzz"_h, CPUFeatures::kSVE2},
        {"umlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"umlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"umlslt_z_zzz"_h, CPUFeatures::kSVE2},
        {"umlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
        {"umlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
        {"umulh_z_zz"_h, CPUFeatures::kSVE2},
        {"umullb_z_zz"_h, CPUFeatures::kSVE2},
        {"umullb_z_zzi_d"_h, CPUFeatures::kSVE2},
        {"umullb_z_zzi_s"_h, CPUFeatures::kSVE2},
        {"umullt_z_zz"_h, CPUFeatures::kSVE2},
        {"umullt_z_zzi_d"_h, CPUFeatures::kSVE2},
        {"umullt_z_zzi_s"_h, CPUFeatures::kSVE2},
        {"uqadd_z_p_zz"_h, CPUFeatures::kSVE2},
        {"uqrshl_z_p_zz"_h, CPUFeatures::kSVE2},
        {"uqrshlr_z_p_zz"_h, CPUFeatures::kSVE2},
        {"uqrshrnb_z_zi"_h, CPUFeatures::kSVE2},
        {"uqrshrnt_z_zi"_h, CPUFeatures::kSVE2},
        {"uqshl_z_p_zi"_h, CPUFeatures::kSVE2},
        {"uqshl_z_p_zz"_h, CPUFeatures::kSVE2},
        {"uqshlr_z_p_zz"_h, CPUFeatures::kSVE2},
        {"uqshrnb_z_zi"_h, CPUFeatures::kSVE2},
        {"uqshrnt_z_zi"_h, CPUFeatures::kSVE2},
        {"uqsub_z_p_zz"_h, CPUFeatures::kSVE2},
        {"uqsubr_z_p_zz"_h, CPUFeatures::kSVE2},
        {"uqxtnb_z_zz"_h, CPUFeatures::kSVE2},
        {"uqxtnt_z_zz"_h, CPUFeatures::kSVE2},
        {"urecpe_z_p_z"_h, CPUFeatures::kSVE2},
        {"urhadd_z_p_zz"_h, CPUFeatures::kSVE2},
        {"urshl_z_p_zz"_h, CPUFeatures::kSVE2},
        {"urshlr_z_p_zz"_h, CPUFeatures::kSVE2},
        {"urshr_z_p_zi"_h, CPUFeatures::kSVE2},
        {"ursqrte_z_p_z"_h, CPUFeatures::kSVE2},
        {"ursra_z_zi"_h, CPUFeatures::kSVE2},
        {"ushllb_z_zi"_h, CPUFeatures::kSVE2},
        {"ushllt_z_zi"_h, CPUFeatures::kSVE2},
        {"usqadd_z_p_zz"_h, CPUFeatures::kSVE2},
        {"usra_z_zi"_h, CPUFeatures::kSVE2},
        {"usublb_z_zz"_h, CPUFeatures::kSVE2},
        {"usublt_z_zz"_h, CPUFeatures::kSVE2},
        {"usubwb_z_zz"_h, CPUFeatures::kSVE2},
        {"usubwt_z_zz"_h, CPUFeatures::kSVE2},
        {"whilege_p_p_rr"_h, CPUFeatures::kSVE2},
        {"whilegt_p_p_rr"_h, CPUFeatures::kSVE2},
        {"whilehi_p_p_rr"_h, CPUFeatures::kSVE2},
        {"whilehs_p_p_rr"_h, CPUFeatures::kSVE2},
        {"whilerw_p_rr"_h, CPUFeatures::kSVE2},
        {"whilewr_p_rr"_h, CPUFeatures::kSVE2},
        {"xar_z_zzi"_h, CPUFeatures::kSVE2},
        {"smmla_z_zzz"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
        {"ummla_z_zzz"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
        {"usmmla_z_zzz"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
        {"fmmla_z_zzz_s"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF32MM)},
        {"fmmla_z_zzz_d"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
        {"smmla_asimdsame2_g"_h,
         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
        {"ummla_asimdsame2_g"_h,
         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
        {"usmmla_asimdsame2_g"_h,
         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
        {"ld1row_z_p_bi_u32"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
        {"ld1row_z_p_br_contiguous"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
        {"ld1rod_z_p_bi_u64"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
        {"ld1rod_z_p_br_contiguous"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
        {"ld1rob_z_p_bi_u8"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
        {"ld1rob_z_p_br_contiguous"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
        {"ld1roh_z_p_bi_u16"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
        {"ld1roh_z_p_br_contiguous"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
        {"usdot_asimdsame2_d"_h,
         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
        {"sudot_asimdelem_d"_h,
         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
        {"usdot_asimdelem_d"_h,
         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
        {"usdot_z_zzz_s"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
        {"usdot_z_zzzi_s"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
        {"sudot_z_zzzi_s"_h,
         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
        {"addg_64_addsub_immtags"_h, CPUFeatures::kMTE},
        {"gmi_64g_dp_2src"_h, CPUFeatures::kMTE},
        {"irg_64i_dp_2src"_h, CPUFeatures::kMTE},
        {"ldg_64loffset_ldsttags"_h, CPUFeatures::kMTE},
        {"st2g_64soffset_ldsttags"_h, CPUFeatures::kMTE},
        {"st2g_64spost_ldsttags"_h, CPUFeatures::kMTE},
        {"st2g_64spre_ldsttags"_h, CPUFeatures::kMTE},
        {"stgp_64_ldstpair_off"_h, CPUFeatures::kMTE},
        {"stgp_64_ldstpair_post"_h, CPUFeatures::kMTE},
        {"stgp_64_ldstpair_pre"_h, CPUFeatures::kMTE},
        {"stg_64soffset_ldsttags"_h, CPUFeatures::kMTE},
        {"stg_64spost_ldsttags"_h, CPUFeatures::kMTE},
        {"stg_64spre_ldsttags"_h, CPUFeatures::kMTE},
        {"stz2g_64soffset_ldsttags"_h, CPUFeatures::kMTE},
        {"stz2g_64spost_ldsttags"_h, CPUFeatures::kMTE},
        {"stz2g_64spre_ldsttags"_h, CPUFeatures::kMTE},
        {"stzg_64soffset_ldsttags"_h, CPUFeatures::kMTE},
        {"stzg_64spost_ldsttags"_h, CPUFeatures::kMTE},
        {"stzg_64spre_ldsttags"_h, CPUFeatures::kMTE},
        {"subg_64_addsub_immtags"_h, CPUFeatures::kMTE},
        {"subps_64s_dp_2src"_h, CPUFeatures::kMTE},
        {"subp_64s_dp_2src"_h, CPUFeatures::kMTE},
        {"cpyen_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyern_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyewn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpye_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfen_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfern_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfewn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfe_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfmn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfmrn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfmwn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfm_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfpn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfprn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfpwn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyfp_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpymn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpymrn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpymwn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpym_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpypn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyprn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpypwn_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"cpyp_cpy_memcms"_h, CPUFeatures::kMOPS},
        {"seten_set_memcms"_h, CPUFeatures::kMOPS},
        {"sete_set_memcms"_h, CPUFeatures::kMOPS},
        {"setgen_set_memcms"_h,
         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
        {"setge_set_memcms"_h,
         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
        {"setgmn_set_memcms"_h,
         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
        {"setgm_set_memcms"_h,
         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
        {"setgpn_set_memcms"_h,
         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
        {"setgp_set_memcms"_h,
         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
        {"setmn_set_memcms"_h, CPUFeatures::kMOPS},
        {"setm_set_memcms"_h, CPUFeatures::kMOPS},
        {"setpn_set_memcms"_h, CPUFeatures::kMOPS},
        {"setp_set_memcms"_h, CPUFeatures::kMOPS},
        {"abs_32_dp_1src"_h, CPUFeatures::kCSSC},
        {"abs_64_dp_1src"_h, CPUFeatures::kCSSC},
        {"cnt_32_dp_1src"_h, CPUFeatures::kCSSC},
        {"cnt_64_dp_1src"_h, CPUFeatures::kCSSC},
        {"ctz_32_dp_1src"_h, CPUFeatures::kCSSC},
        {"ctz_64_dp_1src"_h, CPUFeatures::kCSSC},
        {"smax_32_dp_2src"_h, CPUFeatures::kCSSC},
        {"smax_64_dp_2src"_h, CPUFeatures::kCSSC},
        {"smin_32_dp_2src"_h, CPUFeatures::kCSSC},
        {"smin_64_dp_2src"_h, CPUFeatures::kCSSC},
        {"umax_32_dp_2src"_h, CPUFeatures::kCSSC},
        {"umax_64_dp_2src"_h, CPUFeatures::kCSSC},
        {"umin_32_dp_2src"_h, CPUFeatures::kCSSC},
        {"umin_64_dp_2src"_h, CPUFeatures::kCSSC},
        {"smax_32_minmax_imm"_h, CPUFeatures::kCSSC},
        {"smax_64_minmax_imm"_h, CPUFeatures::kCSSC},
        {"smin_32_minmax_imm"_h, CPUFeatures::kCSSC},
        {"smin_64_minmax_imm"_h, CPUFeatures::kCSSC},
        {"umax_32u_minmax_imm"_h, CPUFeatures::kCSSC},
        {"umax_64u_minmax_imm"_h, CPUFeatures::kCSSC},
        {"umin_32u_minmax_imm"_h, CPUFeatures::kCSSC},
        {"umin_64u_minmax_imm"_h, CPUFeatures::kCSSC},
    };
    if (features.count(form_hash_) > 0) {
      scope.Record(features[form_hash_]);
    }
  } else {
    (it->second)(this, instr);
  }
 }
 }  // namespace aarch64
 }  // namespace vixl
--- a/dep/vixl/src/aarch64/debugger-aarch64.cc
+++ b/dep/vixl/src/aarch64/debugger-aarch64.cc
@ -0,0 +1,499 @@
 // Copyright 2023, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //   * Neither the name of ARM Limited nor the names of its contributors may be
 //     used to endorse or promote products derived from this software without
 //     specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 #include "debugger-aarch64.h"
 #include <cerrno>
 #include <cmath>
 #include <cstring>
 #include <errno.h>
 #include <limits>
 #include <unistd.h>
 namespace vixl {
 namespace aarch64 {
 Debugger::Debugger(Simulator* sim)
    : sim_(sim), input_stream_(&std::cin), ostream_(sim->GetOutputStream()) {
  // Register all basic debugger commands.
  RegisterCmd<HelpCmd>();
  RegisterCmd<BreakCmd>();
  RegisterCmd<StepCmd>();
  RegisterCmd<ContinueCmd>();
  RegisterCmd<PrintCmd>();
  RegisterCmd<TraceCmd>();
  RegisterCmd<GdbCmd>();
 }
 template <class T>
 void Debugger::RegisterCmd() {
  auto new_command = std::make_unique<T>(sim_);
  // Check that the new command word and alias, don't already exist.
  std::string_view new_cmd_word = new_command->GetCommandWord();
  std::string_view new_cmd_alias = new_command->GetCommandAlias();
  for (const auto& cmd : debugger_cmds_) {
    std::string_view cmd_word = cmd->GetCommandWord();
    std::string_view cmd_alias = cmd->GetCommandAlias();
    if (new_cmd_word == cmd_word) {
      VIXL_ABORT_WITH_MSG("Command word matches an existing command word.");
    } else if (new_cmd_word == cmd_alias) {
      VIXL_ABORT_WITH_MSG("Command word matches an existing command alias.");
    }
    if (new_cmd_alias != "") {
      if (new_cmd_alias == cmd_word) {
        VIXL_ABORT_WITH_MSG("Command alias matches an existing command word.");
      } else if (new_cmd_alias == cmd_alias) {
        VIXL_ABORT_WITH_MSG("Command alias matches an existing command alias.");
      }
    }
  }
  debugger_cmds_.push_back(std::move(new_command));
 }
 bool Debugger::IsAtBreakpoint() const {
  return IsBreakpoint(reinterpret_cast<uint64_t>(sim_->ReadPc()));
 }
 void Debugger::Debug() {
  DebugReturn done = DebugContinue;
  while (done == DebugContinue) {
    // Disassemble the next instruction to execute.
    PrintDisassembler print_disasm = PrintDisassembler(ostream_);
    print_disasm.Disassemble(sim_->ReadPc());
    // Read the command line.
    fprintf(ostream_, "sim> ");
    std::string line;
    std::getline(*input_stream_, line);
    // Remove all control characters from the command string.
    line.erase(std::remove_if(line.begin(),
                              line.end(),
                              [](char c) { return std::iscntrl(c); }),
               line.end());
    // Assume input from std::cin has already been output (e.g: by a terminal)
    // but input from elsewhere (e.g: from a testing input stream) has not.
    if (input_stream_ != &std::cin) {
      fprintf(ostream_, "%s\n", line.c_str());
    }
    // Parse the command into tokens.
    std::vector<std::string> tokenized_cmd = Tokenize(line);
    if (!tokenized_cmd.empty()) {
      done = ExecDebugCommand(tokenized_cmd);
    }
  }
 }
 std::optional<uint64_t> Debugger::ParseUint64String(std::string_view uint64_str,
                                                    int base) {
  // Clear any previous errors.
  errno = 0;
  // strtoull uses 0 to indicate that no conversion was possible so first
  // check that the string isn't zero.
  if (IsZeroUint64String(uint64_str, base)) {
    return 0;
  }
  // Cannot use stoi as it might not be possible to use exceptions.
  char* end;
  uint64_t value = std::strtoull(uint64_str.data(), &end, base);
  if (value == 0 || *end != '\0' || errno == ERANGE) {
    return std::nullopt;
  }
  return value;
 }
 std::optional<Debugger::RegisterParsedFormat> Debugger::ParseRegString(
    std::string_view reg_str) {
  // A register should only have 2 (e.g: X0) or 3 (e.g: X31) characters.
  if (reg_str.size() < 2 || reg_str.size() > 3) {
    return std::nullopt;
  }
  // Check for aliases of registers.
  if (reg_str == "lr") {
    return {{'X', kLinkRegCode}};
  } else if (reg_str == "sp") {
    return {{'X', kSpRegCode}};
  }
  unsigned max_reg_num;
  char reg_prefix = std::toupper(reg_str.front());
  switch (reg_prefix) {
    case 'W':
      VIXL_FALLTHROUGH();
    case 'X':
      max_reg_num = kNumberOfRegisters - 1;
      break;
    case 'V':
      max_reg_num = kNumberOfVRegisters - 1;
      break;
    case 'Z':
      max_reg_num = kNumberOfZRegisters - 1;
      break;
    case 'P':
      max_reg_num = kNumberOfPRegisters - 1;
      break;
    default:
      return std::nullopt;
  }
  std::string_view str_code = reg_str.substr(1, reg_str.size());
  auto reg_code = ParseUint64String(str_code, 10);
  if (!reg_code) {
    return std::nullopt;
  }
  if (*reg_code > max_reg_num) {
    return std::nullopt;
  }
  return {{reg_prefix, *reg_code}};
 }
 void Debugger::PrintUsage() {
  for (const auto& cmd : debugger_cmds_) {
    // Print commands in the following format:
    //  foo / f
    //      foo <arg>
    //      A description of the foo command.
    //
    std::string_view cmd_word = cmd->GetCommandWord();
    std::string_view cmd_alias = cmd->GetCommandAlias();
    if (cmd_alias != "") {
      fprintf(ostream_, "%s / %s\n", cmd_word.data(), cmd_alias.data());
    } else {
      fprintf(ostream_, "%s\n", cmd_word.data());
    }
    std::string_view args_str = cmd->GetArgsString();
    if (args_str != "") {
      fprintf(ostream_, "\t%s %s\n", cmd_word.data(), args_str.data());
    }
    std::string_view description = cmd->GetDescription();
    if (description != "") {
      fprintf(ostream_, "\t%s\n", description.data());
    }
  }
 }
 std::vector<std::string> Debugger::Tokenize(std::string_view input_line,
                                            char separator) {
  std::vector<std::string> words;
  if (input_line.empty()) {
    return words;
  }
  for (auto separator_pos = input_line.find(separator);
       separator_pos != input_line.npos;
       separator_pos = input_line.find(separator)) {
    // Skip consecutive, repeated separators.
    if (separator_pos != 0) {
      words.push_back(std::string{input_line.substr(0, separator_pos)});
    }
    // Remove characters up to and including the separator.
    input_line.remove_prefix(separator_pos + 1);
  }
  // Add the rest of the string to the vector.
  words.push_back(std::string{input_line});
  return words;
 }
 DebugReturn Debugger::ExecDebugCommand(
    const std::vector<std::string>& tokenized_cmd) {
  std::string cmd_word = tokenized_cmd.front();
  for (const auto& cmd : debugger_cmds_) {
    if (cmd_word == cmd->GetCommandWord() ||
        cmd_word == cmd->GetCommandAlias()) {
      const std::vector<std::string> args(tokenized_cmd.begin() + 1,
                                          tokenized_cmd.end());
      // Call the handler for the command and pass the arguments.
      return cmd->Action(args);
    }
  }
  fprintf(ostream_, "Error: command '%s' not found\n", cmd_word.c_str());
  return DebugContinue;
 }
 bool Debugger::IsZeroUint64String(std::string_view uint64_str, int base) {
  // Remove any hex prefixes.
  if (base == 0 || base == 16) {
    std::string_view prefix = uint64_str.substr(0, 2);
    if (prefix == "0x" || prefix == "0X") {
      uint64_str.remove_prefix(2);
    }
  }
  if (uint64_str.empty()) {
    return false;
  }
  // Check all remaining digits in the string for anything other than zero.
  for (char c : uint64_str) {
    if (c != '0') {
      return false;
    }
  }
  return true;
 }
 DebuggerCmd::DebuggerCmd(Simulator* sim,
                         std::string cmd_word,
                         std::string cmd_alias,
                         std::string args_str,
                         std::string description)
    : sim_(sim),
      ostream_(sim->GetOutputStream()),
      command_word_(cmd_word),
      command_alias_(cmd_alias),
      args_str_(args_str),
      description_(description) {}
 DebugReturn HelpCmd::Action(const std::vector<std::string>& args) {
  USE(args);
  sim_->GetDebugger()->PrintUsage();
  return DebugContinue;
 }
 DebugReturn BreakCmd::Action(const std::vector<std::string>& args) {
  if (args.size() != 1) {
    fprintf(ostream_, "Error: Use `break <address>` to set a breakpoint\n");
    return DebugContinue;
  }
  std::string arg = args.front();
  auto break_addr = Debugger::ParseUint64String(arg);
  if (!break_addr) {
    fprintf(ostream_, "Error: Use `break <address>` to set a breakpoint\n");
    return DebugContinue;
  }
  if (sim_->GetDebugger()->IsBreakpoint(*break_addr)) {
    sim_->GetDebugger()->RemoveBreakpoint(*break_addr);
    fprintf(ostream_,
            "Breakpoint successfully removed at: 0x%" PRIx64 "\n",
            *break_addr);
  } else {
    sim_->GetDebugger()->RegisterBreakpoint(*break_addr);
    fprintf(ostream_,
            "Breakpoint successfully added at: 0x%" PRIx64 "\n",
            *break_addr);
  }
  return DebugContinue;
 }
 DebugReturn StepCmd::Action(const std::vector<std::string>& args) {
  if (args.size() > 1) {
    fprintf(ostream_,
            "Error: use `step [number]` to step an optional number of"
            " instructions\n");
    return DebugContinue;
  }
  // Step 1 instruction by default.
  std::optional<uint64_t> number_of_instructions_to_execute{1};
  if (args.size() == 1) {
    // Parse the argument to step that number of instructions.
    std::string arg = args.front();
    number_of_instructions_to_execute = Debugger::ParseUint64String(arg);
    if (!number_of_instructions_to_execute) {
      fprintf(ostream_,
              "Error: use `step [number]` to step an optional number of"
              " instructions\n");
      return DebugContinue;
    }
  }
  while (!sim_->IsSimulationFinished() &&
         *number_of_instructions_to_execute > 0) {
    sim_->ExecuteInstruction();
    (*number_of_instructions_to_execute)--;
    // The first instruction has already been printed by Debug() so only
    // enable instruction tracing after the first instruction has been
    // executed.
    sim_->SetTraceParameters(sim_->GetTraceParameters() | LOG_DISASM);
  }
  // Disable instruction tracing after all instructions have been executed.
  sim_->SetTraceParameters(sim_->GetTraceParameters() & ~LOG_DISASM);
  if (sim_->IsSimulationFinished()) {
    fprintf(ostream_,
            "Debugger at the end of simulation, leaving simulator...\n");
    return DebugExit;
  }
  return DebugContinue;
 }
 DebugReturn ContinueCmd::Action(const std::vector<std::string>& args) {
  USE(args);
  fprintf(ostream_, "Continuing...\n");
  if (sim_->GetDebugger()->IsAtBreakpoint()) {
    // This breakpoint has already been hit, so execute it before continuing.
    sim_->ExecuteInstruction();
  }
  return DebugExit;
 }
 DebugReturn PrintCmd::Action(const std::vector<std::string>& args) {
  if (args.size() != 1) {
    fprintf(ostream_,
            "Error: use `print <register|all>` to print the contents of a"
            " specific register or all registers.\n");
    return DebugContinue;
  }
  if (args.front() == "all") {
    sim_->PrintRegisters();
    sim_->PrintZRegisters();
  } else if (args.front() == "system") {
    sim_->PrintSystemRegisters();
  } else if (args.front() == "ffr") {
    sim_->PrintFFR();
  } else {
    auto reg = Debugger::ParseRegString(args.front());
    if (!reg) {
      fprintf(ostream_,
              "Error: incorrect register format, use e.g: X0, x0, etc...\n");
      return DebugContinue;
    }
    // Ensure the stack pointer is printed instead of the zero register.
    if ((*reg).second == kSpRegCode) {
      (*reg).second = kSPRegInternalCode;
    }
    // Registers are printed in different ways depending on their type.
    switch ((*reg).first) {
      case 'W':
        sim_->PrintRegister(
            (*reg).second,
            static_cast<Simulator::PrintRegisterFormat>(
                Simulator::PrintRegisterFormat::kPrintWReg |
                Simulator::PrintRegisterFormat::kPrintRegPartial));
        break;
      case 'X':
        sim_->PrintRegister((*reg).second,
                            Simulator::PrintRegisterFormat::kPrintXReg);
        break;
      case 'V':
        sim_->PrintVRegister((*reg).second);
        break;
      case 'Z':
        sim_->PrintZRegister((*reg).second);
        break;
      case 'P':
        sim_->PrintPRegister((*reg).second);
        break;
      default:
        // ParseRegString should only allow valid register characters.
        VIXL_UNREACHABLE();
    }
  }
  return DebugContinue;
 }
 DebugReturn TraceCmd::Action(const std::vector<std::string>& args) {
  if (args.size() != 0) {
    fprintf(ostream_, "Error: use `trace` to toggle tracing of registers.\n");
    return DebugContinue;
  }
  int trace_params = sim_->GetTraceParameters();
  if ((trace_params & LOG_ALL) != LOG_ALL) {
    fprintf(ostream_,
            "Enabling disassembly, registers and memory write tracing\n");
    sim_->SetTraceParameters(trace_params | LOG_ALL);
  } else {
    fprintf(ostream_,
            "Disabling disassembly, registers and memory write tracing\n");
    sim_->SetTraceParameters(trace_params & ~LOG_ALL);
  }
  return DebugContinue;
 }
 DebugReturn GdbCmd::Action(const std::vector<std::string>& args) {
  if (args.size() != 0) {
    fprintf(ostream_,
            "Error: use `gdb` to enter GDB from the simulator debugger.\n");
    return DebugContinue;
  }
  HostBreakpoint();
  return DebugContinue;
 }
 }  // namespace aarch64
 }  // namespace vixl
 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
--- a/dep/vixl/src/aarch64/decoder-aarch64.cc
+++ b/dep/vixl/src/aarch64/decoder-aarch64.cc
--- a/dep/vixl/src/aarch64/disasm-aarch64.cc
+++ b/dep/vixl/src/aarch64/disasm-aarch64.cc
--- a/dep/vixl/src/aarch64/instructions-aarch64.cc
+++ b/dep/vixl/src/aarch64/instructions-aarch64.cc
@ -25,6 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "instructions-aarch64.h"
 #include "assembler-aarch64.h"
 namespace vixl {
@ -35,7 +36,8 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
                                    unsigned width) {
  VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) ||
              (width == 32));
-  VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+  VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
              (reg_size == kSRegSize) || (reg_size == kDRegSize));
  uint64_t result = value & ((UINT64_C(1) << width) - 1);
  for (unsigned i = width; i < reg_size; i *= 2) {
    result |= (result << i);
@ -43,6 +45,442 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
  return result;
 }
 bool Instruction::CanTakeSVEMovprfx(const char* form,
                                    const Instruction* movprfx) const {
  return CanTakeSVEMovprfx(Hash(form), movprfx);
 }
 bool Instruction::CanTakeSVEMovprfx(uint32_t form_hash,
                                    const Instruction* movprfx) const {
  bool movprfx_is_predicated = movprfx->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z;
  bool movprfx_is_unpredicated =
      movprfx->Mask(SVEConstructivePrefix_UnpredicatedMask) == MOVPRFX_z_z;
  VIXL_ASSERT(movprfx_is_predicated != movprfx_is_unpredicated);
  int movprfx_zd = movprfx->GetRd();
  int movprfx_pg = movprfx_is_predicated ? movprfx->GetPgLow8() : -1;
  VectorFormat movprfx_vform =
      movprfx_is_predicated ? movprfx->GetSVEVectorFormat() : kFormatUndefined;
  bool pg_matches_low8 = movprfx_pg == GetPgLow8();
  bool vform_matches = movprfx_vform == GetSVEVectorFormat();
  bool zd_matches = movprfx_zd == GetRd();
  bool zd_isnt_zn = movprfx_zd != GetRn();
  bool zd_isnt_zm = movprfx_zd != GetRm();
  switch (form_hash) {
    case "cdot_z_zzzi_s"_h:
    case "sdot_z_zzzi_s"_h:
    case "sudot_z_zzzi_s"_h:
    case "udot_z_zzzi_s"_h:
    case "usdot_z_zzzi_s"_h:
      return (GetRd() != static_cast<int>(ExtractBits(18, 16))) &&
             movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
    case "cdot_z_zzzi_d"_h:
    case "sdot_z_zzzi_d"_h:
    case "udot_z_zzzi_d"_h:
      return (GetRd() != static_cast<int>(ExtractBits(19, 16))) &&
             movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
    case "fmlalb_z_zzzi_s"_h:
    case "fmlalt_z_zzzi_s"_h:
    case "fmlslb_z_zzzi_s"_h:
    case "fmlslt_z_zzzi_s"_h:
    case "smlalb_z_zzzi_d"_h:
    case "smlalb_z_zzzi_s"_h:
    case "smlalt_z_zzzi_d"_h:
    case "smlalt_z_zzzi_s"_h:
    case "smlslb_z_zzzi_d"_h:
    case "smlslb_z_zzzi_s"_h:
    case "smlslt_z_zzzi_d"_h:
    case "smlslt_z_zzzi_s"_h:
    case "sqdmlalb_z_zzzi_d"_h:
    case "sqdmlalb_z_zzzi_s"_h:
    case "sqdmlalt_z_zzzi_d"_h:
    case "sqdmlalt_z_zzzi_s"_h:
    case "sqdmlslb_z_zzzi_d"_h:
    case "sqdmlslb_z_zzzi_s"_h:
    case "sqdmlslt_z_zzzi_d"_h:
    case "sqdmlslt_z_zzzi_s"_h:
    case "umlalb_z_zzzi_d"_h:
    case "umlalb_z_zzzi_s"_h:
    case "umlalt_z_zzzi_d"_h:
    case "umlalt_z_zzzi_s"_h:
    case "umlslb_z_zzzi_d"_h:
    case "umlslb_z_zzzi_s"_h:
    case "umlslt_z_zzzi_d"_h:
    case "umlslt_z_zzzi_s"_h:
      return (GetRd() != GetSVEMulLongZmAndIndex().first) &&
             movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
    case "cmla_z_zzzi_h"_h:
    case "cmla_z_zzzi_s"_h:
    case "fcmla_z_zzzi_h"_h:
    case "fcmla_z_zzzi_s"_h:
    case "fmla_z_zzzi_d"_h:
    case "fmla_z_zzzi_h"_h:
    case "fmla_z_zzzi_s"_h:
    case "fmls_z_zzzi_d"_h:
    case "fmls_z_zzzi_h"_h:
    case "fmls_z_zzzi_s"_h:
    case "mla_z_zzzi_d"_h:
    case "mla_z_zzzi_h"_h:
    case "mla_z_zzzi_s"_h:
    case "mls_z_zzzi_d"_h:
    case "mls_z_zzzi_h"_h:
    case "mls_z_zzzi_s"_h:
    case "sqrdcmlah_z_zzzi_h"_h:
    case "sqrdcmlah_z_zzzi_s"_h:
    case "sqrdmlah_z_zzzi_d"_h:
    case "sqrdmlah_z_zzzi_h"_h:
    case "sqrdmlah_z_zzzi_s"_h:
    case "sqrdmlsh_z_zzzi_d"_h:
    case "sqrdmlsh_z_zzzi_h"_h:
    case "sqrdmlsh_z_zzzi_s"_h:
      return (GetRd() != GetSVEMulZmAndIndex().first) &&
             movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
    case "adclb_z_zzz"_h:
    case "adclt_z_zzz"_h:
    case "bcax_z_zzz"_h:
    case "bsl1n_z_zzz"_h:
    case "bsl2n_z_zzz"_h:
    case "bsl_z_zzz"_h:
    case "cdot_z_zzz"_h:
    case "cmla_z_zzz"_h:
    case "eor3_z_zzz"_h:
    case "eorbt_z_zz"_h:
    case "eortb_z_zz"_h:
    case "fmlalb_z_zzz"_h:
    case "fmlalt_z_zzz"_h:
    case "fmlslb_z_zzz"_h:
    case "fmlslt_z_zzz"_h:
    case "nbsl_z_zzz"_h:
    case "saba_z_zzz"_h:
    case "sabalb_z_zzz"_h:
    case "sabalt_z_zzz"_h:
    case "sbclb_z_zzz"_h:
    case "sbclt_z_zzz"_h:
    case "sdot_z_zzz"_h:
    case "smlalb_z_zzz"_h:
    case "smlalt_z_zzz"_h:
    case "smlslb_z_zzz"_h:
    case "smlslt_z_zzz"_h:
    case "sqdmlalb_z_zzz"_h:
    case "sqdmlalbt_z_zzz"_h:
    case "sqdmlalt_z_zzz"_h:
    case "sqdmlslb_z_zzz"_h:
    case "sqdmlslbt_z_zzz"_h:
    case "sqdmlslt_z_zzz"_h:
    case "sqrdcmlah_z_zzz"_h:
    case "sqrdmlah_z_zzz"_h:
    case "sqrdmlsh_z_zzz"_h:
    case "uaba_z_zzz"_h:
    case "uabalb_z_zzz"_h:
    case "uabalt_z_zzz"_h:
    case "udot_z_zzz"_h:
    case "umlalb_z_zzz"_h:
    case "umlalt_z_zzz"_h:
    case "umlslb_z_zzz"_h:
    case "umlslt_z_zzz"_h:
    case "usdot_z_zzz_s"_h:
    case "fmmla_z_zzz_s"_h:
    case "fmmla_z_zzz_d"_h:
    case "smmla_z_zzz"_h:
    case "ummla_z_zzz"_h:
    case "usmmla_z_zzz"_h:
      return movprfx_is_unpredicated && zd_isnt_zm && zd_isnt_zn && zd_matches;
    case "addp_z_p_zz"_h:
    case "cadd_z_zz"_h:
    case "clasta_z_p_zz"_h:
    case "clastb_z_p_zz"_h:
    case "decd_z_zs"_h:
    case "dech_z_zs"_h:
    case "decw_z_zs"_h:
    case "ext_z_zi_des"_h:
    case "faddp_z_p_zz"_h:
    case "fmaxnmp_z_p_zz"_h:
    case "fmaxp_z_p_zz"_h:
    case "fminnmp_z_p_zz"_h:
    case "fminp_z_p_zz"_h:
    case "ftmad_z_zzi"_h:
    case "incd_z_zs"_h:
    case "inch_z_zs"_h:
    case "incw_z_zs"_h:
    case "insr_z_v"_h:
    case "smaxp_z_p_zz"_h:
    case "sminp_z_p_zz"_h:
    case "splice_z_p_zz_des"_h:
    case "sqcadd_z_zz"_h:
    case "sqdecd_z_zs"_h:
    case "sqdech_z_zs"_h:
    case "sqdecw_z_zs"_h:
    case "sqincd_z_zs"_h:
    case "sqinch_z_zs"_h:
    case "sqincw_z_zs"_h:
    case "srsra_z_zi"_h:
    case "ssra_z_zi"_h:
    case "umaxp_z_p_zz"_h:
    case "uminp_z_p_zz"_h:
    case "uqdecd_z_zs"_h:
    case "uqdech_z_zs"_h:
    case "uqdecw_z_zs"_h:
    case "uqincd_z_zs"_h:
    case "uqinch_z_zs"_h:
    case "uqincw_z_zs"_h:
    case "ursra_z_zi"_h:
    case "usra_z_zi"_h:
    case "xar_z_zzi"_h:
      return movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
    case "add_z_zi"_h:
    case "and_z_zi"_h:
    case "decp_z_p_z"_h:
    case "eor_z_zi"_h:
    case "incp_z_p_z"_h:
    case "insr_z_r"_h:
    case "mul_z_zi"_h:
    case "orr_z_zi"_h:
    case "smax_z_zi"_h:
    case "smin_z_zi"_h:
    case "sqadd_z_zi"_h:
    case "sqdecp_z_p_z"_h:
    case "sqincp_z_p_z"_h:
    case "sqsub_z_zi"_h:
    case "sub_z_zi"_h:
    case "subr_z_zi"_h:
    case "umax_z_zi"_h:
    case "umin_z_zi"_h:
    case "uqadd_z_zi"_h:
    case "uqdecp_z_p_z"_h:
    case "uqincp_z_p_z"_h:
    case "uqsub_z_zi"_h:
      return movprfx_is_unpredicated && zd_matches;
    case "cpy_z_p_i"_h:
      if (movprfx_is_predicated) {
        if (!vform_matches) return false;
        if (movprfx_pg != GetRx<19, 16>()) return false;
      }
      // Only the merging form can take movprfx.
      if (ExtractBit(14) == 0) return false;
      return zd_matches;
    case "fcpy_z_p_i"_h:
      return (movprfx_is_unpredicated ||
              ((movprfx_pg == GetRx<19, 16>()) && vform_matches)) &&
             zd_matches;
    case "flogb_z_p_z"_h:
      return (movprfx_is_unpredicated ||
              ((movprfx_vform == GetSVEVectorFormat(17)) && pg_matches_low8)) &&
             zd_isnt_zn && zd_matches;
    case "asr_z_p_zi"_h:
    case "asrd_z_p_zi"_h:
    case "lsl_z_p_zi"_h:
    case "lsr_z_p_zi"_h:
    case "sqshl_z_p_zi"_h:
    case "sqshlu_z_p_zi"_h:
    case "srshr_z_p_zi"_h:
    case "uqshl_z_p_zi"_h:
    case "urshr_z_p_zi"_h:
      return (movprfx_is_unpredicated ||
              ((movprfx_vform ==
                SVEFormatFromLaneSizeInBytesLog2(
                    GetSVEImmShiftAndLaneSizeLog2(true).second)) &&
               pg_matches_low8)) &&
             zd_matches;
    case "fcvt_z_p_z_d2h"_h:
    case "fcvt_z_p_z_d2s"_h:
    case "fcvt_z_p_z_h2d"_h:
    case "fcvt_z_p_z_s2d"_h:
    case "fcvtx_z_p_z_d2s"_h:
    case "fcvtzs_z_p_z_d2w"_h:
    case "fcvtzs_z_p_z_d2x"_h:
    case "fcvtzs_z_p_z_fp162x"_h:
    case "fcvtzs_z_p_z_s2x"_h:
    case "fcvtzu_z_p_z_d2w"_h:
    case "fcvtzu_z_p_z_d2x"_h:
    case "fcvtzu_z_p_z_fp162x"_h:
    case "fcvtzu_z_p_z_s2x"_h:
    case "scvtf_z_p_z_w2d"_h:
    case "scvtf_z_p_z_x2d"_h:
    case "scvtf_z_p_z_x2fp16"_h:
    case "scvtf_z_p_z_x2s"_h:
    case "ucvtf_z_p_z_w2d"_h:
    case "ucvtf_z_p_z_x2d"_h:
    case "ucvtf_z_p_z_x2fp16"_h:
    case "ucvtf_z_p_z_x2s"_h:
      return (movprfx_is_unpredicated ||
              ((movprfx_vform == kFormatVnD) && pg_matches_low8)) &&
             zd_isnt_zn && zd_matches;
    case "fcvtzs_z_p_z_fp162h"_h:
    case "fcvtzu_z_p_z_fp162h"_h:
    case "scvtf_z_p_z_h2fp16"_h:
    case "ucvtf_z_p_z_h2fp16"_h:
      return (movprfx_is_unpredicated ||
              ((movprfx_vform == kFormatVnH) && pg_matches_low8)) &&
             zd_isnt_zn && zd_matches;
    case "fcvt_z_p_z_h2s"_h:
    case "fcvt_z_p_z_s2h"_h:
    case "fcvtzs_z_p_z_fp162w"_h:
    case "fcvtzs_z_p_z_s2w"_h:
    case "fcvtzu_z_p_z_fp162w"_h:
    case "fcvtzu_z_p_z_s2w"_h:
    case "scvtf_z_p_z_w2fp16"_h:
    case "scvtf_z_p_z_w2s"_h:
    case "ucvtf_z_p_z_w2fp16"_h:
    case "ucvtf_z_p_z_w2s"_h:
      return (movprfx_is_unpredicated ||
              ((movprfx_vform == kFormatVnS) && pg_matches_low8)) &&
             zd_isnt_zn && zd_matches;
    case "fcmla_z_p_zzz"_h:
    case "fmad_z_p_zzz"_h:
    case "fmla_z_p_zzz"_h:
    case "fmls_z_p_zzz"_h:
    case "fmsb_z_p_zzz"_h:
    case "fnmad_z_p_zzz"_h:
    case "fnmla_z_p_zzz"_h:
    case "fnmls_z_p_zzz"_h:
    case "fnmsb_z_p_zzz"_h:
    case "mad_z_p_zzz"_h:
    case "mla_z_p_zzz"_h:
    case "mls_z_p_zzz"_h:
    case "msb_z_p_zzz"_h:
      return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
             zd_isnt_zm && zd_isnt_zn && zd_matches;
    case "abs_z_p_z"_h:
    case "add_z_p_zz"_h:
    case "and_z_p_zz"_h:
    case "asr_z_p_zw"_h:
    case "asr_z_p_zz"_h:
    case "asrr_z_p_zz"_h:
    case "bic_z_p_zz"_h:
    case "cls_z_p_z"_h:
    case "clz_z_p_z"_h:
    case "cnot_z_p_z"_h:
    case "cnt_z_p_z"_h:
    case "cpy_z_p_v"_h:
    case "eor_z_p_zz"_h:
    case "fabd_z_p_zz"_h:
    case "fabs_z_p_z"_h:
    case "fadd_z_p_zz"_h:
    case "fcadd_z_p_zz"_h:
    case "fdiv_z_p_zz"_h:
    case "fdivr_z_p_zz"_h:
    case "fmax_z_p_zz"_h:
    case "fmaxnm_z_p_zz"_h:
    case "fmin_z_p_zz"_h:
    case "fminnm_z_p_zz"_h:
    case "fmul_z_p_zz"_h:
    case "fmulx_z_p_zz"_h:
    case "fneg_z_p_z"_h:
    case "frecpx_z_p_z"_h:
    case "frinta_z_p_z"_h:
    case "frinti_z_p_z"_h:
    case "frintm_z_p_z"_h:
    case "frintn_z_p_z"_h:
    case "frintp_z_p_z"_h:
    case "frintx_z_p_z"_h:
    case "frintz_z_p_z"_h:
    case "fscale_z_p_zz"_h:
    case "fsqrt_z_p_z"_h:
    case "fsub_z_p_zz"_h:
    case "fsubr_z_p_zz"_h:
    case "lsl_z_p_zw"_h:
    case "lsl_z_p_zz"_h:
    case "lslr_z_p_zz"_h:
    case "lsr_z_p_zw"_h:
    case "lsr_z_p_zz"_h:
    case "lsrr_z_p_zz"_h:
    case "mul_z_p_zz"_h:
    case "neg_z_p_z"_h:
    case "not_z_p_z"_h:
    case "orr_z_p_zz"_h:
    case "rbit_z_p_z"_h:
    case "revb_z_z"_h:
    case "revh_z_z"_h:
    case "revw_z_z"_h:
    case "sabd_z_p_zz"_h:
    case "sadalp_z_p_z"_h:
    case "sdiv_z_p_zz"_h:
    case "sdivr_z_p_zz"_h:
    case "shadd_z_p_zz"_h:
    case "shsub_z_p_zz"_h:
    case "shsubr_z_p_zz"_h:
    case "smax_z_p_zz"_h:
    case "smin_z_p_zz"_h:
    case "smulh_z_p_zz"_h:
    case "sqabs_z_p_z"_h:
    case "sqadd_z_p_zz"_h:
    case "sqneg_z_p_z"_h:
    case "sqrshl_z_p_zz"_h:
    case "sqrshlr_z_p_zz"_h:
    case "sqshl_z_p_zz"_h:
    case "sqshlr_z_p_zz"_h:
    case "sqsub_z_p_zz"_h:
    case "sqsubr_z_p_zz"_h:
    case "srhadd_z_p_zz"_h:
    case "srshl_z_p_zz"_h:
    case "srshlr_z_p_zz"_h:
    case "sub_z_p_zz"_h:
    case "subr_z_p_zz"_h:
    case "suqadd_z_p_zz"_h:
    case "sxtb_z_p_z"_h:
    case "sxth_z_p_z"_h:
    case "sxtw_z_p_z"_h:
    case "uabd_z_p_zz"_h:
    case "uadalp_z_p_z"_h:
    case "udiv_z_p_zz"_h:
    case "udivr_z_p_zz"_h:
    case "uhadd_z_p_zz"_h:
    case "uhsub_z_p_zz"_h:
    case "uhsubr_z_p_zz"_h:
    case "umax_z_p_zz"_h:
    case "umin_z_p_zz"_h:
    case "umulh_z_p_zz"_h:
    case "uqadd_z_p_zz"_h:
    case "uqrshl_z_p_zz"_h:
    case "uqrshlr_z_p_zz"_h:
    case "uqshl_z_p_zz"_h:
    case "uqshlr_z_p_zz"_h:
    case "uqsub_z_p_zz"_h:
    case "uqsubr_z_p_zz"_h:
    case "urecpe_z_p_z"_h:
    case "urhadd_z_p_zz"_h:
    case "urshl_z_p_zz"_h:
    case "urshlr_z_p_zz"_h:
    case "ursqrte_z_p_z"_h:
    case "usqadd_z_p_zz"_h:
    case "uxtb_z_p_z"_h:
    case "uxth_z_p_z"_h:
    case "uxtw_z_p_z"_h:
      return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
             zd_isnt_zn && zd_matches;
    case "cpy_z_p_r"_h:
    case "fadd_z_p_zs"_h:
    case "fmax_z_p_zs"_h:
    case "fmaxnm_z_p_zs"_h:
    case "fmin_z_p_zs"_h:
    case "fminnm_z_p_zs"_h:
    case "fmul_z_p_zs"_h:
    case "fsub_z_p_zs"_h:
    case "fsubr_z_p_zs"_h:
      return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
             zd_matches;
    default:
      return false;
  }
 }  // NOLINT(readability/fn_size)
 bool Instruction::IsLoad() const {
  if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) {
@ -103,6 +541,68 @@ bool Instruction::IsStore() const {
 }
 std::pair<int, int> Instruction::GetSVEPermuteIndexAndLaneSizeLog2() const {
  uint32_t imm_2 = ExtractBits<0x00C00000>();
  uint32_t tsz_5 = ExtractBits<0x001F0000>();
  uint32_t imm_7 = (imm_2 << 5) | tsz_5;
  int lane_size_in_byte_log_2 = std::min(CountTrailingZeros(tsz_5), 5);
  int index = ExtractUnsignedBitfield32(6, lane_size_in_byte_log_2 + 1, imm_7);
  return std::make_pair(index, lane_size_in_byte_log_2);
 }
 // Get the register and index for SVE indexed multiplies encoded in the forms:
 //  .h : Zm = <18:16>, index = <22><20:19>
 //  .s : Zm = <18:16>, index = <20:19>
 //  .d : Zm = <19:16>, index = <20>
 std::pair<int, int> Instruction::GetSVEMulZmAndIndex() const {
  int reg_code = GetRmLow16();
  int index = ExtractBits(20, 19);
  // For .h, index uses bit zero of the size field, so kFormatVnB below implies
  // half-word lane, with most-significant bit of the index zero.
  switch (GetSVEVectorFormat()) {
    case kFormatVnD:
      index >>= 1;  // Only bit 20 in the index for D lanes.
      break;
    case kFormatVnH:
      index += 4;  // Bit 22 is the top bit of index.
      VIXL_FALLTHROUGH();
    case kFormatVnB:
    case kFormatVnS:
      reg_code &= 7;  // Three bits used for the register.
      break;
    default:
      VIXL_UNIMPLEMENTED();
      break;
  }
  return std::make_pair(reg_code, index);
 }
 // Get the register and index for SVE indexed long multiplies encoded in the
 // forms:
 //  .h : Zm = <18:16>, index = <20:19><11>
 //  .s : Zm = <19:16>, index = <20><11>
 std::pair<int, int> Instruction::GetSVEMulLongZmAndIndex() const {
  int reg_code = GetRmLow16();
  int index = ExtractBit(11);
  // For long multiplies, the SVE size field <23:22> encodes the destination
  // element size. The source element size is half the width.
  switch (GetSVEVectorFormat()) {
    case kFormatVnS:
      reg_code &= 7;
      index |= ExtractBits(20, 19) << 1;
      break;
    case kFormatVnD:
      index |= ExtractBit(20) << 1;
      break;
    default:
      VIXL_UNIMPLEMENTED();
      break;
  }
  return std::make_pair(reg_code, index);
 }
 // Logical immediates can't encode zero, so a return value of zero is used to
 // indicate a failure case. Specifically, where the constraints on imm_s are
 // not met.
@ -111,7 +611,114 @@ uint64_t Instruction::GetImmLogical() const {
  int32_t n = GetBitN();
  int32_t imm_s = GetImmSetBits();
  int32_t imm_r = GetImmRotate();
  return DecodeImmBitMask(n, imm_s, imm_r, reg_size);
 }
 // Logical immediates can't encode zero, so a return value of zero is used to
 // indicate a failure case. Specifically, where the constraints on imm_s are
 // not met.
 uint64_t Instruction::GetSVEImmLogical() const {
  int n = GetSVEBitN();
  int imm_s = GetSVEImmSetBits();
  int imm_r = GetSVEImmRotate();
  int lane_size_in_bytes_log2 = GetSVEBitwiseImmLaneSizeInBytesLog2();
  switch (lane_size_in_bytes_log2) {
    case kDRegSizeInBytesLog2:
    case kSRegSizeInBytesLog2:
    case kHRegSizeInBytesLog2:
    case kBRegSizeInBytesLog2: {
      int lane_size_in_bits = 1 << (lane_size_in_bytes_log2 + 3);
      return DecodeImmBitMask(n, imm_s, imm_r, lane_size_in_bits);
    }
    default:
      return 0;
  }
 }
 std::pair<int, int> Instruction::GetSVEImmShiftAndLaneSizeLog2(
    bool is_predicated) const {
  Instr tsize =
      is_predicated ? ExtractBits<0x00C00300>() : ExtractBits<0x00D80000>();
  Instr imm_3 =
      is_predicated ? ExtractBits<0x000000E0>() : ExtractBits<0x00070000>();
  if (tsize == 0) {
    // The bit field `tsize` means undefined if it is zero, so return a
    // convenience value kWMinInt to indicate a failure case.
    return std::make_pair(kWMinInt, kWMinInt);
  }
  int lane_size_in_bytes_log_2 = 32 - CountLeadingZeros(tsize, 32) - 1;
  int esize = (1 << lane_size_in_bytes_log_2) * kBitsPerByte;
  int shift = (2 * esize) - ((tsize << 3) | imm_3);
  return std::make_pair(shift, lane_size_in_bytes_log_2);
 }
 int Instruction::GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb) const {
  Instr dtype_h = ExtractBits(dtype_h_lsb + 1, dtype_h_lsb);
  if (is_signed) {
    dtype_h = dtype_h ^ 0x3;
  }
  return dtype_h;
 }
 int Instruction::GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb) const {
  Instr dtype_l = ExtractBits(dtype_l_lsb + 1, dtype_l_lsb);
  if (is_signed) {
    dtype_l = dtype_l ^ 0x3;
  }
  return dtype_l;
 }
 int Instruction::GetSVEBitwiseImmLaneSizeInBytesLog2() const {
  int n = GetSVEBitN();
  int imm_s = GetSVEImmSetBits();
  unsigned type_bitset =
      (n << SVEImmSetBits_width) | (~imm_s & GetUintMask(SVEImmSetBits_width));
  // An lane size is constructed from the n and imm_s bits according to
  // the following table:
  //
  // N   imms   size
  // 0  0xxxxx   32
  // 0  10xxxx   16
  // 0  110xxx    8
  // 0  1110xx    8
  // 0  11110x    8
  // 1  xxxxxx   64
  if (type_bitset == 0) {
    // Bail out early since `HighestSetBitPosition` doesn't accept zero
    // value input.
    return -1;
  }
  switch (HighestSetBitPosition(type_bitset)) {
    case 6:
      return kDRegSizeInBytesLog2;
    case 5:
      return kSRegSizeInBytesLog2;
    case 4:
      return kHRegSizeInBytesLog2;
    case 3:
    case 2:
    case 1:
      return kBRegSizeInBytesLog2;
    default:
      // RESERVED encoding.
      return -1;
  }
 }
 int Instruction::GetSVEExtractImmediate() const {
  const int imm8h_mask = 0x001F0000;
  const int imm8l_mask = 0x00001C00;
  return ExtractBits<imm8h_mask | imm8l_mask>();
 }
 uint64_t Instruction::DecodeImmBitMask(int32_t n,
                                       int32_t imm_s,
                                       int32_t imm_r,
                                       int32_t size) const {
  // An integer is constructed from the n, imm_s and imm_r bits according to
  // the following table:
  //
@ -146,7 +753,7 @@ uint64_t Instruction::GetImmLogical() const {
          return 0;
        }
        uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1;
-        return RepeatBitsAcrossReg(reg_size,
+        return RepeatBitsAcrossReg(size,
                                   RotateRight(bits, imm_r & mask, width),
                                   width);
      }
@ -397,8 +1004,6 @@ void Instruction::SetImmLLiteral(const Instruction* source) {
 VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
  VIXL_ASSERT(vform == kFormat8H || vform == kFormat4S || vform == kFormat2D ||
              vform == kFormatH || vform == kFormatS || vform == kFormatD);
  switch (vform) {
    case kFormat8H:
      return kFormat8B;
@ -406,12 +1011,20 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
      return kFormat4H;
    case kFormat2D:
      return kFormat2S;
    case kFormat1Q:
      return kFormat1D;
    case kFormatH:
      return kFormatB;
    case kFormatS:
      return kFormatH;
    case kFormatD:
      return kFormatS;
    case kFormatVnH:
      return kFormatVnB;
    case kFormatVnS:
      return kFormatVnH;
    case kFormatVnD:
      return kFormatVnS;
    default:
      VIXL_UNREACHABLE();
      return kFormatUndefined;
@ -420,8 +1033,6 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
 VectorFormat VectorFormatDoubleWidth(VectorFormat vform) {
  VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S ||
              vform == kFormatB || vform == kFormatH || vform == kFormatS);
  switch (vform) {
    case kFormat8B:
      return kFormat8H;
@ -435,6 +1046,12 @@ VectorFormat VectorFormatDoubleWidth(VectorFormat vform) {
      return kFormatS;
    case kFormatS:
      return kFormatD;
    case kFormatVnB:
      return kFormatVnH;
    case kFormatVnH:
      return kFormatVnS;
    case kFormatVnS:
      return kFormatVnD;
    default:
      VIXL_UNREACHABLE();
      return kFormatUndefined;
@ -480,6 +1097,14 @@ VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform) {
      return kFormat2S;
    case kFormat2D:
      return kFormat4S;
    case kFormat1Q:
      return kFormat2D;
    case kFormatVnH:
      return kFormatVnB;
    case kFormatVnS:
      return kFormatVnH;
    case kFormatVnD:
      return kFormatVnS;
    default:
      VIXL_UNREACHABLE();
      return kFormatUndefined;
@ -518,8 +1143,8 @@ VectorFormat VectorFormatHalfLanes(VectorFormat vform) {
 }
-VectorFormat ScalarFormatFromLaneSize(int laneSize) {
+VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits) {
-  switch (laneSize) {
+  switch (lane_size_in_bits) {
    case 8:
      return kFormatB;
    case 16:
@ -535,6 +1160,70 @@ VectorFormat ScalarFormatFromLaneSize(int laneSize) {
 }
 bool IsSVEFormat(VectorFormat vform) {
  switch (vform) {
    case kFormatVnB:
    case kFormatVnH:
    case kFormatVnS:
    case kFormatVnD:
    case kFormatVnQ:
    case kFormatVnO:
      return true;
    default:
      return false;
  }
 }
 VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes) {
  switch (lane_size_in_bytes) {
    case 1:
      return kFormatVnB;
    case 2:
      return kFormatVnH;
    case 4:
      return kFormatVnS;
    case 8:
      return kFormatVnD;
    case 16:
      return kFormatVnQ;
    default:
      VIXL_UNREACHABLE();
      return kFormatUndefined;
  }
 }
 VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits) {
  switch (lane_size_in_bits) {
    case 8:
    case 16:
    case 32:
    case 64:
    case 128:
      return SVEFormatFromLaneSizeInBytes(lane_size_in_bits / kBitsPerByte);
    default:
      VIXL_UNREACHABLE();
      return kFormatUndefined;
  }
 }
 VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log2) {
  switch (lane_size_in_bytes_log2) {
    case 0:
    case 1:
    case 2:
    case 3:
    case 4:
      return SVEFormatFromLaneSizeInBytes(1 << lane_size_in_bytes_log2);
    default:
      VIXL_UNREACHABLE();
      return kFormatUndefined;
  }
 }
 VectorFormat ScalarFormatFromFormat(VectorFormat vform) {
  return ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
 }
@ -542,6 +1231,7 @@ VectorFormat ScalarFormatFromFormat(VectorFormat vform) {
 unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
  VIXL_ASSERT(vform != kFormatUndefined);
  VIXL_ASSERT(!IsSVEFormat(vform));
  switch (vform) {
    case kFormatB:
      return kBRegSize;
@ -551,14 +1241,20 @@ unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
    case kFormat2H:
      return kSRegSize;
    case kFormatD:
      return kDRegSize;
    case kFormat8B:
    case kFormat4H:
    case kFormat2S:
    case kFormat1D:
      return kDRegSize;
-    default:
+    case kFormat16B:
    case kFormat8H:
    case kFormat4S:
    case kFormat2D:
    case kFormat1Q:
      return kQRegSize;
    default:
      VIXL_UNREACHABLE();
      return 0;
  }
 }
@ -574,20 +1270,29 @@ unsigned LaneSizeInBitsFromFormat(VectorFormat vform) {
    case kFormatB:
    case kFormat8B:
    case kFormat16B:
    case kFormatVnB:
      return 8;
    case kFormatH:
    case kFormat2H:
    case kFormat4H:
    case kFormat8H:
    case kFormatVnH:
      return 16;
    case kFormatS:
    case kFormat2S:
    case kFormat4S:
    case kFormatVnS:
      return 32;
    case kFormatD:
    case kFormat1D:
    case kFormat2D:
    case kFormatVnD:
      return 64;
    case kFormat1Q:
    case kFormatVnQ:
      return 128;
    case kFormatVnO:
      return 256;
    default:
      VIXL_UNREACHABLE();
      return 0;
@ -606,20 +1311,26 @@ int LaneSizeInBytesLog2FromFormat(VectorFormat vform) {
    case kFormatB:
    case kFormat8B:
    case kFormat16B:
    case kFormatVnB:
      return 0;
    case kFormatH:
    case kFormat2H:
    case kFormat4H:
    case kFormat8H:
    case kFormatVnH:
      return 1;
    case kFormatS:
    case kFormat2S:
    case kFormat4S:
    case kFormatVnS:
      return 2;
    case kFormatD:
    case kFormat1D:
    case kFormat2D:
    case kFormatVnD:
      return 3;
    case kFormatVnQ:
      return 4;
    default:
      VIXL_UNREACHABLE();
      return 0;
@ -643,6 +1354,7 @@ int LaneCountFromFormat(VectorFormat vform) {
    case kFormat2D:
      return 2;
    case kFormat1D:
    case kFormat1Q:
    case kFormatB:
    case kFormatH:
    case kFormatS:
@ -697,17 +1409,19 @@ bool IsVectorFormat(VectorFormat vform) {
 int64_t MaxIntFromFormat(VectorFormat vform) {
-  return INT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+  int lane_size = LaneSizeInBitsFromFormat(vform);
  return static_cast<int64_t>(GetUintMask(lane_size) >> 1);
 }
 int64_t MinIntFromFormat(VectorFormat vform) {
-  return INT64_MIN >> (64 - LaneSizeInBitsFromFormat(vform));
+  return -MaxIntFromFormat(vform) - 1;
 }
 uint64_t MaxUintFromFormat(VectorFormat vform) {
-  return UINT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+  return GetUintMask(LaneSizeInBitsFromFormat(vform));
 }
 }  // namespace aarch64
 }  // namespace vixl
--- a/dep/vixl/src/aarch64/instrument-aarch64.cc
+++ b/dep/vixl/src/aarch64/instrument-aarch64.cc
@ -1,916 +0,0 @@
 // Copyright 2014, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //   * Neither the name of ARM Limited nor the names of its contributors may be
 //     used to endorse or promote products derived from this software without
 //     specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "instrument-aarch64.h"
 namespace vixl {
 namespace aarch64 {
 Counter::Counter(const char* name, CounterType type)
    : count_(0), enabled_(false), type_(type) {
  VIXL_ASSERT(name != NULL);
  strncpy(name_, name, kCounterNameMaxLength);
  // Make sure `name_` is always NULL-terminated, even if the source's length is
  // higher.
  name_[kCounterNameMaxLength - 1] = '\0';
 }
 void Counter::Enable() { enabled_ = true; }
 void Counter::Disable() { enabled_ = false; }
 bool Counter::IsEnabled() { return enabled_; }
 void Counter::Increment() {
  if (enabled_) {
    count_++;
  }
 }
 uint64_t Counter::GetCount() {
  uint64_t result = count_;
  if (type_ == Gauge) {
    // If the counter is a Gauge, reset the count after reading.
    count_ = 0;
  }
  return result;
 }
 const char* Counter::GetName() { return name_; }
 CounterType Counter::GetType() { return type_; }
 struct CounterDescriptor {
  const char* name;
  CounterType type;
 };
 static const CounterDescriptor kCounterList[] =
    {{"Instruction", Cumulative},
     {"Move Immediate", Gauge},
     {"Add/Sub DP", Gauge},
     {"Logical DP", Gauge},
     {"Other Int DP", Gauge},
     {"FP DP", Gauge},
     {"Conditional Select", Gauge},
     {"Conditional Compare", Gauge},
     {"Unconditional Branch", Gauge},
     {"Compare and Branch", Gauge},
     {"Test and Branch", Gauge},
     {"Conditional Branch", Gauge},
     {"Load Integer", Gauge},
     {"Load FP", Gauge},
     {"Load Pair", Gauge},
     {"Load Literal", Gauge},
     {"Store Integer", Gauge},
     {"Store FP", Gauge},
     {"Store Pair", Gauge},
     {"PC Addressing", Gauge},
     {"Other", Gauge},
     {"NEON", Gauge},
     {"Crypto", Gauge}};
 Instrument::Instrument(const char* datafile, uint64_t sample_period)
    : output_stream_(stdout), sample_period_(sample_period) {
  // Set up the output stream. If datafile is non-NULL, use that file. If it
  // can't be opened, or datafile is NULL, use stdout.
  if (datafile != NULL) {
    output_stream_ = fopen(datafile, "w");
    if (output_stream_ == NULL) {
      printf("Can't open output file %s. Using stdout.\n", datafile);
      output_stream_ = stdout;
    }
  }
  static const int num_counters =
      sizeof(kCounterList) / sizeof(CounterDescriptor);
  // Dump an instrumentation description comment at the top of the file.
  fprintf(output_stream_, "# counters=%d\n", num_counters);
  fprintf(output_stream_, "# sample_period=%" PRIu64 "\n", sample_period_);
  // Construct Counter objects from counter description array.
  for (int i = 0; i < num_counters; i++) {
    Counter* counter = new Counter(kCounterList[i].name, kCounterList[i].type);
    counters_.push_back(counter);
  }
  DumpCounterNames();
 }
 Instrument::~Instrument() {
  // Dump any remaining instruction data to the output file.
  DumpCounters();
  // Free all the counter objects.
  std::list<Counter*>::iterator it;
  for (it = counters_.begin(); it != counters_.end(); it++) {
    delete *it;
  }
  if (output_stream_ != stdout) {
    fclose(output_stream_);
  }
 }
 void Instrument::Update() {
  // Increment the instruction counter, and dump all counters if a sample period
  // has elapsed.
  static Counter* counter = GetCounter("Instruction");
  VIXL_ASSERT(counter->GetType() == Cumulative);
  counter->Increment();
  if ((sample_period_ != 0) && counter->IsEnabled() &&
      (counter->GetCount() % sample_period_) == 0) {
    DumpCounters();
  }
 }
 void Instrument::DumpCounters() {
  // Iterate through the counter objects, dumping their values to the output
  // stream.
  std::list<Counter*>::const_iterator it;
  for (it = counters_.begin(); it != counters_.end(); it++) {
    fprintf(output_stream_, "%" PRIu64 ",", (*it)->GetCount());
  }
  fprintf(output_stream_, "\n");
  fflush(output_stream_);
 }
 void Instrument::DumpCounterNames() {
  // Iterate through the counter objects, dumping the counter names to the
  // output stream.
  std::list<Counter*>::const_iterator it;
  for (it = counters_.begin(); it != counters_.end(); it++) {
    fprintf(output_stream_, "%s,", (*it)->GetName());
  }
  fprintf(output_stream_, "\n");
  fflush(output_stream_);
 }
 void Instrument::HandleInstrumentationEvent(unsigned event) {
  switch (event) {
    case InstrumentStateEnable:
      Enable();
      break;
    case InstrumentStateDisable:
      Disable();
      break;
    default:
      DumpEventMarker(event);
  }
 }
 void Instrument::DumpEventMarker(unsigned marker) {
  // Dumpan event marker to the output stream as a specially formatted comment
  // line.
  static Counter* counter = GetCounter("Instruction");
  fprintf(output_stream_,
          "# %c%c @ %" PRId64 "\n",
          marker & 0xff,
          (marker >> 8) & 0xff,
          counter->GetCount());
 }
 Counter* Instrument::GetCounter(const char* name) {
  // Get a Counter object by name from the counter list.
  std::list<Counter*>::const_iterator it;
  for (it = counters_.begin(); it != counters_.end(); it++) {
    if (strcmp((*it)->GetName(), name) == 0) {
      return *it;
    }
  }
  // A Counter by that name does not exist: print an error message to stderr
  // and the output file, and exit.
  static const char* error_message =
      "# Error: Unknown counter \"%s\". Exiting.\n";
  fprintf(stderr, error_message, name);
  fprintf(output_stream_, error_message, name);
  exit(1);
 }
 void Instrument::Enable() {
  std::list<Counter*>::iterator it;
  for (it = counters_.begin(); it != counters_.end(); it++) {
    (*it)->Enable();
  }
 }
 void Instrument::Disable() {
  std::list<Counter*>::iterator it;
  for (it = counters_.begin(); it != counters_.end(); it++) {
    (*it)->Disable();
  }
 }
 void Instrument::VisitPCRelAddressing(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("PC Addressing");
  counter->Increment();
 }
 void Instrument::VisitAddSubImmediate(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Add/Sub DP");
  counter->Increment();
 }
 void Instrument::VisitLogicalImmediate(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Logical DP");
  counter->Increment();
 }
 void Instrument::VisitMoveWideImmediate(const Instruction* instr) {
  Update();
  static Counter* counter = GetCounter("Move Immediate");
  if (instr->IsMovn() && (instr->GetRd() == kZeroRegCode)) {
    unsigned imm = instr->GetImmMoveWide();
    HandleInstrumentationEvent(imm);
  } else {
    counter->Increment();
  }
 }
 void Instrument::VisitBitfield(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Other Int DP");
  counter->Increment();
 }
 void Instrument::VisitExtract(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Other Int DP");
  counter->Increment();
 }
 void Instrument::VisitUnconditionalBranch(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Unconditional Branch");
  counter->Increment();
 }
 void Instrument::VisitUnconditionalBranchToRegister(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Unconditional Branch");
  counter->Increment();
 }
 void Instrument::VisitCompareBranch(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Compare and Branch");
  counter->Increment();
 }
 void Instrument::VisitTestBranch(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Test and Branch");
  counter->Increment();
 }
 void Instrument::VisitConditionalBranch(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Conditional Branch");
  counter->Increment();
 }
 void Instrument::VisitSystem(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Other");
  counter->Increment();
 }
 void Instrument::VisitException(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Other");
  counter->Increment();
 }
 void Instrument::InstrumentLoadStorePair(const Instruction* instr) {
  static Counter* load_pair_counter = GetCounter("Load Pair");
  static Counter* store_pair_counter = GetCounter("Store Pair");
  if (instr->Mask(LoadStorePairLBit) != 0) {
    load_pair_counter->Increment();
  } else {
    store_pair_counter->Increment();
  }
 }
 void Instrument::VisitLoadStorePairPostIndex(const Instruction* instr) {
  Update();
  InstrumentLoadStorePair(instr);
 }
 void Instrument::VisitLoadStorePairOffset(const Instruction* instr) {
  Update();
  InstrumentLoadStorePair(instr);
 }
 void Instrument::VisitLoadStorePairPreIndex(const Instruction* instr) {
  Update();
  InstrumentLoadStorePair(instr);
 }
 void Instrument::VisitLoadStorePairNonTemporal(const Instruction* instr) {
  Update();
  InstrumentLoadStorePair(instr);
 }
 void Instrument::VisitLoadStoreExclusive(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Other");
  counter->Increment();
 }
 void Instrument::VisitAtomicMemory(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Other");
  counter->Increment();
 }
 void Instrument::VisitLoadLiteral(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Load Literal");
  counter->Increment();
 }
 void Instrument::InstrumentLoadStore(const Instruction* instr) {
  static Counter* load_int_counter = GetCounter("Load Integer");
  static Counter* store_int_counter = GetCounter("Store Integer");
  static Counter* load_fp_counter = GetCounter("Load FP");
  static Counter* store_fp_counter = GetCounter("Store FP");
  switch (instr->Mask(LoadStoreMask)) {
    case STRB_w:
    case STRH_w:
    case STR_w:
      VIXL_FALLTHROUGH();
    case STR_x:
      store_int_counter->Increment();
      break;
    case STR_s:
      VIXL_FALLTHROUGH();
    case STR_d:
      store_fp_counter->Increment();
      break;
    case LDRB_w:
    case LDRH_w:
    case LDR_w:
    case LDR_x:
    case LDRSB_x:
    case LDRSH_x:
    case LDRSW_x:
    case LDRSB_w:
      VIXL_FALLTHROUGH();
    case LDRSH_w:
      load_int_counter->Increment();
      break;
    case LDR_s:
      VIXL_FALLTHROUGH();
    case LDR_d:
      load_fp_counter->Increment();
      break;
  }
 }
 void Instrument::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
  Update();
  InstrumentLoadStore(instr);
 }
 void Instrument::VisitLoadStorePostIndex(const Instruction* instr) {
  USE(instr);
  Update();
  InstrumentLoadStore(instr);
 }
 void Instrument::VisitLoadStorePreIndex(const Instruction* instr) {
  Update();
  InstrumentLoadStore(instr);
 }
 void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) {
  Update();
  InstrumentLoadStore(instr);
 }
 void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
  Update();
  InstrumentLoadStore(instr);
 }
 void Instrument::VisitLogicalShifted(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Logical DP");
  counter->Increment();
 }
 void Instrument::VisitAddSubShifted(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Add/Sub DP");
  counter->Increment();
 }
 void Instrument::VisitAddSubExtended(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Add/Sub DP");
  counter->Increment();
 }
 void Instrument::VisitAddSubWithCarry(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Add/Sub DP");
  counter->Increment();
 }
 void Instrument::VisitConditionalCompareRegister(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Conditional Compare");
  counter->Increment();
 }
 void Instrument::VisitConditionalCompareImmediate(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Conditional Compare");
  counter->Increment();
 }
 void Instrument::VisitConditionalSelect(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Conditional Select");
  counter->Increment();
 }
 void Instrument::VisitDataProcessing1Source(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Other Int DP");
  counter->Increment();
 }
 void Instrument::VisitDataProcessing2Source(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Other Int DP");
  counter->Increment();
 }
 void Instrument::VisitDataProcessing3Source(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Other Int DP");
  counter->Increment();
 }
 void Instrument::VisitFPCompare(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("FP DP");
  counter->Increment();
 }
 void Instrument::VisitFPConditionalCompare(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Conditional Compare");
  counter->Increment();
 }
 void Instrument::VisitFPConditionalSelect(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Conditional Select");
  counter->Increment();
 }
 void Instrument::VisitFPImmediate(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("FP DP");
  counter->Increment();
 }
 void Instrument::VisitFPDataProcessing1Source(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("FP DP");
  counter->Increment();
 }
 void Instrument::VisitFPDataProcessing2Source(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("FP DP");
  counter->Increment();
 }
 void Instrument::VisitFPDataProcessing3Source(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("FP DP");
  counter->Increment();
 }
 void Instrument::VisitFPIntegerConvert(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("FP DP");
  counter->Increment();
 }
 void Instrument::VisitFPFixedPointConvert(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("FP DP");
  counter->Increment();
 }
 void Instrument::VisitCrypto2RegSHA(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Crypto");
  counter->Increment();
 }
 void Instrument::VisitCrypto3RegSHA(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Crypto");
  counter->Increment();
 }
 void Instrument::VisitCryptoAES(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Crypto");
  counter->Increment();
 }
 void Instrument::VisitNEON2RegMisc(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEON2RegMiscFP16(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEON3Same(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEON3SameFP16(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEON3SameExtra(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEON3Different(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONAcrossLanes(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONByIndexedElement(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONCopy(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONExtract(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONLoadStoreMultiStructPostIndex(
    const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONLoadStoreSingleStructPostIndex(
    const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONModifiedImmediate(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONScalar2RegMisc(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONScalar3Diff(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONScalar3Same(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONScalar3SameFP16(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONScalar3SameExtra(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONScalarByIndexedElement(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONScalarCopy(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONScalarPairwise(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONScalarShiftImmediate(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONShiftImmediate(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONTable(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitNEONPerm(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("NEON");
  counter->Increment();
 }
 void Instrument::VisitUnallocated(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Other");
  counter->Increment();
 }
 void Instrument::VisitUnimplemented(const Instruction* instr) {
  USE(instr);
  Update();
  static Counter* counter = GetCounter("Other");
  counter->Increment();
 }
 }  // namespace aarch64
 }  // namespace vixl
--- a/dep/vixl/src/aarch64/logic-aarch64.cc
+++ b/dep/vixl/src/aarch64/logic-aarch64.cc
--- a/dep/vixl/src/aarch64/macro-assembler-aarch64.cc
+++ b/dep/vixl/src/aarch64/macro-assembler-aarch64.cc
--- a/dep/vixl/src/aarch64/macro-assembler-sve-aarch64.cc
+++ b/dep/vixl/src/aarch64/macro-assembler-sve-aarch64.cc
--- a/dep/vixl/src/aarch64/operands-aarch64.cc
+++ b/dep/vixl/src/aarch64/operands-aarch64.cc
@ -30,32 +30,32 @@ namespace vixl {
 namespace aarch64 {
 // CPURegList utilities.
-CPURegister CPURegList::PopLowestIndex() {
+CPURegister CPURegList::PopLowestIndex(RegList mask) {
-  if (IsEmpty()) {
+  RegList list = list_ & mask;
-    return NoCPUReg;
+  if (list == 0) return NoCPUReg;
-  }
+  int index = CountTrailingZeros(list);
-  int index = CountTrailingZeros(list_);
+  VIXL_ASSERT(((static_cast<RegList>(1) << index) & list) != 0);
  VIXL_ASSERT((1 << index) & list_);
  Remove(index);
  return CPURegister(index, size_, type_);
 }
-CPURegister CPURegList::PopHighestIndex() {
+CPURegister CPURegList::PopHighestIndex(RegList mask) {
-  VIXL_ASSERT(IsValid());
+  RegList list = list_ & mask;
-  if (IsEmpty()) {
+  if (list == 0) return NoCPUReg;
-    return NoCPUReg;
+  int index = CountLeadingZeros(list);
  }
  int index = CountLeadingZeros(list_);
  index = kRegListSizeInBits - 1 - index;
-  VIXL_ASSERT((1 << index) & list_);
+  VIXL_ASSERT(((static_cast<RegList>(1) << index) & list) != 0);
  Remove(index);
  return CPURegister(index, size_, type_);
 }
 bool CPURegList::IsValid() const {
-  if ((type_ == CPURegister::kRegister) || (type_ == CPURegister::kVRegister)) {
+  if (type_ == CPURegister::kNoRegister) {
    // We can't use IsEmpty here because that asserts IsValid().
    return list_ == 0;
  } else {
    bool is_valid = true;
    // Try to create a CPURegister for each element in the list.
    for (int i = 0; i < kRegListSizeInBits; i++) {
@ -64,11 +64,6 @@ bool CPURegList::IsValid() const {
      }
    }
    return is_valid;
  } else if (type_ == CPURegister::kNoRegister) {
    // We can't use IsEmpty here because that asserts IsValid().
    return list_ == 0;
  } else {
    return false;
  }
 }
@ -149,145 +144,6 @@ const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV();
 const CPURegList kCallerSaved = CPURegList::GetCallerSaved();
 const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV();
 // Registers.
 #define WREG(n) w##n,
 const Register Register::wregisters[] = {AARCH64_REGISTER_CODE_LIST(WREG)};
 #undef WREG
 #define XREG(n) x##n,
 const Register Register::xregisters[] = {AARCH64_REGISTER_CODE_LIST(XREG)};
 #undef XREG
 #define BREG(n) b##n,
 const VRegister VRegister::bregisters[] = {AARCH64_REGISTER_CODE_LIST(BREG)};
 #undef BREG
 #define HREG(n) h##n,
 const VRegister VRegister::hregisters[] = {AARCH64_REGISTER_CODE_LIST(HREG)};
 #undef HREG
 #define SREG(n) s##n,
 const VRegister VRegister::sregisters[] = {AARCH64_REGISTER_CODE_LIST(SREG)};
 #undef SREG
 #define DREG(n) d##n,
 const VRegister VRegister::dregisters[] = {AARCH64_REGISTER_CODE_LIST(DREG)};
 #undef DREG
 #define QREG(n) q##n,
 const VRegister VRegister::qregisters[] = {AARCH64_REGISTER_CODE_LIST(QREG)};
 #undef QREG
 #define VREG(n) v##n,
 const VRegister VRegister::vregisters[] = {AARCH64_REGISTER_CODE_LIST(VREG)};
 #undef VREG
 const Register& Register::GetWRegFromCode(unsigned code) {
  if (code == kSPRegInternalCode) {
    return wsp;
  } else {
    VIXL_ASSERT(code < kNumberOfRegisters);
    return wregisters[code];
  }
 }
 const Register& Register::GetXRegFromCode(unsigned code) {
  if (code == kSPRegInternalCode) {
    return sp;
  } else {
    VIXL_ASSERT(code < kNumberOfRegisters);
    return xregisters[code];
  }
 }
 const VRegister& VRegister::GetBRegFromCode(unsigned code) {
  VIXL_ASSERT(code < kNumberOfVRegisters);
  return bregisters[code];
 }
 const VRegister& VRegister::GetHRegFromCode(unsigned code) {
  VIXL_ASSERT(code < kNumberOfVRegisters);
  return hregisters[code];
 }
 const VRegister& VRegister::GetSRegFromCode(unsigned code) {
  VIXL_ASSERT(code < kNumberOfVRegisters);
  return sregisters[code];
 }
 const VRegister& VRegister::GetDRegFromCode(unsigned code) {
  VIXL_ASSERT(code < kNumberOfVRegisters);
  return dregisters[code];
 }
 const VRegister& VRegister::GetQRegFromCode(unsigned code) {
  VIXL_ASSERT(code < kNumberOfVRegisters);
  return qregisters[code];
 }
 const VRegister& VRegister::GetVRegFromCode(unsigned code) {
  VIXL_ASSERT(code < kNumberOfVRegisters);
  return vregisters[code];
 }
 const Register& CPURegister::W() const {
  VIXL_ASSERT(IsValidRegister());
  return Register::GetWRegFromCode(code_);
 }
 const Register& CPURegister::X() const {
  VIXL_ASSERT(IsValidRegister());
  return Register::GetXRegFromCode(code_);
 }
 const VRegister& CPURegister::B() const {
  VIXL_ASSERT(IsValidVRegister());
  return VRegister::GetBRegFromCode(code_);
 }
 const VRegister& CPURegister::H() const {
  VIXL_ASSERT(IsValidVRegister());
  return VRegister::GetHRegFromCode(code_);
 }
 const VRegister& CPURegister::S() const {
  VIXL_ASSERT(IsValidVRegister());
  return VRegister::GetSRegFromCode(code_);
 }
 const VRegister& CPURegister::D() const {
  VIXL_ASSERT(IsValidVRegister());
  return VRegister::GetDRegFromCode(code_);
 }
 const VRegister& CPURegister::Q() const {
  VIXL_ASSERT(IsValidVRegister());
  return VRegister::GetQRegFromCode(code_);
 }
 const VRegister& CPURegister::V() const {
  VIXL_ASSERT(IsValidVRegister());
  return VRegister::GetVRegFromCode(code_);
 }
 // Operand.
 Operand::Operand(int64_t immediate)
    : immediate_(immediate),
@ -296,6 +152,12 @@ Operand::Operand(int64_t immediate)
      extend_(NO_EXTEND),
      shift_amount_(0) {}
 Operand::Operand(IntegerOperand immediate)
    : immediate_(immediate.AsIntN(64)),
      reg_(NoReg),
      shift_(NO_SHIFT),
      extend_(NO_EXTEND),
      shift_amount_(0) {}
 Operand::Operand(Register reg, Shift shift, unsigned shift_amount)
    : reg_(reg),
@ -471,6 +333,24 @@ MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode)
 }
 bool MemOperand::IsPlainRegister() const {
  return IsImmediateOffset() && (GetOffset() == 0);
 }
 bool MemOperand::IsEquivalentToPlainRegister() const {
  if (regoffset_.Is(NoReg)) {
    // Immediate offset, pre-index or post-index.
    return GetOffset() == 0;
  } else if (GetRegisterOffset().IsZero()) {
    // Zero register offset, pre-index or post-index.
    // We can ignore shift and extend options because they all result in zero.
    return true;
  }
  return false;
 }
 bool MemOperand::IsImmediateOffset() const {
  return (addrmode_ == Offset) && regoffset_.Is(NoReg);
 }
@ -480,12 +360,16 @@ bool MemOperand::IsRegisterOffset() const {
  return (addrmode_ == Offset) && !regoffset_.Is(NoReg);
 }
 bool MemOperand::IsPreIndex() const { return addrmode_ == PreIndex; }
 bool MemOperand::IsPostIndex() const { return addrmode_ == PostIndex; }
 bool MemOperand::IsImmediatePreIndex() const {
  return IsPreIndex() && regoffset_.Is(NoReg);
 }
 bool MemOperand::IsImmediatePostIndex() const {
  return IsPostIndex() && regoffset_.Is(NoReg);
 }
 void MemOperand::AddOffset(int64_t offset) {
  VIXL_ASSERT(IsImmediateOffset());
@ -493,6 +377,63 @@ void MemOperand::AddOffset(int64_t offset) {
 }
 bool SVEMemOperand::IsValid() const {
 #ifdef VIXL_DEBUG
  {
    // It should not be possible for an SVEMemOperand to match multiple types.
    int count = 0;
    if (IsScalarPlusImmediate()) count++;
    if (IsScalarPlusScalar()) count++;
    if (IsScalarPlusVector()) count++;
    if (IsVectorPlusImmediate()) count++;
    if (IsVectorPlusScalar()) count++;
    if (IsVectorPlusVector()) count++;
    VIXL_ASSERT(count <= 1);
  }
 #endif
  // We can't have a register _and_ an immediate offset.
  if ((offset_ != 0) && (!regoffset_.IsNone())) return false;
  if (shift_amount_ != 0) {
    // Only shift and extend modifiers can take a shift amount.
    switch (mod_) {
      case NO_SVE_OFFSET_MODIFIER:
      case SVE_MUL_VL:
        return false;
      case SVE_LSL:
      case SVE_UXTW:
      case SVE_SXTW:
        // Fall through.
        break;
    }
  }
  return IsScalarPlusImmediate() || IsScalarPlusScalar() ||
         IsScalarPlusVector() || IsVectorPlusImmediate() ||
         IsVectorPlusScalar() || IsVectorPlusVector();
 }
 bool SVEMemOperand::IsEquivalentToScalar() const {
  if (IsScalarPlusImmediate()) {
    return GetImmediateOffset() == 0;
  }
  if (IsScalarPlusScalar()) {
    // We can ignore the shift because it will still result in zero.
    return GetScalarOffset().IsZero();
  }
  // Forms involving vectors are never equivalent to a single scalar.
  return false;
 }
 bool SVEMemOperand::IsPlainRegister() const {
  if (IsScalarPlusImmediate()) {
    return GetImmediateOffset() == 0;
  }
  return false;
 }
 GenericOperand::GenericOperand(const CPURegister& reg)
    : cpu_register_(reg), mem_op_size_(0) {
  if (reg.IsQ()) {
@ -524,5 +465,5 @@ bool GenericOperand::Equals(const GenericOperand& other) const {
  }
  return false;
 }
-}
+}  // namespace aarch64
-}  // namespace vixl::aarch64
+}  // namespace vixl
--- a/dep/vixl/src/aarch64/pointer-auth-aarch64.cc
+++ b/dep/vixl/src/aarch64/pointer-auth-aarch64.cc
@ -26,10 +26,10 @@
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 #include "simulator-aarch64.h"
 #include "utils-vixl.h"
 #include "simulator-aarch64.h"
 namespace vixl {
 namespace aarch64 {
--- a/dep/vixl/src/aarch64/registers-aarch64.cc
+++ b/dep/vixl/src/aarch64/registers-aarch64.cc
@ -0,0 +1,322 @@
 // Copyright 2019, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //   * Redistributions of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //   * Redistributions in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //   * Neither the name of ARM Limited nor the names of its contributors may be
 //     used to endorse or promote products derived from this software without
 //     specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "registers-aarch64.h"
 #include <sstream>
 #include <string>
 namespace vixl {
 namespace aarch64 {
 std::string CPURegister::GetArchitecturalName() const {
  std::ostringstream name;
  if (IsZRegister()) {
    name << 'z' << GetCode();
    if (HasLaneSize()) {
      name << '.' << GetLaneSizeSymbol();
    }
  } else if (IsPRegister()) {
    name << 'p' << GetCode();
    if (HasLaneSize()) {
      name << '.' << GetLaneSizeSymbol();
    }
    switch (qualifiers_) {
      case kNoQualifiers:
        break;
      case kMerging:
        name << "/m";
        break;
      case kZeroing:
        name << "/z";
        break;
    }
  } else {
    VIXL_UNIMPLEMENTED();
  }
  return name.str();
 }
 unsigned CPURegister::GetMaxCodeFor(CPURegister::RegisterBank bank) {
  switch (bank) {
    case kNoRegisterBank:
      return 0;
    case kRRegisterBank:
      return Register::GetMaxCode();
    case kVRegisterBank:
 #ifdef VIXL_HAS_CONSTEXPR
      VIXL_STATIC_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
 #else
      VIXL_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
 #endif
      return VRegister::GetMaxCode();
    case kPRegisterBank:
      return PRegister::GetMaxCode();
  }
  VIXL_UNREACHABLE();
  return 0;
 }
 bool CPURegister::IsValidRegister() const {
  return ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)) &&
         (bank_ == kRRegisterBank) &&
         ((size_ == kEncodedWRegSize) || (size_ == kEncodedXRegSize)) &&
         (qualifiers_ == kNoQualifiers) && (lane_size_ == size_);
 }
 bool CPURegister::IsValidVRegister() const {
  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
  return (code_ < kNumberOfVRegisters) && (bank_ == kVRegisterBank) &&
         ((size_ >= kEncodedBRegSize) && (size_ <= kEncodedQRegSize)) &&
         (qualifiers_ == kNoQualifiers) &&
         (lane_size_ != kEncodedUnknownSize) && (lane_size_ <= size_);
 }
 bool CPURegister::IsValidFPRegister() const {
  return IsValidVRegister() && IsFPRegister();
 }
 bool CPURegister::IsValidZRegister() const {
  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
  // Z registers are valid with or without a lane size, so we don't need to
  // check lane_size_.
  return (code_ < kNumberOfZRegisters) && (bank_ == kVRegisterBank) &&
         (size_ == kEncodedUnknownSize) && (qualifiers_ == kNoQualifiers);
 }
 bool CPURegister::IsValidPRegister() const {
  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
  // P registers are valid with or without a lane size, so we don't need to
  // check lane_size_.
  return (code_ < kNumberOfPRegisters) && (bank_ == kPRegisterBank) &&
         (size_ == kEncodedUnknownSize) &&
         ((qualifiers_ == kNoQualifiers) || (qualifiers_ == kMerging) ||
          (qualifiers_ == kZeroing));
 }
 bool CPURegister::IsValid() const {
  return IsValidRegister() || IsValidVRegister() || IsValidZRegister() ||
         IsValidPRegister();
 }
 // Most coercions simply invoke the necessary constructor.
 #define VIXL_CPUREG_COERCION_LIST(U) \
  U(Register, W, R)                  \
  U(Register, X, R)                  \
  U(VRegister, B, V)                 \
  U(VRegister, H, V)                 \
  U(VRegister, S, V)                 \
  U(VRegister, D, V)                 \
  U(VRegister, Q, V)                 \
  U(VRegister, V, V)                 \
  U(ZRegister, Z, V)                 \
  U(PRegister, P, P)
 #define VIXL_DEFINE_CPUREG_COERCION(RET_TYPE, CTOR_TYPE, BANK) \
  RET_TYPE CPURegister::CTOR_TYPE() const {                    \
    VIXL_ASSERT(GetBank() == k##BANK##RegisterBank);           \
    return CTOR_TYPE##Register(GetCode());                     \
  }
 VIXL_CPUREG_COERCION_LIST(VIXL_DEFINE_CPUREG_COERCION)
 #undef VIXL_CPUREG_COERCION_LIST
 #undef VIXL_DEFINE_CPUREG_COERCION
 // NEON lane-format coercions always return VRegisters.
 #define VIXL_CPUREG_NEON_COERCION_LIST(V) \
  V(8, B)                                 \
  V(16, B)                                \
  V(2, H)                                 \
  V(4, H)                                 \
  V(8, H)                                 \
  V(2, S)                                 \
  V(4, S)                                 \
  V(1, D)                                 \
  V(2, D)                                 \
  V(1, Q)
 #define VIXL_DEFINE_CPUREG_NEON_COERCION(LANES, LANE_TYPE)             \
  VRegister VRegister::V##LANES##LANE_TYPE() const {                   \
    VIXL_ASSERT(IsVRegister());                                        \
    return VRegister(GetCode(), LANES * k##LANE_TYPE##RegSize, LANES); \
  }
 VIXL_CPUREG_NEON_COERCION_LIST(VIXL_DEFINE_CPUREG_NEON_COERCION)
 #undef VIXL_CPUREG_NEON_COERCION_LIST
 #undef VIXL_DEFINE_CPUREG_NEON_COERCION
 // Semantic type coercion for sdot and udot.
 // TODO: Use the qualifiers_ field to distinguish this from ::S().
 VRegister VRegister::S4B() const {
  VIXL_ASSERT(IsVRegister());
  return SRegister(GetCode());
 }
 bool AreAliased(const CPURegister& reg1,
                const CPURegister& reg2,
                const CPURegister& reg3,
                const CPURegister& reg4,
                const CPURegister& reg5,
                const CPURegister& reg6,
                const CPURegister& reg7,
                const CPURegister& reg8) {
  int number_of_valid_regs = 0;
  int number_of_valid_vregs = 0;
  int number_of_valid_pregs = 0;
  RegList unique_regs = 0;
  RegList unique_vregs = 0;
  RegList unique_pregs = 0;
  const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
  for (size_t i = 0; i < ArrayLength(regs); i++) {
    switch (regs[i].GetBank()) {
      case CPURegister::kRRegisterBank:
        number_of_valid_regs++;
        unique_regs |= regs[i].GetBit();
        break;
      case CPURegister::kVRegisterBank:
        number_of_valid_vregs++;
        unique_vregs |= regs[i].GetBit();
        break;
      case CPURegister::kPRegisterBank:
        number_of_valid_pregs++;
        unique_pregs |= regs[i].GetBit();
        break;
      case CPURegister::kNoRegisterBank:
        VIXL_ASSERT(regs[i].IsNone());
        break;
    }
  }
  int number_of_unique_regs = CountSetBits(unique_regs);
  int number_of_unique_vregs = CountSetBits(unique_vregs);
  int number_of_unique_pregs = CountSetBits(unique_pregs);
  VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
  VIXL_ASSERT(number_of_valid_vregs >= number_of_unique_vregs);
  VIXL_ASSERT(number_of_valid_pregs >= number_of_unique_pregs);
  return (number_of_valid_regs != number_of_unique_regs) ||
         (number_of_valid_vregs != number_of_unique_vregs) ||
         (number_of_valid_pregs != number_of_unique_pregs);
 }
 bool AreSameSizeAndType(const CPURegister& reg1,
                        const CPURegister& reg2,
                        const CPURegister& reg3,
                        const CPURegister& reg4,
                        const CPURegister& reg5,
                        const CPURegister& reg6,
                        const CPURegister& reg7,
                        const CPURegister& reg8) {
  VIXL_ASSERT(reg1.IsValid());
  bool match = true;
  match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1);
  match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1);
  match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1);
  match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1);
  match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1);
  match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1);
  match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1);
  return match;
 }
 bool AreEven(const CPURegister& reg1,
             const CPURegister& reg2,
             const CPURegister& reg3,
             const CPURegister& reg4,
             const CPURegister& reg5,
             const CPURegister& reg6,
             const CPURegister& reg7,
             const CPURegister& reg8) {
  VIXL_ASSERT(reg1.IsValid());
  bool even = (reg1.GetCode() % 2) == 0;
  even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0);
  even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0);
  even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0);
  even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0);
  even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0);
  even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0);
  even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0);
  return even;
 }
 bool AreConsecutive(const CPURegister& reg1,
                    const CPURegister& reg2,
                    const CPURegister& reg3,
                    const CPURegister& reg4) {
  VIXL_ASSERT(reg1.IsValid());
  if (!reg2.IsValid()) {
    return true;
  } else if (reg2.GetCode() !=
             ((reg1.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
    return false;
  }
  if (!reg3.IsValid()) {
    return true;
  } else if (reg3.GetCode() !=
             ((reg2.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
    return false;
  }
  if (!reg4.IsValid()) {
    return true;
  } else if (reg4.GetCode() !=
             ((reg3.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
    return false;
  }
  return true;
 }
 bool AreSameFormat(const CPURegister& reg1,
                   const CPURegister& reg2,
                   const CPURegister& reg3,
                   const CPURegister& reg4) {
  VIXL_ASSERT(reg1.IsValid());
  bool match = true;
  match &= !reg2.IsValid() || reg2.IsSameFormat(reg1);
  match &= !reg3.IsValid() || reg3.IsSameFormat(reg1);
  match &= !reg4.IsValid() || reg4.IsSameFormat(reg1);
  return match;
 }
 bool AreSameLaneSize(const CPURegister& reg1,
                     const CPURegister& reg2,
                     const CPURegister& reg3,
                     const CPURegister& reg4) {
  VIXL_ASSERT(reg1.IsValid());
  bool match = true;
  match &=
      !reg2.IsValid() || (reg2.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
  match &=
      !reg3.IsValid() || (reg3.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
  match &=
      !reg4.IsValid() || (reg4.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
  return match;
 }
 }  // namespace aarch64
 }  // namespace vixl
--- a/dep/vixl/src/aarch64/simulator-aarch64.cc
+++ b/dep/vixl/src/aarch64/simulator-aarch64.cc
--- a/dep/vixl/src/code-buffer-vixl.cc
+++ b/dep/vixl/src/code-buffer-vixl.cc
@ -24,51 +24,17 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifdef VIXL_CODE_BUFFER_MMAP
 extern "C" {
 #include <sys/mman.h>
 }
 #endif
 #include "code-buffer-vixl.h"
 #include "utils-vixl.h"
 namespace vixl {
-
+CodeBuffer::CodeBuffer() : buffer_(nullptr), cursor_(nullptr), dirty_(false), capacity_(0)
-CodeBuffer::CodeBuffer(size_t capacity)
+{
    : buffer_(NULL),
      managed_(true),
      cursor_(NULL),
      dirty_(false),
      capacity_(capacity) {
  if (capacity_ == 0) {
    return;
  }
 #ifdef VIXL_CODE_BUFFER_MALLOC
  buffer_ = reinterpret_cast<byte*>(malloc(capacity_));
 #elif defined(VIXL_CODE_BUFFER_MMAP)
  buffer_ = reinterpret_cast<byte*>(mmap(NULL,
                                         capacity,
                                         PROT_READ | PROT_WRITE,
                                         MAP_PRIVATE | MAP_ANONYMOUS,
                                         -1,
                                         0));
 #else
 #error Unknown code buffer allocator.
 #endif
  VIXL_CHECK(buffer_ != NULL);
  // Aarch64 instructions must be word aligned, we assert the default allocator
  // always returns word align memory.
  VIXL_ASSERT(IsWordAligned(buffer_));
  cursor_ = buffer_;
 }
 CodeBuffer::CodeBuffer(byte* buffer, size_t capacity)
    : buffer_(reinterpret_cast<byte*>(buffer)),
      managed_(false),
      cursor_(reinterpret_cast<byte*>(buffer)),
      dirty_(false),
      capacity_(capacity) {
@ -76,42 +42,18 @@ CodeBuffer::CodeBuffer(byte* buffer, size_t capacity)
 }
-CodeBuffer::~CodeBuffer() {
+CodeBuffer::~CodeBuffer() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
  VIXL_ASSERT(!IsDirty());
  if (managed_) {
 #ifdef VIXL_CODE_BUFFER_MALLOC
    free(buffer_);
 #elif defined(VIXL_CODE_BUFFER_MMAP)
    munmap(buffer_, capacity_);
 #else
 #error Unknown code buffer allocator.
 #endif
  }
 }
 #ifdef VIXL_CODE_BUFFER_MMAP
 void CodeBuffer::SetExecutable() {
  int ret = mprotect(buffer_, capacity_, PROT_READ | PROT_EXEC);
  VIXL_CHECK(ret == 0);
 }
 #endif
 #ifdef VIXL_CODE_BUFFER_MMAP
 void CodeBuffer::SetWritable() {
  int ret = mprotect(buffer_, capacity_, PROT_READ | PROT_WRITE);
  VIXL_CHECK(ret == 0);
 }
 #endif
 void CodeBuffer::EmitString(const char* string) {
-  VIXL_ASSERT(HasSpaceFor(strlen(string) + 1));
+  const auto len = strlen(string) + 1;
  VIXL_ASSERT(HasSpaceFor(len));
  char* dst = reinterpret_cast<char*>(cursor_);
  dirty_ = true;
-  char* null_char = strcpy(dst, string);
+  memcpy(dst, string, len);
-  cursor_ = reinterpret_cast<byte*>(null_char) + 1;
+  cursor_ = reinterpret_cast<byte*>(dst + len);
 }
@ -139,48 +81,22 @@ void CodeBuffer::Align() {
 }
 void CodeBuffer::EmitZeroedBytes(int n) {
-  EnsureSpaceFor(n);
+  VIXL_ASSERT(HasSpaceFor(n));
  dirty_ = true;
  memset(cursor_, 0, n);
  cursor_ += n;
 }
 void CodeBuffer::Reset() {
 #ifdef VIXL_DEBUG
  if (managed_) {
    // Fill with zeros (there is no useful value common to A32 and T32).
    memset(buffer_, 0, capacity_);
  }
 #endif
  cursor_ = buffer_;
  SetClean();
 }
-void CodeBuffer::Reset(byte* buffer, size_t capacity, bool managed) {
+void CodeBuffer::Reset(byte* buffer, size_t capacity) {
  buffer_ = buffer;
  cursor_ = buffer;
  capacity_ = capacity;
-  managed_ = managed;
+  SetClean();
 }
 void CodeBuffer::Grow(size_t new_capacity) {
  VIXL_ASSERT(managed_);
  VIXL_ASSERT(new_capacity > capacity_);
  ptrdiff_t cursor_offset = GetCursorOffset();
 #ifdef VIXL_CODE_BUFFER_MALLOC
  buffer_ = static_cast<byte*>(realloc(buffer_, new_capacity));
  VIXL_CHECK(buffer_ != NULL);
 #elif defined(VIXL_CODE_BUFFER_MMAP)
  buffer_ = static_cast<byte*>(
      mremap(buffer_, capacity_, new_capacity, MREMAP_MAYMOVE));
  VIXL_CHECK(buffer_ != MAP_FAILED);
 #else
 #error Unknown code buffer allocator.
 #endif
  cursor_ = buffer_ + cursor_offset;
  capacity_ = new_capacity;
 }
 }  // namespace vixl
--- a/dep/vixl/src/compiler-intrinsics-vixl.cc
+++ b/dep/vixl/src/compiler-intrinsics-vixl.cc
@ -26,11 +26,14 @@
 #include "compiler-intrinsics-vixl.h"
 #include "utils-vixl.h"
 namespace vixl {
 int CountLeadingSignBitsFallBack(int64_t value, int width) {
  VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
  if (width < 64) VIXL_ASSERT(IsIntN(width, value));
  if (value >= 0) {
    return CountLeadingZeros(value, width) - 1;
  } else {
--- a/dep/vixl/src/cpu-features.cc
+++ b/dep/vixl/src/cpu-features.cc
@ -24,119 +24,71 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cpu-features.h"
 #include <ostream>
 #include "cpu-features.h"
 #include "globals-vixl.h"
 #include "utils-vixl.h"
 #if defined(__aarch64__) && defined(VIXL_INCLUDE_TARGET_AARCH64)
 #include "aarch64/cpu-aarch64.h"
 #define VIXL_USE_AARCH64_CPU_HELPERS
 #endif
 namespace vixl {
 static uint64_t MakeFeatureMask(CPUFeatures::Feature feature) {
  if (feature == CPUFeatures::kNone) {
    return 0;
  } else {
    // Check that the shift is well-defined, and that the feature is valid.
    VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures <=
                       (sizeof(uint64_t) * 8));
    VIXL_ASSERT(feature < CPUFeatures::kNumberOfFeatures);
    return UINT64_C(1) << feature;
  }
 }
 CPUFeatures::CPUFeatures(Feature feature0,
                         Feature feature1,
                         Feature feature2,
                         Feature feature3)
    : features_(0) {
  Combine(feature0, feature1, feature2, feature3);
 }
 CPUFeatures CPUFeatures::All() {
  CPUFeatures all;
-  // Check that the shift is well-defined.
+  all.features_.set();
  VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures < (sizeof(uint64_t) * 8));
  all.features_ = (UINT64_C(1) << kNumberOfFeatures) - 1;
  return all;
 }
-CPUFeatures CPUFeatures::InferFromOS() {
+CPUFeatures CPUFeatures::InferFromIDRegisters() {
-  // TODO: Actually infer features from the OS.
+  // This function assumes that kIDRegisterEmulation is available.
  CPUFeatures features(CPUFeatures::kIDRegisterEmulation);
 #ifdef VIXL_USE_AARCH64_CPU_HELPERS
  // Note that the Linux kernel filters these values during emulation, so the
  // results may not exactly match the expected hardware support.
  features.Combine(aarch64::CPU::InferCPUFeaturesFromIDRegisters());
 #endif
  return features;
 }
 CPUFeatures CPUFeatures::InferFromOS(QueryIDRegistersOption option) {
 #ifdef VIXL_USE_AARCH64_CPU_HELPERS
  return aarch64::CPU::InferCPUFeaturesFromOS(option);
 #else
  USE(option);
  return CPUFeatures();
 #endif
 }
 void CPUFeatures::Combine(const CPUFeatures& other) {
  features_ |= other.features_;
 }
-void CPUFeatures::Combine(Feature feature0,
+void CPUFeatures::Combine(Feature feature) {
-                          Feature feature1,
+  if (feature != CPUFeatures::kNone) features_.set(feature);
                          Feature feature2,
                          Feature feature3) {
  features_ |= MakeFeatureMask(feature0);
  features_ |= MakeFeatureMask(feature1);
  features_ |= MakeFeatureMask(feature2);
  features_ |= MakeFeatureMask(feature3);
 }
 void CPUFeatures::Remove(const CPUFeatures& other) {
  features_ &= ~other.features_;
 }
-void CPUFeatures::Remove(Feature feature0,
+void CPUFeatures::Remove(Feature feature) {
-                         Feature feature1,
+  if (feature != CPUFeatures::kNone) features_.reset(feature);
                         Feature feature2,
                         Feature feature3) {
  features_ &= ~MakeFeatureMask(feature0);
  features_ &= ~MakeFeatureMask(feature1);
  features_ &= ~MakeFeatureMask(feature2);
  features_ &= ~MakeFeatureMask(feature3);
 }
 CPUFeatures CPUFeatures::With(const CPUFeatures& other) const {
  CPUFeatures f(*this);
  f.Combine(other);
  return f;
 }
 CPUFeatures CPUFeatures::With(Feature feature0,
                              Feature feature1,
                              Feature feature2,
                              Feature feature3) const {
  CPUFeatures f(*this);
  f.Combine(feature0, feature1, feature2, feature3);
  return f;
 }
 CPUFeatures CPUFeatures::Without(const CPUFeatures& other) const {
  CPUFeatures f(*this);
  f.Remove(other);
  return f;
 }
 CPUFeatures CPUFeatures::Without(Feature feature0,
                                 Feature feature1,
                                 Feature feature2,
                                 Feature feature3) const {
  CPUFeatures f(*this);
  f.Remove(feature0, feature1, feature2, feature3);
  return f;
 }
 bool CPUFeatures::Has(const CPUFeatures& other) const {
  return (features_ & other.features_) == other.features_;
 }
-bool CPUFeatures::Has(Feature feature0,
+bool CPUFeatures::Has(Feature feature) const {
-                      Feature feature1,
+  return (feature == CPUFeatures::kNone) || features_[feature];
                      Feature feature2,
                      Feature feature3) const {
  uint64_t mask = MakeFeatureMask(feature0) | MakeFeatureMask(feature1) |
                  MakeFeatureMask(feature2) | MakeFeatureMask(feature3);
  return (features_ & mask) == mask;
 }
-size_t CPUFeatures::Count() const { return CountSetBits(features_); }
+size_t CPUFeatures::Count() const { return features_.count(); }
 std::ostream& operator<<(std::ostream& os, CPUFeatures::Feature feature) {
  // clang-format off
@ -157,12 +109,9 @@ VIXL_CPU_FEATURE_LIST(VIXL_FORMAT_FEATURE)
 }
 CPUFeatures::const_iterator CPUFeatures::begin() const {
-  if (features_ == 0) return const_iterator(this, kNone);
+  // For iterators in general, it's undefined to increment `end()`, but here we
-
+  // control the implementation and it is safe to do this.
-  int feature_number = CountTrailingZeros(features_);
+  return ++end();
  vixl::CPUFeatures::Feature feature =
      static_cast<CPUFeatures::Feature>(feature_number);
  return const_iterator(this, feature);
 }
 CPUFeatures::const_iterator CPUFeatures::end() const {
@ -170,11 +119,11 @@ CPUFeatures::const_iterator CPUFeatures::end() const {
 }
 std::ostream& operator<<(std::ostream& os, const CPUFeatures& features) {
-  CPUFeatures::const_iterator it = features.begin();
+  bool need_separator = false;
-  while (it != features.end()) {
+  for (CPUFeatures::Feature feature : features) {
-    os << *it;
+    if (need_separator) os << ", ";
-    ++it;
+    need_separator = true;
-    if (it != features.end()) os << ", ";
+    os << feature;
  }
  return os;
 }
@ -185,7 +134,7 @@ bool CPUFeaturesConstIterator::operator==(
  return (cpu_features_ == other.cpu_features_) && (feature_ == other.feature_);
 }
-CPUFeatures::Feature CPUFeaturesConstIterator::operator++() {  // Prefix
+CPUFeaturesConstIterator& CPUFeaturesConstIterator::operator++() {  // Prefix
  VIXL_ASSERT(IsValid());
  do {
    // Find the next feature. The order is unspecified.
@ -199,11 +148,11 @@ CPUFeatures::Feature CPUFeaturesConstIterator::operator++() {  // Prefix
    // cpu_features_->Has(kNone) is always true, so this will terminate even if
    // the features list is empty.
  } while (!cpu_features_->Has(feature_));
-  return feature_;
+  return *this;
 }
-CPUFeatures::Feature CPUFeaturesConstIterator::operator++(int) {  // Postfix
+CPUFeaturesConstIterator CPUFeaturesConstIterator::operator++(int) {  // Postfix
-  CPUFeatures::Feature result = feature_;
+  CPUFeaturesConstIterator result = *this;
  ++(*this);
  return result;
 }
--- a/dep/vixl/src/utils-vixl.cc
+++ b/dep/vixl/src/utils-vixl.cc
@ -24,10 +24,10 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <cstdio>
 #include "utils-vixl.h"
 #include <cstdio>
 namespace vixl {
 // The default NaN values (for FPCR.DN=1).
@ -391,7 +391,7 @@ float FPToFloat(double value,
  }
  VIXL_UNREACHABLE();
-  return value;
+  return static_cast<float>(value);
 }
 // TODO: We should consider implementing a full FPToDouble(Float16)
--- a/dep/vixl/vixl.vcxproj
+++ b/dep/vixl/vixl.vcxproj
@ -1,7 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <Import Project="..\msvc\vsprops\Configurations.props" />
  <ItemGroup>
    <ClInclude Include="include\vixl\aarch32\assembler-aarch32.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM'">true</ExcludedFromBuild>
@ -42,21 +41,27 @@
    <ClInclude Include="include\vixl\aarch64\decoder-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\decoder-constants-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\decoder-visitor-map-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\disasm-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\instructions-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\instrument-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\macro-assembler-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\operands-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\registers-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\simulator-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
@ -101,6 +106,9 @@
    <ClCompile Include="src\aarch64\assembler-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\aarch64\assembler-sve-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\aarch64\cpu-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
@ -116,22 +124,22 @@
    <ClCompile Include="src\aarch64\instructions-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\aarch64\instrument-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\aarch64\logic-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\aarch64\macro-assembler-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\aarch64\macro-assembler-sve-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\aarch64\operands-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\aarch64\pointer-auth-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
-    <ClCompile Include="src\aarch64\simulator-aarch64.cc">
+    <ClCompile Include="src\aarch64\registers-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\code-buffer-vixl.cc" />
@ -139,18 +147,14 @@
    <ClCompile Include="src\cpu-features.cc" />
    <ClCompile Include="src\utils-vixl.cc" />
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <ProjectGuid>{8906836E-F06E-46E8-B11A-74E5E8C7B8FB}</ProjectGuid>
  </PropertyGroup>
  <Import Project="..\msvc\vsprops\StaticLibrary.props" />
  <PropertyGroup Condition="'$(Platform)'=='ARM64'">
    <VixlPreprocessorDefinitions>VIXL_INCLUDE_TARGET_AARCH64;VIXL_CODE_BUFFER_MALLOC</VixlPreprocessorDefinitions>
    <VixlIncludeDirectories>$(ProjectDir)include\vixl\aarch64</VixlIncludeDirectories>
  </PropertyGroup>
  <ItemDefinitionGroup>
    <ClCompile>
      <WarningLevel>TurnOffAllWarnings</WarningLevel>
@ -159,6 +163,5 @@
      <AdditionalOptions Condition="$(Configuration.Contains(Clang))"> -Wno-deprecated-enum-enum-conversion %(AdditionalOptions)</AdditionalOptions>
    </ClCompile>
  </ItemDefinitionGroup>
  <Import Project="..\msvc\vsprops\Targets.props" />
 </Project>
--- a/dep/vixl/vixl.vcxproj.filters
+++ b/dep/vixl/vixl.vcxproj.filters
@ -20,45 +20,6 @@
    <ClInclude Include="include\vixl\pool-manager.h" />
    <ClInclude Include="include\vixl\pool-manager-impl.h" />
    <ClInclude Include="include\vixl\utils-vixl.h" />
    <ClInclude Include="include\vixl\aarch64\assembler-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\constants-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\cpu-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\cpu-features-auditor-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\decoder-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\disasm-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\instructions-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\instrument-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\macro-assembler-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\operands-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\simulator-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\simulator-constants-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\abi-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch32\assembler-aarch32.h">
      <Filter>aarch32</Filter>
    </ClInclude>
@ -81,6 +42,51 @@
      <Filter>aarch32</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\code-buffer-vixl.h" />
    <ClInclude Include="include\vixl\aarch64\constants-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\cpu-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\cpu-features-auditor-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\decoder-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\decoder-constants-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\decoder-visitor-map-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\disasm-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\instructions-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\macro-assembler-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\operands-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\registers-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\simulator-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\simulator-constants-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\abi-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\assembler-aarch64.h">
      <Filter>aarch64</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="src\aarch32\disasm-aarch32.cc">
@ -104,6 +110,19 @@
    <ClCompile Include="src\aarch32\constants-aarch32.cc">
      <Filter>aarch32</Filter>
    </ClCompile>
    <ClCompile Include="src\compiler-intrinsics-vixl.cc" />
    <ClCompile Include="src\cpu-features.cc" />
    <ClCompile Include="src\utils-vixl.cc" />
    <ClCompile Include="src\code-buffer-vixl.cc" />
    <ClCompile Include="src\aarch64\assembler-sve-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\cpu-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\cpu-features-auditor-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\decoder-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
@ -113,36 +132,26 @@
    <ClCompile Include="src\aarch64\instructions-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\instrument-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\logic-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\macro-assembler-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\macro-assembler-sve-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\operands-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\pointer-auth-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
-    <ClCompile Include="src\aarch64\simulator-aarch64.cc">
+    <ClCompile Include="src\aarch64\registers-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\assembler-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\cpu-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\cpu-features-auditor-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\compiler-intrinsics-vixl.cc" />
    <ClCompile Include="src\cpu-features.cc" />
    <ClCompile Include="src\utils-vixl.cc" />
    <ClCompile Include="src\code-buffer-vixl.cc" />
  </ItemGroup>
 </Project>
--- a/src/core/cpu_newrec_compiler_aarch64.cpp
+++ b/src/core/cpu_newrec_compiler_aarch64.cpp
@ -63,10 +63,10 @@ void CPU::NewRec::AArch64Compiler::Reset(CodeCache::Block* block, u8* code_buffe
  armAsm = &m_emitter;
 #ifdef VIXL_DEBUG
-  m_emitter_check = std::make_unique<vixl::CodeBufferCheckScope>(m_emitter.get(), code_buffer_space,
+  m_emitter_check = std::make_unique<vixl::CodeBufferCheckScope>(&m_emitter, code_buffer_space,
                                                                 vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
  m_far_emitter_check = std::make_unique<vixl::CodeBufferCheckScope>(
-    m_far_emitter.get(), far_code_space, vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
+    &m_far_emitter, far_code_space, vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
 #endif
  // Need to wipe it out so it's correct when toggling fastmem.
@ -162,7 +162,7 @@ void CPU::NewRec::AArch64Compiler::SwitchToNearCode(bool emit_jump, vixl::aarch6
  armAsm = &m_emitter;
 }
-void CPU::NewRec::AArch64Compiler::EmitMov(const vixl::aarch64::WRegister& dst, u32 val)
+void CPU::NewRec::AArch64Compiler::EmitMov(const vixl::aarch64::Register& dst, u32 val)
 {
  armEmitMov(armAsm, dst, val);
 }
@ -495,38 +495,39 @@ vixl::aarch64::MemOperand CPU::NewRec::AArch64Compiler::MipsPtr(Reg r) const
  return PTR(&g_state.regs.r[static_cast<u32>(r)]);
 }
-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::CFGetRegD(CompileFlags cf) const
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegD(CompileFlags cf) const
 {
  DebugAssert(cf.valid_host_d);
  return WRegister(cf.host_d);
 }
-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::CFGetRegS(CompileFlags cf) const
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegS(CompileFlags cf) const
 {
  DebugAssert(cf.valid_host_s);
  return WRegister(cf.host_s);
 }
-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::CFGetRegT(CompileFlags cf) const
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegT(CompileFlags cf) const
 {
  DebugAssert(cf.valid_host_t);
  return WRegister(cf.host_t);
 }
-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::CFGetRegLO(CompileFlags cf) const
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegLO(CompileFlags cf) const
 {
  DebugAssert(cf.valid_host_lo);
  return WRegister(cf.host_lo);
 }
-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::CFGetRegHI(CompileFlags cf) const
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegHI(CompileFlags cf) const
 {
  DebugAssert(cf.valid_host_hi);
  return WRegister(cf.host_hi);
 }
-void CPU::NewRec::AArch64Compiler::MoveSToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf)
+void CPU::NewRec::AArch64Compiler::MoveSToReg(const vixl::aarch64::Register& dst, CompileFlags cf)
 {
  DebugAssert(dst.IsW());
  if (cf.valid_host_s)
  {
    if (cf.host_s != dst.GetCode())
@ -547,8 +548,9 @@ void CPU::NewRec::AArch64Compiler::MoveSToReg(const vixl::aarch64::WRegister& ds
  }
 }
-void CPU::NewRec::AArch64Compiler::MoveTToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf)
+void CPU::NewRec::AArch64Compiler::MoveTToReg(const vixl::aarch64::Register& dst, CompileFlags cf)
 {
  DebugAssert(dst.IsW());
  if (cf.valid_host_t)
  {
    if (cf.host_t != dst.GetCode())
@ -569,9 +571,9 @@ void CPU::NewRec::AArch64Compiler::MoveTToReg(const vixl::aarch64::WRegister& ds
  }
 }
-void CPU::NewRec::AArch64Compiler::MoveMIPSRegToReg(const vixl::aarch64::WRegister& dst, Reg reg)
+void CPU::NewRec::AArch64Compiler::MoveMIPSRegToReg(const vixl::aarch64::Register& dst, Reg reg)
 {
-  DebugAssert(reg < Reg::count);
+  DebugAssert(reg < Reg::count && dst.IsW());
  if (const std::optional<u32> hreg = CheckHostReg(0, Compiler::HR_TYPE_CPU_REG, reg))
    armAsm->mov(dst, WRegister(hreg.value()));
  else if (HasConstantReg(reg))
@ -712,8 +714,9 @@ void CPU::NewRec::AArch64Compiler::Compile_Fallback()
  m_load_delay_dirty = EMULATE_LOAD_DELAYS;
 }
-void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::WRegister& pcreg)
+void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::Register& pcreg)
 {
  DebugAssert(pcreg.IsW());
  if (!g_settings.cpu_recompiler_memory_exceptions)
    return;
@ -729,7 +732,7 @@ void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::WRegis
 void CPU::NewRec::AArch64Compiler::Compile_jr(CompileFlags cf)
 {
-  const WRegister pcreg = CFGetRegS(cf);
+  const Register pcreg = CFGetRegS(cf);
  CheckBranchTarget(pcreg);
  armAsm->str(pcreg, PTR(&g_state.pc));
@ -740,7 +743,7 @@ void CPU::NewRec::AArch64Compiler::Compile_jr(CompileFlags cf)
 void CPU::NewRec::AArch64Compiler::Compile_jalr(CompileFlags cf)
 {
-  const WRegister pcreg = CFGetRegS(cf);
+  const Register pcreg = CFGetRegS(cf);
  if (MipsD() != Reg::zero)
    SetConstantReg(MipsD(), GetBranchReturnAddress(cf));
@ -765,7 +768,7 @@ void CPU::NewRec::AArch64Compiler::Compile_bxx(CompileFlags cf, BranchCondition
  DebugAssert(cond == BranchCondition::Equal || cond == BranchCondition::NotEqual || cf.MipsT() == Reg::zero);
  Label taken;
-  const WRegister rs = CFGetRegS(cf);
+  const Register rs = CFGetRegS(cf);
  switch (cond)
  {
    case BranchCondition::Equal:
@ -834,8 +837,8 @@ void CPU::NewRec::AArch64Compiler::Compile_bxx(CompileFlags cf, BranchCondition
 void CPU::NewRec::AArch64Compiler::Compile_addi(CompileFlags cf, bool overflow)
 {
-  const WRegister rs = CFGetRegS(cf);
+  const Register rs = CFGetRegS(cf);
-  const WRegister rt = CFGetRegT(cf);
+  const Register rt = CFGetRegT(cf);
  if (const u32 imm = inst->i.imm_sext32(); imm != 0)
  {
    if (!overflow)
@ -882,7 +885,7 @@ void CPU::NewRec::AArch64Compiler::Compile_slti(CompileFlags cf, bool sign)
 void CPU::NewRec::AArch64Compiler::Compile_andi(CompileFlags cf)
 {
-  const WRegister rt = CFGetRegT(cf);
+  const Register rt = CFGetRegT(cf);
  if (const u32 imm = inst->i.imm_zext32(); imm != 0)
    armAsm->and_(rt, CFGetRegS(cf), armCheckLogicalConstant(imm));
  else
@ -891,8 +894,8 @@ void CPU::NewRec::AArch64Compiler::Compile_andi(CompileFlags cf)
 void CPU::NewRec::AArch64Compiler::Compile_ori(CompileFlags cf)
 {
-  const WRegister rt = CFGetRegT(cf);
+  const Register rt = CFGetRegT(cf);
-  const WRegister rs = CFGetRegS(cf);
+  const Register rs = CFGetRegS(cf);
  if (const u32 imm = inst->i.imm_zext32(); imm != 0)
    armAsm->orr(rt, rs, armCheckLogicalConstant(imm));
  else if (rt.GetCode() != rs.GetCode())
@ -901,8 +904,8 @@ void CPU::NewRec::AArch64Compiler::Compile_ori(CompileFlags cf)
 void CPU::NewRec::AArch64Compiler::Compile_xori(CompileFlags cf)
 {
-  const WRegister rt = CFGetRegT(cf);
+  const Register rt = CFGetRegT(cf);
-  const WRegister rs = CFGetRegS(cf);
+  const Register rs = CFGetRegS(cf);
  if (const u32 imm = inst->i.imm_zext32(); imm != 0)
    armAsm->eor(rt, rs, armCheckLogicalConstant(imm));
  else if (rt.GetCode() != rs.GetCode())
@ -914,8 +917,8 @@ void CPU::NewRec::AArch64Compiler::Compile_shift(CompileFlags cf,
                                                                                      const vixl::aarch64::Register&,
                                                                                      unsigned))
 {
-  const WRegister rd = CFGetRegD(cf);
+  const Register rd = CFGetRegD(cf);
-  const WRegister rt = CFGetRegT(cf);
+  const Register rt = CFGetRegT(cf);
  if (inst->r.shamt > 0)
    (armAsm->*op)(rd, rt, inst->r.shamt);
  else if (rd.GetCode() != rt.GetCode())
@ -943,12 +946,12 @@ void CPU::NewRec::AArch64Compiler::Compile_variable_shift(
                                       const vixl::aarch64::Register&),
  void (vixl::aarch64::Assembler::*op_const)(const vixl::aarch64::Register&, const vixl::aarch64::Register&, unsigned))
 {
-  const WRegister rd = CFGetRegD(cf);
+  const Register rd = CFGetRegD(cf);
  AssertRegOrConstS(cf);
  AssertRegOrConstT(cf);
-  const WRegister rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
+  const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
  if (!cf.valid_host_t)
    MoveTToReg(rt, cf);
@ -982,17 +985,17 @@ void CPU::NewRec::AArch64Compiler::Compile_srav(CompileFlags cf)
 void CPU::NewRec::AArch64Compiler::Compile_mult(CompileFlags cf, bool sign)
 {
-  const WRegister rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
+  const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
  if (!cf.valid_host_s)
    MoveSToReg(rs, cf);
-  const WRegister rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
+  const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
  if (!cf.valid_host_t)
    MoveTToReg(rt, cf);
  // TODO: if lo/hi gets killed, we can use a 32-bit multiply
-  const WRegister lo = CFGetRegLO(cf);
+  const Register lo = CFGetRegLO(cf);
-  const WRegister hi = CFGetRegHI(cf);
+  const Register hi = CFGetRegHI(cf);
  (sign) ? armAsm->smull(lo.X(), rs, rt) : armAsm->umull(lo.X(), rs, rt);
  armAsm->lsr(hi.X(), lo.X(), 32);
@ -1010,16 +1013,16 @@ void CPU::NewRec::AArch64Compiler::Compile_multu(CompileFlags cf)
 void CPU::NewRec::AArch64Compiler::Compile_div(CompileFlags cf)
 {
-  const WRegister rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
+  const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
  if (!cf.valid_host_s)
    MoveSToReg(rs, cf);
-  const WRegister rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
+  const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
  if (!cf.valid_host_t)
    MoveTToReg(rt, cf);
-  const WRegister rlo = CFGetRegLO(cf);
+  const Register rlo = CFGetRegLO(cf);
-  const WRegister rhi = CFGetRegHI(cf);
+  const Register rhi = CFGetRegHI(cf);
  // TODO: This could be slightly more optimal
  Label done;
@ -1055,16 +1058,16 @@ void CPU::NewRec::AArch64Compiler::Compile_div(CompileFlags cf)
 void CPU::NewRec::AArch64Compiler::Compile_divu(CompileFlags cf)
 {
-  const WRegister rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
+  const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
  if (!cf.valid_host_s)
    MoveSToReg(rs, cf);
-  const WRegister rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
+  const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
  if (!cf.valid_host_t)
    MoveTToReg(rt, cf);
-  const WRegister rlo = CFGetRegLO(cf);
+  const Register rlo = CFGetRegLO(cf);
-  const WRegister rhi = CFGetRegHI(cf);
+  const Register rhi = CFGetRegHI(cf);
  Label done;
  Label not_divide_by_zero;
@ -1083,8 +1086,9 @@ void CPU::NewRec::AArch64Compiler::Compile_divu(CompileFlags cf)
  armAsm->bind(&done);
 }
-void CPU::NewRec::AArch64Compiler::TestOverflow(const vixl::aarch64::WRegister& result)
+void CPU::NewRec::AArch64Compiler::TestOverflow(const vixl::aarch64::Register& result)
 {
  DebugAssert(result.IsW());
  SwitchToFarCode(true, vs);
  BackupHostState();
@ -1108,14 +1112,14 @@ void CPU::NewRec::AArch64Compiler::Compile_dst_op(CompileFlags cf,
  AssertRegOrConstS(cf);
  AssertRegOrConstT(cf);
-  const WRegister rd = CFGetRegD(cf);
+  const Register rd = CFGetRegD(cf);
  if (cf.valid_host_s && cf.valid_host_t)
  {
    (armAsm->*op)(rd, CFGetRegS(cf), CFGetRegT(cf));
  }
  else if (commutative && (cf.const_s || cf.const_t))
  {
-    const WRegister src = cf.const_s ? CFGetRegT(cf) : CFGetRegS(cf);
+    const Register src = cf.const_s ? CFGetRegT(cf) : CFGetRegS(cf);
    if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)
    {
      (armAsm->*op)(rd, src, logical ? armCheckLogicalConstant(cv) : armCheckAddSubConstant(cv));
@ -1135,7 +1139,7 @@ void CPU::NewRec::AArch64Compiler::Compile_dst_op(CompileFlags cf,
  }
  else if (cf.const_t)
  {
-    const WRegister rs = CFGetRegS(cf);
+    const Register rs = CFGetRegS(cf);
    if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)
    {
      (armAsm->*op)(rd, rs, logical ? armCheckLogicalConstant(cv) : armCheckAddSubConstant(cv));
@ -1184,7 +1188,7 @@ void CPU::NewRec::AArch64Compiler::Compile_and(CompileFlags cf)
  AssertRegOrConstT(cf);
  // special cases - and with self -> self, and with 0 -> 0
-  const WRegister regd = CFGetRegD(cf);
+  const Register regd = CFGetRegD(cf);
  if (cf.MipsS() == cf.MipsT())
  {
    armAsm->mov(regd, CFGetRegS(cf));
@ -1205,7 +1209,7 @@ void CPU::NewRec::AArch64Compiler::Compile_or(CompileFlags cf)
  AssertRegOrConstT(cf);
  // or/nor with 0 -> no effect
-  const WRegister regd = CFGetRegD(cf);
+  const Register regd = CFGetRegD(cf);
  if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0) || cf.MipsS() == cf.MipsT())
  {
    cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf);
@ -1220,7 +1224,7 @@ void CPU::NewRec::AArch64Compiler::Compile_xor(CompileFlags cf)
  AssertRegOrConstS(cf);
  AssertRegOrConstT(cf);
-  const WRegister regd = CFGetRegD(cf);
+  const Register regd = CFGetRegD(cf);
  if (cf.MipsS() == cf.MipsT())
  {
    // xor with self -> zero
@ -1276,16 +1280,16 @@ void CPU::NewRec::AArch64Compiler::Compile_slt(CompileFlags cf, bool sign)
  armAsm->cset(CFGetRegD(cf), sign ? lt : lo);
 }
-vixl::aarch64::WRegister
+vixl::aarch64::Register
 CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
                                                         const std::optional<VirtualMemoryAddress>& address,
-                                                         const std::optional<const vixl::aarch64::WRegister>& reg)
+                                                         const std::optional<const vixl::aarch64::Register>& reg)
 {
  const u32 imm = inst->i.imm_sext32();
  if (cf.valid_host_s && imm == 0 && !reg.has_value())
    return CFGetRegS(cf);
-  const WRegister dst = reg.has_value() ? reg.value() : RWARG1;
+  const Register dst = reg.has_value() ? reg.value() : RWARG1;
  if (address.has_value())
  {
    EmitMov(dst, address.value());
@ -1294,7 +1298,7 @@ CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
  {
    if (cf.valid_host_s)
    {
-      if (const WRegister src = CFGetRegS(cf); src.GetCode() != dst.GetCode())
+      if (const Register src = CFGetRegS(cf); src.GetCode() != dst.GetCode())
        armAsm->mov(dst, CFGetRegS(cf));
    }
    else
@ -1319,15 +1323,16 @@ CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
 }
 template<typename RegAllocFn>
-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::aarch64::WRegister& addr_reg,
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::aarch64::Register& addr_reg,
                                                                   MemoryAccessSize size, bool sign, bool use_fastmem,
                                                                   const RegAllocFn& dst_reg_alloc)
 {
  DebugAssert(addr_reg.IsW());
  if (use_fastmem)
  {
    m_cycles += Bus::RAM_READ_TICKS;
-    const WRegister dst = dst_reg_alloc();
+    const Register dst = dst_reg_alloc();
    if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
    {
@ -1410,7 +1415,7 @@ vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::
    SwitchToNearCode(false);
  }
-  const WRegister dst_reg = dst_reg_alloc();
+  const Register dst_reg = dst_reg_alloc();
  switch (size)
  {
    case MemoryAccessSize::Byte:
@ -1434,10 +1439,11 @@ vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::
  return dst_reg;
 }
-void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister& addr_reg,
+void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::Register& addr_reg,
-                                                 const vixl::aarch64::WRegister& value_reg, MemoryAccessSize size,
+                                                 const vixl::aarch64::Register& value_reg, MemoryAccessSize size,
                                                 bool use_fastmem)
 {
  DebugAssert(addr_reg.IsW() && value_reg.IsW());
  if (use_fastmem)
  {
    if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
@ -1529,8 +1535,8 @@ void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize
    g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
                                 std::optional<WRegister>();
  FlushForLoadStore(address, false, use_fastmem);
-  const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
+  const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
-  const WRegister data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
+  const Register data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() -> Register {
    if (cf.MipsT() == Reg::zero)
      return RWRET;
@ -1556,7 +1562,7 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
 {
  DebugAssert(size == MemoryAccessSize::Word && !sign);
-  const WRegister addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
+  const Register addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
  FlushForLoadStore(address, false, use_fastmem);
  // TODO: if address is constant, this can be simplified..
@ -1579,7 +1585,7 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
  // lwl/lwr from a load-delayed value takes the new value, but it itself, is load delayed, so the original value is
  // never written back. NOTE: can't trust T in cf because of the flush
  const Reg rt = inst->r.rt;
-  WRegister value;
+  Register value;
  if (m_load_delay_register == rt)
  {
    const u32 existing_ld_rt = (m_load_delay_value_register == NUM_HOST_REGS) ?
@ -1654,8 +1660,8 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
    g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
                                 std::optional<WRegister>();
  FlushForLoadStore(address, false, use_fastmem);
-  const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
+  const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
-  const WRegister value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action = action]() {
+  const Register value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action = action]() {
    return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ?
             WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) :
             RWRET;
@ -1741,8 +1747,8 @@ void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize
    g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
                                 std::optional<WRegister>();
  FlushForLoadStore(address, true, use_fastmem);
-  const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
+  const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
-  const WRegister data = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
+  const Register data = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
  if (!cf.valid_host_t)
    MoveTToReg(RWARG2, cf);
@ -1766,8 +1772,8 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
  // TODO: this can take over rt's value if it's no longer needed
  // NOTE: can't trust T in cf because of the alloc
-  const WRegister addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
+  const Register addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
-  const WRegister value = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
+  const Register value = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
  if (g_settings.gpu_pgxp_enable)
    MoveMIPSRegToReg(value, inst->r.rt);
@ -1838,10 +1844,10 @@ void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz
 {
  const u32 index = static_cast<u32>(inst->r.rt.GetValue());
  const auto [ptr, action] = GetGTERegisterPointer(index, false);
-  const WRegister addr = (g_settings.gpu_pgxp_enable || action == GTERegisterAccessAction::CallHandler) ?
+  const Register addr = (g_settings.gpu_pgxp_enable || action == GTERegisterAccessAction::CallHandler) ?
                          WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) :
                          RWARG1;
-  const WRegister data = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
+  const Register data = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
  FlushForLoadStore(address, true, use_fastmem);
  ComputeLoadStoreAddressArg(cf, address, addr);
@ -1912,10 +1918,10 @@ void CPU::NewRec::AArch64Compiler::Compile_mtc0(CompileFlags cf)
  // for some registers, we need to test certain bits
  const bool needs_bit_test = (reg == Cop0Reg::SR);
-  const WRegister new_value = RWARG1;
+  const Register new_value = RWARG1;
-  const WRegister old_value = RWARG2;
+  const Register old_value = RWARG2;
-  const WRegister changed_bits = RWARG3;
+  const Register changed_bits = RWARG3;
-  const WRegister mask_reg = RWSCRATCH;
+  const Register mask_reg = RWSCRATCH;
  // Load old value
  armAsm->ldr(old_value, PTR(ptr));
@ -1975,8 +1981,10 @@ void CPU::NewRec::AArch64Compiler::Compile_rfe(CompileFlags cf)
  TestInterrupts(RWARG1);
 }
-void CPU::NewRec::AArch64Compiler::TestInterrupts(const vixl::aarch64::WRegister& sr)
+void CPU::NewRec::AArch64Compiler::TestInterrupts(const vixl::aarch64::Register& sr)
 {
  DebugAssert(sr.IsW());
  // if Iec == 0 then goto no_interrupt
  Label no_interrupt;
  armAsm->tbz(sr, 0, &no_interrupt);
--- a/src/core/cpu_newrec_compiler_aarch64.h
+++ b/src/core/cpu_newrec_compiler_aarch64.h
@ -43,7 +43,7 @@ protected:
  void Compile_Fallback() override;
-  void CheckBranchTarget(const vixl::aarch64::WRegister& pcreg);
+  void CheckBranchTarget(const vixl::aarch64::Register& pcreg);
  void Compile_jr(CompileFlags cf) override;
  void Compile_jalr(CompileFlags cf) override;
  void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
@ -77,7 +77,7 @@ protected:
  void Compile_multu(CompileFlags cf) override;
  void Compile_div(CompileFlags cf) override;
  void Compile_divu(CompileFlags cf) override;
-  void TestOverflow(const vixl::aarch64::WRegister& result);
+  void TestOverflow(const vixl::aarch64::Register& result);
  void Compile_dst_op(CompileFlags cf,
                      void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&,
                                                           const vixl::aarch64::Register&,
@ -95,13 +95,13 @@ protected:
  void Compile_slt(CompileFlags cf) override;
  void Compile_sltu(CompileFlags cf) override;
-  vixl::aarch64::WRegister
+  vixl::aarch64::Register
  ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
-                             const std::optional<const vixl::aarch64::WRegister>& reg = std::nullopt);
+                             const std::optional<const vixl::aarch64::Register>& reg = std::nullopt);
  template<typename RegAllocFn>
-  vixl::aarch64::WRegister GenerateLoad(const vixl::aarch64::WRegister& addr_reg, MemoryAccessSize size, bool sign,
+  vixl::aarch64::Register GenerateLoad(const vixl::aarch64::Register& addr_reg, MemoryAccessSize size, bool sign,
                                       bool use_fastmem, const RegAllocFn& dst_reg_alloc);
-  void GenerateStore(const vixl::aarch64::WRegister& addr_reg, const vixl::aarch64::WRegister& value_reg,
+  void GenerateStore(const vixl::aarch64::Register& addr_reg, const vixl::aarch64::Register& value_reg,
                     MemoryAccessSize size, bool use_fastmem);
  void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
                   const std::optional<VirtualMemoryAddress>& address) override;
@ -116,7 +116,7 @@ protected:
  void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
                    const std::optional<VirtualMemoryAddress>& address) override;
-  void TestInterrupts(const vixl::aarch64::WRegister& sr);
+  void TestInterrupts(const vixl::aarch64::Register& sr);
  void Compile_mtc0(CompileFlags cf) override;
  void Compile_rfe(CompileFlags cf) override;
@ -128,7 +128,7 @@ protected:
                                    Reg arg3reg = Reg::count) override;
 private:
-  void EmitMov(const vixl::aarch64::WRegister& dst, u32 val);
+  void EmitMov(const vixl::aarch64::Register& dst, u32 val);
  void EmitCall(const void* ptr, bool force_inline = false);
  vixl::aarch64::Operand armCheckAddSubConstant(s32 val);
@ -144,15 +144,15 @@ private:
  void AssertRegOrConstS(CompileFlags cf) const;
  void AssertRegOrConstT(CompileFlags cf) const;
  vixl::aarch64::MemOperand MipsPtr(Reg r) const;
-  vixl::aarch64::WRegister CFGetRegD(CompileFlags cf) const;
+  vixl::aarch64::Register CFGetRegD(CompileFlags cf) const;
-  vixl::aarch64::WRegister CFGetRegS(CompileFlags cf) const;
+  vixl::aarch64::Register CFGetRegS(CompileFlags cf) const;
-  vixl::aarch64::WRegister CFGetRegT(CompileFlags cf) const;
+  vixl::aarch64::Register CFGetRegT(CompileFlags cf) const;
-  vixl::aarch64::WRegister CFGetRegLO(CompileFlags cf) const;
+  vixl::aarch64::Register CFGetRegLO(CompileFlags cf) const;
-  vixl::aarch64::WRegister CFGetRegHI(CompileFlags cf) const;
+  vixl::aarch64::Register CFGetRegHI(CompileFlags cf) const;
-  void MoveSToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf);
+  void MoveSToReg(const vixl::aarch64::Register& dst, CompileFlags cf);
-  void MoveTToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf);
+  void MoveTToReg(const vixl::aarch64::Register& dst, CompileFlags cf);
-  void MoveMIPSRegToReg(const vixl::aarch64::WRegister& dst, Reg reg);
+  void MoveMIPSRegToReg(const vixl::aarch64::Register& dst, Reg reg);
  vixl::aarch64::Assembler m_emitter;
  vixl::aarch64::Assembler m_far_emitter;
--- a/src/core/cpu_recompiler_code_generator_aarch64.cpp
+++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp
@ -145,8 +145,10 @@ s64 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* targe
  return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
 }
-void CPU::Recompiler::armMoveAddressToReg(a64::Assembler* armAsm, const a64::XRegister& reg, const void* addr)
+void CPU::Recompiler::armMoveAddressToReg(a64::Assembler* armAsm, const a64::Register& reg, const void* addr)
 {
  DebugAssert(reg.IsX());
  const void* cur = armAsm->GetCursorAddress<const void*>();
  const void* current_code_ptr_page =
    reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
@ -259,8 +261,13 @@ u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)
  u8* start = s_trampoline_start_ptr + offset;
  a64::Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset);
 #ifdef VIXL_DEBUG
  vixl::CodeBufferCheckScope armAsmCheck(&armAsm, TRAMPOLINE_AREA_SIZE - offset,
                                         vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
 #endif
  armMoveAddressToReg(&armAsm, RXSCRATCH, target);
  armAsm.br(RXSCRATCH);
  armAsm.FinalizeCode();
  const u32 size = static_cast<u32>(armAsm.GetSizeOfCodeGenerated());
  DebugAssert(size < 20);
--- a/src/core/cpu_recompiler_types.h
+++ b/src/core/cpu_recompiler_types.h
@ -123,7 +123,7 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
 bool armIsCallerSavedRegister(u32 id);
 s64 armGetPCDisplacement(const void* current, const void* target);
-void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::XRegister& reg, const void* addr);
+void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr);
 void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm);
 void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
 void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);