Dep: Update vixl to 662828c

2024-06-14 17:27:12 +10:00 · 2024-06-14 17:27:12 +10:00 · f0c2832d03
parent d45e218da7
commit f0c2832d03
66 changed files with 65453 additions and 10345 deletions
--- a/dep/vixl/CMakeLists.txt
+++ b/dep/vixl/CMakeLists.txt
@ -59,26 +59,30 @@ if(CPU_ARCH_ARM64)
    include/vixl/aarch64/constants-aarch64.h
    include/vixl/aarch64/cpu-aarch64.h
    include/vixl/aarch64/cpu-features-auditor-aarch64.h
+    include/vixl/aarch64/debugger-aarch64.h
    include/vixl/aarch64/decoder-aarch64.h
+    include/vixl/aarch64/decoder-constants-aarch64.h
+    include/vixl/aarch64/decoder-visitor-map-aarch64.h
    include/vixl/aarch64/disasm-aarch64.h
    include/vixl/aarch64/instructions-aarch64.h
-    include/vixl/aarch64/instrument-aarch64.h
    include/vixl/aarch64/macro-assembler-aarch64.h
    include/vixl/aarch64/operands-aarch64.h
+    include/vixl/aarch64/registers-aarch64.h
    include/vixl/aarch64/simulator-aarch64.h
    include/vixl/aarch64/simulator-constants-aarch64.h
    src/aarch64/assembler-aarch64.cc
+    src/aarch64/assembler-sve-aarch64.cc
    src/aarch64/cpu-aarch64.cc
    src/aarch64/cpu-features-auditor-aarch64.cc
    src/aarch64/decoder-aarch64.cc
    src/aarch64/disasm-aarch64.cc
    src/aarch64/instructions-aarch64.cc
-    src/aarch64/instrument-aarch64.cc
    src/aarch64/logic-aarch64.cc
    src/aarch64/macro-assembler-aarch64.cc
+    src/aarch64/macro-assembler-sve-aarch64.cc
    src/aarch64/operands-aarch64.cc
    src/aarch64/pointer-auth-aarch64.cc
-    src/aarch64/simulator-aarch64.cc
+    src/aarch64/registers-aarch64.cc
  )
  target_include_directories(vixl PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}/include/vixl/aarch64
--- a/dep/vixl/include/vixl/aarch32/assembler-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/assembler-aarch32.h
@ -27,10 +27,10 @@
 #ifndef VIXL_AARCH32_ASSEMBLER_AARCH32_H_
 #define VIXL_AARCH32_ASSEMBLER_AARCH32_H_

-#include "../assembler-base-vixl.h"
+#include "assembler-base-vixl.h"

-#include "instructions-aarch32.h"
-#include "location-aarch32.h"
+#include "aarch32/instructions-aarch32.h"
+#include "aarch32/location-aarch32.h"

 namespace vixl {
 namespace aarch32 {
--- a/dep/vixl/include/vixl/aarch32/constants-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/constants-aarch32.h
@ -32,7 +32,7 @@ extern "C" {
 #include <stdint.h>
 }

-#include "../globals-vixl.h"
+#include "globals-vixl.h"


 namespace vixl {
--- a/dep/vixl/include/vixl/aarch32/disasm-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/disasm-aarch32.h
@ -33,8 +33,14 @@ extern "C" {

 #include <iomanip>

-#include "constants-aarch32.h"
-#include "operands-aarch32.h"
+#include "aarch32/constants-aarch32.h"
+#include "aarch32/operands-aarch32.h"
+
+// Microsoft Visual C++ defines a `mvn` macro that conflicts with our own
+// definition.
+#if defined(_MSC_VER) && defined(mvn)
+#undef mvn
+#endif

 namespace vixl {
 namespace aarch32 {
--- a/dep/vixl/include/vixl/aarch32/instructions-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/instructions-aarch32.h
@ -34,13 +34,14 @@ extern "C" {
 #include <algorithm>
 #include <ostream>

-#include "../code-buffer-vixl.h"
-#include "../utils-vixl.h"
+#include "code-buffer-vixl.h"
+#include "utils-vixl.h"
+#include "aarch32/constants-aarch32.h"

-#include "constants-aarch32.h"
-
-#ifdef __arm__
+#if defined(__arm__) && !defined(__SOFTFP__)
 #define HARDFLOAT __attribute__((noinline, pcs("aapcs-vfp")))
+#elif defined(_MSC_VER)
+#define HARDFLOAT __declspec(noinline)
 #else
 #define HARDFLOAT __attribute__((noinline))
 #endif
@ -492,6 +493,8 @@ class RegisterList {
  }
  Register GetFirstAvailableRegister() const;
  bool IsEmpty() const { return list_ == 0; }
+  bool IsSingleRegister() const { return IsPowerOf2(list_); }
+  int GetCount() const { return CountSetBits(list_); }
  static RegisterList Union(const RegisterList& list_1,
                            const RegisterList& list_2) {
    return RegisterList(list_1.list_ | list_2.list_);
@ -1039,7 +1042,9 @@ class Sign {
  const char* GetName() const { return (IsPlus() ? "" : "-"); }
  bool IsPlus() const { return sign_ == plus; }
  bool IsMinus() const { return sign_ == minus; }
-  int32_t ApplyTo(uint32_t value) { return IsPlus() ? value : -value; }
+  int32_t ApplyTo(uint32_t value) {
+    return IsPlus() ? value : UnsignedNegate(value);
+  }

 private:
  SignType sign_;
--- a/dep/vixl/include/vixl/aarch32/location-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/location-aarch32.h
@ -36,9 +36,9 @@ extern "C" {
 #include <iomanip>
 #include <list>

-#include "../invalset-vixl.h"
-#include "../pool-manager.h"
-#include "../utils-vixl.h"
+#include "invalset-vixl.h"
+#include "pool-manager.h"
+#include "utils-vixl.h"

 #include "constants-aarch32.h"
 #include "instructions-aarch32.h"
@ -58,12 +58,12 @@ class Location : public LocationBase<int32_t> {
  // with the assembler methods for generating instructions, but will never
  // be handled by the pool manager.
  Location()
-      : LocationBase<int32_t>(kRawLocation, 1 /* dummy size*/),
+      : LocationBase<int32_t>(kRawLocation, 1 /* placeholder size*/),
        referenced_(false) {}

  typedef int32_t Offset;

-  ~Location() {
+  ~Location() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
 #ifdef VIXL_DEBUG
    if (IsReferenced() && !IsBound()) {
      VIXL_ABORT_WITH_MSG("Location, label or literal used but not bound.\n");
@ -217,7 +217,7 @@ class Location : public LocationBase<int32_t> {

 protected:
  // Types passed to LocationBase. Must be distinct for unbound Locations (not
-  // relevant for bound locations, as they don't have a correspoding
+  // relevant for bound locations, as they don't have a corresponding
  // PoolObject).
  static const int kRawLocation = 0;  // Will not be used by the pool manager.
  static const int kVeneerType = 1;
--- a/dep/vixl/include/vixl/aarch32/macro-assembler-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/macro-assembler-aarch32.h
@ -28,15 +28,15 @@
 #ifndef VIXL_AARCH32_MACRO_ASSEMBLER_AARCH32_H_
 #define VIXL_AARCH32_MACRO_ASSEMBLER_AARCH32_H_

-#include "../code-generation-scopes-vixl.h"
-#include "../macro-assembler-interface.h"
-#include "../pool-manager-impl.h"
-#include "../pool-manager.h"
-#include "../utils-vixl.h"
+#include "code-generation-scopes-vixl.h"
+#include "macro-assembler-interface.h"
+#include "pool-manager-impl.h"
+#include "pool-manager.h"
+#include "utils-vixl.h"

-#include "assembler-aarch32.h"
-#include "instructions-aarch32.h"
-#include "operands-aarch32.h"
+#include "aarch32/assembler-aarch32.h"
+#include "aarch32/instructions-aarch32.h"
+#include "aarch32/operands-aarch32.h"

 namespace vixl {

@ -268,7 +268,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
        generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
        pool_end_(NULL) {
 #ifdef VIXL_DEBUG
-    SetAllowMacroInstructions(true);
+    SetAllowMacroInstructions(  // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
+        true);
 #else
    USE(allow_macro_instructions_);
 #endif
@ -283,7 +284,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
        generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
        pool_end_(NULL) {
 #ifdef VIXL_DEBUG
-    SetAllowMacroInstructions(true);
+    SetAllowMacroInstructions(  // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
+        true);
 #endif
  }
  MacroAssembler(byte* buffer, size_t size, InstructionSet isa = kDefaultISA)
@ -296,7 +298,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
        generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
        pool_end_(NULL) {
 #ifdef VIXL_DEBUG
-    SetAllowMacroInstructions(true);
+    SetAllowMacroInstructions(  // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
+        true);
 #endif
  }

@ -399,13 +402,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
      VIXL_ASSERT(GetBuffer()->Is32bitAligned());
    }
    // If we need to add padding, check if we have to emit the pool.
-    const int32_t pc = GetCursorOffset();
-    if (label->Needs16BitPadding(pc)) {
+    const int32_t cursor = GetCursorOffset();
+    if (label->Needs16BitPadding(cursor)) {
      const int kPaddingBytes = 2;
-      if (pool_manager_.MustEmit(pc, kPaddingBytes)) {
-        int32_t new_pc = pool_manager_.Emit(this, pc, kPaddingBytes);
-        USE(new_pc);
-        VIXL_ASSERT(new_pc == GetCursorOffset());
+      if (pool_manager_.MustEmit(cursor, kPaddingBytes)) {
+        int32_t new_cursor = pool_manager_.Emit(this, cursor, kPaddingBytes);
+        USE(new_cursor);
+        VIXL_ASSERT(new_cursor == GetCursorOffset());
      }
    }
    pool_manager_.Bind(this, label, GetCursorOffset());
@ -427,30 +430,30 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
                                   Location* location,
                                   Condition* cond = NULL) {
    int size = info->size;
-    int32_t pc = GetCursorOffset();
+    int32_t cursor = GetCursorOffset();
    // If we need to emit a branch over the instruction, take this into account.
    if ((cond != NULL) && NeedBranch(cond)) {
      size += kBranchSize;
-      pc += kBranchSize;
+      cursor += kBranchSize;
    }
-    int32_t from = pc;
+    int32_t from = cursor;
    from += IsUsingT32() ? kT32PcDelta : kA32PcDelta;
    if (info->pc_needs_aligning) from = AlignDown(from, 4);
    int32_t min = from + info->min_offset;
    int32_t max = from + info->max_offset;
-    ForwardReference<int32_t> temp_ref(pc,
+    ForwardReference<int32_t> temp_ref(cursor,
                                       info->size,
                                       min,
                                       max,
                                       info->alignment);
    if (pool_manager_.MustEmit(GetCursorOffset(), size, &temp_ref, location)) {
-      int32_t new_pc = pool_manager_.Emit(this,
+      int32_t new_cursor = pool_manager_.Emit(this,
                                              GetCursorOffset(),
                                              info->size,
                                              &temp_ref,
                                              location);
-      USE(new_pc);
-      VIXL_ASSERT(new_pc == GetCursorOffset());
+      USE(new_cursor);
+      VIXL_ASSERT(new_cursor == GetCursorOffset());
    }
  }

@ -461,13 +464,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
    // into account, as well as potential 16-bit padding needed to reach the
    // minimum accessible location.
    int alignment = literal->GetMaxAlignment();
-    int32_t pc = GetCursorOffset();
-    int total_size = AlignUp(pc, alignment) - pc + literal->GetSize();
-    if (literal->Needs16BitPadding(pc)) total_size += 2;
-    if (pool_manager_.MustEmit(pc, total_size)) {
-      int32_t new_pc = pool_manager_.Emit(this, pc, total_size);
-      USE(new_pc);
-      VIXL_ASSERT(new_pc == GetCursorOffset());
+    int32_t cursor = GetCursorOffset();
+    int total_size = AlignUp(cursor, alignment) - cursor + literal->GetSize();
+    if (literal->Needs16BitPadding(cursor)) total_size += 2;
+    if (pool_manager_.MustEmit(cursor, total_size)) {
+      int32_t new_cursor = pool_manager_.Emit(this, cursor, total_size);
+      USE(new_cursor);
+      VIXL_ASSERT(new_cursor == GetCursorOffset());
    }
    pool_manager_.Bind(this, literal, GetCursorOffset());
    literal->EmitPoolObject(this);
@ -2894,8 +2897,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
    VIXL_ASSERT(OutsideITBlock());
    MacroEmissionCheckScope guard(this);
    ITScope it_scope(this, &cond, guard);
+    if (registers.IsSingleRegister() &&
+        (!IsUsingT32() || !registers.IsR0toR7orPC())) {
+      pop(cond, registers.GetFirstAvailableRegister());
+    } else if (!registers.IsEmpty()) {
      pop(cond, registers);
    }
+  }
  void Pop(RegisterList registers) { Pop(al, registers); }

  void Pop(Condition cond, Register rt) {
@ -2914,8 +2922,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
    VIXL_ASSERT(OutsideITBlock());
    MacroEmissionCheckScope guard(this);
    ITScope it_scope(this, &cond, guard);
+    if (registers.IsSingleRegister() && !registers.Includes(sp) &&
+        (!IsUsingT32() || !registers.IsR0toR7orLR())) {
+      push(cond, registers.GetFirstAvailableRegister());
+    } else if (!registers.IsEmpty()) {
      push(cond, registers);
    }
+  }
  void Push(RegisterList registers) { Push(al, registers); }

  void Push(Condition cond, Register rt) {
@ -2924,8 +2937,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
    VIXL_ASSERT(OutsideITBlock());
    MacroEmissionCheckScope guard(this);
    ITScope it_scope(this, &cond, guard);
+    if (IsUsingA32() && rt.IsSP()) {
+      // Only the A32 multiple-register form can push sp.
+      push(cond, RegisterList(rt));
+    } else {
      push(cond, rt);
    }
+  }
  void Push(Register rt) { Push(al, rt); }

  void Qadd(Condition cond, Register rd, Register rm, Register rn) {
@ -11170,10 +11188,11 @@ class UseScratchRegisterScope {
  uint32_t old_available_;      // kRRegister
  uint64_t old_available_vfp_;  // kVRegister

-  VIXL_DEBUG_NO_RETURN UseScratchRegisterScope(const UseScratchRegisterScope&) {
+  VIXL_NO_RETURN_IN_DEBUG_MODE UseScratchRegisterScope(
+      const UseScratchRegisterScope&) {
    VIXL_UNREACHABLE();
  }
-  VIXL_DEBUG_NO_RETURN void operator=(const UseScratchRegisterScope&) {
+  VIXL_NO_RETURN_IN_DEBUG_MODE void operator=(const UseScratchRegisterScope&) {
    VIXL_UNREACHABLE();
  }
 };
--- a/dep/vixl/include/vixl/aarch32/operands-aarch32.h
+++ b/dep/vixl/include/vixl/aarch32/operands-aarch32.h
@ -28,7 +28,7 @@
 #ifndef VIXL_AARCH32_OPERANDS_AARCH32_H_
 #define VIXL_AARCH32_OPERANDS_AARCH32_H_

-#include "instructions-aarch32.h"
+#include "aarch32/instructions-aarch32.h"

 namespace vixl {
 namespace aarch32 {
@ -54,28 +54,16 @@ class Operand {
  // This is allowed to be an implicit constructor because Operand is
  // a wrapper class that doesn't normally perform any type conversion.
  Operand(uint32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoReg),
-        shift_(LSL),
-        amount_(0),
-        rs_(NoReg) {}
+      : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {}
  Operand(int32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoReg),
-        shift_(LSL),
-        amount_(0),
-        rs_(NoReg) {}
+      : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {}

  // rm
  // where rm is the base register
  // This is allowed to be an implicit constructor because Operand is
  // a wrapper class that doesn't normally perform any type conversion.
  Operand(Register rm)  // NOLINT(runtime/explicit)
-      : imm_(0),
-        rm_(rm),
-        shift_(LSL),
-        amount_(0),
-        rs_(NoReg) {
+      : imm_(0), rm_(rm), shift_(LSL), amount_(0), rs_(NoReg) {
    VIXL_ASSERT(rm_.IsValid());
  }

@ -202,7 +190,7 @@ class Operand {
  }

 private:
-// Forbid implicitely creating operands around types that cannot be encoded
+// Forbid implicitly creating operands around types that cannot be encoded
 // into a uint32_t without loss.
 #if __cplusplus >= 201103L
  Operand(int64_t) = delete;   // NOLINT(runtime/explicit)
@ -245,22 +233,18 @@ class NeonImmediate {
  // This is allowed to be an implicit constructor because NeonImmediate is
  // a wrapper class that doesn't normally perform any type conversion.
  NeonImmediate(uint32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(I32) {}
+      : imm_(immediate), immediate_type_(I32) {}
  NeonImmediate(int immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(I32) {}
+      : imm_(immediate), immediate_type_(I32) {}

  // { #<immediate> }
  // where <immediate> is a 64 bit number
  // This is allowed to be an implicit constructor because NeonImmediate is
  // a wrapper class that doesn't normally perform any type conversion.
  NeonImmediate(int64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(I64) {}
+      : imm_(immediate), immediate_type_(I64) {}
  NeonImmediate(uint64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(I64) {}
+      : imm_(immediate), immediate_type_(I64) {}

  // { #<immediate> }
  // where <immediate> is a non zero floating point number which can be encoded
@ -268,11 +252,9 @@ class NeonImmediate {
  // This is allowed to be an implicit constructor because NeonImmediate is
  // a wrapper class that doesn't normally perform any type conversion.
  NeonImmediate(float immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(F32) {}
+      : imm_(immediate), immediate_type_(F32) {}
  NeonImmediate(double immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(F64) {}
+      : imm_(immediate), immediate_type_(F64) {}

  NeonImmediate(const NeonImmediate& src)
      : imm_(src.imm_), immediate_type_(src.immediate_type_) {}
@ -311,7 +293,7 @@ class NeonImmediate {

  bool IsInteger32() const { return immediate_type_.Is(I32); }
  bool IsInteger64() const { return immediate_type_.Is(I64); }
-  bool IsInteger() const { return IsInteger32() | IsInteger64(); }
+  bool IsInteger() const { return IsInteger32() || IsInteger64(); }
  bool IsFloat() const { return immediate_type_.Is(F32); }
  bool IsDouble() const { return immediate_type_.Is(F64); }
  bool IsFloatZero() const {
@ -374,29 +356,21 @@ std::ostream& operator<<(std::ostream& os, const NeonImmediate& operand);
 class NeonOperand {
 public:
  NeonOperand(int32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
  NeonOperand(uint32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
  NeonOperand(int64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
  NeonOperand(uint64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
  NeonOperand(float immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
  NeonOperand(double immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
  NeonOperand(const NeonImmediate& imm)  // NOLINT(runtime/explicit)
-      : imm_(imm),
-        rm_(NoDReg) {}
+      : imm_(imm), rm_(NoDReg) {}
  NeonOperand(const VRegister& rm)  // NOLINT(runtime/explicit)
-      : imm_(0),
-        rm_(rm) {
+      : imm_(0), rm_(rm) {
    VIXL_ASSERT(rm_.IsValid());
  }

@ -641,7 +615,7 @@ class ImmediateVorn : public ImmediateVorr {
 //     - a shifted index register <Rm>, <shift> #<amount>
 //
 //   The index register may have an associated {+/-} sign,
-//   which if ommitted, defaults to + .
+//   which if omitted, defaults to + .
 //
 //   We have two constructors for the offset:
 //
--- a/dep/vixl/include/vixl/aarch64/abi-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/abi-aarch64.h
@ -105,7 +105,7 @@ class ABI {

    // Stage C.1
    if (is_floating_point_type && (NSRN_ < 8)) {
-      return GenericOperand(FPRegister(NSRN_++, size * kBitsPerByte));
+      return GenericOperand(VRegister(NSRN_++, size * kBitsPerByte));
    }
    // Stages C.2, C.3, and C.4: Unsupported. Caught by the assertions above.
    // Stages C.5 and C.6
@ -159,8 +159,8 @@ template <>
 inline GenericOperand ABI::GetReturnGenericOperand<void>() const {
  return GenericOperand();
 }
-}
-}  // namespace vixl::aarch64
+}  // namespace aarch64
+}  // namespace vixl

 #endif  // VIXL_AARCH64_ABI_AARCH64_H_

--- a/dep/vixl/include/vixl/aarch64/assembler-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/assembler-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/constants-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/constants-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/cpu-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/cpu-aarch64.h
@ -27,13 +27,219 @@
 #ifndef VIXL_CPU_AARCH64_H
 #define VIXL_CPU_AARCH64_H

+#include "../cpu-features.h"
 #include "../globals-vixl.h"

 #include "instructions-aarch64.h"
+#include "simulator-aarch64.h"
+
+#ifndef VIXL_INCLUDE_TARGET_AARCH64
+// The supporting .cc file is only compiled when the A64 target is selected.
+// Throw an explicit error now to avoid a harder-to-debug linker error later.
+//
+// These helpers _could_ work on any AArch64 host, even when generating AArch32
+// code, but we don't support this because the available features may differ
+// between AArch32 and AArch64 on the same platform, so basing AArch32 code
+// generation on aarch64::CPU features is probably broken.
+#error cpu-aarch64.h requires VIXL_INCLUDE_TARGET_AARCH64 (scons target=a64).
+#endif

 namespace vixl {
 namespace aarch64 {

+// A CPU ID register, for use with CPUFeatures::kIDRegisterEmulation. Fields
+// specific to each register are described in relevant subclasses.
+class IDRegister {
+ protected:
+  explicit IDRegister(uint64_t value = 0) : value_(value) {}
+
+  class Field {
+   public:
+    enum Type { kUnsigned, kSigned };
+
+    static const int kMaxWidthInBits = 4;
+
+    // This needs to be constexpr so that fields have "constant initialisation".
+    // This avoids initialisation order problems when these values are used to
+    // (dynamically) initialise static variables, etc.
+    explicit constexpr Field(int lsb,
+                             int bitWidth = kMaxWidthInBits,
+                             Type type = kUnsigned)
+        : lsb_(lsb), bitWidth_(bitWidth), type_(type) {}
+
+    int GetWidthInBits() const { return bitWidth_; }
+    int GetLsb() const { return lsb_; }
+    int GetMsb() const { return lsb_ + GetWidthInBits() - 1; }
+    Type GetType() const { return type_; }
+
+   private:
+    int lsb_;
+    int bitWidth_;
+    Type type_;
+  };
+
+ public:
+  // Extract the specified field, performing sign-extension for signed fields.
+  // This allows us to implement the 'value >= number' detection mechanism
+  // recommended by the Arm ARM, for both signed and unsigned fields.
+  int Get(Field field) const;
+
+ private:
+  uint64_t value_;
+};
+
+class AA64PFR0 : public IDRegister {
+ public:
+  explicit AA64PFR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kFP;
+  static const Field kAdvSIMD;
+  static const Field kRAS;
+  static const Field kSVE;
+  static const Field kDIT;
+  static const Field kCSV2;
+  static const Field kCSV3;
+};
+
+class AA64PFR1 : public IDRegister {
+ public:
+  explicit AA64PFR1(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kBT;
+  static const Field kSSBS;
+  static const Field kMTE;
+  static const Field kSME;
+};
+
+class AA64ISAR0 : public IDRegister {
+ public:
+  explicit AA64ISAR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kAES;
+  static const Field kSHA1;
+  static const Field kSHA2;
+  static const Field kCRC32;
+  static const Field kAtomic;
+  static const Field kRDM;
+  static const Field kSHA3;
+  static const Field kSM3;
+  static const Field kSM4;
+  static const Field kDP;
+  static const Field kFHM;
+  static const Field kTS;
+  static const Field kRNDR;
+};
+
+class AA64ISAR1 : public IDRegister {
+ public:
+  explicit AA64ISAR1(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kDPB;
+  static const Field kAPA;
+  static const Field kAPI;
+  static const Field kJSCVT;
+  static const Field kFCMA;
+  static const Field kLRCPC;
+  static const Field kGPA;
+  static const Field kGPI;
+  static const Field kFRINTTS;
+  static const Field kSB;
+  static const Field kSPECRES;
+  static const Field kBF16;
+  static const Field kDGH;
+  static const Field kI8MM;
+};
+
+class AA64ISAR2 : public IDRegister {
+ public:
+  explicit AA64ISAR2(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kWFXT;
+  static const Field kRPRES;
+  static const Field kMOPS;
+  static const Field kCSSC;
+};
+
+class AA64MMFR0 : public IDRegister {
+ public:
+  explicit AA64MMFR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kECV;
+};
+
+class AA64MMFR1 : public IDRegister {
+ public:
+  explicit AA64MMFR1(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kLO;
+  static const Field kAFP;
+};
+
+class AA64MMFR2 : public IDRegister {
+ public:
+  explicit AA64MMFR2(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kAT;
+};
+
+class AA64ZFR0 : public IDRegister {
+ public:
+  explicit AA64ZFR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kSVEver;
+  static const Field kAES;
+  static const Field kBitPerm;
+  static const Field kBF16;
+  static const Field kSHA3;
+  static const Field kSM4;
+  static const Field kI8MM;
+  static const Field kF32MM;
+  static const Field kF64MM;
+};
+
+class AA64SMFR0 : public IDRegister {
+ public:
+  explicit AA64SMFR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kSMEf32f32;
+  static const Field kSMEb16f32;
+  static const Field kSMEf16f32;
+  static const Field kSMEi8i32;
+  static const Field kSMEf64f64;
+  static const Field kSMEi16i64;
+  static const Field kSMEfa64;
+};
+
 class CPU {
 public:
  // Initialise CPU support.
@ -45,6 +251,25 @@ class CPU {
  // safely run.
  static void EnsureIAndDCacheCoherency(void *address, size_t length);

+  // Read and interpret the ID registers. This requires
+  // CPUFeatures::kIDRegisterEmulation, and therefore cannot be called on
+  // non-AArch64 platforms.
+  static CPUFeatures InferCPUFeaturesFromIDRegisters();
+
+  // Read and interpret CPUFeatures reported by the OS. Failed queries (or
+  // unsupported platforms) return an empty list. Note that this is
+  // indistinguishable from a successful query on a platform that advertises no
+  // features.
+  //
+  // Non-AArch64 hosts are considered to be unsupported platforms, and this
+  // function returns an empty list.
+  static CPUFeatures InferCPUFeaturesFromOS(
+      CPUFeatures::QueryIDRegistersOption option =
+          CPUFeatures::kQueryIDRegistersIfAvailable);
+
+  // Query the SVE vector length. This requires CPUFeatures::kSVE.
+  static int ReadSVEVectorLengthInBits();
+
  // Handle tagged pointers.
  template <typename T>
  static T SetPointerTag(T pointer, uint64_t tag) {
@ -72,6 +297,27 @@ class CPU {
  }

 private:
+#define VIXL_AARCH64_ID_REG_LIST(V)                                           \
+  V(AA64PFR0, "ID_AA64PFR0_EL1")                                              \
+  V(AA64PFR1, "ID_AA64PFR1_EL1")                                              \
+  V(AA64ISAR0, "ID_AA64ISAR0_EL1")                                            \
+  V(AA64ISAR1, "ID_AA64ISAR1_EL1")                                            \
+  V(AA64MMFR0, "ID_AA64MMFR0_EL1")                                            \
+  V(AA64MMFR1, "ID_AA64MMFR1_EL1")                                            \
+  /* These registers are RES0 in the baseline Arm8.0. We can always safely */ \
+  /* read them, but some compilers don't accept the symbolic names. */        \
+  V(AA64SMFR0, "S3_0_C0_C4_5")                                                \
+  V(AA64ISAR2, "S3_0_C0_C6_2")                                                \
+  V(AA64MMFR2, "S3_0_C0_C7_2")                                                \
+  V(AA64ZFR0, "S3_0_C0_C4_4")
+
+#define VIXL_READ_ID_REG(NAME, MRS_ARG) static NAME Read##NAME();
+  // On native AArch64 platforms, read the named CPU ID registers. These require
+  // CPUFeatures::kIDRegisterEmulation, and should not be called on non-AArch64
+  // platforms.
+  VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
+#undef VIXL_READ_ID_REG
+
  // Return the content of the cache type register.
  static uint32_t GetCacheType();

--- a/dep/vixl/include/vixl/aarch64/cpu-features-auditor-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/cpu-features-auditor-aarch64.h
@ -27,10 +27,14 @@
 #ifndef VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_
 #define VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_

+#include <functional>
 #include <iostream>
+#include <unordered_map>

 #include "../cpu-features.h"
+
 #include "decoder-aarch64.h"
+#include "decoder-visitor-map-aarch64.h"

 namespace vixl {
 namespace aarch64 {
@ -100,15 +104,16 @@ class CPUFeaturesAuditor : public DecoderVisitor {
    SetAvailableFeatures(available);
  }

-// Declare all Visitor functions.
-#define DECLARE(A) \
-  virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
-  VISITOR_LIST(DECLARE)
-#undef DECLARE
+  virtual void Visit(Metadata* metadata,
+                     const Instruction* instr) VIXL_OVERRIDE;

 private:
  class RecordInstructionFeaturesScope;

+#define DECLARE(A) virtual void Visit##A(const Instruction* instr);
+  VISITOR_LIST(DECLARE)
+#undef DECLARE
+
  void LoadStoreHelper(const Instruction* instr);
  void LoadStorePairHelper(const Instruction* instr);

@ -117,6 +122,12 @@ class CPUFeaturesAuditor : public DecoderVisitor {
  CPUFeatures available_;

  Decoder* decoder_;
+
+  using FormToVisitorFnMap = std::unordered_map<
+      uint32_t,
+      std::function<void(CPUFeaturesAuditor*, const Instruction*)>>;
+  static const FormToVisitorFnMap* GetFormToVisitorFnMap();
+  uint32_t form_hash_;
 };

 }  // namespace aarch64
--- a/dep/vixl/include/vixl/aarch64/debugger-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/debugger-aarch64.h
@ -0,0 +1,276 @@
+// Copyright 2023, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_AARCH64_DEBUGGER_AARCH64_H_
+#define VIXL_AARCH64_DEBUGGER_AARCH64_H_
+
+#include <optional>
+#include <unordered_set>
+#include <vector>
+
+#include "../globals-vixl.h"
+#include "../utils-vixl.h"
+#include "../cpu-features.h"
+
+#include "abi-aarch64.h"
+#include "cpu-features-auditor-aarch64.h"
+#include "disasm-aarch64.h"
+#include "instructions-aarch64.h"
+#include "simulator-aarch64.h"
+#include "simulator-constants-aarch64.h"
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+
+namespace vixl {
+namespace aarch64 {
+
+class Simulator;
+
+enum DebugReturn { DebugContinue, DebugExit };
+
+
+// A debugger command that performs some action when used by the simulator
+// debugger.
+class DebuggerCmd {
+ public:
+  DebuggerCmd(Simulator* sim,
+              std::string cmd_word,
+              std::string cmd_alias,
+              std::string usage,
+              std::string description);
+  virtual ~DebuggerCmd() {}
+
+  // Perform some action based on the arguments passed in. Returns true if the
+  // debugger should exit after the action, false otherwise.
+  virtual DebugReturn Action(const std::vector<std::string>& args) = 0;
+
+  // Return the command word.
+  std::string_view GetCommandWord() { return command_word_; }
+  // Return the alias for this command. Returns an empty string if this command
+  // has no alias.
+  std::string_view GetCommandAlias() { return command_alias_; }
+  // Return this commands usage.
+  std::string_view GetArgsString() { return args_str_; }
+  // Return this commands description.
+  std::string_view GetDescription() { return description_; }
+
+ protected:
+  // Simulator which this command will be performed on.
+  Simulator* sim_;
+  // Stream to output the result of the command to.
+  FILE* ostream_;
+  // Command word that, when given to the interactive debugger, calls Action.
+  std::string command_word_;
+  // Optional alias for the command_word.
+  std::string command_alias_;
+  // Optional string showing the arguments that can be passed to the command.
+  std::string args_str_;
+  // Optional description of the command.
+  std::string description_;
+};
+
+
+//
+// Base debugger command handlers:
+//
+
+
+class HelpCmd : public DebuggerCmd {
+ public:
+  HelpCmd(Simulator* sim)
+      : DebuggerCmd(sim, "help", "h", "", "Display this help message.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class BreakCmd : public DebuggerCmd {
+ public:
+  BreakCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "break",
+                    "b",
+                    "<address>",
+                    "Set or remove a breakpoint.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class StepCmd : public DebuggerCmd {
+ public:
+  StepCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "step",
+                    "s",
+                    "[<n>]",
+                    "Step n instructions, default step 1 instruction.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class ContinueCmd : public DebuggerCmd {
+ public:
+  ContinueCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "continue",
+                    "c",
+                    "",
+                    "Exit the debugger and continue executing instructions.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class PrintCmd : public DebuggerCmd {
+ public:
+  PrintCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "print",
+                    "p",
+                    "<register|all|system>",
+                    "Print the contents of a register, all registers or all"
+                    " system registers.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class TraceCmd : public DebuggerCmd {
+ public:
+  TraceCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "trace",
+                    "t",
+                    "",
+                    "Start/stop memory and register tracing.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+class GdbCmd : public DebuggerCmd {
+ public:
+  GdbCmd(Simulator* sim)
+      : DebuggerCmd(sim,
+                    "gdb",
+                    "g",
+                    "",
+                    "Enter an already running instance of gdb.") {}
+
+  DebugReturn Action(const std::vector<std::string>& args) override;
+};
+
+
+// A debugger for the Simulator which takes input from the user in order to
+// control the running of the Simulator.
+class Debugger {
+ public:
+  // A pair consisting of a register character (e.g: W, X, V) and a register
+  // code (e.g: 0, 1 ...31) which represents a single parsed register.
+  //
+  // Note: the register character is guaranteed to be upper case.
+  using RegisterParsedFormat = std::pair<char, unsigned>;
+
+  Debugger(Simulator* sim);
+
+  // Set the input stream, from which commands are read, to a custom stream.
+  void SetInputStream(std::istream* stream) { input_stream_ = stream; }
+
+  // Register a new command for the debugger.
+  template <class T>
+  void RegisterCmd();
+
+  // Set a breakpoint at the given address.
+  void RegisterBreakpoint(uint64_t addr) { breakpoints_.insert(addr); }
+  // Remove a breakpoint at the given address.
+  void RemoveBreakpoint(uint64_t addr) { breakpoints_.erase(addr); }
+  // Return true if the address is the location of a breakpoint.
+  bool IsBreakpoint(uint64_t addr) const {
+    return (breakpoints_.find(addr) != breakpoints_.end());
+  }
+  // Return true if the simulator pc is a breakpoint.
+  bool IsAtBreakpoint() const;
+
+  // Main loop for the debugger. Keep prompting for user inputted debugger
+  // commands and try to execute them until a command is given that exits the
+  // interactive debugger.
+  void Debug();
+
+  // Get an unsigned integer value from a string and return it in 'value'.
+  // Base is used to determine the numeric base of the number to be read,
+  // i.e: 8 for octal, 10 for decimal, 16 for hexadecimal and 0 for
+  // auto-detect. Return true if an integer value was found, false otherwise.
+  static std::optional<uint64_t> ParseUint64String(std::string_view uint64_str,
+                                                   int base = 0);
+
+  // Get a register from a string and return it in 'reg'. Return true if a
+  // valid register character and code (e.g: W0, X29, V31) was found, false
+  // otherwise.
+  static std::optional<RegisterParsedFormat> ParseRegString(
+      std::string_view reg_str);
+
+  // Print the usage of each debugger command.
+  void PrintUsage();
+
+ private:
+  // Split a string based on the separator given (a single space character by
+  // default) and return as a std::vector of strings.
+  static std::vector<std::string> Tokenize(std::string_view input_line,
+                                           char separator = ' ');
+
+  // Try to execute a single debugger command.
+  DebugReturn ExecDebugCommand(const std::vector<std::string>& tokenized_cmd);
+
+  // Return true if the string is zero, i.e: all characters in the string
+  // (other than prefixes) are zero.
+  static bool IsZeroUint64String(std::string_view uint64_str, int base);
+
+  // The simulator that this debugger acts on.
+  Simulator* sim_;
+
+  // A vector of all commands recognised by the debugger.
+  std::vector<std::unique_ptr<DebuggerCmd>> debugger_cmds_;
+
+  // Input stream from which commands are read. Default is std::cin.
+  std::istream* input_stream_;
+
+  // Output stream from the simulator.
+  FILE* ostream_;
+
+  // A list of all instruction addresses that, when executed by the
+  // simulator, will start the interactive debugger if it hasn't already.
+  std::unordered_set<uint64_t> breakpoints_;
+};
+
+
+}  // namespace aarch64
+}  // namespace vixl
+
+#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
+
+#endif  // VIXL_AARCH64_DEBUGGER_AARCH64_H_
--- a/dep/vixl/include/vixl/aarch64/decoder-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/decoder-aarch64.h
@ -1,4 +1,4 @@
-// Copyright 2014, VIXL authors
+// Copyright 2019, VIXL authors
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@ -28,14 +28,14 @@
 #define VIXL_AARCH64_DECODER_AARCH64_H_

 #include <list>
+#include <map>
+#include <string>

 #include "../globals-vixl.h"

 #include "instructions-aarch64.h"

-
 // List macro containing all visitors needed by the decoder class.
-
 #define VISITOR_LIST_THAT_RETURN(V)                              \
  V(AddSubExtended)                                              \
  V(AddSubImmediate)                                             \
@ -54,6 +54,7 @@
  V(DataProcessing1Source)                                       \
  V(DataProcessing2Source)                                       \
  V(DataProcessing3Source)                                       \
+  V(EvaluateIntoFlags)                                           \
  V(Exception)                                                   \
  V(Extract)                                                     \
  V(FPCompare)                                                   \
@ -67,12 +68,14 @@
  V(FPIntegerConvert)                                            \
  V(LoadLiteral)                                                 \
  V(LoadStoreExclusive)                                          \
+  V(LoadStorePAC)                                                \
  V(LoadStorePairNonTemporal)                                    \
  V(LoadStorePairOffset)                                         \
  V(LoadStorePairPostIndex)                                      \
  V(LoadStorePairPreIndex)                                       \
  V(LoadStorePostIndex)                                          \
  V(LoadStorePreIndex)                                           \
+  V(LoadStoreRCpcUnscaledOffset)                                 \
  V(LoadStoreRegisterOffset)                                     \
  V(LoadStoreUnscaledOffset)                                     \
  V(LoadStoreUnsignedOffset)                                     \
@ -108,15 +111,162 @@
  V(NEONShiftImmediate)                                          \
  V(NEONTable)                                                   \
  V(PCRelAddressing)                                             \
+  V(RotateRightIntoFlags)                                        \
+  V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)           \
+  V(SVE32BitGatherLoad_VectorPlusImm)                            \
+  V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)    \
+  V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)        \
+  V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)         \
+  V(SVE32BitGatherPrefetch_VectorPlusImm)                        \
+  V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)           \
+  V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)         \
+  V(SVE32BitScatterStore_VectorPlusImm)                          \
+  V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets)     \
+  V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets)             \
+  V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets)           \
+  V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets)   \
+  V(SVE64BitGatherLoad_VectorPlusImm)                            \
+  V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)         \
+  V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \
+  V(SVE64BitGatherPrefetch_VectorPlusImm)                        \
+  V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)           \
+  V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets)         \
+  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)   \
+  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \
+  V(SVE64BitScatterStore_VectorPlusImm)                          \
+  V(SVEAddressGeneration)                                        \
+  V(SVEBitwiseLogicalUnpredicated)                               \
+  V(SVEBitwiseShiftUnpredicated)                                 \
+  V(SVEFFRInitialise)                                            \
+  V(SVEFFRWriteFromPredicate)                                    \
+  V(SVEFPAccumulatingReduction)                                  \
+  V(SVEFPArithmeticUnpredicated)                                 \
+  V(SVEFPCompareVectors)                                         \
+  V(SVEFPCompareWithZero)                                        \
+  V(SVEFPComplexAddition)                                        \
+  V(SVEFPComplexMulAdd)                                          \
+  V(SVEFPComplexMulAddIndex)                                     \
+  V(SVEFPFastReduction)                                          \
+  V(SVEFPMulIndex)                                               \
+  V(SVEFPMulAdd)                                                 \
+  V(SVEFPMulAddIndex)                                            \
+  V(SVEFPUnaryOpUnpredicated)                                    \
+  V(SVEIncDecByPredicateCount)                                   \
+  V(SVEIndexGeneration)                                          \
+  V(SVEIntArithmeticUnpredicated)                                \
+  V(SVEIntCompareSignedImm)                                      \
+  V(SVEIntCompareUnsignedImm)                                    \
+  V(SVEIntCompareVectors)                                        \
+  V(SVEIntMulAddPredicated)                                      \
+  V(SVEIntMulAddUnpredicated)                                    \
+  V(SVEIntReduction)                                             \
+  V(SVEIntUnaryArithmeticPredicated)                             \
+  V(SVEMovprfx)                                                  \
+  V(SVEMulIndex)                                                 \
+  V(SVEPermuteVectorExtract)                                     \
+  V(SVEPermuteVectorInterleaving)                                \
+  V(SVEPredicateCount)                                           \
+  V(SVEPredicateLogical)                                         \
+  V(SVEPropagateBreak)                                           \
+  V(SVEStackFrameAdjustment)                                     \
+  V(SVEStackFrameSize)                                           \
+  V(SVEVectorSelect)                                             \
+  V(SVEBitwiseLogical_Predicated)                                \
+  V(SVEBitwiseLogicalWithImm_Unpredicated)                       \
+  V(SVEBitwiseShiftByImm_Predicated)                             \
+  V(SVEBitwiseShiftByVector_Predicated)                          \
+  V(SVEBitwiseShiftByWideElements_Predicated)                    \
+  V(SVEBroadcastBitmaskImm)                                      \
+  V(SVEBroadcastFPImm_Unpredicated)                              \
+  V(SVEBroadcastGeneralRegister)                                 \
+  V(SVEBroadcastIndexElement)                                    \
+  V(SVEBroadcastIntImm_Unpredicated)                             \
+  V(SVECompressActiveElements)                                   \
+  V(SVEConditionallyBroadcastElementToVector)                    \
+  V(SVEConditionallyExtractElementToSIMDFPScalar)                \
+  V(SVEConditionallyExtractElementToGeneralRegister)             \
+  V(SVEConditionallyTerminateScalars)                            \
+  V(SVEConstructivePrefix_Unpredicated)                          \
+  V(SVEContiguousFirstFaultLoad_ScalarPlusScalar)                \
+  V(SVEContiguousLoad_ScalarPlusImm)                             \
+  V(SVEContiguousLoad_ScalarPlusScalar)                          \
+  V(SVEContiguousNonFaultLoad_ScalarPlusImm)                     \
+  V(SVEContiguousNonTemporalLoad_ScalarPlusImm)                  \
+  V(SVEContiguousNonTemporalLoad_ScalarPlusScalar)               \
+  V(SVEContiguousNonTemporalStore_ScalarPlusImm)                 \
+  V(SVEContiguousNonTemporalStore_ScalarPlusScalar)              \
+  V(SVEContiguousPrefetch_ScalarPlusImm)                         \
+  V(SVEContiguousPrefetch_ScalarPlusScalar)                      \
+  V(SVEContiguousStore_ScalarPlusImm)                            \
+  V(SVEContiguousStore_ScalarPlusScalar)                         \
+  V(SVECopySIMDFPScalarRegisterToVector_Predicated)              \
+  V(SVECopyFPImm_Predicated)                                     \
+  V(SVECopyGeneralRegisterToVector_Predicated)                   \
+  V(SVECopyIntImm_Predicated)                                    \
+  V(SVEElementCount)                                             \
+  V(SVEExtractElementToSIMDFPScalarRegister)                     \
+  V(SVEExtractElementToGeneralRegister)                          \
+  V(SVEFPArithmetic_Predicated)                                  \
+  V(SVEFPArithmeticWithImm_Predicated)                           \
+  V(SVEFPConvertPrecision)                                       \
+  V(SVEFPConvertToInt)                                           \
+  V(SVEFPExponentialAccelerator)                                 \
+  V(SVEFPRoundToIntegralValue)                                   \
+  V(SVEFPTrigMulAddCoefficient)                                  \
+  V(SVEFPTrigSelectCoefficient)                                  \
+  V(SVEFPUnaryOp)                                                \
+  V(SVEIncDecRegisterByElementCount)                             \
+  V(SVEIncDecVectorByElementCount)                               \
+  V(SVEInsertSIMDFPScalarRegister)                               \
+  V(SVEInsertGeneralRegister)                                    \
+  V(SVEIntAddSubtractImm_Unpredicated)                           \
+  V(SVEIntAddSubtractVectors_Predicated)                         \
+  V(SVEIntCompareScalarCountAndLimit)                            \
+  V(SVEIntConvertToFP)                                           \
+  V(SVEIntDivideVectors_Predicated)                              \
+  V(SVEIntMinMaxImm_Unpredicated)                                \
+  V(SVEIntMinMaxDifference_Predicated)                           \
+  V(SVEIntMulImm_Unpredicated)                                   \
+  V(SVEIntMulVectors_Predicated)                                 \
+  V(SVELoadAndBroadcastElement)                                  \
+  V(SVELoadAndBroadcastQOWord_ScalarPlusImm)                     \
+  V(SVELoadAndBroadcastQOWord_ScalarPlusScalar)                  \
+  V(SVELoadMultipleStructures_ScalarPlusImm)                     \
+  V(SVELoadMultipleStructures_ScalarPlusScalar)                  \
+  V(SVELoadPredicateRegister)                                    \
+  V(SVELoadVectorRegister)                                       \
+  V(SVEPartitionBreakCondition)                                  \
+  V(SVEPermutePredicateElements)                                 \
+  V(SVEPredicateFirstActive)                                     \
+  V(SVEPredicateInitialize)                                      \
+  V(SVEPredicateNextActive)                                      \
+  V(SVEPredicateReadFromFFR_Predicated)                          \
+  V(SVEPredicateReadFromFFR_Unpredicated)                        \
+  V(SVEPredicateTest)                                            \
+  V(SVEPredicateZero)                                            \
+  V(SVEPropagateBreakToNextPartition)                            \
+  V(SVEReversePredicateElements)                                 \
+  V(SVEReverseVectorElements)                                    \
+  V(SVEReverseWithinElements)                                    \
+  V(SVESaturatingIncDecRegisterByElementCount)                   \
+  V(SVESaturatingIncDecVectorByElementCount)                     \
+  V(SVEStoreMultipleStructures_ScalarPlusImm)                    \
+  V(SVEStoreMultipleStructures_ScalarPlusScalar)                 \
+  V(SVEStorePredicateRegister)                                   \
+  V(SVEStoreVectorRegister)                                      \
+  V(SVETableLookup)                                              \
+  V(SVEUnpackPredicateElements)                                  \
+  V(SVEUnpackVectorElements)                                     \
+  V(SVEVectorSplice)                                             \
  V(System)                                                      \
  V(TestBranch)                                                  \
-  V(UnconditionalBranch)                \
-  V(UnconditionalBranchToRegister)
-
-#define VISITOR_LIST_THAT_DONT_RETURN(V) \
  V(Unallocated)                                                 \
+  V(UnconditionalBranch)                                         \
+  V(UnconditionalBranchToRegister)                               \
  V(Unimplemented)

+#define VISITOR_LIST_THAT_DONT_RETURN(V) V(Reserved)
+
 #define VISITOR_LIST(V)       \
  VISITOR_LIST_THAT_RETURN(V) \
  VISITOR_LIST_THAT_DONT_RETURN(V)
@ -124,8 +274,12 @@
 namespace vixl {
 namespace aarch64 {

-// The Visitor interface. Disassembler and simulator (and other tools)
-// must provide implementations for all of these functions.
+using Metadata = std::map<std::string, std::string>;
+
+// The Visitor interface consists only of the Visit() method. User classes
+// that inherit from this one must provide an implementation of the method.
+// Information about the instruction encountered by the Decoder is available
+// via the metadata pointer.
 class DecoderVisitor {
 public:
  enum VisitorConstness { kConstVisitor, kNonConstVisitor };
@ -134,9 +288,7 @@ class DecoderVisitor {

  virtual ~DecoderVisitor() {}

-#define DECLARE(A) virtual void Visit##A(const Instruction* instr) = 0;
-  VISITOR_LIST(DECLARE)
-#undef DECLARE
+  virtual void Visit(Metadata* metadata, const Instruction* instr) = 0;

  bool IsConstVisitor() const { return constness_ == kConstVisitor; }
  Instruction* MutableInstruction(const Instruction* instr) {
@ -148,22 +300,22 @@ class DecoderVisitor {
  const VisitorConstness constness_;
 };

+class DecodeNode;
+class CompiledDecodeNode;

+// The instruction decoder is constructed from a graph of decode nodes. At each
+// node, a number of bits are sampled from the instruction being decoded. The
+// resulting value is used to look up the next node in the graph, which then
+// samples other bits, and moves to other decode nodes. Eventually, a visitor
+// node is reached, and the corresponding visitor function is called, which
+// handles the instruction.
 class Decoder {
 public:
-  Decoder() {}
+  Decoder() { ConstructDecodeGraph(); }

  // Top-level wrappers around the actual decoding function.
-  void Decode(const Instruction* instr) {
-    std::list<DecoderVisitor*>::iterator it;
-    for (it = visitors_.begin(); it != visitors_.end(); it++) {
-      VIXL_ASSERT((*it)->IsConstVisitor());
-    }
-    DecodeInstruction(instr);
-  }
-  void Decode(Instruction* instr) {
-    DecodeInstruction(const_cast<const Instruction*>(instr));
-  }
+  void Decode(const Instruction* instr);
+  void Decode(Instruction* instr);

  // Decode all instructions from start (inclusive) to end (exclusive).
  template <typename T>
@ -212,76 +364,329 @@ class Decoder {
  // of visitors stored by the decoder.
  void RemoveVisitor(DecoderVisitor* visitor);

-#define DECLARE(A) void Visit##A(const Instruction* instr);
-  VISITOR_LIST(DECLARE)
-#undef DECLARE
-
+  void VisitNamedInstruction(const Instruction* instr, const std::string& name);

  std::list<DecoderVisitor*>* visitors() { return &visitors_; }

+  // Get a DecodeNode by name from the Decoder's map.
+  DecodeNode* GetDecodeNode(std::string name);
+
 private:
  // Decodes an instruction and calls the visitor functions registered with the
  // Decoder class.
  void DecodeInstruction(const Instruction* instr);

-  // Decode the PC relative addressing instruction, and call the corresponding
-  // visitors.
-  // On entry, instruction bits 27:24 = 0x0.
-  void DecodePCRelAddressing(const Instruction* instr);
+  // Add an initialised DecodeNode to the decode_node_ map.
+  void AddDecodeNode(const DecodeNode& node);

-  // Decode the add/subtract immediate instruction, and call the correspoding
-  // visitors.
-  // On entry, instruction bits 27:24 = 0x1.
-  void DecodeAddSubImmediate(const Instruction* instr);
-
-  // Decode the branch, system command, and exception generation parts of
-  // the instruction tree, and call the corresponding visitors.
-  // On entry, instruction bits 27:24 = {0x4, 0x5, 0x6, 0x7}.
-  void DecodeBranchSystemException(const Instruction* instr);
-
-  // Decode the load and store parts of the instruction tree, and call
-  // the corresponding visitors.
-  // On entry, instruction bits 27:24 = {0x8, 0x9, 0xC, 0xD}.
-  void DecodeLoadStore(const Instruction* instr);
-
-  // Decode the logical immediate and move wide immediate parts of the
-  // instruction tree, and call the corresponding visitors.
-  // On entry, instruction bits 27:24 = 0x2.
-  void DecodeLogical(const Instruction* instr);
-
-  // Decode the bitfield and extraction parts of the instruction tree,
-  // and call the corresponding visitors.
-  // On entry, instruction bits 27:24 = 0x3.
-  void DecodeBitfieldExtract(const Instruction* instr);
-
-  // Decode the data processing parts of the instruction tree, and call the
-  // corresponding visitors.
-  // On entry, instruction bits 27:24 = {0x1, 0xA, 0xB}.
-  void DecodeDataProcessing(const Instruction* instr);
-
-  // Decode the floating point parts of the instruction tree, and call the
-  // corresponding visitors.
-  // On entry, instruction bits 27:24 = {0xE, 0xF}.
-  void DecodeFP(const Instruction* instr);
-
-  // Decode the Advanced SIMD (NEON) load/store part of the instruction tree,
-  // and call the corresponding visitors.
-  // On entry, instruction bits 29:25 = 0x6.
-  void DecodeNEONLoadStore(const Instruction* instr);
-
-  // Decode the Advanced SIMD (NEON) vector data processing part of the
-  // instruction tree, and call the corresponding visitors.
-  // On entry, instruction bits 28:25 = 0x7.
-  void DecodeNEONVectorDataProcessing(const Instruction* instr);
-
-  // Decode the Advanced SIMD (NEON) scalar data processing part of the
-  // instruction tree, and call the corresponding visitors.
-  // On entry, instruction bits 28:25 = 0xF.
-  void DecodeNEONScalarDataProcessing(const Instruction* instr);
-
- private:
  // Visitors are registered in a list.
  std::list<DecoderVisitor*> visitors_;
+
+  // Compile the dynamically generated decode graph based on the static
+  // information in kDecodeMapping and kVisitorNodes.
+  void ConstructDecodeGraph();
+
+  // Root node for the compiled decoder graph, stored here to avoid a map lookup
+  // for every instruction decoded.
+  CompiledDecodeNode* compiled_decoder_root_;
+
+  // Map of node names to DecodeNodes.
+  std::map<std::string, DecodeNode> decode_nodes_;
+};
+
+typedef void (Decoder::*DecodeFnPtr)(const Instruction*);
+typedef uint32_t (Instruction::*BitExtractFn)(void) const;
+
+// A Visitor node maps the name of a visitor to the function that handles it.
+struct VisitorNode {
+  const char* name;
+  const DecodeFnPtr visitor_fn;
+};
+
+// DecodePattern and DecodeMapping represent the input data to the decoder
+// compilation stage. After compilation, the decoder is embodied in the graph
+// of CompiledDecodeNodes pointer to by compiled_decoder_root_.
+
+// A DecodePattern maps a pattern of set/unset/don't care (1, 0, x) bits encoded
+// as uint32_t to its handler.
+// The encoding uses two bits per symbol: 0 => 0b00, 1 => 0b01, x => 0b10.
+// 0b11 marks the edge of the most-significant bits of the pattern, which is
+// required to determine the length. For example, the pattern "1x01"_b is
+// encoded in a uint32_t as 0b11_01_10_00_01.
+struct DecodePattern {
+  uint32_t pattern;
+  const char* handler;
+};
+
+// A DecodeMapping consists of the name of a handler, the bits sampled in the
+// instruction by that handler, and a mapping from the pattern that those
+// sampled bits match to the corresponding name of a node.
+struct DecodeMapping {
+  const char* name;
+  const std::vector<uint8_t> sampled_bits;
+  const std::vector<DecodePattern> mapping;
+};
+
+// For speed, before nodes can be used for decoding instructions, they must
+// be compiled. This converts the mapping "bit pattern strings to decoder name
+// string" stored in DecodeNodes to an array look up for the pointer to the next
+// node, stored in CompiledDecodeNodes. Compilation may also apply other
+// optimisations for simple decode patterns.
+class CompiledDecodeNode {
+ public:
+  // Constructor for decode node, containing a decode table and pointer to a
+  // function that extracts the bits to be sampled.
+  CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size)
+      : bit_extract_fn_(bit_extract_fn),
+        instruction_name_("node"),
+        decode_table_size_(decode_table_size),
+        decoder_(NULL) {
+    decode_table_ = new CompiledDecodeNode*[decode_table_size_];
+    memset(decode_table_, 0, decode_table_size_ * sizeof(decode_table_[0]));
+  }
+
+  // Constructor for wrappers around visitor functions. These require no
+  // decoding, so no bit extraction function or decode table is assigned.
+  explicit CompiledDecodeNode(std::string iname, Decoder* decoder)
+      : bit_extract_fn_(NULL),
+        instruction_name_(iname),
+        decode_table_(NULL),
+        decode_table_size_(0),
+        decoder_(decoder) {}
+
+  ~CompiledDecodeNode() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
+    // Free the decode table, if this is a compiled, non-leaf node.
+    if (decode_table_ != NULL) {
+      VIXL_ASSERT(!IsLeafNode());
+      delete[] decode_table_;
+    }
+  }
+
+  // Decode the instruction by either sampling the bits using the bit extract
+  // function to find the next node, or, if we're at a leaf, calling the visitor
+  // function.
+  void Decode(const Instruction* instr) const;
+
+  // A leaf node is a wrapper for a visitor function.
+  bool IsLeafNode() const {
+    VIXL_ASSERT(((instruction_name_ == "node") && (bit_extract_fn_ != NULL)) ||
+                ((instruction_name_ != "node") && (bit_extract_fn_ == NULL)));
+    return instruction_name_ != "node";
+  }
+
+  // Get a pointer to the next node required in the decode process, based on the
+  // bits sampled by the current node.
+  CompiledDecodeNode* GetNodeForBits(uint32_t bits) const {
+    VIXL_ASSERT(bits < decode_table_size_);
+    return decode_table_[bits];
+  }
+
+  // Set the next node in the decode process for the pattern of sampled bits in
+  // the current node.
+  void SetNodeForBits(uint32_t bits, CompiledDecodeNode* n) {
+    VIXL_ASSERT(bits < decode_table_size_);
+    VIXL_ASSERT(n != NULL);
+    decode_table_[bits] = n;
+  }
+
+ private:
+  // Pointer to an instantiated template function for extracting the bits
+  // sampled by this node. Set to NULL for leaf nodes.
+  const BitExtractFn bit_extract_fn_;
+
+  // Visitor function that handles the instruction identified. Set only for
+  // leaf nodes, where no extra decoding is required, otherwise NULL.
+  std::string instruction_name_;
+
+  // Mapping table from instruction bits to next decode stage.
+  CompiledDecodeNode** decode_table_;
+  const size_t decode_table_size_;
+
+  // Pointer to the decoder containing this node, used to call its visitor
+  // function for leaf nodes. Set to NULL for non-leaf nodes.
+  Decoder* decoder_;
+};
+
+class DecodeNode {
+ public:
+  // Default constructor needed for map initialisation.
+  DecodeNode()
+      : sampled_bits_(DecodeNode::kEmptySampledBits),
+        pattern_table_(DecodeNode::kEmptyPatternTable),
+        compiled_node_(NULL) {}
+
+  // Constructor for DecodeNode wrappers around visitor functions. These are
+  // marked as "compiled", as there is no decoding left to do.
+  explicit DecodeNode(const std::string& iname, Decoder* decoder)
+      : name_(iname),
+        sampled_bits_(DecodeNode::kEmptySampledBits),
+        instruction_name_(iname),
+        pattern_table_(DecodeNode::kEmptyPatternTable),
+        decoder_(decoder),
+        compiled_node_(NULL) {}
+
+  // Constructor for DecodeNodes that map bit patterns to other DecodeNodes.
+  explicit DecodeNode(const DecodeMapping& map, Decoder* decoder = NULL)
+      : name_(map.name),
+        sampled_bits_(map.sampled_bits),
+        instruction_name_("node"),
+        pattern_table_(map.mapping),
+        decoder_(decoder),
+        compiled_node_(NULL) {
+    // With the current two bits per symbol encoding scheme, the maximum pattern
+    // length is (32 - 2) / 2 = 15 bits.
+    VIXL_CHECK(GetPatternLength(map.mapping[0].pattern) <= 15);
+    for (const DecodePattern& p : map.mapping) {
+      VIXL_CHECK(GetPatternLength(p.pattern) == map.sampled_bits.size());
+    }
+  }
+
+  ~DecodeNode() {
+    // Delete the compiled version of this node, if one was created.
+    if (compiled_node_ != NULL) {
+      delete compiled_node_;
+    }
+  }
+
+  // Get the bits sampled from the instruction by this node.
+  const std::vector<uint8_t>& GetSampledBits() const { return sampled_bits_; }
+
+  // Get the number of bits sampled from the instruction by this node.
+  size_t GetSampledBitsCount() const { return sampled_bits_.size(); }
+
+  // A leaf node is a DecodeNode that wraps the visitor function for the
+  // identified instruction class.
+  bool IsLeafNode() const { return instruction_name_ != "node"; }
+
+  std::string GetName() const { return name_; }
+
+  // Create a CompiledDecodeNode of specified table size that uses
+  // bit_extract_fn to sample bits from the instruction.
+  void CreateCompiledNode(BitExtractFn bit_extract_fn, size_t table_size) {
+    VIXL_ASSERT(bit_extract_fn != NULL);
+    VIXL_ASSERT(table_size > 0);
+    compiled_node_ = new CompiledDecodeNode(bit_extract_fn, table_size);
+  }
+
+  // Create a CompiledDecodeNode wrapping a visitor function. No decoding is
+  // required for this node; the visitor function is called instead.
+  void CreateVisitorNode() {
+    compiled_node_ = new CompiledDecodeNode(instruction_name_, decoder_);
+  }
+
+  // Find and compile the DecodeNode named "name", and set it as the node for
+  // the pattern "bits".
+  void CompileNodeForBits(Decoder* decoder, std::string name, uint32_t bits);
+
+  // Get a pointer to an instruction method that extracts the instruction bits
+  // specified by the mask argument, and returns those sampled bits as a
+  // contiguous sequence, suitable for indexing an array.
+  // For example, a mask of 0b1010 returns a function that, given an instruction
+  // 0bXYZW, will return 0bXZ.
+  BitExtractFn GetBitExtractFunction(uint32_t mask) {
+    return GetBitExtractFunctionHelper(mask, 0);
+  }
+
+  // Get a pointer to an Instruction method that applies a mask to the
+  // instruction bits, and tests if the result is equal to value. The returned
+  // function gives a 1 result if (inst & mask == value), 0 otherwise.
+  BitExtractFn GetBitExtractFunction(uint32_t mask, uint32_t value) {
+    return GetBitExtractFunctionHelper(value, mask);
+  }
+
+  // Compile this DecodeNode into a new CompiledDecodeNode and returns a pointer
+  // to it. This pointer is also stored inside the DecodeNode itself. Destroying
+  // a DecodeNode frees its associated CompiledDecodeNode.
+  CompiledDecodeNode* Compile(Decoder* decoder);
+
+  // Get a pointer to the CompiledDecodeNode associated with this DecodeNode.
+  // Returns NULL if the node has not been compiled yet.
+  CompiledDecodeNode* GetCompiledNode() const { return compiled_node_; }
+  bool IsCompiled() const { return GetCompiledNode() != NULL; }
+
+  enum class PatternSymbol { kSymbol0 = 0, kSymbol1 = 1, kSymbolX = 2 };
+  static const uint32_t kEndOfPattern = 3;
+  static const uint32_t kPatternSymbolMask = 3;
+
+  size_t GetPatternLength(uint32_t pattern) const {
+    uint32_t hsb = HighestSetBitPosition(pattern);
+    // The pattern length is signified by two set bits in a two bit-aligned
+    // position. Ensure that the pattern has a highest set bit, it's at an odd
+    // bit position, and that the bit to the right of the hsb is also set.
+    VIXL_ASSERT(((hsb % 2) == 1) && (pattern >> (hsb - 1)) == kEndOfPattern);
+    return hsb / 2;
+  }
+
+  bool PatternContainsSymbol(uint32_t pattern, PatternSymbol symbol) const {
+    while ((pattern & kPatternSymbolMask) != kEndOfPattern) {
+      if (static_cast<PatternSymbol>(pattern & kPatternSymbolMask) == symbol)
+        return true;
+      pattern >>= 2;
+    }
+    return false;
+  }
+
+  PatternSymbol GetSymbolAt(uint32_t pattern, size_t pos) const {
+    size_t len = GetPatternLength(pattern);
+    VIXL_ASSERT((pos < 15) && (pos < len));
+    uint32_t shift = static_cast<uint32_t>(2 * (len - pos - 1));
+    uint32_t sym = (pattern >> shift) & kPatternSymbolMask;
+    return static_cast<PatternSymbol>(sym);
+  }
+
+ private:
+  // Generate a mask and value pair from a pattern constructed from 0, 1 and x
+  // (don't care) 2-bit symbols.
+  // For example "10x1"_b should return mask = 0b1101, value = 0b1001.
+  typedef std::pair<Instr, Instr> MaskValuePair;
+  MaskValuePair GenerateMaskValuePair(uint32_t pattern) const;
+
+  // Generate a pattern ordered by the bit positions sampled by this node.
+  // The symbol corresponding to the lowest sample position is placed in the
+  // least-significant bits of the result pattern.
+  // For example, a pattern of "1x0"_b expected when sampling bits 31, 1 and 30
+  // returns the pattern "x01"_b; bit 1 should be 'x', bit 30 '0' and bit 31
+  // '1'.
+  // This output makes comparisons easier between the pattern and bits sampled
+  // from an instruction using the fast "compress" algorithm. See
+  // Instruction::Compress().
+  uint32_t GenerateOrderedPattern(uint32_t pattern) const;
+
+  // Generate a mask with a bit set at each sample position.
+  uint32_t GenerateSampledBitsMask() const;
+
+  // Try to compile a more optimised decode operation for this node, returning
+  // true if successful.
+  bool TryCompileOptimisedDecodeTable(Decoder* decoder);
+
+  // Helper function that returns a bit extracting function. If y is zero,
+  // x is a bit extraction mask. Otherwise, y is the mask, and x is the value
+  // to match after masking.
+  BitExtractFn GetBitExtractFunctionHelper(uint32_t x, uint32_t y);
+
+  // Name of this decoder node, used to construct edges in the decode graph.
+  std::string name_;
+
+  // Vector of bits sampled from an instruction to determine which node to look
+  // up next in the decode process.
+  const std::vector<uint8_t>& sampled_bits_;
+  static const std::vector<uint8_t> kEmptySampledBits;
+
+  // For leaf nodes, this is the name of the instruction form that the node
+  // represents. For other nodes, this is always set to "node".
+  std::string instruction_name_;
+
+  // Source mapping from bit pattern to name of next decode stage.
+  const std::vector<DecodePattern>& pattern_table_;
+  static const std::vector<DecodePattern> kEmptyPatternTable;
+
+  // Pointer to the decoder containing this node, used to call its visitor
+  // function for leaf nodes.
+  Decoder* decoder_;
+
+  // Pointer to the compiled version of this node. Is this node hasn't been
+  // compiled yet, this pointer is NULL.
+  CompiledDecodeNode* compiled_node_;
 };

 }  // namespace aarch64
--- a/dep/vixl/include/vixl/aarch64/decoder-constants-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/decoder-constants-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/decoder-visitor-map-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/decoder-visitor-map-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/disasm-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/disasm-aarch64.h
@ -27,11 +27,16 @@
 #ifndef VIXL_AARCH64_DISASM_AARCH64_H
 #define VIXL_AARCH64_DISASM_AARCH64_H

+#include <functional>
+#include <unordered_map>
+#include <utility>
+
 #include "../globals-vixl.h"
 #include "../utils-vixl.h"

 #include "cpu-features-auditor-aarch64.h"
 #include "decoder-aarch64.h"
+#include "decoder-visitor-map-aarch64.h"
 #include "instructions-aarch64.h"
 #include "operands-aarch64.h"

@ -46,10 +51,8 @@ class Disassembler : public DecoderVisitor {
  char* GetOutput();

  // Declare all Visitor functions.
-#define DECLARE(A) \
-  virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
-  VISITOR_LIST(DECLARE)
-#undef DECLARE
+  virtual void Visit(Metadata* metadata,
+                     const Instruction* instr) VIXL_OVERRIDE;

 protected:
  virtual void ProcessOutput(const Instruction* instr);
@ -110,12 +113,145 @@ class Disassembler : public DecoderVisitor {
  int64_t CodeRelativeAddress(const void* instr);

 private:
+#define DECLARE(A) virtual void Visit##A(const Instruction* instr);
+  VISITOR_LIST(DECLARE)
+#undef DECLARE
+
+  using FormToVisitorFnMap = std::unordered_map<
+      uint32_t,
+      std::function<void(Disassembler*, const Instruction*)>>;
+  static const FormToVisitorFnMap* GetFormToVisitorFnMap();
+
+  std::string mnemonic_;
+  uint32_t form_hash_;
+
+  void SetMnemonicFromForm(const std::string& form) {
+    if (form != "unallocated") {
+      VIXL_ASSERT(form.find_first_of('_') != std::string::npos);
+      mnemonic_ = form.substr(0, form.find_first_of('_'));
+    }
+  }
+
+  void Disassemble_PdT_PgZ_ZnT_ZmT(const Instruction* instr);
+  void Disassemble_ZdB_Zn1B_Zn2B_imm(const Instruction* instr);
+  void Disassemble_ZdB_ZnB_ZmB(const Instruction* instr);
+  void Disassemble_ZdD_PgM_ZnS(const Instruction* instr);
+  void Disassemble_ZdD_ZnD_ZmD(const Instruction* instr);
+  void Disassemble_ZdD_ZnD_ZmD_imm(const Instruction* instr);
+  void Disassemble_ZdD_ZnS_ZmS_imm(const Instruction* instr);
+  void Disassemble_ZdH_PgM_ZnS(const Instruction* instr);
+  void Disassemble_ZdH_ZnH_ZmH_imm(const Instruction* instr);
+  void Disassemble_ZdS_PgM_ZnD(const Instruction* instr);
+  void Disassemble_ZdS_PgM_ZnH(const Instruction* instr);
+  void Disassemble_ZdS_PgM_ZnS(const Instruction* instr);
+  void Disassemble_ZdS_ZnH_ZmH_imm(const Instruction* instr);
+  void Disassemble_ZdS_ZnS_ZmS(const Instruction* instr);
+  void Disassemble_ZdS_ZnS_ZmS_imm(const Instruction* instr);
+  void Disassemble_ZdT_PgM_ZnT(const Instruction* instr);
+  void Disassemble_ZdT_PgZ_ZnT_ZmT(const Instruction* instr);
+  void Disassemble_ZdT_Pg_Zn1T_Zn2T(const Instruction* instr);
+  void Disassemble_ZdT_Zn1T_Zn2T_ZmT(const Instruction* instr);
+  void Disassemble_ZdT_ZnT_ZmT(const Instruction* instr);
+  void Disassemble_ZdT_ZnT_ZmTb(const Instruction* instr);
+  void Disassemble_ZdT_ZnTb(const Instruction* instr);
+  void Disassemble_ZdT_ZnTb_ZmTb(const Instruction* instr);
+  void Disassemble_ZdaD_ZnD_ZmD_imm(const Instruction* instr);
+  void Disassemble_ZdaD_ZnH_ZmH_imm_const(const Instruction* instr);
+  void Disassemble_ZdaD_ZnS_ZmS_imm(const Instruction* instr);
+  void Disassemble_ZdaH_ZnH_ZmH_imm(const Instruction* instr);
+  void Disassemble_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr);
+  void Disassemble_ZdaS_ZnB_ZmB_imm_const(const Instruction* instr);
+  void Disassemble_ZdaS_ZnH_ZmH(const Instruction* instr);
+  void Disassemble_ZdaS_ZnH_ZmH_imm(const Instruction* instr);
+  void Disassemble_ZdaS_ZnS_ZmS_imm(const Instruction* instr);
+  void Disassemble_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr);
+  void Disassemble_ZdaT_PgM_ZnTb(const Instruction* instr);
+  void Disassemble_ZdaT_ZnT_ZmT(const Instruction* instr);
+  void Disassemble_ZdaT_ZnT_ZmT_const(const Instruction* instr);
+  void Disassemble_ZdaT_ZnT_const(const Instruction* instr);
+  void Disassemble_ZdaT_ZnTb_ZmTb(const Instruction* instr);
+  void Disassemble_ZdaT_ZnTb_ZmTb_const(const Instruction* instr);
+  void Disassemble_ZdnB_ZdnB(const Instruction* instr);
+  void Disassemble_ZdnB_ZdnB_ZmB(const Instruction* instr);
+  void Disassemble_ZdnS_ZdnS_ZmS(const Instruction* instr);
+  void Disassemble_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr);
+  void Disassemble_ZdnT_PgM_ZdnT_const(const Instruction* instr);
+  void Disassemble_ZdnT_ZdnT_ZmT_const(const Instruction* instr);
+  void Disassemble_ZtD_PgZ_ZnD_Xm(const Instruction* instr);
+  void Disassemble_ZtD_Pg_ZnD_Xm(const Instruction* instr);
+  void Disassemble_ZtS_PgZ_ZnS_Xm(const Instruction* instr);
+  void Disassemble_ZtS_Pg_ZnS_Xm(const Instruction* instr);
+  void Disassemble_ZdaS_ZnB_ZmB(const Instruction* instr);
+  void Disassemble_Vd4S_Vn16B_Vm16B(const Instruction* instr);
+
+  void DisassembleCpy(const Instruction* instr);
+  void DisassembleSet(const Instruction* instr);
+  void DisassembleMinMaxImm(const Instruction* instr);
+
+  void DisassembleSVEShiftLeftImm(const Instruction* instr);
+  void DisassembleSVEShiftRightImm(const Instruction* instr);
+  void DisassembleSVEAddSubCarry(const Instruction* instr);
+  void DisassembleSVEAddSubHigh(const Instruction* instr);
+  void DisassembleSVEComplexIntAddition(const Instruction* instr);
+  void DisassembleSVEBitwiseTernary(const Instruction* instr);
+  void DisassembleSVEFlogb(const Instruction* instr);
+  void DisassembleSVEFPPair(const Instruction* instr);
+
+  void DisassembleNoArgs(const Instruction* instr);
+
+  void DisassembleNEONMulByElementLong(const Instruction* instr);
+  void DisassembleNEONDotProdByElement(const Instruction* instr);
+  void DisassembleNEONFPMulByElement(const Instruction* instr);
+  void DisassembleNEONHalfFPMulByElement(const Instruction* instr);
+  void DisassembleNEONFPMulByElementLong(const Instruction* instr);
+  void DisassembleNEONComplexMulByElement(const Instruction* instr);
+  void DisassembleNEON2RegLogical(const Instruction* instr);
+  void DisassembleNEON2RegExtract(const Instruction* instr);
+  void DisassembleNEON2RegAddlp(const Instruction* instr);
+  void DisassembleNEON2RegCompare(const Instruction* instr);
+  void DisassembleNEON2RegFPCompare(const Instruction* instr);
+  void DisassembleNEON2RegFPConvert(const Instruction* instr);
+  void DisassembleNEON2RegFP(const Instruction* instr);
+  void DisassembleNEON3SameLogical(const Instruction* instr);
+  void DisassembleNEON3SameFHM(const Instruction* instr);
+  void DisassembleNEON3SameNoD(const Instruction* instr);
+  void DisassembleNEONShiftLeftLongImm(const Instruction* instr);
+  void DisassembleNEONShiftRightImm(const Instruction* instr);
+  void DisassembleNEONShiftRightNarrowImm(const Instruction* instr);
+  void DisassembleNEONScalarSatMulLongIndex(const Instruction* instr);
+  void DisassembleNEONFPScalarMulIndex(const Instruction* instr);
+  void DisassembleNEONFPScalar3Same(const Instruction* instr);
+  void DisassembleNEONScalar3SameOnlyD(const Instruction* instr);
+  void DisassembleNEONFPAcrossLanes(const Instruction* instr);
+  void DisassembleNEONFP16AcrossLanes(const Instruction* instr);
+  void DisassembleNEONScalarShiftImmOnlyD(const Instruction* instr);
+  void DisassembleNEONScalarShiftRightNarrowImm(const Instruction* instr);
+  void DisassembleNEONScalar2RegMiscOnlyD(const Instruction* instr);
+  void DisassembleNEONFPScalar2RegMisc(const Instruction* instr);
+  void DisassembleNEONPolynomialMul(const Instruction* instr);
+
+  void DisassembleMTELoadTag(const Instruction* instr);
+  void DisassembleMTEStoreTag(const Instruction* instr);
+  void DisassembleMTEStoreTagPair(const Instruction* instr);
+
+  void Disassemble_XdSP_XnSP_Xm(const Instruction* instr);
+  void Disassemble_XdSP_XnSP_uimm6_uimm4(const Instruction* instr);
+  void Disassemble_Xd_XnSP_Xm(const Instruction* instr);
+  void Disassemble_Xd_XnSP_XmSP(const Instruction* instr);
+
  void Format(const Instruction* instr,
              const char* mnemonic,
-              const char* format);
+              const char* format0,
+              const char* format1 = NULL);
+  void FormatWithDecodedMnemonic(const Instruction* instr,
+                                 const char* format0,
+                                 const char* format1 = NULL);
+
  void Substitute(const Instruction* instr, const char* string);
  int SubstituteField(const Instruction* instr, const char* format);
  int SubstituteRegisterField(const Instruction* instr, const char* format);
+  int SubstitutePredicateRegisterField(const Instruction* instr,
+                                       const char* format);
  int SubstituteImmediateField(const Instruction* instr, const char* format);
  int SubstituteLiteralField(const Instruction* instr, const char* format);
  int SubstituteBitfieldImmediateField(const Instruction* instr,
@ -130,6 +266,14 @@ class Disassembler : public DecoderVisitor {
  int SubstituteBarrierField(const Instruction* instr, const char* format);
  int SubstituteSysOpField(const Instruction* instr, const char* format);
  int SubstituteCrField(const Instruction* instr, const char* format);
+  int SubstituteIntField(const Instruction* instr, const char* format);
+  int SubstituteSVESize(const Instruction* instr, const char* format);
+  int SubstituteTernary(const Instruction* instr, const char* format);
+
+  std::pair<unsigned, unsigned> GetRegNumForField(const Instruction* instr,
+                                                  char reg_prefix,
+                                                  const char* field);
+
  bool RdIsZROrSP(const Instruction* instr) const {
    return (instr->GetRd() == kZeroRegCode);
  }
@ -173,6 +317,7 @@ class PrintDisassembler : public Disassembler {
      : cpu_features_auditor_(NULL),
        cpu_features_prefix_("// Needs: "),
        cpu_features_suffix_(""),
+        signed_addresses_(false),
        stream_(stream) {}

  // Convenience helpers for quick disassembly, without having to manually
@ -201,12 +346,23 @@ class PrintDisassembler : public Disassembler {
    cpu_features_suffix_ = suffix;
  }

+  // By default, addresses are printed as simple, unsigned 64-bit hex values.
+  //
+  // With `PrintSignedAddresses(true)`:
+  //  - negative addresses are printed as "-0x1234...",
+  //  - positive addresses have a leading space, like " 0x1234...", to maintain
+  //    alignment.
+  //
+  // This is most useful in combination with Disassembler::MapCodeAddress(...).
+  void PrintSignedAddresses(bool s) { signed_addresses_ = s; }
+
 protected:
  virtual void ProcessOutput(const Instruction* instr) VIXL_OVERRIDE;

  CPUFeaturesAuditor* cpu_features_auditor_;
  const char* cpu_features_prefix_;
  const char* cpu_features_suffix_;
+  bool signed_addresses_;

 private:
  FILE* stream_;
--- a/dep/vixl/include/vixl/aarch64/instructions-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/instructions-aarch64.h
@ -32,6 +32,11 @@

 #include "constants-aarch64.h"

+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-enum-enum-conversion"
+#endif
+
 namespace vixl {
 namespace aarch64 {
 // ISA constants. --------------------------------------------------------------
@ -81,6 +86,7 @@ const uint64_t kXRegMask = UINT64_C(0xffffffffffffffff);
 const uint64_t kHRegMask = UINT64_C(0xffff);
 const uint64_t kSRegMask = UINT64_C(0xffffffff);
 const uint64_t kDRegMask = UINT64_C(0xffffffffffffffff);
+const uint64_t kHSignMask = UINT64_C(0x8000);
 const uint64_t kSSignMask = UINT64_C(0x80000000);
 const uint64_t kDSignMask = UINT64_C(0x8000000000000000);
 const uint64_t kWSignMask = UINT64_C(0x80000000);
@ -106,6 +112,8 @@ const unsigned kZeroRegCode = 31;
 const unsigned kSPRegInternalCode = 63;
 const unsigned kRegCodeMask = 0x1f;

+const unsigned kAtomicAccessGranule = 16;
+
 const unsigned kAddressTagOffset = 56;
 const unsigned kAddressTagWidth = 8;
 const uint64_t kAddressTagMask = ((UINT64_C(1) << kAddressTagWidth) - 1)
@ -114,21 +122,49 @@ VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000));

 const uint64_t kTTBRMask = UINT64_C(1) << 55;

+// We can't define a static kZRegSize because the size depends on the
+// implementation. However, it is sometimes useful to know the minimum and
+// maximum possible sizes.
+const unsigned kZRegMinSize = 128;
+const unsigned kZRegMinSizeLog2 = 7;
+const unsigned kZRegMinSizeInBytes = kZRegMinSize / 8;
+const unsigned kZRegMinSizeInBytesLog2 = kZRegMinSizeLog2 - 3;
+const unsigned kZRegMaxSize = 2048;
+const unsigned kZRegMaxSizeLog2 = 11;
+const unsigned kZRegMaxSizeInBytes = kZRegMaxSize / 8;
+const unsigned kZRegMaxSizeInBytesLog2 = kZRegMaxSizeLog2 - 3;
+
+// The P register size depends on the Z register size.
+const unsigned kZRegBitsPerPRegBit = kBitsPerByte;
+const unsigned kZRegBitsPerPRegBitLog2 = 3;
+const unsigned kPRegMinSize = kZRegMinSize / kZRegBitsPerPRegBit;
+const unsigned kPRegMinSizeLog2 = kZRegMinSizeLog2 - 3;
+const unsigned kPRegMinSizeInBytes = kPRegMinSize / 8;
+const unsigned kPRegMinSizeInBytesLog2 = kPRegMinSizeLog2 - 3;
+const unsigned kPRegMaxSize = kZRegMaxSize / kZRegBitsPerPRegBit;
+const unsigned kPRegMaxSizeLog2 = kZRegMaxSizeLog2 - 3;
+const unsigned kPRegMaxSizeInBytes = kPRegMaxSize / 8;
+const unsigned kPRegMaxSizeInBytesLog2 = kPRegMaxSizeLog2 - 3;
+
+const unsigned kMTETagGranuleInBytes = 16;
+const unsigned kMTETagGranuleInBytesLog2 = 4;
+const unsigned kMTETagWidth = 4;
+
 // Make these moved float constants backwards compatible
 // with explicit vixl::aarch64:: namespace references.
-using vixl::kDoubleMantissaBits;
 using vixl::kDoubleExponentBits;
-using vixl::kFloatMantissaBits;
-using vixl::kFloatExponentBits;
-using vixl::kFloat16MantissaBits;
+using vixl::kDoubleMantissaBits;
 using vixl::kFloat16ExponentBits;
+using vixl::kFloat16MantissaBits;
+using vixl::kFloatExponentBits;
+using vixl::kFloatMantissaBits;

-using vixl::kFP16PositiveInfinity;
 using vixl::kFP16NegativeInfinity;
-using vixl::kFP32PositiveInfinity;
+using vixl::kFP16PositiveInfinity;
 using vixl::kFP32NegativeInfinity;
-using vixl::kFP64PositiveInfinity;
+using vixl::kFP32PositiveInfinity;
 using vixl::kFP64NegativeInfinity;
+using vixl::kFP64PositiveInfinity;

 using vixl::kFP16DefaultNaN;
 using vixl::kFP32DefaultNaN;
@ -149,6 +185,49 @@ enum AddrMode { Offset, PreIndex, PostIndex };

 enum Reg31Mode { Reg31IsStackPointer, Reg31IsZeroRegister };

+enum VectorFormat {
+  kFormatUndefined = 0xffffffff,
+  kFormat8B = NEON_8B,
+  kFormat16B = NEON_16B,
+  kFormat4H = NEON_4H,
+  kFormat8H = NEON_8H,
+  kFormat2S = NEON_2S,
+  kFormat4S = NEON_4S,
+  kFormat1D = NEON_1D,
+  kFormat2D = NEON_2D,
+
+  // Scalar formats. We add the scalar bit to distinguish between scalar and
+  // vector enumerations; the bit is always set in the encoding of scalar ops
+  // and always clear for vector ops. Although kFormatD and kFormat1D appear
+  // to be the same, their meaning is subtly different. The first is a scalar
+  // operation, the second a vector operation that only affects one lane.
+  kFormatB = NEON_B | NEONScalar,
+  kFormatH = NEON_H | NEONScalar,
+  kFormatS = NEON_S | NEONScalar,
+  kFormatD = NEON_D | NEONScalar,
+
+  // An artificial value, used to distinguish from NEON format category.
+  kFormatSVE = 0x0000fffd,
+  // Artificial values. Q and O lane sizes aren't encoded in the usual size
+  // field.
+  kFormatSVEQ = 0x00080000,
+  kFormatSVEO = 0x00040000,
+
+  // Vector element width of SVE register with the unknown lane count since
+  // the vector length is implementation dependent.
+  kFormatVnB = SVE_B | kFormatSVE,
+  kFormatVnH = SVE_H | kFormatSVE,
+  kFormatVnS = SVE_S | kFormatSVE,
+  kFormatVnD = SVE_D | kFormatSVE,
+  kFormatVnQ = kFormatSVEQ | kFormatSVE,
+  kFormatVnO = kFormatSVEO | kFormatSVE,
+
+  // Artificial values, used by simulator trace tests and a few oddball
+  // instructions (such as FMLAL).
+  kFormat2H = 0xfffffffe,
+  kFormat1Q = 0xfffffffd
+};
+
 // Instructions. ---------------------------------------------------------------

 class Instruction {
@ -176,6 +255,47 @@ class Instruction {
    return ExtractBits(msb, lsb);
  }

+  // Compress bit extraction operation from Hacker's Delight.
+  // https://github.com/hcs0/Hackers-Delight/blob/master/compress.c.txt
+  uint32_t Compress(uint32_t mask) const {
+    uint32_t mk, mp, mv, t;
+    uint32_t x = GetInstructionBits() & mask;  // Clear irrelevant bits.
+    mk = ~mask << 1;                           // We will count 0's to right.
+    for (int i = 0; i < 5; i++) {
+      mp = mk ^ (mk << 1);  // Parallel suffix.
+      mp = mp ^ (mp << 2);
+      mp = mp ^ (mp << 4);
+      mp = mp ^ (mp << 8);
+      mp = mp ^ (mp << 16);
+      mv = mp & mask;                         // Bits to move.
+      mask = (mask ^ mv) | (mv >> (1 << i));  // Compress mask.
+      t = x & mv;
+      x = (x ^ t) | (t >> (1 << i));  // Compress x.
+      mk = mk & ~mp;
+    }
+    return x;
+  }
+
+  template <uint32_t M>
+  uint32_t ExtractBits() const {
+    return Compress(M);
+  }
+
+  uint32_t ExtractBitsAbsent() const {
+    VIXL_UNREACHABLE();
+    return 0;
+  }
+
+  template <uint32_t M, uint32_t V>
+  uint32_t IsMaskedValue() const {
+    return (Mask(M) == V) ? 1 : 0;
+  }
+
+  uint32_t IsMaskedValueAbsent() const {
+    VIXL_UNREACHABLE();
+    return 0;
+  }
+
  int32_t ExtractSignedBits(int msb, int lsb) const {
    int32_t bits = *(reinterpret_cast<const int32_t*>(this));
    return ExtractSignedBitfield32(msb, lsb, bits);
@ -196,6 +316,34 @@ class Instruction {
  INSTRUCTION_FIELDS_LIST(DEFINE_GETTER)
 #undef DEFINE_GETTER

+  template <int msb, int lsb>
+  int32_t GetRx() const {
+    // We don't have any register fields wider than five bits, so the result
+    // will always fit into an int32_t.
+    VIXL_ASSERT((msb - lsb + 1) <= 5);
+    return this->ExtractBits(msb, lsb);
+  }
+
+  VectorFormat GetSVEVectorFormat(int field_lsb = 22) const {
+    VIXL_ASSERT((field_lsb >= 0) && (field_lsb <= 30));
+    uint32_t instr = ExtractUnsignedBitfield32(field_lsb + 1,
+                                               field_lsb,
+                                               GetInstructionBits())
+                     << 22;
+    switch (instr & SVESizeFieldMask) {
+      case SVE_B:
+        return kFormatVnB;
+      case SVE_H:
+        return kFormatVnH;
+      case SVE_S:
+        return kFormatVnS;
+      case SVE_D:
+        return kFormatVnD;
+    }
+    VIXL_UNREACHABLE();
+    return kFormatUndefined;
+  }
+
  // ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST),
  // formed from ImmPCRelLo and ImmPCRelHi.
  int GetImmPCRel() const {
@ -207,10 +355,40 @@ class Instruction {
  }
  VIXL_DEPRECATED("GetImmPCRel", int ImmPCRel() const) { return GetImmPCRel(); }

+  // ImmLSPAC is a compound field (not present in INSTRUCTION_FIELDS_LIST),
+  // formed from ImmLSPACLo and ImmLSPACHi.
+  int GetImmLSPAC() const {
+    uint32_t hi = static_cast<uint32_t>(GetImmLSPACHi());
+    uint32_t lo = GetImmLSPACLo();
+    uint32_t offset = (hi << ImmLSPACLo_width) | lo;
+    int width = ImmLSPACLo_width + ImmLSPACHi_width;
+    return ExtractSignedBitfield32(width - 1, 0, offset) << 3;
+  }
+
  uint64_t GetImmLogical() const;
  VIXL_DEPRECATED("GetImmLogical", uint64_t ImmLogical() const) {
    return GetImmLogical();
  }
+  uint64_t GetSVEImmLogical() const;
+  int GetSVEBitwiseImmLaneSizeInBytesLog2() const;
+  uint64_t DecodeImmBitMask(int32_t n,
+                            int32_t imm_s,
+                            int32_t imm_r,
+                            int32_t size) const;
+
+  std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const;
+
+  std::pair<int, int> GetSVEMulZmAndIndex() const;
+  std::pair<int, int> GetSVEMulLongZmAndIndex() const;
+
+  std::pair<int, int> GetSVEImmShiftAndLaneSizeLog2(bool is_predicated) const;
+
+  int GetSVEExtractImmediate() const;
+
+  int GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb = 23) const;
+
+  int GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb = 21) const;
+

  unsigned GetImmNEONabcdefgh() const;
  VIXL_DEPRECATED("GetImmNEONabcdefgh", unsigned ImmNEONabcdefgh() const) {
@ -237,6 +415,16 @@ class Instruction {
    return GetImmNEONFP64();
  }

+  Float16 GetSVEImmFP16() const { return Imm8ToFloat16(ExtractBits(12, 5)); }
+
+  float GetSVEImmFP32() const { return Imm8ToFP32(ExtractBits(12, 5)); }
+
+  double GetSVEImmFP64() const { return Imm8ToFP64(ExtractBits(12, 5)); }
+
+  static Float16 Imm8ToFloat16(uint32_t imm8);
+  static float Imm8ToFP32(uint32_t imm8);
+  static double Imm8ToFP64(uint32_t imm8);
+
  unsigned GetSizeLS() const {
    return CalcLSDataSize(static_cast<LoadStoreOp>(Mask(LoadStoreMask)));
  }
@ -299,6 +487,10 @@ class Instruction {
    return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed;
  }

+  // True if `this` is valid immediately after the provided movprfx instruction.
+  bool CanTakeSVEMovprfx(uint32_t form_hash, Instruction const* movprfx) const;
+  bool CanTakeSVEMovprfx(const char* form, Instruction const* movprfx) const;
+
  bool IsLoad() const;
  bool IsStore() const;

@ -312,6 +504,83 @@ class Instruction {
           (Mask(MoveWideImmediateMask) == MOVN_w);
  }

+  bool IsException() const { return Mask(ExceptionFMask) == ExceptionFixed; }
+
+  bool IsPAuth() const { return Mask(SystemPAuthFMask) == SystemPAuthFixed; }
+
+  bool IsBti() const {
+    if (Mask(SystemHintFMask) == SystemHintFixed) {
+      int imm_hint = GetImmHint();
+      switch (imm_hint) {
+        case BTI:
+        case BTI_c:
+        case BTI_j:
+        case BTI_jc:
+          return true;
+      }
+    }
+    return false;
+  }
+
+  bool IsMOPSPrologueOf(const Instruction* instr, uint32_t mops_type) const {
+    VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
+                (mops_type == "cpy"_h));
+    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
+    return GetInstructionBits() == instr->Mask(~(0x3U << op_lsb));
+  }
+
+  bool IsMOPSMainOf(const Instruction* instr, uint32_t mops_type) const {
+    VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
+                (mops_type == "cpy"_h));
+    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
+    return GetInstructionBits() ==
+           (instr->Mask(~(0x3U << op_lsb)) | (0x1 << op_lsb));
+  }
+
+  bool IsMOPSEpilogueOf(const Instruction* instr, uint32_t mops_type) const {
+    VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
+                (mops_type == "cpy"_h));
+    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
+    return GetInstructionBits() ==
+           (instr->Mask(~(0x3U << op_lsb)) | (0x2 << op_lsb));
+  }
+
+  template <uint32_t mops_type>
+  bool IsConsistentMOPSTriplet() const {
+    VIXL_STATIC_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) ||
+                       (mops_type == "cpy"_h));
+
+    int64_t isize = static_cast<int64_t>(kInstructionSize);
+    const Instruction* prev2 = GetInstructionAtOffset(-2 * isize);
+    const Instruction* prev1 = GetInstructionAtOffset(-1 * isize);
+    const Instruction* next1 = GetInstructionAtOffset(1 * isize);
+    const Instruction* next2 = GetInstructionAtOffset(2 * isize);
+
+    // Use the encoding of the current instruction to determine the expected
+    // adjacent instructions. NB. this doesn't check if the nearby instructions
+    // are MOPS-type, but checks that they form a consistent triplet if they
+    // are. For example, 'mov x0, #0; mov x0, #512; mov x0, #1024' is a
+    // consistent triplet, but they are not MOPS instructions.
+    const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14;
+    const uint32_t kMOPSOpfield = 0x3 << op_lsb;
+    const uint32_t kMOPSPrologue = 0;
+    const uint32_t kMOPSMain = 0x1 << op_lsb;
+    const uint32_t kMOPSEpilogue = 0x2 << op_lsb;
+    switch (Mask(kMOPSOpfield)) {
+      case kMOPSPrologue:
+        return next1->IsMOPSMainOf(this, mops_type) &&
+               next2->IsMOPSEpilogueOf(this, mops_type);
+      case kMOPSMain:
+        return prev1->IsMOPSPrologueOf(this, mops_type) &&
+               next1->IsMOPSEpilogueOf(this, mops_type);
+      case kMOPSEpilogue:
+        return prev2->IsMOPSPrologueOf(this, mops_type) &&
+               prev1->IsMOPSMainOf(this, mops_type);
+      default:
+        VIXL_ABORT_WITH_MSG("Undefined MOPS operation\n");
+    }
+  }
+
  static int GetImmBranchRangeBitwidth(ImmBranchType branch_type);
  VIXL_DEPRECATED(
      "GetImmBranchRangeBitwidth",
@ -496,40 +765,12 @@ class Instruction {
 private:
  int GetImmBranch() const;

-  static Float16 Imm8ToFloat16(uint32_t imm8);
-  static float Imm8ToFP32(uint32_t imm8);
-  static double Imm8ToFP64(uint32_t imm8);
-
  void SetPCRelImmTarget(const Instruction* target);
  void SetBranchImmTarget(const Instruction* target);
 };


-// Functions for handling NEON vector format information.
-enum VectorFormat {
-  kFormatUndefined = 0xffffffff,
-  kFormat8B = NEON_8B,
-  kFormat16B = NEON_16B,
-  kFormat4H = NEON_4H,
-  kFormat8H = NEON_8H,
-  kFormat2S = NEON_2S,
-  kFormat4S = NEON_4S,
-  kFormat1D = NEON_1D,
-  kFormat2D = NEON_2D,
-
-  // Scalar formats. We add the scalar bit to distinguish between scalar and
-  // vector enumerations; the bit is always set in the encoding of scalar ops
-  // and always clear for vector ops. Although kFormatD and kFormat1D appear
-  // to be the same, their meaning is subtly different. The first is a scalar
-  // operation, the second a vector operation that only affects one lane.
-  kFormatB = NEON_B | NEONScalar,
-  kFormatH = NEON_H | NEONScalar,
-  kFormatS = NEON_S | NEONScalar,
-  kFormatD = NEON_D | NEONScalar,
-
-  // A value invented solely for FP16 scalar pairwise simulator trace tests.
-  kFormat2H = 0xfffffffe
-};
+// Functions for handling NEON and SVE vector format information.

 const int kMaxLanesPerVector = 16;

@ -537,12 +778,16 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform);
 VectorFormat VectorFormatDoubleWidth(VectorFormat vform);
 VectorFormat VectorFormatDoubleLanes(VectorFormat vform);
 VectorFormat VectorFormatHalfLanes(VectorFormat vform);
-VectorFormat ScalarFormatFromLaneSize(int lanesize);
+VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits);
 VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform);
 VectorFormat VectorFormatFillQ(VectorFormat vform);
 VectorFormat ScalarFormatFromFormat(VectorFormat vform);
+VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits);
+VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes);
+VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log_2);
 unsigned RegisterSizeInBitsFromFormat(VectorFormat vform);
 unsigned RegisterSizeInBytesFromFormat(VectorFormat vform);
+bool IsSVEFormat(VectorFormat vform);
 // TODO: Make the return types of these functions consistent.
 unsigned LaneSizeInBitsFromFormat(VectorFormat vform);
 int LaneSizeInBytesFromFormat(VectorFormat vform);
@ -588,7 +833,7 @@ class NEONFormatDecoder {
  enum SubstitutionMode { kPlaceholder, kFormat };

  // Construct a format decoder with increasingly specific format maps for each
-  // subsitution. If no format map is specified, the default is the integer
+  // substitution. If no format map is specified, the default is the integer
  // format map.
  explicit NEONFormatDecoder(const Instruction* instr) {
    instrbits_ = instr->GetInstructionBits();
@ -639,18 +884,26 @@ class NEONFormatDecoder {
                         SubstitutionMode mode0 = kFormat,
                         SubstitutionMode mode1 = kFormat,
                         SubstitutionMode mode2 = kFormat) {
+    const char* subst0 = GetSubstitute(0, mode0);
+    const char* subst1 = GetSubstitute(1, mode1);
+    const char* subst2 = GetSubstitute(2, mode2);
+
+    if ((subst0 == NULL) || (subst1 == NULL) || (subst2 == NULL)) {
+      return NULL;
+    }
+
    snprintf(form_buffer_,
             sizeof(form_buffer_),
             string,
-             GetSubstitute(0, mode0),
-             GetSubstitute(1, mode1),
-             GetSubstitute(2, mode2));
+             subst0,
+             subst1,
+             subst2);
    return form_buffer_;
  }

-  // Append a "2" to a mnemonic string based of the state of the Q bit.
+  // Append a "2" to a mnemonic string based on the state of the Q bit.
  const char* Mnemonic(const char* mnemonic) {
-    if ((instrbits_ & NEON_Q) != 0) {
+    if ((mnemonic != NULL) && (instrbits_ & NEON_Q) != 0) {
      snprintf(mne_buffer_, sizeof(mne_buffer_), "%s2", mnemonic);
      return mne_buffer_;
    }
@ -745,6 +998,33 @@ class NEONFormatDecoder {
    return &map;
  }

+  // The shift immediate map uses between two and five bits to encode the NEON
+  // vector format:
+  // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
+  // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
+  static const NEONFormatMap* ShiftImmFormatMap() {
+    static const NEONFormatMap map = {{22, 21, 20, 19, 30},
+                                      {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B,
+                                       NF_4H,    NF_8H,    NF_4H,    NF_8H,
+                                       NF_2S,    NF_4S,    NF_2S,    NF_4S,
+                                       NF_2S,    NF_4S,    NF_2S,    NF_4S,
+                                       NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
+                                       NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
+                                       NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
+                                       NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D}};
+    return &map;
+  }
+
+  // The shift long/narrow immediate map uses between two and four bits to
+  // encode the NEON vector format:
+  // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
+  static const NEONFormatMap* ShiftLongNarrowImmFormatMap() {
+    static const NEONFormatMap map =
+        {{22, 21, 20, 19},
+         {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}};
+    return &map;
+  }
+
  // The scalar format map uses two bits (size<1:0>) to encode the NEON scalar
  // formats: NF_B, NF_H, NF_S, NF_D.
  static const NEONFormatMap* ScalarFormatMap() {
@ -818,7 +1098,7 @@ class NEONFormatDecoder {
  static const char* NEONFormatAsString(NEONFormat format) {
    // clang-format off
    static const char* formats[] = {
-      "undefined",
+      NULL,
      "8b", "16b", "4h", "8h", "2s", "4s", "1d", "2d",
      "b", "h", "s", "d"
    };
@ -833,9 +1113,9 @@ class NEONFormatDecoder {
                (format == NF_D) || (format == NF_UNDEF));
    // clang-format off
    static const char* formats[] = {
-      "undefined",
-      "undefined", "undefined", "undefined", "undefined",
-      "undefined", "undefined", "undefined", "undefined",
+      NULL,
+      NULL, NULL, NULL, NULL,
+      NULL, NULL, NULL, NULL,
      "'B", "'H", "'S", "'D"
    };
    // clang-format on
@ -862,4 +1142,8 @@ class NEONFormatDecoder {
 }  // namespace aarch64
 }  // namespace vixl

+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
 #endif  // VIXL_AARCH64_INSTRUCTIONS_AARCH64_H_
--- a/dep/vixl/include/vixl/aarch64/instrument-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/instrument-aarch64.h
@ -1,117 +0,0 @@
-// Copyright 2014, VIXL authors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//   * Redistributions of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//   * Neither the name of ARM Limited nor the names of its contributors may be
-//     used to endorse or promote products derived from this software without
-//     specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef VIXL_AARCH64_INSTRUMENT_AARCH64_H_
-#define VIXL_AARCH64_INSTRUMENT_AARCH64_H_
-
-#include "../globals-vixl.h"
-#include "../utils-vixl.h"
-
-#include "constants-aarch64.h"
-#include "decoder-aarch64.h"
-#include "instrument-aarch64.h"
-
-namespace vixl {
-namespace aarch64 {
-
-const int kCounterNameMaxLength = 256;
-const uint64_t kDefaultInstrumentationSamplingPeriod = 1 << 22;
-
-
-enum InstrumentState { InstrumentStateDisable = 0, InstrumentStateEnable = 1 };
-
-
-enum CounterType {
-  Gauge = 0,      // Gauge counters reset themselves after reading.
-  Cumulative = 1  // Cumulative counters keep their value after reading.
-};
-
-
-class Counter {
- public:
-  explicit Counter(const char* name, CounterType type = Gauge);
-
-  void Increment();
-  void Enable();
-  void Disable();
-  bool IsEnabled();
-  uint64_t GetCount();
-  VIXL_DEPRECATED("GetCount", uint64_t count()) { return GetCount(); }
-
-  const char* GetName();
-  VIXL_DEPRECATED("GetName", const char* name()) { return GetName(); }
-
-  CounterType GetType();
-  VIXL_DEPRECATED("GetType", CounterType type()) { return GetType(); }
-
- private:
-  char name_[kCounterNameMaxLength];
-  uint64_t count_;
-  bool enabled_;
-  CounterType type_;
-};
-
-
-class Instrument : public DecoderVisitor {
- public:
-  explicit Instrument(
-      const char* datafile = NULL,
-      uint64_t sample_period = kDefaultInstrumentationSamplingPeriod);
-  ~Instrument();
-
-  void Enable();
-  void Disable();
-
-// Declare all Visitor functions.
-#define DECLARE(A) void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
-  VISITOR_LIST(DECLARE)
-#undef DECLARE
-
- private:
-  void Update();
-  void DumpCounters();
-  void DumpCounterNames();
-  void DumpEventMarker(unsigned marker);
-  void HandleInstrumentationEvent(unsigned event);
-  Counter* GetCounter(const char* name);
-
-  void InstrumentLoadStore(const Instruction* instr);
-  void InstrumentLoadStorePair(const Instruction* instr);
-
-  std::list<Counter*> counters_;
-
-  FILE* output_stream_;
-
-  // Counter information is dumped every sample_period_ instructions decoded.
-  // For a sample_period_ = 0 a final counter value is only produced when the
-  // Instrumentation class is destroyed.
-  uint64_t sample_period_;
-};
-
-}  // namespace aarch64
-}  // namespace vixl
-
-#endif  // VIXL_AARCH64_INSTRUMENT_AARCH64_H_
--- a/dep/vixl/include/vixl/aarch64/macro-assembler-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/macro-assembler-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/operands-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/operands-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/registers-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/registers-aarch64.h
@ -0,0 +1,902 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_AARCH64_REGISTERS_AARCH64_H_
+#define VIXL_AARCH64_REGISTERS_AARCH64_H_
+
+#include <string>
+
+#include "instructions-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+// An integer type capable of representing a homogeneous, non-overlapping set of
+// registers as a bitmask of their codes.
+typedef uint64_t RegList;
+static const int kRegListSizeInBits = sizeof(RegList) * 8;
+
+class Register;
+class WRegister;
+class XRegister;
+
+class VRegister;
+class BRegister;
+class HRegister;
+class SRegister;
+class DRegister;
+class QRegister;
+
+class ZRegister;
+
+class PRegister;
+class PRegisterWithLaneSize;
+class PRegisterM;
+class PRegisterZ;
+
+// A container for any single register supported by the processor. Selected
+// qualifications are also supported. Basic registers can be constructed
+// directly as CPURegister objects. Other variants should be constructed as one
+// of the derived classes.
+//
+// CPURegister aims to support any getter that would also be available to more
+// specialised register types. However, using the equivalent functions on the
+// specialised register types can avoid run-time checks, and should therefore be
+// preferred where run-time polymorphism isn't required.
+//
+// Type-specific modifiers are typically implemented only on the derived
+// classes.
+//
+// The encoding is such that CPURegister objects are cheap to pass by value.
+class CPURegister {
+ public:
+  enum RegisterBank : uint8_t {
+    kNoRegisterBank = 0,
+    kRRegisterBank,
+    kVRegisterBank,
+    kPRegisterBank
+  };
+  enum RegisterType {
+    kNoRegister,
+    kRegister,
+    kVRegister,
+    kZRegister,
+    kPRegister
+  };
+
+  static const unsigned kUnknownSize = 0;
+
+  VIXL_CONSTEXPR CPURegister()
+      : code_(0),
+        bank_(kNoRegisterBank),
+        size_(kEncodedUnknownSize),
+        qualifiers_(kNoQualifiers),
+        lane_size_(kEncodedUnknownSize) {}
+
+  CPURegister(int code, int size_in_bits, RegisterType type)
+      : code_(code),
+        bank_(GetBankFor(type)),
+        size_(EncodeSizeInBits(size_in_bits)),
+        qualifiers_(kNoQualifiers),
+        lane_size_(EncodeSizeInBits(size_in_bits)) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  // Basic accessors.
+
+  // TODO: Make this return 'int'.
+  unsigned GetCode() const { return code_; }
+
+  RegisterBank GetBank() const { return bank_; }
+
+  // For scalar registers, the lane size matches the register size, and is
+  // always known.
+  bool HasSize() const { return size_ != kEncodedUnknownSize; }
+  bool HasLaneSize() const { return lane_size_ != kEncodedUnknownSize; }
+
+  RegList GetBit() const {
+    if (IsNone()) return 0;
+    VIXL_ASSERT(code_ < kRegListSizeInBits);
+    return static_cast<RegList>(1) << code_;
+  }
+
+  // Return the architectural name for this register.
+  // TODO: This is temporary. Ultimately, we should move the
+  // Simulator::*RegNameForCode helpers out of the simulator, and provide an
+  // independent way to obtain the name of a register.
+  std::string GetArchitecturalName() const;
+
+  // Return the highest valid register code for this type, to allow generic
+  // loops to be written. This excludes kSPRegInternalCode, since it is not
+  // contiguous, and sp usually requires special handling anyway.
+  unsigned GetMaxCode() const { return GetMaxCodeFor(GetBank()); }
+
+  // Registers without a known size report kUnknownSize.
+  int GetSizeInBits() const { return DecodeSizeInBits(size_); }
+  int GetSizeInBytes() const { return DecodeSizeInBytes(size_); }
+  // TODO: Make these return 'int'.
+  unsigned GetLaneSizeInBits() const { return DecodeSizeInBits(lane_size_); }
+  unsigned GetLaneSizeInBytes() const { return DecodeSizeInBytes(lane_size_); }
+  unsigned GetLaneSizeInBytesLog2() const {
+    VIXL_ASSERT(HasLaneSize());
+    return DecodeSizeInBytesLog2(lane_size_);
+  }
+
+  int GetLanes() const {
+    if (HasSize() && HasLaneSize()) {
+      // Take advantage of the size encoding to calculate this efficiently.
+      VIXL_STATIC_ASSERT(kEncodedHRegSize == (kEncodedBRegSize + 1));
+      VIXL_STATIC_ASSERT(kEncodedSRegSize == (kEncodedHRegSize + 1));
+      VIXL_STATIC_ASSERT(kEncodedDRegSize == (kEncodedSRegSize + 1));
+      VIXL_STATIC_ASSERT(kEncodedQRegSize == (kEncodedDRegSize + 1));
+      int log2_delta = static_cast<int>(size_) - static_cast<int>(lane_size_);
+      VIXL_ASSERT(log2_delta >= 0);
+      return 1 << log2_delta;
+    }
+    return kUnknownSize;
+  }
+
+  bool Is8Bits() const { return size_ == kEncodedBRegSize; }
+  bool Is16Bits() const { return size_ == kEncodedHRegSize; }
+  bool Is32Bits() const { return size_ == kEncodedSRegSize; }
+  bool Is64Bits() const { return size_ == kEncodedDRegSize; }
+  bool Is128Bits() const { return size_ == kEncodedQRegSize; }
+
+  bool IsLaneSizeB() const { return lane_size_ == kEncodedBRegSize; }
+  bool IsLaneSizeH() const { return lane_size_ == kEncodedHRegSize; }
+  bool IsLaneSizeS() const { return lane_size_ == kEncodedSRegSize; }
+  bool IsLaneSizeD() const { return lane_size_ == kEncodedDRegSize; }
+  bool IsLaneSizeQ() const { return lane_size_ == kEncodedQRegSize; }
+
+  // If Is<Foo>Register(), then it is valid to convert the CPURegister to some
+  // <Foo>Register<Bar> type.
+  //
+  //  If...                              ... then it is safe to construct ...
+  //      r.IsRegister()                       -> Register(r)
+  //      r.IsVRegister()                      -> VRegister(r)
+  //      r.IsZRegister()                      -> ZRegister(r)
+  //      r.IsPRegister()                      -> PRegister(r)
+  //
+  //      r.IsPRegister() && HasLaneSize()     -> PRegisterWithLaneSize(r)
+  //      r.IsPRegister() && IsMerging()       -> PRegisterM(r)
+  //      r.IsPRegister() && IsZeroing()       -> PRegisterZ(r)
+  bool IsRegister() const { return GetType() == kRegister; }
+  bool IsVRegister() const { return GetType() == kVRegister; }
+  bool IsZRegister() const { return GetType() == kZRegister; }
+  bool IsPRegister() const { return GetType() == kPRegister; }
+
+  bool IsNone() const { return GetType() == kNoRegister; }
+
+  // `GetType() == kNoRegister` implies IsNone(), and vice-versa.
+  // `GetType() == k<Foo>Register` implies Is<Foo>Register(), and vice-versa.
+  RegisterType GetType() const {
+    switch (bank_) {
+      case kNoRegisterBank:
+        return kNoRegister;
+      case kRRegisterBank:
+        return kRegister;
+      case kVRegisterBank:
+        return HasSize() ? kVRegister : kZRegister;
+      case kPRegisterBank:
+        return kPRegister;
+    }
+    VIXL_UNREACHABLE();
+    return kNoRegister;
+  }
+
+  // IsFPRegister() is true for scalar FP types (and therefore implies
+  // IsVRegister()). There is no corresponding FPRegister type.
+  bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); }
+
+  // TODO: These are stricter forms of the helpers above. We should make the
+  // basic helpers strict, and remove these.
+  bool IsValidRegister() const;
+  bool IsValidVRegister() const;
+  bool IsValidFPRegister() const;
+  bool IsValidZRegister() const;
+  bool IsValidPRegister() const;
+
+  bool IsValid() const;
+  bool IsValidOrNone() const { return IsNone() || IsValid(); }
+
+  bool IsVector() const { return HasLaneSize() && (size_ != lane_size_); }
+  bool IsScalar() const { return HasLaneSize() && (size_ == lane_size_); }
+
+  bool IsSameType(const CPURegister& other) const {
+    return GetType() == other.GetType();
+  }
+
+  bool IsSameBank(const CPURegister& other) const {
+    return GetBank() == other.GetBank();
+  }
+
+  // Two registers with unknown size are considered to have the same size if
+  // they also have the same type. For example, all Z registers have the same
+  // size, even though we don't know what that is.
+  bool IsSameSizeAndType(const CPURegister& other) const {
+    return IsSameType(other) && (size_ == other.size_);
+  }
+
+  bool IsSameFormat(const CPURegister& other) const {
+    return IsSameSizeAndType(other) && (lane_size_ == other.lane_size_);
+  }
+
+  // Note that NoReg aliases itself, so that 'Is' implies 'Aliases'.
+  bool Aliases(const CPURegister& other) const {
+    return IsSameBank(other) && (code_ == other.code_);
+  }
+
+  bool Is(const CPURegister& other) const {
+    if (IsRegister() || IsVRegister()) {
+      // For core (W, X) and FP/NEON registers, we only consider the code, size
+      // and type. This is legacy behaviour.
+      // TODO: We should probably check every field for all registers.
+      return Aliases(other) && (size_ == other.size_);
+    } else {
+      // For Z and P registers, we require all fields to match exactly.
+      VIXL_ASSERT(IsNone() || IsZRegister() || IsPRegister());
+      return (code_ == other.code_) && (bank_ == other.bank_) &&
+             (size_ == other.size_) && (qualifiers_ == other.qualifiers_) &&
+             (lane_size_ == other.lane_size_);
+    }
+  }
+
+  // Conversions to specific register types. The result is a register that
+  // aliases the original CPURegister. That is, the original register bank
+  // (`GetBank()`) is checked and the code (`GetCode()`) preserved, but all
+  // other properties are ignored.
+  //
+  // Typical usage:
+  //
+  //     if (reg.GetBank() == kVRegisterBank) {
+  //       DRegister d = reg.D();
+  //       ...
+  //     }
+  //
+  // These could all return types with compile-time guarantees (like XRegister),
+  // but this breaks backwards-compatibility quite severely, particularly with
+  // code like `cond ? reg.W() : reg.X()`, which would have indeterminate type.
+
+  // Core registers, like "w0".
+  Register W() const;
+  Register X() const;
+  // FP/NEON registers, like "b0".
+  VRegister B() const;
+  VRegister H() const;
+  VRegister S() const;
+  VRegister D() const;
+  VRegister Q() const;
+  VRegister V() const;
+  // SVE registers, like "z0".
+  ZRegister Z() const;
+  PRegister P() const;
+
+  // Utilities for kRegister types.
+
+  bool IsZero() const { return IsRegister() && (code_ == kZeroRegCode); }
+  bool IsSP() const { return IsRegister() && (code_ == kSPRegInternalCode); }
+  bool IsW() const { return IsRegister() && Is32Bits(); }
+  bool IsX() const { return IsRegister() && Is64Bits(); }
+
+  // Utilities for FP/NEON kVRegister types.
+
+  // These helpers ensure that the size and type of the register are as
+  // described. They do not consider the number of lanes that make up a vector.
+  // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
+  // does not imply Is1D() or Is8B().
+  // Check the number of lanes, ie. the format of the vector, using methods such
+  // as Is8B(), Is1D(), etc.
+  bool IsB() const { return IsVRegister() && Is8Bits(); }
+  bool IsH() const { return IsVRegister() && Is16Bits(); }
+  bool IsS() const { return IsVRegister() && Is32Bits(); }
+  bool IsD() const { return IsVRegister() && Is64Bits(); }
+  bool IsQ() const { return IsVRegister() && Is128Bits(); }
+
+  // As above, but also check that the register has exactly one lane. For
+  // example, reg.Is1D() implies DRegister(reg).IsValid(), but reg.IsD() does
+  // not.
+  bool Is1B() const { return IsB() && IsScalar(); }
+  bool Is1H() const { return IsH() && IsScalar(); }
+  bool Is1S() const { return IsS() && IsScalar(); }
+  bool Is1D() const { return IsD() && IsScalar(); }
+  bool Is1Q() const { return IsQ() && IsScalar(); }
+
+  // Check the specific NEON format.
+  bool Is8B() const { return IsD() && IsLaneSizeB(); }
+  bool Is16B() const { return IsQ() && IsLaneSizeB(); }
+  bool Is2H() const { return IsS() && IsLaneSizeH(); }
+  bool Is4H() const { return IsD() && IsLaneSizeH(); }
+  bool Is8H() const { return IsQ() && IsLaneSizeH(); }
+  bool Is2S() const { return IsD() && IsLaneSizeS(); }
+  bool Is4S() const { return IsQ() && IsLaneSizeS(); }
+  bool Is2D() const { return IsQ() && IsLaneSizeD(); }
+
+  // A semantic alias for sdot and udot (indexed and by element) instructions.
+  // The current CPURegister implementation cannot not tell this from Is1S(),
+  // but it might do later.
+  // TODO: Do this with the qualifiers_ field.
+  bool Is1S4B() const { return Is1S(); }
+
+  // Utilities for SVE registers.
+
+  bool IsUnqualified() const { return qualifiers_ == kNoQualifiers; }
+  bool IsMerging() const { return IsPRegister() && (qualifiers_ == kMerging); }
+  bool IsZeroing() const { return IsPRegister() && (qualifiers_ == kZeroing); }
+
+  // SVE types have unknown sizes, but within known bounds.
+
+  int GetMaxSizeInBytes() const {
+    switch (GetType()) {
+      case kZRegister:
+        return kZRegMaxSizeInBytes;
+      case kPRegister:
+        return kPRegMaxSizeInBytes;
+      default:
+        VIXL_ASSERT(HasSize());
+        return GetSizeInBits();
+    }
+  }
+
+  int GetMinSizeInBytes() const {
+    switch (GetType()) {
+      case kZRegister:
+        return kZRegMinSizeInBytes;
+      case kPRegister:
+        return kPRegMinSizeInBytes;
+      default:
+        VIXL_ASSERT(HasSize());
+        return GetSizeInBits();
+    }
+  }
+
+  int GetMaxSizeInBits() const { return GetMaxSizeInBytes() * kBitsPerByte; }
+  int GetMinSizeInBits() const { return GetMinSizeInBytes() * kBitsPerByte; }
+
+  static RegisterBank GetBankFor(RegisterType type) {
+    switch (type) {
+      case kNoRegister:
+        return kNoRegisterBank;
+      case kRegister:
+        return kRRegisterBank;
+      case kVRegister:
+      case kZRegister:
+        return kVRegisterBank;
+      case kPRegister:
+        return kPRegisterBank;
+    }
+    VIXL_UNREACHABLE();
+    return kNoRegisterBank;
+  }
+
+  static unsigned GetMaxCodeFor(CPURegister::RegisterType type) {
+    return GetMaxCodeFor(GetBankFor(type));
+  }
+
+ protected:
+  enum EncodedSize : uint8_t {
+    // Ensure that kUnknownSize (and therefore kNoRegister) is encoded as zero.
+    kEncodedUnknownSize = 0,
+
+    // The implementation assumes that the remaining sizes are encoded as
+    // `log2(size) + c`, so the following names must remain in sequence.
+    kEncodedBRegSize,
+    kEncodedHRegSize,
+    kEncodedSRegSize,
+    kEncodedDRegSize,
+    kEncodedQRegSize,
+
+    kEncodedWRegSize = kEncodedSRegSize,
+    kEncodedXRegSize = kEncodedDRegSize
+  };
+  VIXL_STATIC_ASSERT(kSRegSize == kWRegSize);
+  VIXL_STATIC_ASSERT(kDRegSize == kXRegSize);
+
+  char GetLaneSizeSymbol() const {
+    switch (lane_size_) {
+      case kEncodedBRegSize:
+        return 'B';
+      case kEncodedHRegSize:
+        return 'H';
+      case kEncodedSRegSize:
+        return 'S';
+      case kEncodedDRegSize:
+        return 'D';
+      case kEncodedQRegSize:
+        return 'Q';
+      case kEncodedUnknownSize:
+        break;
+    }
+    VIXL_UNREACHABLE();
+    return '?';
+  }
+
+  static EncodedSize EncodeSizeInBits(int size_in_bits) {
+    switch (size_in_bits) {
+      case kUnknownSize:
+        return kEncodedUnknownSize;
+      case kBRegSize:
+        return kEncodedBRegSize;
+      case kHRegSize:
+        return kEncodedHRegSize;
+      case kSRegSize:
+        return kEncodedSRegSize;
+      case kDRegSize:
+        return kEncodedDRegSize;
+      case kQRegSize:
+        return kEncodedQRegSize;
+    }
+    VIXL_UNREACHABLE();
+    return kEncodedUnknownSize;
+  }
+
+  static int DecodeSizeInBytesLog2(EncodedSize encoded_size) {
+    switch (encoded_size) {
+      case kEncodedUnknownSize:
+        // Log2 of B-sized lane in bytes is 0, so we can't just return 0 here.
+        VIXL_UNREACHABLE();
+        return -1;
+      case kEncodedBRegSize:
+        return kBRegSizeInBytesLog2;
+      case kEncodedHRegSize:
+        return kHRegSizeInBytesLog2;
+      case kEncodedSRegSize:
+        return kSRegSizeInBytesLog2;
+      case kEncodedDRegSize:
+        return kDRegSizeInBytesLog2;
+      case kEncodedQRegSize:
+        return kQRegSizeInBytesLog2;
+    }
+    VIXL_UNREACHABLE();
+    return kUnknownSize;
+  }
+
+  static int DecodeSizeInBytes(EncodedSize encoded_size) {
+    if (encoded_size == kEncodedUnknownSize) {
+      return kUnknownSize;
+    }
+    return 1 << DecodeSizeInBytesLog2(encoded_size);
+  }
+
+  static int DecodeSizeInBits(EncodedSize encoded_size) {
+    VIXL_STATIC_ASSERT(kUnknownSize == 0);
+    return DecodeSizeInBytes(encoded_size) * kBitsPerByte;
+  }
+
+  static unsigned GetMaxCodeFor(CPURegister::RegisterBank bank);
+
+  enum Qualifiers : uint8_t {
+    kNoQualifiers = 0,
+    // Used by P registers.
+    kMerging,
+    kZeroing
+  };
+
+  // An unchecked constructor, for use by derived classes.
+  CPURegister(int code,
+              EncodedSize size,
+              RegisterBank bank,
+              EncodedSize lane_size,
+              Qualifiers qualifiers = kNoQualifiers)
+      : code_(code),
+        bank_(bank),
+        size_(size),
+        qualifiers_(qualifiers),
+        lane_size_(lane_size) {}
+
+  // TODO: Check that access to these fields is reasonably efficient.
+  uint8_t code_;
+  RegisterBank bank_;
+  EncodedSize size_;
+  Qualifiers qualifiers_;
+  EncodedSize lane_size_;
+};
+// Ensure that CPURegisters can fit in a single (64-bit) register. This is a
+// proxy for being "cheap to pass by value", which is hard to check directly.
+VIXL_STATIC_ASSERT(sizeof(CPURegister) <= sizeof(uint64_t));
+
+// TODO: Add constexpr constructors.
+#define VIXL_DECLARE_REGISTER_COMMON(NAME, REGISTER_TYPE, PARENT_TYPE) \
+  VIXL_CONSTEXPR NAME() : PARENT_TYPE() {}                             \
+                                                                       \
+  explicit NAME(CPURegister other) : PARENT_TYPE(other) {              \
+    VIXL_ASSERT(IsValid());                                            \
+  }                                                                    \
+                                                                       \
+  VIXL_CONSTEXPR static unsigned GetMaxCode() {                        \
+    return kNumberOf##REGISTER_TYPE##s - 1;                            \
+  }
+
+// Any W or X register, including the zero register and the stack pointer.
+class Register : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(Register, Register, CPURegister)
+
+  Register(int code, int size_in_bits)
+      : CPURegister(code, size_in_bits, kRegister) {
+    VIXL_ASSERT(IsValidRegister());
+  }
+
+  bool IsValid() const { return IsValidRegister(); }
+};
+
+// Any FP or NEON V register, including vector (V.<T>) and scalar forms
+// (B, H, S, D, Q).
+class VRegister : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(VRegister, VRegister, CPURegister)
+
+  // For historical reasons, VRegister(0) returns v0.1Q (or equivalently, q0).
+  explicit VRegister(int code, int size_in_bits = kQRegSize, int lanes = 1)
+      : CPURegister(code,
+                    EncodeSizeInBits(size_in_bits),
+                    kVRegisterBank,
+                    EncodeLaneSizeInBits(size_in_bits, lanes)) {
+    VIXL_ASSERT(IsValidVRegister());
+  }
+
+  VRegister(int code, VectorFormat format)
+      : CPURegister(code,
+                    EncodeSizeInBits(RegisterSizeInBitsFromFormat(format)),
+                    kVRegisterBank,
+                    EncodeSizeInBits(LaneSizeInBitsFromFormat(format)),
+                    kNoQualifiers) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  VRegister V8B() const;
+  VRegister V16B() const;
+  VRegister V2H() const;
+  VRegister V4H() const;
+  VRegister V8H() const;
+  VRegister V2S() const;
+  VRegister V4S() const;
+  VRegister V1D() const;
+  VRegister V2D() const;
+  VRegister V1Q() const;
+  VRegister S4B() const;
+
+  bool IsValid() const { return IsValidVRegister(); }
+
+ protected:
+  static EncodedSize EncodeLaneSizeInBits(int size_in_bits, int lanes) {
+    VIXL_ASSERT(lanes >= 1);
+    VIXL_ASSERT((size_in_bits % lanes) == 0);
+    return EncodeSizeInBits(size_in_bits / lanes);
+  }
+};
+
+// Any SVE Z register, with or without a lane size specifier.
+class ZRegister : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(ZRegister, ZRegister, CPURegister)
+
+  explicit ZRegister(int code, int lane_size_in_bits = kUnknownSize)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kVRegisterBank,
+                    EncodeSizeInBits(lane_size_in_bits)) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  ZRegister(int code, VectorFormat format)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kVRegisterBank,
+                    EncodeSizeInBits(LaneSizeInBitsFromFormat(format)),
+                    kNoQualifiers) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  // Return a Z register with a known lane size (like "z0.B").
+  ZRegister VnB() const { return ZRegister(GetCode(), kBRegSize); }
+  ZRegister VnH() const { return ZRegister(GetCode(), kHRegSize); }
+  ZRegister VnS() const { return ZRegister(GetCode(), kSRegSize); }
+  ZRegister VnD() const { return ZRegister(GetCode(), kDRegSize); }
+  ZRegister VnQ() const { return ZRegister(GetCode(), kQRegSize); }
+
+  template <typename T>
+  ZRegister WithLaneSize(T format) const {
+    return ZRegister(GetCode(), format);
+  }
+
+  ZRegister WithSameLaneSizeAs(const CPURegister& other) const {
+    VIXL_ASSERT(other.HasLaneSize());
+    return this->WithLaneSize(other.GetLaneSizeInBits());
+  }
+
+  bool IsValid() const { return IsValidZRegister(); }
+};
+
+// Any SVE P register, with or without a qualifier or lane size specifier.
+class PRegister : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegister, PRegister, CPURegister)
+
+  explicit PRegister(int code) : CPURegister(code, kUnknownSize, kPRegister) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && !HasLaneSize() && IsUnqualified();
+  }
+
+  // Return a P register with a known lane size (like "p0.B").
+  PRegisterWithLaneSize VnB() const;
+  PRegisterWithLaneSize VnH() const;
+  PRegisterWithLaneSize VnS() const;
+  PRegisterWithLaneSize VnD() const;
+
+  template <typename T>
+  PRegisterWithLaneSize WithLaneSize(T format) const;
+
+  PRegisterWithLaneSize WithSameLaneSizeAs(const CPURegister& other) const;
+
+  // SVE predicates are specified (in normal assembly) with a "/z" (zeroing) or
+  // "/m" (merging) suffix. These methods are VIXL's equivalents.
+  PRegisterZ Zeroing() const;
+  PRegisterM Merging() const;
+
+ protected:
+  // Unchecked constructors, for use by derived classes.
+  PRegister(int code, EncodedSize encoded_lane_size)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kPRegisterBank,
+                    encoded_lane_size,
+                    kNoQualifiers) {}
+
+  PRegister(int code, Qualifiers qualifiers)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kPRegisterBank,
+                    kEncodedUnknownSize,
+                    qualifiers) {}
+};
+
+// Any SVE P register with a known lane size (like "p0.B").
+class PRegisterWithLaneSize : public PRegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegisterWithLaneSize, PRegister, PRegister)
+
+  PRegisterWithLaneSize(int code, int lane_size_in_bits)
+      : PRegister(code, EncodeSizeInBits(lane_size_in_bits)) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  PRegisterWithLaneSize(int code, VectorFormat format)
+      : PRegister(code, EncodeSizeInBits(LaneSizeInBitsFromFormat(format))) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && HasLaneSize() && IsUnqualified();
+  }
+
+  // Overload lane size accessors so we can assert `HasLaneSize()`. This allows
+  // tools such as clang-tidy to prove that the result of GetLaneSize* is
+  // non-zero.
+
+  // TODO: Make these return 'int'.
+  unsigned GetLaneSizeInBits() const {
+    VIXL_ASSERT(HasLaneSize());
+    return PRegister::GetLaneSizeInBits();
+  }
+
+  unsigned GetLaneSizeInBytes() const {
+    VIXL_ASSERT(HasLaneSize());
+    return PRegister::GetLaneSizeInBytes();
+  }
+};
+
+// Any SVE P register with the zeroing qualifier (like "p0/z").
+class PRegisterZ : public PRegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegisterZ, PRegister, PRegister)
+
+  explicit PRegisterZ(int code) : PRegister(code, kZeroing) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && !HasLaneSize() && IsZeroing();
+  }
+};
+
+// Any SVE P register with the merging qualifier (like "p0/m").
+class PRegisterM : public PRegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegisterM, PRegister, PRegister)
+
+  explicit PRegisterM(int code) : PRegister(code, kMerging) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && !HasLaneSize() && IsMerging();
+  }
+};
+
+inline PRegisterWithLaneSize PRegister::VnB() const {
+  return PRegisterWithLaneSize(GetCode(), kBRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnH() const {
+  return PRegisterWithLaneSize(GetCode(), kHRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnS() const {
+  return PRegisterWithLaneSize(GetCode(), kSRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnD() const {
+  return PRegisterWithLaneSize(GetCode(), kDRegSize);
+}
+
+template <typename T>
+inline PRegisterWithLaneSize PRegister::WithLaneSize(T format) const {
+  return PRegisterWithLaneSize(GetCode(), format);
+}
+
+inline PRegisterWithLaneSize PRegister::WithSameLaneSizeAs(
+    const CPURegister& other) const {
+  VIXL_ASSERT(other.HasLaneSize());
+  return this->WithLaneSize(other.GetLaneSizeInBits());
+}
+
+inline PRegisterZ PRegister::Zeroing() const { return PRegisterZ(GetCode()); }
+inline PRegisterM PRegister::Merging() const { return PRegisterM(GetCode()); }
+
+#define VIXL_REGISTER_WITH_SIZE_LIST(V) \
+  V(WRegister, kWRegSize, Register)     \
+  V(XRegister, kXRegSize, Register)     \
+  V(QRegister, kQRegSize, VRegister)    \
+  V(DRegister, kDRegSize, VRegister)    \
+  V(SRegister, kSRegSize, VRegister)    \
+  V(HRegister, kHRegSize, VRegister)    \
+  V(BRegister, kBRegSize, VRegister)
+
+#define VIXL_DEFINE_REGISTER_WITH_SIZE(NAME, SIZE, PARENT)           \
+  class NAME : public PARENT {                                       \
+   public:                                                           \
+    VIXL_CONSTEXPR NAME() : PARENT() {}                              \
+    explicit NAME(int code) : PARENT(code, SIZE) {}                  \
+                                                                     \
+    explicit NAME(PARENT other) : PARENT(other) {                    \
+      VIXL_ASSERT(GetSizeInBits() == SIZE);                          \
+    }                                                                \
+                                                                     \
+    PARENT As##PARENT() const { return *this; }                      \
+                                                                     \
+    VIXL_CONSTEXPR int GetSizeInBits() const { return SIZE; }        \
+                                                                     \
+    bool IsValid() const {                                           \
+      return PARENT::IsValid() && (PARENT::GetSizeInBits() == SIZE); \
+    }                                                                \
+  };
+
+VIXL_REGISTER_WITH_SIZE_LIST(VIXL_DEFINE_REGISTER_WITH_SIZE)
+
+// No*Reg is used to provide default values for unused arguments, error cases
+// and so on. Note that these (and the default constructors) all compare equal
+// (using the Is() method).
+const Register NoReg;
+const VRegister NoVReg;
+const CPURegister NoCPUReg;
+const ZRegister NoZReg;
+
+// TODO: Ideally, these would use specialised register types (like XRegister and
+// so on). However, doing so throws up template overloading problems elsewhere.
+#define VIXL_DEFINE_REGISTERS(N)       \
+  const Register w##N = WRegister(N);  \
+  const Register x##N = XRegister(N);  \
+  const VRegister b##N = BRegister(N); \
+  const VRegister h##N = HRegister(N); \
+  const VRegister s##N = SRegister(N); \
+  const VRegister d##N = DRegister(N); \
+  const VRegister q##N = QRegister(N); \
+  const VRegister v##N(N);             \
+  const ZRegister z##N(N);
+AARCH64_REGISTER_CODE_LIST(VIXL_DEFINE_REGISTERS)
+#undef VIXL_DEFINE_REGISTERS
+
+#define VIXL_DEFINE_P_REGISTERS(N) const PRegister p##N(N);
+AARCH64_P_REGISTER_CODE_LIST(VIXL_DEFINE_P_REGISTERS)
+#undef VIXL_DEFINE_P_REGISTERS
+
+// VIXL represents 'sp' with a unique code, to tell it apart from 'xzr'.
+const Register wsp = WRegister(kSPRegInternalCode);
+const Register sp = XRegister(kSPRegInternalCode);
+
+// Standard aliases.
+const Register ip0 = x16;
+const Register ip1 = x17;
+const Register lr = x30;
+const Register xzr = x31;
+const Register wzr = w31;
+
+// AreAliased returns true if any of the named registers overlap. Arguments
+// set to NoReg are ignored. The system stack pointer may be specified.
+bool AreAliased(const CPURegister& reg1,
+                const CPURegister& reg2,
+                const CPURegister& reg3 = NoReg,
+                const CPURegister& reg4 = NoReg,
+                const CPURegister& reg5 = NoReg,
+                const CPURegister& reg6 = NoReg,
+                const CPURegister& reg7 = NoReg,
+                const CPURegister& reg8 = NoReg);
+
+// AreSameSizeAndType returns true if all of the specified registers have the
+// same size, and are of the same type. The system stack pointer may be
+// specified. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreSameSizeAndType(const CPURegister& reg1,
+                        const CPURegister& reg2,
+                        const CPURegister& reg3 = NoCPUReg,
+                        const CPURegister& reg4 = NoCPUReg,
+                        const CPURegister& reg5 = NoCPUReg,
+                        const CPURegister& reg6 = NoCPUReg,
+                        const CPURegister& reg7 = NoCPUReg,
+                        const CPURegister& reg8 = NoCPUReg);
+
+// AreEven returns true if all of the specified registers have even register
+// indices. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreEven(const CPURegister& reg1,
+             const CPURegister& reg2,
+             const CPURegister& reg3 = NoReg,
+             const CPURegister& reg4 = NoReg,
+             const CPURegister& reg5 = NoReg,
+             const CPURegister& reg6 = NoReg,
+             const CPURegister& reg7 = NoReg,
+             const CPURegister& reg8 = NoReg);
+
+// AreConsecutive returns true if all of the specified registers are
+// consecutive in the register file. Arguments set to NoReg are ignored, as are
+// any subsequent arguments. At least one argument (reg1) must be valid
+// (not NoCPUReg).
+bool AreConsecutive(const CPURegister& reg1,
+                    const CPURegister& reg2,
+                    const CPURegister& reg3 = NoCPUReg,
+                    const CPURegister& reg4 = NoCPUReg);
+
+// AreSameFormat returns true if all of the specified registers have the same
+// vector format. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoVReg).
+bool AreSameFormat(const CPURegister& reg1,
+                   const CPURegister& reg2,
+                   const CPURegister& reg3 = NoCPUReg,
+                   const CPURegister& reg4 = NoCPUReg);
+
+// AreSameLaneSize returns true if all of the specified registers have the same
+// element lane size, B, H, S or D. It doesn't compare the type of registers.
+// Arguments set to NoReg are ignored, as are any subsequent arguments.
+// At least one argument (reg1) must be valid (not NoVReg).
+// TODO: Remove this, and replace its uses with AreSameFormat.
+bool AreSameLaneSize(const CPURegister& reg1,
+                     const CPURegister& reg2,
+                     const CPURegister& reg3 = NoCPUReg,
+                     const CPURegister& reg4 = NoCPUReg);
+}  // namespace aarch64
+}  // namespace vixl
+
+#endif  // VIXL_AARCH64_REGISTERS_AARCH64_H_
--- a/dep/vixl/include/vixl/aarch64/simulator-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/simulator-aarch64.h
--- a/dep/vixl/include/vixl/aarch64/simulator-constants-aarch64.h
+++ b/dep/vixl/include/vixl/aarch64/simulator-constants-aarch64.h
@ -56,6 +56,8 @@ enum DebugHltOpcode {
  kDisableCPUFeaturesOpcode,
  kSaveCPUFeaturesOpcode,
  kRestoreCPUFeaturesOpcode,
+  kMTEActive,
+  kMTEInactive,
  // Aliases.
  kDebugHltFirstOpcode = kUnreachableOpcode,
  kDebugHltLastOpcode = kLogOpcode
@ -88,7 +90,7 @@ VIXL_DEPRECATED("DebugHltOpcode", typedef DebugHltOpcode DebugHltOpcodes);
 // call):
 //    x0: The format string
 // x1-x7: Optional arguments, if type == CPURegister::kRegister
-// d0-d7: Optional arguments, if type == CPURegister::kFPRegister
+// d0-d7: Optional arguments, if type == CPURegister::kVRegister
 const unsigned kPrintfArgCountOffset = 1 * kInstructionSize;
 const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize;
 const unsigned kPrintfLength = 3 * kInstructionSize;
@ -121,7 +123,7 @@ const unsigned kTraceLength = 3 * kInstructionSize;
 enum TraceParameters {
  LOG_DISASM = 1 << 0,   // Log disassembly.
  LOG_REGS = 1 << 1,     // Log general purpose registers.
-  LOG_VREGS = 1 << 2,    // Log NEON and floating-point registers.
+  LOG_VREGS = 1 << 2,    // Log SVE, NEON and floating-point registers.
  LOG_SYSREGS = 1 << 3,  // Log the flags and system registers.
  LOG_WRITE = 1 << 4,    // Log writes to memory.
  LOG_BRANCH = 1 << 5,   // Log taken branches.
--- a/dep/vixl/include/vixl/assembler-base-vixl.h
+++ b/dep/vixl/include/vixl/assembler-base-vixl.h
@ -29,6 +29,12 @@

 #include "code-buffer-vixl.h"

+// Microsoft Visual C++ defines a `mvn` macro that conflicts with our own
+// definition.
+#if defined(_MSC_VER) && defined(mvn)
+#undef mvn
+#endif
+
 namespace vixl {

 class CodeBufferCheckScope;
@ -37,9 +43,8 @@ namespace internal {

 class AssemblerBase {
 public:
-  AssemblerBase() : allow_assembler_(false) {}
-  explicit AssemblerBase(size_t capacity)
-      : buffer_(capacity), allow_assembler_(false) {}
+  AssemblerBase()
+      : allow_assembler_(false) {}
  AssemblerBase(byte* buffer, size_t capacity)
      : buffer_(buffer, capacity), allow_assembler_(false) {}

--- a/dep/vixl/include/vixl/code-buffer-vixl.h
+++ b/dep/vixl/include/vixl/code-buffer-vixl.h
@ -36,24 +36,12 @@ namespace vixl {

 class CodeBuffer {
 public:
-  static const size_t kDefaultCapacity = 4 * KBytes;
-
-  explicit CodeBuffer(size_t capacity = kDefaultCapacity);
+  CodeBuffer();
  CodeBuffer(byte* buffer, size_t capacity);
-  ~CodeBuffer();
+  ~CodeBuffer() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION;

  void Reset();
-  void Reset(byte* buffer, size_t capacity, bool managed = false);
-
-#ifdef VIXL_CODE_BUFFER_MMAP
-  void SetExecutable();
-  void SetWritable();
-#else
-  // These require page-aligned memory blocks, which we can only guarantee with
-  // mmap.
-  VIXL_NO_RETURN_IN_DEBUG_MODE void SetExecutable() { VIXL_UNIMPLEMENTED(); }
-  VIXL_NO_RETURN_IN_DEBUG_MODE void SetWritable() { VIXL_UNIMPLEMENTED(); }
-#endif
+  void Reset(byte* buffer, size_t capacity);

  ptrdiff_t GetOffsetFrom(ptrdiff_t offset) const {
    ptrdiff_t cursor_offset = cursor_ - buffer_;
@ -128,8 +116,9 @@ class CodeBuffer {
  void Emit(T value) {
    VIXL_ASSERT(HasSpaceFor(sizeof(value)));
    dirty_ = true;
-    memcpy(cursor_, &value, sizeof(value));
-    cursor_ += sizeof(value);
+    byte* c = cursor_;
+    memcpy(c, &value, sizeof(value));
+    cursor_ = c + sizeof(value);
  }

  void UpdateData(size_t offset, const void* data, size_t size);
@ -149,10 +138,6 @@ class CodeBuffer {
    return GetCapacity();
  }

-  bool IsManaged() const { return managed_; }
-
-  void Grow(size_t new_capacity);
-
  bool IsDirty() const { return dirty_; }

  void SetClean() { dirty_ = false; }
@ -161,24 +146,9 @@ class CodeBuffer {
    return GetRemainingBytes() >= amount;
  }

-  void EnsureSpaceFor(size_t amount, bool* has_grown) {
-    bool is_full = !HasSpaceFor(amount);
-    if (is_full) Grow(capacity_ * 2 + amount);
-    VIXL_ASSERT(has_grown != NULL);
-    *has_grown = is_full;
-  }
-  void EnsureSpaceFor(size_t amount) {
-    bool dummy;
-    EnsureSpaceFor(amount, &dummy);
-  }
-
 private:
  // Backing store of the buffer.
  byte* buffer_;
-  // If true the backing store is allocated and deallocated by the buffer. The
-  // backing store can then grow on demand. If false the backing store is
-  // provided by the user and cannot be resized internally.
-  bool managed_;
  // Pointer to the next location to be written.
  byte* cursor_;
  // True if there has been any write since the buffer was created or cleaned.
--- a/dep/vixl/include/vixl/code-generation-scopes-vixl.h
+++ b/dep/vixl/include/vixl/code-generation-scopes-vixl.h
@ -68,14 +68,19 @@ class CodeBufferCheckScope {
                       size_t size,
                       BufferSpacePolicy check_policy = kReserveBufferSpace,
                       SizePolicy size_policy = kMaximumSize)
-      : assembler_(NULL), initialised_(false) {
+      : CodeBufferCheckScope() {
    Open(assembler, size, check_policy, size_policy);
  }

  // This constructor does not implicitly initialise the scope. Instead, the
  // user is required to explicitly call the `Open` function before using the
  // scope.
-  CodeBufferCheckScope() : assembler_(NULL), initialised_(false) {
+  CodeBufferCheckScope()
+      : assembler_(NULL),
+        assert_policy_(kMaximumSize),
+        limit_(0),
+        previous_allow_assembler_(false),
+        initialised_(false) {
    // Nothing to do.
  }

@ -90,7 +95,7 @@ class CodeBufferCheckScope {
    VIXL_ASSERT(assembler != NULL);
    assembler_ = assembler;
    if (check_policy == kReserveBufferSpace) {
-      assembler->GetBuffer()->EnsureSpaceFor(size);
+      VIXL_ASSERT(assembler->GetBuffer()->HasSpaceFor(size));
    }
 #ifdef VIXL_DEBUG
    limit_ = assembler_->GetSizeOfCodeGenerated() + size;
@ -152,14 +157,15 @@ class EmissionCheckScope : public CodeBufferCheckScope {
  // constructed.
  EmissionCheckScope(MacroAssemblerInterface* masm,
                     size_t size,
-                     SizePolicy size_policy = kMaximumSize) {
+                     SizePolicy size_policy = kMaximumSize)
+      : EmissionCheckScope() {
    Open(masm, size, size_policy);
  }

  // This constructor does not implicitly initialise the scope. Instead, the
  // user is required to explicitly call the `Open` function before using the
  // scope.
-  EmissionCheckScope() {}
+  EmissionCheckScope() : masm_(nullptr), pool_policy_(kBlockPools) {}

  virtual ~EmissionCheckScope() { Close(); }

@ -250,14 +256,15 @@ class ExactAssemblyScope : public EmissionCheckScope {
  // constructed.
  ExactAssemblyScope(MacroAssemblerInterface* masm,
                     size_t size,
-                     SizePolicy size_policy = kExactSize) {
+                     SizePolicy size_policy = kExactSize)
+      : ExactAssemblyScope() {
    Open(masm, size, size_policy);
  }

  // This constructor does not implicitly initialise the scope. Instead, the
  // user is required to explicitly call the `Open` function before using the
  // scope.
-  ExactAssemblyScope() {}
+  ExactAssemblyScope() : previous_allow_macro_assembler_(false) {}

  virtual ~ExactAssemblyScope() { Close(); }

--- a/dep/vixl/include/vixl/compiler-intrinsics-vixl.h
+++ b/dep/vixl/include/vixl/compiler-intrinsics-vixl.h
@ -28,6 +28,8 @@
 #ifndef VIXL_COMPILER_INTRINSICS_H
 #define VIXL_COMPILER_INTRINSICS_H

+#include <limits.h>
+
 #include "globals-vixl.h"

 namespace vixl {
@ -104,16 +106,23 @@ int CountTrailingZerosFallBack(uint64_t value, int width);
 // TODO: The implementations could be improved for sizes different from 32bit
 // and 64bit: we could mask the values and call the appropriate builtin.

+// Return the number of leading bits that match the topmost (sign) bit,
+// excluding the topmost bit itself.
 template <typename V>
 inline int CountLeadingSignBits(V value, int width = (sizeof(V) * 8)) {
+  VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
 #if COMPILER_HAS_BUILTIN_CLRSB
-  if (width == 32) {
-    return __builtin_clrsb(value);
-  } else if (width == 64) {
-    return __builtin_clrsbll(value);
-  }
-#endif
+  VIXL_ASSERT((LLONG_MIN <= value) && (value <= LLONG_MAX));
+  int ll_width =
+      sizeof(long long) * kBitsPerByte;  // NOLINT(google-runtime-int)
+  int result = __builtin_clrsbll(value) - (ll_width - width);
+  // Check that the value fits in the specified width.
+  VIXL_ASSERT(result >= 0);
+  return result;
+#else
+  VIXL_ASSERT((INT64_MIN <= value) && (value <= INT64_MAX));
  return CountLeadingSignBitsFallBack(value, width);
+#endif
 }


--- a/dep/vixl/include/vixl/cpu-features.h
+++ b/dep/vixl/include/vixl/cpu-features.h
@ -27,6 +27,7 @@
 #ifndef VIXL_CPU_FEATURES_H
 #define VIXL_CPU_FEATURES_H

+#include <bitset>
 #include <ostream>

 #include "globals-vixl.h"
@ -34,16 +35,65 @@
 namespace vixl {


+// VIXL aims to handle and detect all architectural features that are likely to
+// influence code-generation decisions at EL0 (user-space).
+//
+// - There may be multiple VIXL feature flags for a given architectural
+//   extension. This occurs where the extension allow components to be
+//   implemented independently, or where kernel support is needed, and is likely
+//   to be fragmented.
+//
+//   For example, Pointer Authentication (kPAuth*) has a separate feature flag
+//   for access to PACGA, and to indicate that the QARMA algorithm is
+//   implemented.
+//
+// - Conversely, some extensions have configuration options that do not affect
+//   EL0, so these are presented as a single VIXL feature.
+//
+//   For example, the RAS extension (kRAS) has several variants, but the only
+//   feature relevant to VIXL is the addition of the ESB instruction so we only
+//   need a single flag.
+//
+// - VIXL offers separate flags for separate features even if they're
+//   architecturally linked.
+//
+//   For example, the architecture requires kFPHalf and kNEONHalf to be equal,
+//   but they have separate hardware ID register fields so VIXL presents them as
+//   separate features.
+//
+// - VIXL can detect every feature for which it can generate code.
+//
+// - VIXL can detect some features for which it cannot generate code.
+//
+// The CPUFeatures::Feature enum — derived from the macro list below — is
+// frequently extended. New features may be added to the list at any point, and
+// no assumptions should be made about the numerical values assigned to each
+// enum constant. The symbolic names can be considered to be stable.
+//
+// The debug descriptions are used only for debug output. The 'cpuinfo' strings
+// are informative; VIXL does not use /proc/cpuinfo for feature detection.
+
 // clang-format off
 #define VIXL_CPU_FEATURE_LIST(V)                                               \
  /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_*  */ \
  /* registers, so that the detailed feature registers can be read          */ \
  /* directly.                                                              */ \
+                                                                               \
+  /* Constant name        Debug description         Linux 'cpuinfo' string. */ \
  V(kIDRegisterEmulation, "ID register emulation",  "cpuid")                   \
                                                                               \
  V(kFP,                  "FP",                     "fp")                      \
  V(kNEON,                "NEON",                   "asimd")                   \
  V(kCRC32,               "CRC32",                  "crc32")                   \
+  V(kDGH,                 "DGH",                    "dgh")                     \
+  /* Speculation control features.                                          */ \
+  V(kCSV2,                "CSV2",                   NULL)                      \
+  V(kSCXTNUM,             "SCXTNUM",                NULL)                      \
+  V(kCSV3,                "CSV3",                   NULL)                      \
+  V(kSB,                  "SB",                     "sb")                      \
+  V(kSPECRES,             "SPECRES",                NULL)                      \
+  V(kSSBS,                "SSBS",                   NULL)                      \
+  V(kSSBSControl,         "SSBS (PSTATE control)",  "ssbs")                    \
  /* Cryptographic support instructions.                                    */ \
  V(kAES,                 "AES",                    "aes")                     \
  V(kSHA1,                "SHA1",                   "sha1")                    \
@ -56,34 +106,102 @@ namespace vixl {
  V(kLORegions,           "LORegions",              NULL)                      \
  /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH.        */ \
  V(kRDM,                 "RDM",                    "asimdrdm")                \
+  /* Scalable Vector Extension.                                             */ \
+  V(kSVE,                 "SVE",                    "sve")                     \
+  V(kSVEF64MM,            "SVE F64MM",              "svef64mm")                \
+  V(kSVEF32MM,            "SVE F32MM",              "svef32mm")                \
+  V(kSVEI8MM,             "SVE I8MM",               "svei8imm")                \
+  V(kSVEBF16,             "SVE BFloat16",           "svebf16")                 \
  /* SDOT and UDOT support (in NEON).                                       */ \
  V(kDotProduct,          "DotProduct",             "asimddp")                 \
+  /* Int8 matrix multiplication (in NEON).                                  */ \
+  V(kI8MM,                "NEON I8MM",              "i8mm")                    \
  /* Half-precision (FP16) support for FP and NEON, respectively.           */ \
  V(kFPHalf,              "FPHalf",                 "fphp")                    \
  V(kNEONHalf,            "NEONHalf",               "asimdhp")                 \
+  /* BFloat16 support (in both FP and NEON.)                                */ \
+  V(kBF16,                "FP/NEON BFloat 16",      "bf16")                    \
  /* The RAS extension, including the ESB instruction.                      */ \
  V(kRAS,                 "RAS",                    NULL)                      \
  /* Data cache clean to the point of persistence: DC CVAP.                 */ \
  V(kDCPoP,               "DCPoP",                  "dcpop")                   \
+  /* Data cache clean to the point of deep persistence: DC CVADP.           */ \
+  V(kDCCVADP,             "DCCVADP",                "dcpodp")                  \
  /* Cryptographic support instructions.                                    */ \
  V(kSHA3,                "SHA3",                   "sha3")                    \
  V(kSHA512,              "SHA512",                 "sha512")                  \
  V(kSM3,                 "SM3",                    "sm3")                     \
  V(kSM4,                 "SM4",                    "sm4")                     \
  /* Pointer authentication for addresses.                                  */ \
-  V(kPAuth,               "PAuth",                  NULL)                      \
+  V(kPAuth,               "PAuth",                  "paca")                    \
  /* Pointer authentication for addresses uses QARMA.                       */ \
  V(kPAuthQARMA,          "PAuthQARMA",             NULL)                      \
  /* Generic authentication (using the PACGA instruction).                  */ \
-  V(kPAuthGeneric,        "PAuthGeneric",           NULL)                      \
+  V(kPAuthGeneric,        "PAuthGeneric",           "pacg")                    \
  /* Generic authentication uses QARMA.                                     */ \
  V(kPAuthGenericQARMA,   "PAuthGenericQARMA",      NULL)                      \
-  /* JavaScript-style FP <-> integer conversion instruction: FJCVTZS.       */ \
+  /* JavaScript-style FP -> integer conversion instruction: FJCVTZS.        */ \
  V(kJSCVT,               "JSCVT",                  "jscvt")                   \
+  /* Complex number support for NEON: FCMLA and FCADD.                      */ \
+  V(kFcma,                "Fcma",                   "fcma")                    \
  /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \
  V(kRCpc,                "RCpc",                   "lrcpc")                   \
-  /* Complex number support for NEON: FCMLA and FCADD.                      */ \
-  V(kFcma,                "Fcma",                   "fcma")
+  V(kRCpcImm,             "RCpc (imm)",             "ilrcpc")                  \
+  /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF.               */ \
+  V(kFlagM,               "FlagM",                  "flagm")                   \
+  /* Unaligned single-copy atomicity.                                       */ \
+  V(kUSCAT,               "USCAT",                  "uscat")                   \
+  /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}.         */ \
+  V(kFHM,                 "FHM",                    "asimdfhm")                \
+  /* Data-independent timing (for selected instructions).                   */ \
+  V(kDIT,                 "DIT",                    "dit")                     \
+  /* Branch target identification.                                          */ \
+  V(kBTI,                 "BTI",                    "bti")                     \
+  /* Flag manipulation instructions: {AX,XA}FLAG                            */ \
+  V(kAXFlag,              "AXFlag",                 "flagm2")                  \
+  /* Random number generation extension,                                    */ \
+  V(kRNG,                 "RNG",                    "rng")                     \
+  /* Floating-point round to {32,64}-bit integer.                           */ \
+  V(kFrintToFixedSizedInt,"Frint (bounded)",        "frint")                   \
+  /* Memory Tagging Extension.                                              */ \
+  V(kMTEInstructions,     "MTE (EL0 instructions)", NULL)                      \
+  V(kMTE,                 "MTE",                    NULL)                      \
+  V(kMTE3,                "MTE (asymmetric)",       "mte3")                    \
+  /* PAuth extensions.                                                      */ \
+  V(kPAuthEnhancedPAC,    "PAuth EnhancedPAC",      NULL)                      \
+  V(kPAuthEnhancedPAC2,   "PAuth EnhancedPAC2",     NULL)                      \
+  V(kPAuthFPAC,           "PAuth FPAC",             NULL)                      \
+  V(kPAuthFPACCombined,   "PAuth FPACCombined",     NULL)                      \
+  /* Scalable Vector Extension 2.                                           */ \
+  V(kSVE2,                "SVE2",                   "sve2")                    \
+  V(kSVESM4,              "SVE SM4",                "svesm4")                  \
+  V(kSVESHA3,             "SVE SHA3",               "svesha3")                 \
+  V(kSVEBitPerm,          "SVE BitPerm",            "svebitperm")              \
+  V(kSVEAES,              "SVE AES",                "sveaes")                  \
+  V(kSVEPmull128,         "SVE Pmull128",           "svepmull")                \
+  /* Alternate floating-point behavior                                      */ \
+  V(kAFP,                 "AFP",                    "afp")                     \
+  /* Enhanced Counter Virtualization                                        */ \
+  V(kECV,                 "ECV",                    "ecv")                     \
+  /* Increased precision of Reciprocal Estimate and Square Root Estimate    */ \
+  V(kRPRES,               "RPRES",                  "rpres")                   \
+  /* Memory operation instructions, for memcpy, memset                      */ \
+  V(kMOPS,                "Memory ops",             NULL)                      \
+  /* Scalable Matrix Extension (SME)                                        */ \
+  V(kSME,                 "SME",                    "sme")                     \
+  V(kSMEi16i64,           "SME (i16i64)",           "smei16i64")               \
+  V(kSMEf64f64,           "SME (f64f64)",           "smef64f64")               \
+  V(kSMEi8i32,            "SME (i8i32)",            "smei8i32")                \
+  V(kSMEf16f32,           "SME (f16f32)",           "smef16f32")               \
+  V(kSMEb16f32,           "SME (b16f32)",           "smeb16f32")               \
+  V(kSMEf32f32,           "SME (f32f32)",           "smef32f32")               \
+  V(kSMEfa64,             "SME (fa64)",             "smefa64")                 \
+  /* WFET and WFIT instruction support                                      */ \
+  V(kWFXT,                "WFXT",                   "wfxt")                    \
+  /* Extended BFloat16 instructions                                         */ \
+  V(kEBF16,               "EBF16",                  "ebf16")                   \
+  V(kSVE_EBF16,           "EBF16 (SVE)",            "sveebf16")                \
+  V(kCSSC,                "CSSC",                   "cssc")
 // clang-format on


@ -176,13 +294,13 @@ class CPUFeatures {
  // clang-format on

  // By default, construct with no features enabled.
-  CPUFeatures() : features_(0) {}
+  CPUFeatures() : features_{} {}

  // Construct with some features already enabled.
-  CPUFeatures(Feature feature0,
-              Feature feature1 = kNone,
-              Feature feature2 = kNone,
-              Feature feature3 = kNone);
+  template <typename T, typename... U>
+  CPUFeatures(T first, U... others) : features_{} {
+    Combine(first, others...);
+  }

  // Construct with all features enabled. This can be used to disable feature
  // checking: `Has(...)` returns true regardless of the argument.
@ -198,51 +316,80 @@ class CPUFeatures {
    return CPUFeatures(kFP, kNEON, kCRC32);
  }

+  // Construct a new CPUFeatures object using ID registers. This assumes that
+  // kIDRegisterEmulation is present.
+  static CPUFeatures InferFromIDRegisters();
+
+  enum QueryIDRegistersOption {
+    kDontQueryIDRegisters,
+    kQueryIDRegistersIfAvailable
+  };
+
  // Construct a new CPUFeatures object based on what the OS reports.
-  static CPUFeatures InferFromOS();
+  static CPUFeatures InferFromOS(
+      QueryIDRegistersOption option = kQueryIDRegistersIfAvailable);

  // Combine another CPUFeatures object into this one. Features that already
  // exist in this set are left unchanged.
  void Combine(const CPUFeatures& other);

-  // Combine specific features into this set. Features that already exist in
-  // this set are left unchanged.
-  void Combine(Feature feature0,
-               Feature feature1 = kNone,
-               Feature feature2 = kNone,
-               Feature feature3 = kNone);
+  // Combine a specific feature into this set. If it already exists in the set,
+  // the set is left unchanged.
+  void Combine(Feature feature);
+
+  // Combine multiple features (or feature sets) into this set.
+  template <typename T, typename... U>
+  void Combine(T first, U... others) {
+    Combine(first);
+    Combine(others...);
+  }

  // Remove features in another CPUFeatures object from this one.
  void Remove(const CPUFeatures& other);

-  // Remove specific features from this set.
-  void Remove(Feature feature0,
-              Feature feature1 = kNone,
-              Feature feature2 = kNone,
-              Feature feature3 = kNone);
+  // Remove a specific feature from this set. This has no effect if the feature
+  // doesn't exist in the set.
+  void Remove(Feature feature0);

-  // Chaining helpers for convenient construction.
-  CPUFeatures With(const CPUFeatures& other) const;
-  CPUFeatures With(Feature feature0,
-                   Feature feature1 = kNone,
-                   Feature feature2 = kNone,
-                   Feature feature3 = kNone) const;
-  CPUFeatures Without(const CPUFeatures& other) const;
-  CPUFeatures Without(Feature feature0,
-                      Feature feature1 = kNone,
-                      Feature feature2 = kNone,
-                      Feature feature3 = kNone) const;
+  // Remove multiple features (or feature sets) from this set.
+  template <typename T, typename... U>
+  void Remove(T first, U... others) {
+    Remove(first);
+    Remove(others...);
+  }

-  // Query features.
-  // Note that an empty query (like `Has(kNone)`) always returns true.
+  // Chaining helpers for convenient construction by combining other CPUFeatures
+  // or individual Features.
+  template <typename... T>
+  CPUFeatures With(T... others) const {
+    CPUFeatures f(*this);
+    f.Combine(others...);
+    return f;
+  }
+
+  template <typename... T>
+  CPUFeatures Without(T... others) const {
+    CPUFeatures f(*this);
+    f.Remove(others...);
+    return f;
+  }
+
+  // Test whether the `other` feature set is equal to or a subset of this one.
  bool Has(const CPUFeatures& other) const;
-  bool Has(Feature feature0,
-           Feature feature1 = kNone,
-           Feature feature2 = kNone,
-           Feature feature3 = kNone) const;
+
+  // Test whether a single feature exists in this set.
+  // Note that `Has(kNone)` always returns true.
+  bool Has(Feature feature) const;
+
+  // Test whether all of the specified features exist in this set.
+  template <typename T, typename... U>
+  bool Has(T first, U... others) const {
+    return Has(first) && Has(others...);
+  }

  // Return the number of enabled features.
  size_t Count() const;
+  bool HasNoFeatures() const { return Count() == 0; }

  // Check for equivalence.
  bool operator==(const CPUFeatures& other) const {
@ -256,9 +403,8 @@ class CPUFeatures {
  const_iterator end() const;

 private:
-  // Each bit represents a feature. This field will be replaced as needed if
-  // features are added.
-  uint64_t features_;
+  // Each bit represents a feature. This set will be extended as needed.
+  std::bitset<kNumberOfFeatures> features_;

  friend std::ostream& operator<<(std::ostream& os,
                                  const vixl::CPUFeatures& features);
@ -281,8 +427,8 @@ class CPUFeaturesConstIterator {
  bool operator!=(const CPUFeaturesConstIterator& other) const {
    return !(*this == other);
  }
-  CPUFeatures::Feature operator++();
-  CPUFeatures::Feature operator++(int);
+  CPUFeaturesConstIterator& operator++();
+  CPUFeaturesConstIterator operator++(int);

  CPUFeatures::Feature operator*() const {
    VIXL_ASSERT(IsValid());
@ -301,8 +447,10 @@ class CPUFeaturesConstIterator {
  CPUFeatures::Feature feature_;

  bool IsValid() const {
-    return ((cpu_features_ == NULL) && (feature_ == CPUFeatures::kNone)) ||
-           cpu_features_->Has(feature_);
+    if (cpu_features_ == NULL) {
+      return feature_ == CPUFeatures::kNone;
+    }
+    return cpu_features_->Has(feature_);
  }
 };

@ -325,21 +473,17 @@ class CPUFeaturesScope {
  // Start a CPUFeaturesScope on any object that implements
  // `CPUFeatures* GetCPUFeatures()`.
  template <typename T>
-  explicit CPUFeaturesScope(T* cpu_features_wrapper,
-                            CPUFeatures::Feature feature0 = CPUFeatures::kNone,
-                            CPUFeatures::Feature feature1 = CPUFeatures::kNone,
-                            CPUFeatures::Feature feature2 = CPUFeatures::kNone,
-                            CPUFeatures::Feature feature3 = CPUFeatures::kNone)
+  explicit CPUFeaturesScope(T* cpu_features_wrapper)
      : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
-        old_features_(*cpu_features_) {
-    cpu_features_->Combine(feature0, feature1, feature2, feature3);
-  }
+        old_features_(*cpu_features_) {}

-  template <typename T>
-  CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other)
+  // Start a CPUFeaturesScope on any object that implements
+  // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled.
+  template <typename T, typename U, typename... V>
+  CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features)
      : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
        old_features_(*cpu_features_) {
-    cpu_features_->Combine(other);
+    cpu_features_->Combine(first, features...);
  }

  ~CPUFeaturesScope() { *cpu_features_ = old_features_; }
--- a/dep/vixl/include/vixl/globals-vixl.h
+++ b/dep/vixl/include/vixl/globals-vixl.h
@ -27,6 +27,10 @@
 #ifndef VIXL_GLOBALS_H
 #define VIXL_GLOBALS_H

+#if __cplusplus < 201703L
+#error VIXL requires C++17
+#endif
+
 // Get standard C99 macros for integer types.
 #ifndef __STDC_CONSTANT_MACROS
 #define __STDC_CONSTANT_MACROS
@ -66,7 +70,8 @@ typedef uint8_t byte;
 const int KBytes = 1024;
 const int MBytes = 1024 * KBytes;

-const int kBitsPerByte = 8;
+const int kBitsPerByteLog2 = 3;
+const int kBitsPerByte = 1 << kBitsPerByteLog2;

 template <int SizeInBits>
 struct Unsigned;
@ -153,7 +158,7 @@ struct Unsigned<64> {
 #endif
 // This is not as powerful as template based assertions, but it is simple.
 // It assumes that the descriptions are unique. If this starts being a problem,
-// we can switch to a different implemention.
+// we can switch to a different implementation.
 #define VIXL_CONCAT(a, b) a##b
 #if __cplusplus >= 201103L
 #define VIXL_STATIC_ASSERT_LINE(line_unused, condition, message) \
@ -187,8 +192,7 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}

 #define VIXL_ALIGNMENT_EXCEPTION()                \
  do {                                            \
-    fprintf(stderr, "ALIGNMENT EXCEPTION\t"); \
-    VIXL_ABORT();                             \
+    VIXL_ABORT_WITH_MSG("ALIGNMENT EXCEPTION\t"); \
  } while (0)

 // The clang::fallthrough attribute is used along with the Wimplicit-fallthrough
@ -203,7 +207,7 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}
 #if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L
 #define VIXL_FALLTHROUGH() [[clang::fallthrough]]
 // Fallthrough annotation for GCC >= 7.
-#elif __GNUC__ >= 7
+#elif defined(__GNUC__) && __GNUC__ >= 7
 #define VIXL_FALLTHROUGH() __attribute__((fallthrough))
 #else
 #define VIXL_FALLTHROUGH() \
@ -211,6 +215,18 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}
  } while (0)
 #endif

+// Evaluate 'init' to an std::optional and return if it's empty. If 'init' is
+// not empty then define a variable 'name' with the value inside the
+// std::optional.
+#define VIXL_DEFINE_OR_RETURN(name, init) \
+  auto opt##name = init;                  \
+  if (!opt##name) return;                 \
+  auto name = *opt##name;
+#define VIXL_DEFINE_OR_RETURN_FALSE(name, init) \
+  auto opt##name = init;                        \
+  if (!opt##name) return false;                 \
+  auto name = *opt##name;
+
 #if __cplusplus >= 201103L
 #define VIXL_NO_RETURN [[noreturn]]
 #else
@ -224,17 +240,19 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}

 #if __cplusplus >= 201103L
 #define VIXL_OVERRIDE override
+#define VIXL_CONSTEXPR constexpr
+#define VIXL_HAS_CONSTEXPR 1
 #else
 #define VIXL_OVERRIDE
+#define VIXL_CONSTEXPR
 #endif

-// Some functions might only be marked as "noreturn" for the DEBUG build. This
-// macro should be used for such cases (for more details see what
-// VIXL_UNREACHABLE expands to).
-#ifdef VIXL_DEBUG
-#define VIXL_DEBUG_NO_RETURN VIXL_NO_RETURN
+// With VIXL_NEGATIVE_TESTING on, VIXL_ASSERT and VIXL_CHECK will throw
+// exceptions but C++11 marks destructors as noexcept(true) by default.
+#if defined(VIXL_NEGATIVE_TESTING) && __cplusplus >= 201103L
+#define VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION noexcept(false)
 #else
-#define VIXL_DEBUG_NO_RETURN
+#define VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION
 #endif

 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
@ -269,16 +287,24 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}

 // Target Architecture/ISA
 #ifdef VIXL_INCLUDE_TARGET_A64
+#ifndef VIXL_INCLUDE_TARGET_AARCH64
 #define VIXL_INCLUDE_TARGET_AARCH64
 #endif
+#endif

 #if defined(VIXL_INCLUDE_TARGET_A32) && defined(VIXL_INCLUDE_TARGET_T32)
+#ifndef VIXL_INCLUDE_TARGET_AARCH32
 #define VIXL_INCLUDE_TARGET_AARCH32
+#endif
 #elif defined(VIXL_INCLUDE_TARGET_A32)
+#ifndef VIXL_INCLUDE_TARGET_A32_ONLY
 #define VIXL_INCLUDE_TARGET_A32_ONLY
+#endif
 #else
+#ifndef VIXL_INCLUDE_TARGET_T32_ONLY
 #define VIXL_INCLUDE_TARGET_T32_ONLY
 #endif
+#endif


 #endif  // VIXL_GLOBALS_H
--- a/dep/vixl/include/vixl/invalset-vixl.h
+++ b/dep/vixl/include/vixl/invalset-vixl.h
@ -27,9 +27,8 @@
 #ifndef VIXL_INVALSET_H_
 #define VIXL_INVALSET_H_

-#include <cstring>
-
 #include <algorithm>
+#include <cstring>
 #include <vector>

 #include "globals-vixl.h"
@ -91,7 +90,7 @@ template <TEMPLATE_INVALSET_P_DECL>
 class InvalSet {
 public:
  InvalSet();
-  ~InvalSet();
+  ~InvalSet() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION;

  static const size_t kNPreallocatedElements = N_PREALLOCATED_ELEMENTS;
  static const KeyType kInvalidKey = INVALID_KEY;
@ -112,7 +111,7 @@ class InvalSet {
  size_t size() const;

  // Returns true if no elements are stored in the set.
-  // Note that this does not mean the the backing storage is empty: it can still
+  // Note that this does not mean the backing storage is empty: it can still
  // contain invalid elements.
  bool empty() const;

@ -244,8 +243,13 @@ class InvalSet {


 template <class S>
-class InvalSetIterator/* : public std::iterator<std::forward_iterator_tag,
-                                              typename S::_ElementType> */{
+class InvalSetIterator {
+  using iterator_category = std::forward_iterator_tag;
+  using value_type = typename S::_ElementType;
+  using difference_type = std::ptrdiff_t;
+  using pointer = S*;
+  using reference = S&;
+
 private:
  // Redefine types to mirror the associated set types.
  typedef typename S::_ElementType ElementType;
@ -323,7 +327,8 @@ InvalSet<TEMPLATE_INVALSET_P_DEF>::InvalSet()


 template <TEMPLATE_INVALSET_P_DECL>
-InvalSet<TEMPLATE_INVALSET_P_DEF>::~InvalSet() {
+InvalSet<TEMPLATE_INVALSET_P_DEF>::~InvalSet()
+    VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
  VIXL_ASSERT(monitor_ == 0);
  delete vector_;
 }
@ -841,9 +846,7 @@ InvalSetIterator<S>::InvalSetIterator(const InvalSetIterator<S>& other)
 #if __cplusplus >= 201103L
 template <class S>
 InvalSetIterator<S>::InvalSetIterator(InvalSetIterator<S>&& other) noexcept
-    : using_vector_(false),
-      index_(0),
-      inval_set_(NULL) {
+    : using_vector_(false), index_(0), inval_set_(NULL) {
  swap(*this, other);
 }
 #endif
--- a/dep/vixl/include/vixl/macro-assembler-interface.h
+++ b/dep/vixl/include/vixl/macro-assembler-interface.h
@ -35,7 +35,7 @@ class MacroAssemblerInterface {
 public:
  virtual internal::AssemblerBase* AsAssemblerBase() = 0;

-  virtual ~MacroAssemblerInterface() {}
+  virtual ~MacroAssemblerInterface() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {}

  virtual bool AllowMacroInstructions() const = 0;
  virtual bool ArePoolsBlocked() const = 0;
--- a/dep/vixl/include/vixl/pool-manager-impl.h
+++ b/dep/vixl/include/vixl/pool-manager-impl.h
@ -27,10 +27,10 @@
 #ifndef VIXL_POOL_MANAGER_IMPL_H_
 #define VIXL_POOL_MANAGER_IMPL_H_

-#include "pool-manager.h"
-
 #include <algorithm>
+
 #include "assembler-base-vixl.h"
+#include "pool-manager.h"

 namespace vixl {

@ -264,14 +264,14 @@ bool PoolManager<T>::MustEmit(T pc,
    if (checkpoint < temp.min_location_) return true;
  }

-  bool tempNotPlacedYet = true;
+  bool temp_not_placed_yet = true;
  for (int i = static_cast<int>(objects_.size()) - 1; i >= 0; --i) {
    const PoolObject<T>& current = objects_[i];
-    if (tempNotPlacedYet && PoolObjectLessThan(current, temp)) {
+    if (temp_not_placed_yet && PoolObjectLessThan(current, temp)) {
      checkpoint = UpdateCheckpointForObject(checkpoint, &temp);
      if (checkpoint < temp.min_location_) return true;
      if (CheckFuturePC(pc, checkpoint)) return true;
-      tempNotPlacedYet = false;
+      temp_not_placed_yet = false;
    }
    if (current.label_base_ == label_base) continue;
    checkpoint = UpdateCheckpointForObject(checkpoint, &current);
@ -279,7 +279,7 @@ bool PoolManager<T>::MustEmit(T pc,
    if (CheckFuturePC(pc, checkpoint)) return true;
  }
  // temp is the object with the smallest max_location_.
-  if (tempNotPlacedYet) {
+  if (temp_not_placed_yet) {
    checkpoint = UpdateCheckpointForObject(checkpoint, &temp);
    if (checkpoint < temp.min_location_) return true;
  }
@ -487,7 +487,7 @@ void PoolManager<T>::Release(T pc) {
 }

 template <typename T>
-PoolManager<T>::~PoolManager<T>() {
+PoolManager<T>::~PoolManager<T>() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
 #ifdef VIXL_DEBUG
  // Check for unbound objects.
  for (objects_iter iter = objects_.begin(); iter != objects_.end(); ++iter) {
@ -517,6 +517,6 @@ int PoolManager<T>::GetPoolSizeForTest() const {
  }
  return size;
 }
-}
+}  // namespace vixl

 #endif  // VIXL_POOL_MANAGER_IMPL_H_
--- a/dep/vixl/include/vixl/pool-manager.h
+++ b/dep/vixl/include/vixl/pool-manager.h
@ -27,11 +27,10 @@
 #ifndef VIXL_POOL_MANAGER_H_
 #define VIXL_POOL_MANAGER_H_

-#include <stdint.h>
-
 #include <cstddef>
 #include <limits>
 #include <map>
+#include <stdint.h>
 #include <vector>

 #include "globals-vixl.h"
@ -142,7 +141,7 @@ class LocationBase {
        is_bound_(true),
        location_(location) {}

-  virtual ~LocationBase() {}
+  virtual ~LocationBase() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {}

  // The PoolManager should assume ownership of some objects, and delete them
  // after they have been placed. This can happen for example for literals that
@ -369,8 +368,8 @@ class ForwardReference {

  // Specify the possible locations where the object could be stored. AArch32's
  // PC offset, and T32's PC alignment calculations should be applied by the
-  // Assembler, not here. The PoolManager deals only with simple locationes.
-  // Including min_object_adddress_ is necessary to handle AArch32 some
+  // Assembler, not here. The PoolManager deals only with simple locations.
+  // Including min_object_address_ is necessary to handle AArch32 some
  // instructions which have a minimum offset of 0, but also have the implicit
  // PC offset.
  // Note that this structure cannot handle sparse ranges, such as A32's ADR,
@ -397,7 +396,7 @@ class PoolManager {
        max_pool_size_(0),
        monitor_(0) {}

-  ~PoolManager();
+  ~PoolManager() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION;

  // Check if we will need to emit the pool at location 'pc', when planning to
  // generate a certain number of bytes. This optionally takes a
--- a/dep/vixl/include/vixl/utils-vixl.h
+++ b/dep/vixl/include/vixl/utils-vixl.h
@ -30,6 +30,7 @@
 #include <cmath>
 #include <cstring>
 #include <limits>
+#include <type_traits>
 #include <vector>

 #include "compiler-intrinsics-vixl.h"
@ -67,29 +68,40 @@ namespace vixl {
 #endif

 template <typename T, size_t n>
-size_t ArrayLength(const T (&)[n]) {
+constexpr size_t ArrayLength(const T (&)[n]) {
  return n;
 }

+inline uint64_t GetUintMask(unsigned bits) {
+  VIXL_ASSERT(bits <= 64);
+  uint64_t base = (bits >= 64) ? 0 : (UINT64_C(1) << bits);
+  return base - 1;
+}
+
+inline uint64_t GetSignMask(unsigned bits) {
+  VIXL_ASSERT(bits <= 64);
+  return UINT64_C(1) << (bits - 1);
+}
+
 // Check number width.
 // TODO: Refactor these using templates.
 inline bool IsIntN(unsigned n, uint32_t x) {
-  VIXL_ASSERT((0 < n) && (n < 32));
-  uint32_t limit = UINT32_C(1) << (n - 1);
-  return x < limit;
+  VIXL_ASSERT((0 < n) && (n <= 32));
+  return x <= static_cast<uint32_t>(INT32_MAX >> (32 - n));
 }
 inline bool IsIntN(unsigned n, int32_t x) {
-  VIXL_ASSERT((0 < n) && (n < 32));
+  VIXL_ASSERT((0 < n) && (n <= 32));
+  if (n == 32) return true;
  int32_t limit = INT32_C(1) << (n - 1);
  return (-limit <= x) && (x < limit);
 }
 inline bool IsIntN(unsigned n, uint64_t x) {
-  VIXL_ASSERT((0 < n) && (n < 64));
-  uint64_t limit = UINT64_C(1) << (n - 1);
-  return x < limit;
+  VIXL_ASSERT((0 < n) && (n <= 64));
+  return x <= static_cast<uint64_t>(INT64_MAX >> (64 - n));
 }
 inline bool IsIntN(unsigned n, int64_t x) {
-  VIXL_ASSERT((0 < n) && (n < 64));
+  VIXL_ASSERT((0 < n) && (n <= 64));
+  if (n == 64) return true;
  int64_t limit = INT64_C(1) << (n - 1);
  return (-limit <= x) && (x < limit);
 }
@ -98,7 +110,8 @@ VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) {
 }

 inline bool IsUintN(unsigned n, uint32_t x) {
-  VIXL_ASSERT((0 < n) && (n < 32));
+  VIXL_ASSERT((0 < n) && (n <= 32));
+  if (n >= 32) return true;
  return !(x >> n);
 }
 inline bool IsUintN(unsigned n, int32_t x) {
@ -107,7 +120,8 @@ inline bool IsUintN(unsigned n, int32_t x) {
  return !(static_cast<uint32_t>(x) >> n);
 }
 inline bool IsUintN(unsigned n, uint64_t x) {
-  VIXL_ASSERT((0 < n) && (n < 64));
+  VIXL_ASSERT((0 < n) && (n <= 64));
+  if (n >= 64) return true;
  return !(x >> n);
 }
 inline bool IsUintN(unsigned n, int64_t x) {
@ -183,14 +197,14 @@ inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) {
 }


-inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint32_t x) {
+inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint64_t x) {
  VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
              (msb >= lsb));
  return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x));
 }


-inline int64_t ExtractSignedBitfield64(int msb, int lsb, int64_t x) {
+inline int64_t ExtractSignedBitfield64(int msb, int lsb, uint64_t x) {
  VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
              (msb >= lsb));
  uint64_t temp = ExtractUnsignedBitfield64(msb, lsb, x);
@ -203,8 +217,7 @@ inline int64_t ExtractSignedBitfield64(int msb, int lsb, int64_t x) {
  return result;
 }

-
-inline int32_t ExtractSignedBitfield32(int msb, int lsb, int32_t x) {
+inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint64_t x) {
  VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
              (msb >= lsb));
  uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x));
@ -213,7 +226,6 @@ inline int32_t ExtractSignedBitfield32(int msb, int lsb, int32_t x) {
  return result;
 }

-
 inline uint64_t RotateRight(uint64_t value,
                            unsigned int rotate,
                            unsigned int width) {
@ -271,6 +283,39 @@ VIXL_DEPRECATED("RawbitsToDouble",
  return RawbitsToDouble(bits);
 }

+// Some compilers dislike negating unsigned integers,
+// so we provide an equivalent.
+template <typename T>
+T UnsignedNegate(T value) {
+  VIXL_STATIC_ASSERT(std::is_unsigned<T>::value);
+  return ~value + 1;
+}
+
+// An absolute operation for signed integers that is defined for results outside
+// the representable range. Specifically, Abs(MIN_INT) is MIN_INT.
+template <typename T>
+T Abs(T val) {
+  // TODO: this static assertion is for signed integer inputs, as that's the
+  // only type tested. However, the code should work for all numeric inputs.
+  // Remove the assertion and this comment when more tests are available.
+  VIXL_STATIC_ASSERT(std::is_signed<T>::value && std::is_integral<T>::value);
+  return ((val >= -std::numeric_limits<T>::max()) && (val < 0)) ? -val : val;
+}
+
+// Convert unsigned to signed numbers in a well-defined way (using two's
+// complement representations).
+inline int64_t RawbitsToInt64(uint64_t bits) {
+  return (bits >= UINT64_C(0x8000000000000000))
+             ? (-static_cast<int64_t>(UnsignedNegate(bits) - 1) - 1)
+             : static_cast<int64_t>(bits);
+}
+
+inline int32_t RawbitsToInt32(uint32_t bits) {
+  return (bits >= UINT64_C(0x80000000))
+             ? (-static_cast<int32_t>(UnsignedNegate(bits) - 1) - 1)
+             : static_cast<int32_t>(bits);
+}
+
 namespace internal {

 // Internal simulation class used solely by the simulator to
@ -294,7 +339,7 @@ class SimFloat16 : public Float16 {
  bool operator>(SimFloat16 rhs) const;
  bool operator==(SimFloat16 rhs) const;
  bool operator!=(SimFloat16 rhs) const;
-  // This is necessary for conversions peformed in (macro asm) Fmov.
+  // This is necessary for conversions performed in (macro asm) Fmov.
  bool operator==(double rhs) const;
  operator double() const;
 };
@ -365,6 +410,10 @@ VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) {

 bool IsZero(Float16 value);

+inline bool IsPositiveZero(double value) {
+  return (value == 0.0) && (copysign(1.0, value) > 0.0);
+}
+
 inline bool IsNaN(float value) { return std::isnan(value); }

 inline bool IsNaN(double value) { return std::isnan(value); }
@ -447,7 +496,9 @@ inline float FusedMultiplyAdd(float op1, float op2, float a) {
 }


-inline uint64_t LowestSetBit(uint64_t value) { return value & static_cast<uint64_t>(-static_cast<int64_t>(value)); }
+inline uint64_t LowestSetBit(uint64_t value) {
+  return value & UnsignedNegate(value);
+}


 template <typename T>
@ -484,11 +535,11 @@ T ReverseBits(T value) {


 template <typename T>
-inline T SignExtend(T val, int bitSize) {
-  VIXL_ASSERT(bitSize > 0);
-  T mask = (T(2) << (bitSize - 1)) - T(1);
+inline T SignExtend(T val, int size_in_bits) {
+  VIXL_ASSERT(size_in_bits > 0);
+  T mask = (T(2) << (size_in_bits - 1)) - T(1);
  val &= mask;
-  T sign_bits = -((val >> (bitSize - 1)) << bitSize);
+  T sign_bits = -((val >> (size_in_bits - 1)) << size_in_bits);
  val |= sign_bits;
  return val;
 }
@ -801,7 +852,7 @@ class Uint32 {
  }
  int32_t GetSigned() const { return data_; }
  Uint32 operator~() const { return Uint32(~data_); }
-  Uint32 operator-() const { return Uint32(static_cast<uint32_t>(-static_cast<int32_t>(data_))); }
+  Uint32 operator-() const { return Uint32(UnsignedNegate(data_)); }
  bool operator==(Uint32 value) const { return data_ == value.data_; }
  bool operator!=(Uint32 value) const { return data_ != value.data_; }
  bool operator>(Uint32 value) const { return data_ > value.data_; }
@ -869,7 +920,7 @@ class Uint64 {
  Uint32 GetHigh32() const { return Uint32(data_ >> 32); }
  Uint32 GetLow32() const { return Uint32(data_ & 0xffffffff); }
  Uint64 operator~() const { return Uint64(~data_); }
-  Uint64 operator-() const { return Uint64(static_cast<uint64_t>(-static_cast<int64_t>(data_))); }
+  Uint64 operator-() const { return Uint64(UnsignedNegate(data_)); }
  bool operator==(Uint64 value) const { return data_ == value.data_; }
  bool operator!=(Uint64 value) const { return data_ != value.data_; }
  Uint64 operator+(Uint64 value) const { return Uint64(data_ + value.data_); }
@ -974,6 +1025,42 @@ Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {}

 Int64 BitCount(Uint32 value);

+// The algorithm used is adapted from the one described in section 8.2 of
+// Hacker's Delight, by Henry S. Warren, Jr.
+template <unsigned N, typename T>
+int64_t MultiplyHigh(T u, T v) {
+  uint64_t u0, v0, w0, u1, v1, w1, w2, t;
+  VIXL_STATIC_ASSERT((N == 8) || (N == 16) || (N == 32) || (N == 64));
+  uint64_t sign_mask = UINT64_C(1) << (N - 1);
+  uint64_t sign_ext = 0;
+  unsigned half_bits = N / 2;
+  uint64_t half_mask = GetUintMask(half_bits);
+  if (std::numeric_limits<T>::is_signed) {
+    sign_ext = UINT64_C(0xffffffffffffffff) << half_bits;
+  }
+
+  VIXL_ASSERT(sizeof(u) == sizeof(uint64_t));
+  VIXL_ASSERT(sizeof(u) == sizeof(u0));
+
+  u0 = u & half_mask;
+  u1 = u >> half_bits | (((u & sign_mask) != 0) ? sign_ext : 0);
+  v0 = v & half_mask;
+  v1 = v >> half_bits | (((v & sign_mask) != 0) ? sign_ext : 0);
+
+  w0 = u0 * v0;
+  t = u1 * v0 + (w0 >> half_bits);
+
+  w1 = t & half_mask;
+  w2 = t >> half_bits | (((t & sign_mask) != 0) ? sign_ext : 0);
+  w1 = u0 * v1 + w1;
+  w1 = w1 >> half_bits | (((w1 & sign_mask) != 0) ? sign_ext : 0);
+
+  uint64_t value = u1 * v1 + w2 + w1;
+  int64_t result;
+  memcpy(&result, &value, sizeof(result));
+  return result;
+}
+
 }  // namespace internal

 // The default NaN values (for FPCR.DN=1).
@ -1139,7 +1226,7 @@ T FPRound(int64_t sign,
    // For subnormal outputs, the shift must be adjusted by the exponent. The +1
    // is necessary because the exponent of a subnormal value (encoded as 0) is
    // the same as the exponent of the smallest normal value (encoded as 1).
-    shift += -exponent + 1;
+    shift += static_cast<int>(-exponent + 1);

    // Handle inputs that would produce a zero output.
    //
@ -1238,9 +1325,8 @@ inline Float16 FPRoundToFloat16(int64_t sign,
                                uint64_t mantissa,
                                FPRounding round_mode) {
  return RawbitsToFloat16(
-      FPRound<uint16_t,
-              kFloat16ExponentBits,
-              kFloat16MantissaBits>(sign, exponent, mantissa, round_mode));
+      FPRound<uint16_t, kFloat16ExponentBits, kFloat16MantissaBits>(
+          sign, exponent, mantissa, round_mode));
 }


@ -1276,6 +1362,81 @@ Float16 FPToFloat16(double value,
                    FPRounding round_mode,
                    UseDefaultNaN DN,
                    bool* exception = NULL);
+
+// Like static_cast<T>(value), but with specialisations for the Float16 type.
+template <typename T, typename F>
+T StaticCastFPTo(F value) {
+  return static_cast<T>(value);
+}
+
+template <>
+inline float StaticCastFPTo<float, Float16>(Float16 value) {
+  return FPToFloat(value, kIgnoreDefaultNaN);
+}
+
+template <>
+inline double StaticCastFPTo<double, Float16>(Float16 value) {
+  return FPToDouble(value, kIgnoreDefaultNaN);
+}
+
+template <>
+inline Float16 StaticCastFPTo<Float16, float>(float value) {
+  return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN);
+}
+
+template <>
+inline Float16 StaticCastFPTo<Float16, double>(double value) {
+  return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN);
+}
+
+template <typename T>
+uint64_t FPToRawbitsWithSize(unsigned size_in_bits, T value) {
+  switch (size_in_bits) {
+    case 16:
+      return Float16ToRawbits(StaticCastFPTo<Float16>(value));
+    case 32:
+      return FloatToRawbits(StaticCastFPTo<float>(value));
+    case 64:
+      return DoubleToRawbits(StaticCastFPTo<double>(value));
+  }
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
+template <typename T>
+T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) {
+  VIXL_ASSERT(IsUintN(size_in_bits, value));
+  switch (size_in_bits) {
+    case 16:
+      return StaticCastFPTo<T>(RawbitsToFloat16(static_cast<uint16_t>(value)));
+    case 32:
+      return StaticCastFPTo<T>(RawbitsToFloat(static_cast<uint32_t>(value)));
+    case 64:
+      return StaticCastFPTo<T>(RawbitsToDouble(value));
+  }
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
+// Jenkins one-at-a-time hash, based on
+// https://en.wikipedia.org/wiki/Jenkins_hash_function citing
+// https://www.drdobbs.com/database/algorithm-alley/184410284.
+constexpr uint32_t Hash(const char* str, uint32_t hash = 0) {
+  if (*str == '\0') {
+    hash += hash << 3;
+    hash ^= hash >> 11;
+    hash += hash << 15;
+    return hash;
+  } else {
+    hash += *str;
+    hash += hash << 10;
+    hash ^= hash >> 6;
+    return Hash(str + 1, hash);
+  }
+}
+
+constexpr uint32_t operator"" _h(const char* x, size_t) { return Hash(x); }
+
 }  // namespace vixl

 #endif  // VIXL_UTILS_H
--- a/dep/vixl/src/aarch32/assembler-aarch32.cc
+++ b/dep/vixl/src/aarch32/assembler-aarch32.cc
@ -2557,13 +2557,13 @@ void Assembler::adr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= 0) && (offset <= 1020) &&
-                      ((offset & 0x3) == 0));
-          const int32_t target = offset >> 2;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= 0) && (off <= 1020) && ((off & 0x3) == 0));
+          const int32_t target = off >> 2;
          return instr | (target & 0xff);
        }
      } immop;
@ -2588,15 +2588,16 @@ void Assembler::adr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
          int32_t target;
-          if ((offset >= 0) && (offset <= 4095)) {
-            target = offset;
+          if ((off >= 0) && (off <= 4095)) {
+            target = off;
          } else {
-            target = -offset;
+            target = -off;
            VIXL_ASSERT((target >= 0) && (target <= 4095));
            // Emit the T2 encoding.
            instr |= 0x00a00000;
@ -2622,19 +2623,20 @@ void Assembler::adr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kA32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
          int32_t target;
-          ImmediateA32 positive_immediate_a32(offset);
-          if (positive_immediate_a32.IsValid()) {
-            target = positive_immediate_a32.GetEncodingValue();
+          ImmediateA32 pos_imm_a32(off);
+          if (pos_imm_a32.IsValid()) {
+            target = pos_imm_a32.GetEncodingValue();
          } else {
-            ImmediateA32 negative_immediate_a32(-offset);
-            VIXL_ASSERT(negative_immediate_a32.IsValid());
+            ImmediateA32 neg_imm_a32(-off);
+            VIXL_ASSERT(neg_imm_a32.IsValid());
            // Emit the A2 encoding.
-            target = negative_immediate_a32.GetEncodingValue();
+            target = neg_imm_a32.GetEncodingValue();
            instr = (instr & ~0x00f00000) | 0x00400000;
          }
          return instr | (target & 0xfff);
@ -3024,13 +3026,12 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
-          VIXL_ASSERT((offset >= -256) && (offset <= 254) &&
-                      ((offset & 0x1) == 0));
-          const int32_t target = offset >> 1;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off = loc->GetLocation() - program_counter;
+          VIXL_ASSERT((off >= -256) && (off <= 254) && ((off & 0x1) == 0));
+          const int32_t target = off >> 1;
          return instr | (target & 0xff);
        }
      } immop;
@ -3051,13 +3052,12 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
-          VIXL_ASSERT((offset >= -2048) && (offset <= 2046) &&
-                      ((offset & 0x1) == 0));
-          const int32_t target = offset >> 1;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off = loc->GetLocation() - program_counter;
+          VIXL_ASSERT((off >= -2048) && (off <= 2046) && ((off & 0x1) == 0));
+          const int32_t target = off >> 1;
          return instr | (target & 0x7ff);
        }
      } immop;
@ -3075,13 +3075,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
-          VIXL_ASSERT((offset >= -1048576) && (offset <= 1048574) &&
-                      ((offset & 0x1) == 0));
-          const int32_t target = offset >> 1;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off = loc->GetLocation() - program_counter;
+          VIXL_ASSERT((off >= -1048576) && (off <= 1048574) &&
+                      ((off & 0x1) == 0));
+          const int32_t target = off >> 1;
          return instr | (target & 0x7ff) | ((target & 0x1f800) << 5) |
                 ((target & 0x20000) >> 4) | ((target & 0x40000) >> 7) |
                 ((target & 0x80000) << 7);
@ -3104,13 +3104,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
-          VIXL_ASSERT((offset >= -16777216) && (offset <= 16777214) &&
-                      ((offset & 0x1) == 0));
-          int32_t target = offset >> 1;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off = loc->GetLocation() - program_counter;
+          VIXL_ASSERT((off >= -16777216) && (off <= 16777214) &&
+                      ((off & 0x1) == 0));
+          int32_t target = off >> 1;
          uint32_t S = target & (1 << 23);
          target ^= ((S >> 1) | (S >> 2)) ^ (3 << 21);
          return instr | (target & 0x7ff) | ((target & 0x1ff800) << 5) |
@ -3132,13 +3132,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
-          VIXL_ASSERT((offset >= -33554432) && (offset <= 33554428) &&
-                      ((offset & 0x3) == 0));
-          const int32_t target = offset >> 2;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kA32PcDelta;
+          Location::Offset off = loc->GetLocation() - program_counter;
+          VIXL_ASSERT((off >= -33554432) && (off <= 33554428) &&
+                      ((off & 0x3) == 0));
+          const int32_t target = off >> 2;
          return instr | (target & 0xffffff);
        }
      } immop;
@ -3462,13 +3462,13 @@ void Assembler::bl(Condition cond, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
-          VIXL_ASSERT((offset >= -16777216) && (offset <= 16777214) &&
-                      ((offset & 0x1) == 0));
-          int32_t target = offset >> 1;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off = loc->GetLocation() - program_counter;
+          VIXL_ASSERT((off >= -16777216) && (off <= 16777214) &&
+                      ((off & 0x1) == 0));
+          int32_t target = off >> 1;
          uint32_t S = target & (1 << 23);
          target ^= ((S >> 1) | (S >> 2)) ^ (3 << 21);
          return instr | (target & 0x7ff) | ((target & 0x1ff800) << 5) |
@ -3490,13 +3490,13 @@ void Assembler::bl(Condition cond, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
-          VIXL_ASSERT((offset >= -33554432) && (offset <= 33554428) &&
-                      ((offset & 0x3) == 0));
-          const int32_t target = offset >> 2;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kA32PcDelta;
+          Location::Offset off = loc->GetLocation() - program_counter;
+          VIXL_ASSERT((off >= -33554432) && (off <= 33554428) &&
+                      ((off & 0x3) == 0));
+          const int32_t target = off >> 2;
          return instr | (target & 0xffffff);
        }
      } immop;
@ -3549,13 +3549,14 @@ void Assembler::blx(Condition cond, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -16777216) && (offset <= 16777212) &&
-                      ((offset & 0x3) == 0));
-          int32_t target = offset >> 2;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -16777216) && (off <= 16777212) &&
+                      ((off & 0x3) == 0));
+          int32_t target = off >> 2;
          uint32_t S = target & (1 << 22);
          target ^= ((S >> 1) | (S >> 2)) ^ (3 << 20);
          return instr | ((target & 0x3ff) << 1) | ((target & 0xffc00) << 6) |
@ -3577,15 +3578,14 @@ void Assembler::blx(Condition cond, Location* location) {
         public:
          EmitOp() : Location::EmitOperator(A32) {}
          virtual uint32_t Encode(uint32_t instr,
-                                  Location::Offset pc,
-                                  const Location* location) const
-              VIXL_OVERRIDE {
-            pc += kA32PcDelta;
-            Location::Offset offset =
-                location->GetLocation() - AlignDown(pc, 4);
-            VIXL_ASSERT((offset >= -33554432) && (offset <= 33554430) &&
-                        ((offset & 0x1) == 0));
-            const int32_t target = offset >> 1;
+                                  Location::Offset program_counter,
+                                  const Location* loc) const VIXL_OVERRIDE {
+            program_counter += kA32PcDelta;
+            Location::Offset off =
+                loc->GetLocation() - AlignDown(program_counter, 4);
+            VIXL_ASSERT((off >= -33554432) && (off <= 33554430) &&
+                        ((off & 0x1) == 0));
+            const int32_t target = off >> 1;
            return instr | ((target & 0x1) << 24) | ((target & 0x1fffffe) >> 1);
          }
        } immop;
@ -3698,13 +3698,12 @@ void Assembler::cbnz(Register rn, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
-          VIXL_ASSERT((offset >= 0) && (offset <= 126) &&
-                      ((offset & 0x1) == 0));
-          const int32_t target = offset >> 1;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off = loc->GetLocation() - program_counter;
+          VIXL_ASSERT((off >= 0) && (off <= 126) && ((off & 0x1) == 0));
+          const int32_t target = off >> 1;
          return instr | ((target & 0x1f) << 3) | ((target & 0x20) << 4);
        }
      } immop;
@ -3748,13 +3747,12 @@ void Assembler::cbz(Register rn, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - pc;
-          VIXL_ASSERT((offset >= 0) && (offset <= 126) &&
-                      ((offset & 0x1) == 0));
-          const int32_t target = offset >> 1;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off = loc->GetLocation() - program_counter;
+          VIXL_ASSERT((off >= 0) && (off <= 126) && ((off & 0x1) == 0));
+          const int32_t target = off >> 1;
          return instr | ((target & 0x1f) << 3) | ((target & 0x20) << 4);
        }
      } immop;
@ -4790,7 +4788,7 @@ void Assembler::ldm(Condition cond,
    }
    // LDM{<c>}{<q>} SP!, <registers> ; T1
    if (!size.IsWide() && rn.Is(sp) && write_back.DoesWriteBack() &&
-        ((registers.GetList() & ~0x80ff) == 0)) {
+        registers.IsR0toR7orPC()) {
      EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) |
                 GetRegisterListEncoding(registers, 0, 8));
      AdvanceIT();
@ -5208,13 +5206,13 @@ void Assembler::ldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= 0) && (offset <= 1020) &&
-                      ((offset & 0x3) == 0));
-          const int32_t target = offset >> 2;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= 0) && (off <= 1020) && ((off & 0x3) == 0));
+          const int32_t target = off >> 2;
          return instr | (target & 0xff);
        }
      } immop;
@ -5233,13 +5231,14 @@ void Assembler::ldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 12);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -5259,13 +5258,14 @@ void Assembler::ldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 12);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kA32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -5505,13 +5505,14 @@ void Assembler::ldrb(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 12);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -5531,13 +5532,14 @@ void Assembler::ldrb(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 12);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kA32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -5747,13 +5749,13 @@ void Assembler::ldrd(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
-                      ((offset & 0x3) == 0));
-          int32_t target = offset >> 2;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
+          int32_t target = off >> 2;
          uint32_t U = (target >= 0);
          target = abs(target) | (U << 8);
          return instr | (target & 0xff) | ((target & 0x100) << 15);
@ -5777,13 +5779,14 @@ void Assembler::ldrd(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -255) && (offset <= 255));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 8);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kA32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -255) && (off <= 255));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 8);
          return instr | (target & 0xf) | ((target & 0xf0) << 4) |
                 ((target & 0x100) << 15);
        }
@ -6129,13 +6132,14 @@ void Assembler::ldrh(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 12);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -6155,13 +6159,14 @@ void Assembler::ldrh(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -255) && (offset <= 255));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 8);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kA32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -255) && (off <= 255));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 8);
          return instr | (target & 0xf) | ((target & 0xf0) << 4) |
                 ((target & 0x100) << 15);
        }
@ -6382,13 +6387,14 @@ void Assembler::ldrsb(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 12);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -6408,13 +6414,14 @@ void Assembler::ldrsb(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -255) && (offset <= 255));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 8);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kA32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -255) && (off <= 255));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 8);
          return instr | (target & 0xf) | ((target & 0xf0) << 4) |
                 ((target & 0x100) << 15);
        }
@ -6635,13 +6642,14 @@ void Assembler::ldrsh(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 12);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -6661,13 +6669,14 @@ void Assembler::ldrsh(Condition cond, Register rt, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -255) && (offset <= 255));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 8);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kA32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -255) && (off <= 255));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 8);
          return instr | (target & 0xf) | ((target & 0xf0) << 4) |
                 ((target & 0x100) << 15);
        }
@ -8039,13 +8048,14 @@ void Assembler::pld(Condition cond, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 12);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -8062,15 +8072,14 @@ void Assembler::pld(Condition cond, Location* location) {
         public:
          EmitOp() : Location::EmitOperator(A32) {}
          virtual uint32_t Encode(uint32_t instr,
-                                  Location::Offset pc,
-                                  const Location* location) const
-              VIXL_OVERRIDE {
-            pc += kA32PcDelta;
-            Location::Offset offset =
-                location->GetLocation() - AlignDown(pc, 4);
-            VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
-            uint32_t U = (offset >= 0);
-            int32_t target = abs(offset) | (U << 12);
+                                  Location::Offset program_counter,
+                                  const Location* loc) const VIXL_OVERRIDE {
+            program_counter += kA32PcDelta;
+            Location::Offset off =
+                loc->GetLocation() - AlignDown(program_counter, 4);
+            VIXL_ASSERT((off >= -4095) && (off <= 4095));
+            uint32_t U = (off >= 0);
+            int32_t target = abs(off) | (U << 12);
            return instr | (target & 0xfff) | ((target & 0x1000) << 11);
          }
        } immop;
@ -8403,13 +8412,14 @@ void Assembler::pli(Condition cond, Location* location) {
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
-          uint32_t U = (offset >= 0);
-          int32_t target = abs(offset) | (U << 12);
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -4095) && (off <= 4095));
+          uint32_t U = (off >= 0);
+          int32_t target = abs(off) | (U << 12);
          return instr | (target & 0xfff) | ((target & 0x1000) << 11);
        }
      } immop;
@ -8426,15 +8436,14 @@ void Assembler::pli(Condition cond, Location* location) {
         public:
          EmitOp() : Location::EmitOperator(A32) {}
          virtual uint32_t Encode(uint32_t instr,
-                                  Location::Offset pc,
-                                  const Location* location) const
-              VIXL_OVERRIDE {
-            pc += kA32PcDelta;
-            Location::Offset offset =
-                location->GetLocation() - AlignDown(pc, 4);
-            VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
-            uint32_t U = (offset >= 0);
-            int32_t target = abs(offset) | (U << 12);
+                                  Location::Offset program_counter,
+                                  const Location* loc) const VIXL_OVERRIDE {
+            program_counter += kA32PcDelta;
+            Location::Offset off =
+                loc->GetLocation() - AlignDown(program_counter, 4);
+            VIXL_ASSERT((off >= -4095) && (off <= 4095));
+            uint32_t U = (off >= 0);
+            int32_t target = abs(off) | (U << 12);
            return instr | (target & 0xfff) | ((target & 0x1000) << 11);
          }
        } immop;
@ -8471,40 +8480,52 @@ bool Assembler::pli_info(Condition cond,
 void Assembler::pop(Condition cond, EncodingSize size, RegisterList registers) {
  VIXL_ASSERT(AllowAssembler());
  CheckIT(cond);
+  if (!registers.IsEmpty() || AllowUnpredictable()) {
    if (IsUsingT32()) {
+      // A branch out of an IT block should be the last instruction in the
+      // block.
+      if (!registers.Includes(pc) || OutsideITBlockAndAlOrLast(cond) ||
+          AllowUnpredictable()) {
        // POP{<c>}{<q>} <registers> ; T1
-    if (!size.IsWide() && ((registers.GetList() & ~0x80ff) == 0)) {
+        if (!size.IsWide() && registers.IsR0toR7orPC()) {
          EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) |
                     GetRegisterListEncoding(registers, 0, 8));
          AdvanceIT();
          return;
        }
        // POP{<c>}{<q>} <registers> ; T2
-    if (!size.IsNarrow() && ((registers.GetList() & ~0xdfff) == 0)) {
-      EmitT32_32(0xe8bd0000U |
-                 (GetRegisterListEncoding(registers, 15, 1) << 15) |
-                 (GetRegisterListEncoding(registers, 14, 1) << 14) |
-                 GetRegisterListEncoding(registers, 0, 13));
+        // Alias of: LDM{<c>}{<q>} SP!, <registers> ; T2
+        if (!size.IsNarrow() &&
+            ((!registers.Includes(sp) && (registers.GetCount() > 1) &&
+              !(registers.Includes(pc) && registers.Includes(lr))) ||
+             AllowUnpredictable())) {
+          EmitT32_32(0xe8bd0000U | GetRegisterListEncoding(registers, 0, 16));
          AdvanceIT();
          return;
        }
+      }
    } else {
      // POP{<c>}{<q>} <registers> ; A1
-    if (cond.IsNotNever()) {
+      // Alias of: LDM{<c>}{<q>} SP!, <registers> ; A1
+      if (cond.IsNotNever() &&
+          (!registers.Includes(sp) || AllowUnpredictable())) {
        EmitA32(0x08bd0000U | (cond.GetCondition() << 28) |
                GetRegisterListEncoding(registers, 0, 16));
        return;
      }
    }
+  }
  Delegate(kPop, &Assembler::pop, cond, size, registers);
 }

 void Assembler::pop(Condition cond, EncodingSize size, Register rt) {
  VIXL_ASSERT(AllowAssembler());
  CheckIT(cond);
+  if (!rt.IsSP() || AllowUnpredictable()) {
    if (IsUsingT32()) {
      // POP{<c>}{<q>} <single_register_list> ; T4
-    if (!size.IsNarrow() && ((!rt.IsPC() || OutsideITBlockAndAlOrLast(cond)) ||
+      // Alias of: LDR{<c>}{<q>} <Rt>, [SP], #4 ; T4
+      if (!size.IsNarrow() && (!rt.IsPC() || OutsideITBlockAndAlOrLast(cond) ||
                               AllowUnpredictable())) {
        EmitT32_32(0xf85d0b04U | (rt.GetCode() << 12));
        AdvanceIT();
@ -8512,11 +8533,14 @@ void Assembler::pop(Condition cond, EncodingSize size, Register rt) {
      }
    } else {
      // POP{<c>}{<q>} <single_register_list> ; A1
+      // Alias of: LDR{<c>}{<q>} <Rt>, [SP], #4 ; T1
      if (cond.IsNotNever()) {
-      EmitA32(0x049d0004U | (cond.GetCondition() << 28) | (rt.GetCode() << 12));
+        EmitA32(0x049d0004U | (cond.GetCondition() << 28) |
+                (rt.GetCode() << 12));
        return;
      }
    }
+  }
  Delegate(kPop, &Assembler::pop, cond, size, rt);
 }

@ -8525,30 +8549,39 @@ void Assembler::push(Condition cond,
                     RegisterList registers) {
  VIXL_ASSERT(AllowAssembler());
  CheckIT(cond);
+  if (!registers.IsEmpty() || AllowUnpredictable()) {
    if (IsUsingT32()) {
      // PUSH{<c>}{<q>} <registers> ; T1
-    if (!size.IsWide() && ((registers.GetList() & ~0x40ff) == 0)) {
+      if (!size.IsWide() && registers.IsR0toR7orLR()) {
        EmitT32_16(0xb400 | (GetRegisterListEncoding(registers, 14, 1) << 8) |
                   GetRegisterListEncoding(registers, 0, 8));
        AdvanceIT();
        return;
      }
      // PUSH{<c>}{<q>} <registers> ; T1
-    if (!size.IsNarrow() && ((registers.GetList() & ~0x5fff) == 0)) {
-      EmitT32_32(0xe92d0000U |
-                 (GetRegisterListEncoding(registers, 14, 1) << 14) |
-                 GetRegisterListEncoding(registers, 0, 13));
+      // Alias of: STMDB SP!, <registers> ; T1
+      if (!size.IsNarrow() && !registers.Includes(pc) &&
+          ((!registers.Includes(sp) && (registers.GetCount() > 1)) ||
+           AllowUnpredictable())) {
+        EmitT32_32(0xe92d0000U | GetRegisterListEncoding(registers, 0, 15));
        AdvanceIT();
        return;
      }
    } else {
      // PUSH{<c>}{<q>} <registers> ; A1
-    if (cond.IsNotNever()) {
+      // Alias of: STMDB SP!, <registers> ; A1
+      if (cond.IsNotNever() &&
+          // For A32, sp can appear in the list, but stores an UNKNOWN value if
+          // it is not the lowest-valued register.
+          (!registers.Includes(sp) ||
+           registers.GetFirstAvailableRegister().IsSP() ||
+           AllowUnpredictable())) {
        EmitA32(0x092d0000U | (cond.GetCondition() << 28) |
                GetRegisterListEncoding(registers, 0, 16));
        return;
      }
    }
+  }
  Delegate(kPush, &Assembler::push, cond, size, registers);
 }

@ -8557,14 +8590,17 @@ void Assembler::push(Condition cond, EncodingSize size, Register rt) {
  CheckIT(cond);
  if (IsUsingT32()) {
    // PUSH{<c>}{<q>} <single_register_list> ; T4
-    if (!size.IsNarrow() && (!rt.IsPC() || AllowUnpredictable())) {
+    // Alias of: STR{<c>}{<q>} <Rt>, [SP, #4]! ; T4
+    if (!size.IsNarrow() &&
+        ((!rt.IsPC() && !rt.IsSP()) || AllowUnpredictable())) {
      EmitT32_32(0xf84d0d04U | (rt.GetCode() << 12));
      AdvanceIT();
      return;
    }
  } else {
    // PUSH{<c>}{<q>} <single_register_list> ; A1
-    if (cond.IsNotNever() && (!rt.IsPC() || AllowUnpredictable())) {
+    // Alias of: STR{<c>}{<q>} <Rt>, [SP, #4]! ; A1
+    if (cond.IsNotNever() && (!rt.IsSP() || AllowUnpredictable())) {
      EmitA32(0x052d0004U | (cond.GetCondition() << 28) | (rt.GetCode() << 12));
      return;
    }
@ -11177,7 +11213,7 @@ void Assembler::stmdb(Condition cond,
  if (IsUsingT32()) {
    // STMDB{<c>}{<q>} SP!, <registers> ; T1
    if (!size.IsWide() && rn.Is(sp) && write_back.DoesWriteBack() &&
-        ((registers.GetList() & ~0x40ff) == 0)) {
+        registers.IsR0toR7orLR()) {
      EmitT32_16(0xb400 | (GetRegisterListEncoding(registers, 14, 1) << 8) |
                 GetRegisterListEncoding(registers, 0, 8));
      AdvanceIT();
@ -19589,13 +19625,13 @@ void Assembler::vldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
-                      ((offset & 0x3) == 0));
-          int32_t target = offset >> 2;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
+          int32_t target = off >> 2;
          uint32_t U = (target >= 0);
          target = abs(target) | (U << 8);
          return instr | (target & 0xff) | ((target & 0x100) << 15);
@ -19619,13 +19655,13 @@ void Assembler::vldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
-                      ((offset & 0x3) == 0));
-          int32_t target = offset >> 2;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kA32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
+          int32_t target = off >> 2;
          uint32_t U = (target >= 0);
          target = abs(target) | (U << 8);
          return instr | (target & 0xff) | ((target & 0x100) << 15);
@ -19743,13 +19779,13 @@ void Assembler::vldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(T32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kT32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
-                      ((offset & 0x3) == 0));
-          int32_t target = offset >> 2;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kT32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
+          int32_t target = off >> 2;
          uint32_t U = (target >= 0);
          target = abs(target) | (U << 8);
          return instr | (target & 0xff) | ((target & 0x100) << 15);
@ -19773,13 +19809,13 @@ void Assembler::vldr(Condition cond,
       public:
        EmitOp() : Location::EmitOperator(A32) {}
        virtual uint32_t Encode(uint32_t instr,
-                                Location::Offset pc,
-                                const Location* location) const VIXL_OVERRIDE {
-          pc += kA32PcDelta;
-          Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
-          VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
-                      ((offset & 0x3) == 0));
-          int32_t target = offset >> 2;
+                                Location::Offset program_counter,
+                                const Location* loc) const VIXL_OVERRIDE {
+          program_counter += kA32PcDelta;
+          Location::Offset off =
+              loc->GetLocation() - AlignDown(program_counter, 4);
+          VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
+          int32_t target = off >> 2;
          uint32_t U = (target >= 0);
          target = abs(target) | (U << 8);
          return instr | (target & 0xff) | ((target & 0x100) << 15);
--- a/dep/vixl/src/aarch32/disasm-aarch32.cc
+++ b/dep/vixl/src/aarch32/disasm-aarch32.cc
@ -348,7 +348,7 @@ DataTypeValue Dt_U_opc1_opc2_1_Decode(uint32_t value, unsigned* lane) {
    *lane = (value >> 2) & 1;
    return Untyped32;
  }
-  *lane = -1;
+  *lane = ~0U;
  return kDataTypeValueInvalid;
 }

@ -365,7 +365,7 @@ DataTypeValue Dt_opc1_opc2_1_Decode(uint32_t value, unsigned* lane) {
    *lane = (value >> 2) & 1;
    return Untyped32;
  }
-  *lane = -1;
+  *lane = ~0U;
  return kDataTypeValueInvalid;
 }

@ -382,7 +382,7 @@ DataTypeValue Dt_imm4_1_Decode(uint32_t value, unsigned* lane) {
    *lane = (value >> 3) & 1;
    return Untyped32;
  }
-  *lane = -1;
+  *lane = ~0U;
  return kDataTypeValueInvalid;
 }

@ -8288,13 +8288,13 @@ void Disassembler::DecodeT32(uint32_t instr) {
                            UnallocatedT32(instr);
                            return;
                          }
-                          unsigned firstcond = (instr >> 20) & 0xf;
+                          unsigned first_cond = (instr >> 20) & 0xf;
                          unsigned mask = (instr >> 16) & 0xf;
-                          bool wasInITBlock = InITBlock();
-                          SetIT(Condition(firstcond), mask);
-                          it(Condition(firstcond), mask);
-                          if (wasInITBlock || (firstcond == 15) ||
-                              ((firstcond == al) &&
+                          bool was_in_it_block = InITBlock();
+                          SetIT(Condition(first_cond), mask);
+                          it(Condition(first_cond), mask);
+                          if (was_in_it_block || (first_cond == 15) ||
+                              ((first_cond == al) &&
                               (BitCount(Uint32(mask)) != 1))) {
                            UnpredictableT32(instr);
                          }
@ -60977,7 +60977,7 @@ void Disassembler::DecodeA32(uint32_t instr) {
                        Condition condition((instr >> 28) & 0xf);
                        unsigned rd = (instr >> 12) & 0xf;
                        uint32_t imm = ImmediateA32::Decode(instr & 0xfff);
-                        Location location(-imm, kA32PcDelta);
+                        Location location(UnsignedNegate(imm), kA32PcDelta);
                        // ADR{<c>}{<q>} <Rd>, <label> ; A2
                        adr(condition, Best, Register(rd), &location);
                        break;
--- a/dep/vixl/src/aarch32/instructions-aarch32.cc
+++ b/dep/vixl/src/aarch32/instructions-aarch32.cc
@ -95,11 +95,11 @@ QRegister VRegister::Q() const {


 Register RegisterList::GetFirstAvailableRegister() const {
-  for (uint32_t i = 0; i < kNumberOfRegisters; i++) {
-    if (((list_ >> i) & 1) != 0) return Register(i);
-  }
+  if (list_ == 0) {
    return Register();
  }
+  return Register(CountTrailingZeros(list_));
+}


 std::ostream& PrintRegisterList(std::ostream& os,  // NOLINT(runtime/references)
@ -651,7 +651,7 @@ bool ImmediateT32::IsImmediateT32(uint32_t imm) {
      (((imm & 0xff00) == 0) || ((imm & 0xff) == 0)))
    return true;
  /* isolate least-significant set bit */
-  uint32_t lsb = imm & -imm;
+  uint32_t lsb = imm & UnsignedNegate(imm);
  /* if imm is less than lsb*256 then it fits, but instead we test imm/256 to
  * avoid overflow (underflow is always a successful case) */
  return ((imm >> 8) < lsb);
@ -702,7 +702,7 @@ bool ImmediateA32::IsImmediateA32(uint32_t imm) {
   * that the least-significant set bit is always an even bit */
  imm = imm | ((imm >> 1) & 0x55555555);
  /* isolate least-significant set bit (always even) */
-  uint32_t lsb = imm & -imm;
+  uint32_t lsb = imm & UnsignedNegate(imm);
  /* if imm is less than lsb*256 then it fits, but instead we test imm/256 to
   * avoid overflow (underflow is always a successful case) */
  return ((imm >> 8) < lsb);
--- a/dep/vixl/src/aarch32/macro-assembler-aarch32.cc
+++ b/dep/vixl/src/aarch32/macro-assembler-aarch32.cc
@ -266,8 +266,8 @@ MemOperand MacroAssembler::MemOperandComputationHelper(

  uint32_t load_store_offset = offset & extra_offset_mask;
  uint32_t add_offset = offset & ~extra_offset_mask;
-  if ((add_offset != 0) &&
-      (IsModifiedImmediate(offset) || IsModifiedImmediate(-offset))) {
+  if ((add_offset != 0) && (IsModifiedImmediate(offset) ||
+                            IsModifiedImmediate(UnsignedNegate(offset)))) {
    load_store_offset = 0;
    add_offset = offset;
  }
@ -288,7 +288,7 @@ MemOperand MacroAssembler::MemOperandComputationHelper(
      // of ADR -- to get behaviour like loads and stores. This ADR can handle
      // at least as much offset as the load_store_offset so it can replace it.

-      uint32_t sub_pc_offset = (-offset) & 0xfff;
+      uint32_t sub_pc_offset = UnsignedNegate(offset) & 0xfff;
      load_store_offset = (offset + sub_pc_offset) & extra_offset_mask;
      add_offset = (offset + sub_pc_offset) & ~extra_offset_mask;

@ -599,7 +599,7 @@ void MacroAssembler::Printf(const char* format,
    Vmsr(FPSCR, tmp);
    Pop(tmp);
    Msr(APSR_nzcvqg, tmp);
-    // Restore the regsisters.
+    // Restore the registers.
    if (Has32DRegs()) Vpop(Untyped64, DRegisterList(d16, 16));
    Vpop(Untyped64, DRegisterList(d0, 8));
    Pop(RegisterList(saved_registers_mask));
--- a/dep/vixl/src/aarch64/assembler-aarch64.cc
+++ b/dep/vixl/src/aarch64/assembler-aarch64.cc
--- a/dep/vixl/src/aarch64/assembler-sve-aarch64.cc
+++ b/dep/vixl/src/aarch64/assembler-sve-aarch64.cc
--- a/dep/vixl/src/aarch64/cpu-aarch64.cc
+++ b/dep/vixl/src/aarch64/cpu-aarch64.cc
@ -24,6 +24,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+#if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
+#include <sys/auxv.h>
+#define VIXL_USE_LINUX_HWCAP 1
+#endif
+
 #include "../utils-vixl.h"

 #include "cpu-aarch64.h"
@ -31,6 +36,382 @@
 namespace vixl {
 namespace aarch64 {

+
+const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
+const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
+const IDRegister::Field AA64PFR0::kRAS(28);
+const IDRegister::Field AA64PFR0::kSVE(32);
+const IDRegister::Field AA64PFR0::kDIT(48);
+const IDRegister::Field AA64PFR0::kCSV2(56);
+const IDRegister::Field AA64PFR0::kCSV3(60);
+
+const IDRegister::Field AA64PFR1::kBT(0);
+const IDRegister::Field AA64PFR1::kSSBS(4);
+const IDRegister::Field AA64PFR1::kMTE(8);
+const IDRegister::Field AA64PFR1::kSME(24);
+
+const IDRegister::Field AA64ISAR0::kAES(4);
+const IDRegister::Field AA64ISAR0::kSHA1(8);
+const IDRegister::Field AA64ISAR0::kSHA2(12);
+const IDRegister::Field AA64ISAR0::kCRC32(16);
+const IDRegister::Field AA64ISAR0::kAtomic(20);
+const IDRegister::Field AA64ISAR0::kRDM(28);
+const IDRegister::Field AA64ISAR0::kSHA3(32);
+const IDRegister::Field AA64ISAR0::kSM3(36);
+const IDRegister::Field AA64ISAR0::kSM4(40);
+const IDRegister::Field AA64ISAR0::kDP(44);
+const IDRegister::Field AA64ISAR0::kFHM(48);
+const IDRegister::Field AA64ISAR0::kTS(52);
+const IDRegister::Field AA64ISAR0::kRNDR(60);
+
+const IDRegister::Field AA64ISAR1::kDPB(0);
+const IDRegister::Field AA64ISAR1::kAPA(4);
+const IDRegister::Field AA64ISAR1::kAPI(8);
+const IDRegister::Field AA64ISAR1::kJSCVT(12);
+const IDRegister::Field AA64ISAR1::kFCMA(16);
+const IDRegister::Field AA64ISAR1::kLRCPC(20);
+const IDRegister::Field AA64ISAR1::kGPA(24);
+const IDRegister::Field AA64ISAR1::kGPI(28);
+const IDRegister::Field AA64ISAR1::kFRINTTS(32);
+const IDRegister::Field AA64ISAR1::kSB(36);
+const IDRegister::Field AA64ISAR1::kSPECRES(40);
+const IDRegister::Field AA64ISAR1::kBF16(44);
+const IDRegister::Field AA64ISAR1::kDGH(48);
+const IDRegister::Field AA64ISAR1::kI8MM(52);
+
+const IDRegister::Field AA64ISAR2::kWFXT(0);
+const IDRegister::Field AA64ISAR2::kRPRES(4);
+const IDRegister::Field AA64ISAR2::kMOPS(16);
+const IDRegister::Field AA64ISAR2::kCSSC(52);
+
+const IDRegister::Field AA64MMFR0::kECV(60);
+
+const IDRegister::Field AA64MMFR1::kLO(16);
+const IDRegister::Field AA64MMFR1::kAFP(44);
+
+const IDRegister::Field AA64MMFR2::kAT(32);
+
+const IDRegister::Field AA64ZFR0::kSVEver(0);
+const IDRegister::Field AA64ZFR0::kAES(4);
+const IDRegister::Field AA64ZFR0::kBitPerm(16);
+const IDRegister::Field AA64ZFR0::kBF16(20);
+const IDRegister::Field AA64ZFR0::kSHA3(32);
+const IDRegister::Field AA64ZFR0::kSM4(40);
+const IDRegister::Field AA64ZFR0::kI8MM(44);
+const IDRegister::Field AA64ZFR0::kF32MM(52);
+const IDRegister::Field AA64ZFR0::kF64MM(56);
+
+const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1);
+const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1);
+const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1);
+const IDRegister::Field AA64SMFR0::kSMEi8i32(36);
+const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1);
+const IDRegister::Field AA64SMFR0::kSMEi16i64(52);
+const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1);
+
+CPUFeatures AA64PFR0::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
+  if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
+  if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
+  if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
+  if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS);
+  if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
+  if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
+  if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2);
+  if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM);
+  if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3);
+  return f;
+}
+
+CPUFeatures AA64PFR1::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
+  if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS);
+  if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
+  if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
+  if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
+  if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3);
+  if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME);
+  return f;
+}
+
+CPUFeatures AA64ISAR0::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
+  if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
+  if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
+  if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
+  if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
+  if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
+  if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
+  if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
+  if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
+  if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
+  if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
+  if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
+  if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
+  if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
+  if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
+  if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG);
+  return f;
+}
+
+CPUFeatures AA64ISAR1::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
+  if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP);
+  if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
+  if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
+  if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
+  if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
+  if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
+  if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
+  if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
+  if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
+  if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16);
+  if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
+  if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
+
+  // Only one of these fields should be non-zero, but they have the same
+  // encodings, so merge the logic.
+  int apx = std::max(Get(kAPI), Get(kAPA));
+  if (apx >= 1) {
+    f.Combine(CPUFeatures::kPAuth);
+    // APA (rather than API) indicates QARMA.
+    if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA);
+    if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC);
+    if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2);
+    if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC);
+    if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined);
+  }
+
+  if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
+  if (Get(kGPA) >= 1) {
+    f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
+  }
+  return f;
+}
+
+CPUFeatures AA64ISAR2::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT);
+  if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
+  if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS);
+  if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC);
+  return f;
+}
+
+CPUFeatures AA64MMFR0::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV);
+  return f;
+}
+
+CPUFeatures AA64MMFR1::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
+  if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP);
+  return f;
+}
+
+CPUFeatures AA64MMFR2::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT);
+  return f;
+}
+
+CPUFeatures AA64ZFR0::GetCPUFeatures() const {
+  // This register is only available with SVE, but reads-as-zero in its absence,
+  // so it's always safe to read it.
+  CPUFeatures f;
+  if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
+  if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
+  if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
+  if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4);
+  if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3);
+  if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
+  if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kSVE_EBF16);
+  if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm);
+  if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES);
+  if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128);
+  if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2);
+  return f;
+}
+
+CPUFeatures AA64SMFR0::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32);
+  if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32);
+  if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32);
+  if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32);
+  if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64);
+  if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64);
+  if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64);
+  return f;
+}
+
+int IDRegister::Get(IDRegister::Field field) const {
+  int msb = field.GetMsb();
+  int lsb = field.GetLsb();
+  VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
+                     (sizeof(int) * kBitsPerByte));
+  switch (field.GetType()) {
+    case Field::kSigned:
+      return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
+    case Field::kUnsigned:
+      return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
+  }
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
+CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
+  CPUFeatures f;
+#define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \
+  f.Combine(Read##NAME().GetCPUFeatures());
+  VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
+#undef VIXL_COMBINE_ID_REG
+  return f;
+}
+
+CPUFeatures CPU::InferCPUFeaturesFromOS(
+    CPUFeatures::QueryIDRegistersOption option) {
+  CPUFeatures features;
+
+#ifdef VIXL_USE_LINUX_HWCAP
+  // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
+  // than explicit bits, but explicit bits allow us to identify features that
+  // the toolchain doesn't know about.
+  static const CPUFeatures::Feature kFeatureBitsLow[] =
+      {// Bits 0-7
+       CPUFeatures::kFP,
+       CPUFeatures::kNEON,
+       CPUFeatures::kNone,  // "EVTSTRM", which VIXL doesn't track.
+       CPUFeatures::kAES,
+       CPUFeatures::kPmull1Q,
+       CPUFeatures::kSHA1,
+       CPUFeatures::kSHA2,
+       CPUFeatures::kCRC32,
+       // Bits 8-15
+       CPUFeatures::kAtomics,
+       CPUFeatures::kFPHalf,
+       CPUFeatures::kNEONHalf,
+       CPUFeatures::kIDRegisterEmulation,
+       CPUFeatures::kRDM,
+       CPUFeatures::kJSCVT,
+       CPUFeatures::kFcma,
+       CPUFeatures::kRCpc,
+       // Bits 16-23
+       CPUFeatures::kDCPoP,
+       CPUFeatures::kSHA3,
+       CPUFeatures::kSM3,
+       CPUFeatures::kSM4,
+       CPUFeatures::kDotProduct,
+       CPUFeatures::kSHA512,
+       CPUFeatures::kSVE,
+       CPUFeatures::kFHM,
+       // Bits 24-31
+       CPUFeatures::kDIT,
+       CPUFeatures::kUSCAT,
+       CPUFeatures::kRCpcImm,
+       CPUFeatures::kFlagM,
+       CPUFeatures::kSSBSControl,
+       CPUFeatures::kSB,
+       CPUFeatures::kPAuth,
+       CPUFeatures::kPAuthGeneric};
+  VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64);
+
+  static const CPUFeatures::Feature kFeatureBitsHigh[] =
+      {// Bits 0-7
+       CPUFeatures::kDCCVADP,
+       CPUFeatures::kSVE2,
+       CPUFeatures::kSVEAES,
+       CPUFeatures::kSVEPmull128,
+       CPUFeatures::kSVEBitPerm,
+       CPUFeatures::kSVESHA3,
+       CPUFeatures::kSVESM4,
+       CPUFeatures::kAXFlag,
+       // Bits 8-15
+       CPUFeatures::kFrintToFixedSizedInt,
+       CPUFeatures::kSVEI8MM,
+       CPUFeatures::kSVEF32MM,
+       CPUFeatures::kSVEF64MM,
+       CPUFeatures::kSVEBF16,
+       CPUFeatures::kI8MM,
+       CPUFeatures::kBF16,
+       CPUFeatures::kDGH,
+       // Bits 16-23
+       CPUFeatures::kRNG,
+       CPUFeatures::kBTI,
+       CPUFeatures::kMTE,
+       CPUFeatures::kECV,
+       CPUFeatures::kAFP,
+       CPUFeatures::kRPRES,
+       CPUFeatures::kMTE3,
+       CPUFeatures::kSME,
+       // Bits 24-31
+       CPUFeatures::kSMEi16i64,
+       CPUFeatures::kSMEf64f64,
+       CPUFeatures::kSMEi8i32,
+       CPUFeatures::kSMEf16f32,
+       CPUFeatures::kSMEb16f32,
+       CPUFeatures::kSMEf32f32,
+       CPUFeatures::kSMEfa64,
+       CPUFeatures::kWFXT,
+       // Bits 32-39
+       CPUFeatures::kEBF16,
+       CPUFeatures::kSVE_EBF16};
+  VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64);
+
+  auto combine_features = [&features](uint64_t hwcap,
+                                      const CPUFeatures::Feature* feature_array,
+                                      size_t features_size) {
+    for (size_t i = 0; i < features_size; i++) {
+      if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]);
+    }
+  };
+
+  uint64_t hwcap_low = getauxval(AT_HWCAP);
+  uint64_t hwcap_high = getauxval(AT_HWCAP2);
+
+  combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow));
+  combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh));
+
+  // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support
+  if (features.Has(CPUFeatures::kMTE)) {
+    features.Combine(CPUFeatures::kMTEInstructions);
+  }
+#endif  // VIXL_USE_LINUX_HWCAP
+
+  if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
+      (features.Has(CPUFeatures::kIDRegisterEmulation))) {
+    features.Combine(InferCPUFeaturesFromIDRegisters());
+  }
+  return features;
+}
+
+
+#ifdef __aarch64__
+#define VIXL_READ_ID_REG(NAME, MRS_ARG)        \
+  NAME CPU::Read##NAME() {                     \
+    uint64_t value = 0;                        \
+    __asm__("mrs %0, " MRS_ARG : "=r"(value)); \
+    return NAME(value);                        \
+  }
+#else  // __aarch64__
+#define VIXL_READ_ID_REG(NAME, MRS_ARG) \
+  NAME CPU::Read##NAME() {              \
+    VIXL_UNREACHABLE();                 \
+    return NAME(0);                     \
+  }
+#endif  // __aarch64__
+
+VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
+
+#undef VIXL_READ_ID_REG
+
+
 // Initialise to smallest possible cache size.
 unsigned CPU::dcache_line_size_ = 1;
 unsigned CPU::icache_line_size_ = 1;
@ -76,6 +457,27 @@ uint32_t CPU::GetCacheType() {
 }


+// Query the SVE vector length. This requires CPUFeatures::kSVE.
+int CPU::ReadSVEVectorLengthInBits() {
+#ifdef __aarch64__
+  uint64_t vl;
+  // To support compilers that don't understand `rdvl`, encode the value
+  // directly and move it manually.
+  __asm__(
+      "   .word 0x04bf5100\n"  // rdvl x0, #8
+      "   mov %[vl], x0\n"
+      : [vl] "=r"(vl)
+      :
+      : "x0");
+  VIXL_ASSERT(vl <= INT_MAX);
+  return static_cast<int>(vl);
+#else
+  VIXL_UNREACHABLE();
+  return 0;
+#endif
+}
+
+
 void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
 #ifdef __aarch64__
  // Implement the cache synchronisation for all targets where AArch64 is the
@ -174,5 +576,6 @@ void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
 #endif
 }

+
 }  // namespace aarch64
 }  // namespace vixl
--- a/dep/vixl/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/dep/vixl/src/aarch64/cpu-features-auditor-aarch64.cc
@ -24,16 +24,54 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+#include "cpu-features-auditor-aarch64.h"
+
 #include "cpu-features.h"
 #include "globals-vixl.h"
 #include "utils-vixl.h"
-#include "decoder-aarch64.h"

-#include "cpu-features-auditor-aarch64.h"
+#include "decoder-aarch64.h"

 namespace vixl {
 namespace aarch64 {

+
+const CPUFeaturesAuditor::FormToVisitorFnMap*
+CPUFeaturesAuditor::GetFormToVisitorFnMap() {
+  static const FormToVisitorFnMap form_to_visitor = {
+      DEFAULT_FORM_TO_VISITOR_MAP(CPUFeaturesAuditor),
+      SIM_AUD_VISITOR_MAP(CPUFeaturesAuditor),
+      {"fcmla_asimdelem_c_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fcmla_asimdelem_c_s"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmlal2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmlal_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmla_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmla_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmlsl2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmlsl_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmls_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmls_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmulx_asimdelem_rh_h"_h,
+       &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmulx_asimdelem_r_sd"_h,
+       &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmul_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"fmul_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"sdot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"smlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"smlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"smull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"sqdmlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"sqdmlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"sqdmull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"udot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"umlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"umlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+      {"umull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+  };
+  return &form_to_visitor;
+}
+
 // Every instruction must update last_instruction_, even if only to clear it,
 // and every instruction must also update seen_ once it has been fully handled.
 // This scope makes that simple, and allows early returns in the decode logic.
@ -140,6 +178,25 @@ void CPUFeaturesAuditor::VisitAddSubWithCarry(const Instruction* instr) {
  USE(instr);
 }

+void CPUFeaturesAuditor::VisitRotateRightIntoFlags(const Instruction* instr) {
+  RecordInstructionFeaturesScope scope(this);
+  switch (instr->Mask(RotateRightIntoFlagsMask)) {
+    case RMIF:
+      scope.Record(CPUFeatures::kFlagM);
+      return;
+  }
+}
+
+void CPUFeaturesAuditor::VisitEvaluateIntoFlags(const Instruction* instr) {
+  RecordInstructionFeaturesScope scope(this);
+  switch (instr->Mask(EvaluateIntoFlagsMask)) {
+    case SETF8:
+    case SETF16:
+      scope.Record(CPUFeatures::kFlagM);
+      return;
+  }
+}
+
 void CPUFeaturesAuditor::VisitAtomicMemory(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  switch (instr->Mask(AtomicMemoryMask)) {
@ -254,6 +311,37 @@ void CPUFeaturesAuditor::VisitDataProcessing2Source(const Instruction* instr) {
  }
 }

+void CPUFeaturesAuditor::VisitLoadStoreRCpcUnscaledOffset(
+    const Instruction* instr) {
+  RecordInstructionFeaturesScope scope(this);
+  switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) {
+    case LDAPURB:
+    case LDAPURSB_w:
+    case LDAPURSB_x:
+    case LDAPURH:
+    case LDAPURSH_w:
+    case LDAPURSH_x:
+    case LDAPUR_w:
+    case LDAPURSW:
+    case LDAPUR_x:
+
+    // These stores don't actually have RCpc semantics but they're included with
+    // the RCpc extensions.
+    case STLURB:
+    case STLURH:
+    case STLUR_w:
+    case STLUR_x:
+      scope.Record(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm);
+      return;
+  }
+}
+
+void CPUFeaturesAuditor::VisitLoadStorePAC(const Instruction* instr) {
+  RecordInstructionFeaturesScope scope(this);
+  USE(instr);
+  scope.Record(CPUFeatures::kPAuth);
+}
+
 void CPUFeaturesAuditor::VisitDataProcessing3Source(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  USE(instr);
@ -329,6 +417,16 @@ void CPUFeaturesAuditor::VisitFPDataProcessing1Source(
    case FRINTI_h:
      scope.Record(CPUFeatures::kFPHalf);
      return;
+    case FRINT32X_s:
+    case FRINT32X_d:
+    case FRINT32Z_s:
+    case FRINT32Z_d:
+    case FRINT64X_s:
+    case FRINT64X_d:
+    case FRINT64Z_s:
+    case FRINT64Z_d:
+      scope.Record(CPUFeatures::kFrintToFixedSizedInt);
+      return;
    default:
      // No special CPU features.
      // This category includes some half-precision FCVT instructions that do
@ -410,8 +508,6 @@ void CPUFeaturesAuditor::VisitFPImmediate(const Instruction* instr) {

 void CPUFeaturesAuditor::VisitFPIntegerConvert(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
-  // All of these instructions require FP.
-  scope.Record(CPUFeatures::kFP);
  switch (instr->Mask(FPIntegerConvertMask)) {
    case FCVTAS_wh:
    case FCVTAS_xh:
@ -441,17 +537,23 @@ void CPUFeaturesAuditor::VisitFPIntegerConvert(const Instruction* instr) {
    case SCVTF_hx:
    case UCVTF_hw:
    case UCVTF_hx:
+      scope.Record(CPUFeatures::kFP);
      scope.Record(CPUFeatures::kFPHalf);
      return;
+    case FMOV_dx:
+      scope.RecordOneOrBothOf(CPUFeatures::kFP, CPUFeatures::kNEON);
+      return;
    case FMOV_d1_x:
    case FMOV_x_d1:
+      scope.Record(CPUFeatures::kFP);
      scope.Record(CPUFeatures::kNEON);
      return;
    case FJCVTZS:
+      scope.Record(CPUFeatures::kFP);
      scope.Record(CPUFeatures::kJSCVT);
      return;
    default:
-      // No special CPU features.
+      scope.Record(CPUFeatures::kFP);
      return;
  }
 }
@ -611,6 +713,12 @@ void CPUFeaturesAuditor::VisitNEON2RegMisc(const Instruction* instr) {
    case NEON_FCMLT_zero:
      scope.Record(CPUFeatures::kFP);
      return;
+    case NEON_FRINT32X:
+    case NEON_FRINT32Z:
+    case NEON_FRINT64X:
+    case NEON_FRINT64Z:
+      scope.Record(CPUFeatures::kFP, CPUFeatures::kFrintToFixedSizedInt);
+      return;
    default:
      // No additional features.
      return;
@ -628,6 +736,12 @@ void CPUFeaturesAuditor::VisitNEON3Different(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  // All of these instructions require NEON.
  scope.Record(CPUFeatures::kNEON);
+  if (form_hash_ == "pmull_asimddiff_l"_h) {
+    if (instr->GetNEONSize() == 3) {
+      // Source is 1D or 2D, destination is 1Q.
+      scope.Record(CPUFeatures::kPmull1Q);
+    }
+  }
  USE(instr);
 }

@ -638,6 +752,17 @@ void CPUFeaturesAuditor::VisitNEON3Same(const Instruction* instr) {
  if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
    scope.Record(CPUFeatures::kFP);
  }
+  switch (instr->Mask(NEON3SameFHMMask)) {
+    case NEON_FMLAL:
+    case NEON_FMLAL2:
+    case NEON_FMLSL:
+    case NEON_FMLSL2:
+      scope.Record(CPUFeatures::kFP, CPUFeatures::kNEONHalf, CPUFeatures::kFHM);
+      return;
+    default:
+      // No additional features.
+      return;
+  }
 }

 void CPUFeaturesAuditor::VisitNEON3SameExtra(const Instruction* instr) {
@ -699,7 +824,18 @@ void CPUFeaturesAuditor::VisitNEONByIndexedElement(const Instruction* instr) {
      scope.Record(CPUFeatures::kRDM);
      return;
    default:
-      // Fall through to check other FP instructions.
+      // Fall through to check other instructions.
+      break;
+  }
+  switch (instr->Mask(NEONByIndexedElementFPLongMask)) {
+    case NEON_FMLAL_H_byelement:
+    case NEON_FMLAL2_H_byelement:
+    case NEON_FMLSL_H_byelement:
+    case NEON_FMLSL2_H_byelement:
+      scope.Record(CPUFeatures::kFP, CPUFeatures::kNEONHalf, CPUFeatures::kFHM);
+      return;
+    default:
+      // Fall through to check other instructions.
      break;
  }
  switch (instr->Mask(NEONByIndexedElementFPMask)) {
@ -782,7 +918,6 @@ void CPUFeaturesAuditor::VisitNEONModifiedImmediate(const Instruction* instr) {
    scope.Record(CPUFeatures::kFP);
    if (instr->ExtractBit(11)) scope.Record(CPUFeatures::kNEONHalf);
  }
-  USE(instr);
 }

 void CPUFeaturesAuditor::VisitNEONPerm(const Instruction* instr) {
@ -980,6 +1115,165 @@ void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) {
  USE(instr);
 }

+// Most SVE visitors require only SVE.
+#define VIXL_SIMPLE_SVE_VISITOR_LIST(V)                          \
+  V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)           \
+  V(SVE32BitGatherLoad_VectorPlusImm)                            \
+  V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)    \
+  V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)        \
+  V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)         \
+  V(SVE32BitGatherPrefetch_VectorPlusImm)                        \
+  V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)           \
+  V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)         \
+  V(SVE32BitScatterStore_VectorPlusImm)                          \
+  V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets)     \
+  V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets)             \
+  V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets)           \
+  V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets)   \
+  V(SVE64BitGatherLoad_VectorPlusImm)                            \
+  V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)         \
+  V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \
+  V(SVE64BitGatherPrefetch_VectorPlusImm)                        \
+  V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)           \
+  V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets)         \
+  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)   \
+  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \
+  V(SVE64BitScatterStore_VectorPlusImm)                          \
+  V(SVEAddressGeneration)                                        \
+  V(SVEBitwiseLogicalUnpredicated)                               \
+  V(SVEBitwiseShiftUnpredicated)                                 \
+  V(SVEFFRInitialise)                                            \
+  V(SVEFFRWriteFromPredicate)                                    \
+  V(SVEFPAccumulatingReduction)                                  \
+  V(SVEFPArithmeticUnpredicated)                                 \
+  V(SVEFPCompareVectors)                                         \
+  V(SVEFPCompareWithZero)                                        \
+  V(SVEFPComplexAddition)                                        \
+  V(SVEFPComplexMulAdd)                                          \
+  V(SVEFPComplexMulAddIndex)                                     \
+  V(SVEFPFastReduction)                                          \
+  V(SVEFPMulIndex)                                               \
+  V(SVEFPMulAdd)                                                 \
+  V(SVEFPMulAddIndex)                                            \
+  V(SVEFPUnaryOpUnpredicated)                                    \
+  V(SVEIncDecByPredicateCount)                                   \
+  V(SVEIndexGeneration)                                          \
+  V(SVEIntArithmeticUnpredicated)                                \
+  V(SVEIntCompareSignedImm)                                      \
+  V(SVEIntCompareUnsignedImm)                                    \
+  V(SVEIntCompareVectors)                                        \
+  V(SVEIntMulAddPredicated)                                      \
+  V(SVEIntMulAddUnpredicated)                                    \
+  V(SVEIntReduction)                                             \
+  V(SVEIntUnaryArithmeticPredicated)                             \
+  V(SVEMovprfx)                                                  \
+  V(SVEMulIndex)                                                 \
+  V(SVEPermuteVectorExtract)                                     \
+  V(SVEPermuteVectorInterleaving)                                \
+  V(SVEPredicateCount)                                           \
+  V(SVEPredicateLogical)                                         \
+  V(SVEPropagateBreak)                                           \
+  V(SVEStackFrameAdjustment)                                     \
+  V(SVEStackFrameSize)                                           \
+  V(SVEVectorSelect)                                             \
+  V(SVEBitwiseLogical_Predicated)                                \
+  V(SVEBitwiseLogicalWithImm_Unpredicated)                       \
+  V(SVEBitwiseShiftByImm_Predicated)                             \
+  V(SVEBitwiseShiftByVector_Predicated)                          \
+  V(SVEBitwiseShiftByWideElements_Predicated)                    \
+  V(SVEBroadcastBitmaskImm)                                      \
+  V(SVEBroadcastFPImm_Unpredicated)                              \
+  V(SVEBroadcastGeneralRegister)                                 \
+  V(SVEBroadcastIndexElement)                                    \
+  V(SVEBroadcastIntImm_Unpredicated)                             \
+  V(SVECompressActiveElements)                                   \
+  V(SVEConditionallyBroadcastElementToVector)                    \
+  V(SVEConditionallyExtractElementToSIMDFPScalar)                \
+  V(SVEConditionallyExtractElementToGeneralRegister)             \
+  V(SVEConditionallyTerminateScalars)                            \
+  V(SVEConstructivePrefix_Unpredicated)                          \
+  V(SVEContiguousFirstFaultLoad_ScalarPlusScalar)                \
+  V(SVEContiguousLoad_ScalarPlusImm)                             \
+  V(SVEContiguousLoad_ScalarPlusScalar)                          \
+  V(SVEContiguousNonFaultLoad_ScalarPlusImm)                     \
+  V(SVEContiguousNonTemporalLoad_ScalarPlusImm)                  \
+  V(SVEContiguousNonTemporalLoad_ScalarPlusScalar)               \
+  V(SVEContiguousNonTemporalStore_ScalarPlusImm)                 \
+  V(SVEContiguousNonTemporalStore_ScalarPlusScalar)              \
+  V(SVEContiguousPrefetch_ScalarPlusImm)                         \
+  V(SVEContiguousPrefetch_ScalarPlusScalar)                      \
+  V(SVEContiguousStore_ScalarPlusImm)                            \
+  V(SVEContiguousStore_ScalarPlusScalar)                         \
+  V(SVECopySIMDFPScalarRegisterToVector_Predicated)              \
+  V(SVECopyFPImm_Predicated)                                     \
+  V(SVECopyGeneralRegisterToVector_Predicated)                   \
+  V(SVECopyIntImm_Predicated)                                    \
+  V(SVEElementCount)                                             \
+  V(SVEExtractElementToSIMDFPScalarRegister)                     \
+  V(SVEExtractElementToGeneralRegister)                          \
+  V(SVEFPArithmetic_Predicated)                                  \
+  V(SVEFPArithmeticWithImm_Predicated)                           \
+  V(SVEFPConvertPrecision)                                       \
+  V(SVEFPConvertToInt)                                           \
+  V(SVEFPExponentialAccelerator)                                 \
+  V(SVEFPRoundToIntegralValue)                                   \
+  V(SVEFPTrigMulAddCoefficient)                                  \
+  V(SVEFPTrigSelectCoefficient)                                  \
+  V(SVEFPUnaryOp)                                                \
+  V(SVEIncDecRegisterByElementCount)                             \
+  V(SVEIncDecVectorByElementCount)                               \
+  V(SVEInsertSIMDFPScalarRegister)                               \
+  V(SVEInsertGeneralRegister)                                    \
+  V(SVEIntAddSubtractImm_Unpredicated)                           \
+  V(SVEIntAddSubtractVectors_Predicated)                         \
+  V(SVEIntCompareScalarCountAndLimit)                            \
+  V(SVEIntConvertToFP)                                           \
+  V(SVEIntDivideVectors_Predicated)                              \
+  V(SVEIntMinMaxImm_Unpredicated)                                \
+  V(SVEIntMinMaxDifference_Predicated)                           \
+  V(SVEIntMulImm_Unpredicated)                                   \
+  V(SVEIntMulVectors_Predicated)                                 \
+  V(SVELoadAndBroadcastElement)                                  \
+  V(SVELoadAndBroadcastQOWord_ScalarPlusImm)                     \
+  V(SVELoadAndBroadcastQOWord_ScalarPlusScalar)                  \
+  V(SVELoadMultipleStructures_ScalarPlusImm)                     \
+  V(SVELoadMultipleStructures_ScalarPlusScalar)                  \
+  V(SVELoadPredicateRegister)                                    \
+  V(SVELoadVectorRegister)                                       \
+  V(SVEPartitionBreakCondition)                                  \
+  V(SVEPermutePredicateElements)                                 \
+  V(SVEPredicateFirstActive)                                     \
+  V(SVEPredicateInitialize)                                      \
+  V(SVEPredicateNextActive)                                      \
+  V(SVEPredicateReadFromFFR_Predicated)                          \
+  V(SVEPredicateReadFromFFR_Unpredicated)                        \
+  V(SVEPredicateTest)                                            \
+  V(SVEPredicateZero)                                            \
+  V(SVEPropagateBreakToNextPartition)                            \
+  V(SVEReversePredicateElements)                                 \
+  V(SVEReverseVectorElements)                                    \
+  V(SVEReverseWithinElements)                                    \
+  V(SVESaturatingIncDecRegisterByElementCount)                   \
+  V(SVESaturatingIncDecVectorByElementCount)                     \
+  V(SVEStoreMultipleStructures_ScalarPlusImm)                    \
+  V(SVEStoreMultipleStructures_ScalarPlusScalar)                 \
+  V(SVEStorePredicateRegister)                                   \
+  V(SVEStoreVectorRegister)                                      \
+  V(SVETableLookup)                                              \
+  V(SVEUnpackPredicateElements)                                  \
+  V(SVEUnpackVectorElements)                                     \
+  V(SVEVectorSplice)
+
+#define VIXL_DEFINE_SIMPLE_SVE_VISITOR(NAME)                       \
+  void CPUFeaturesAuditor::Visit##NAME(const Instruction* instr) { \
+    RecordInstructionFeaturesScope scope(this);                    \
+    scope.Record(CPUFeatures::kSVE);                               \
+    USE(instr);                                                    \
+  }
+VIXL_SIMPLE_SVE_VISITOR_LIST(VIXL_DEFINE_SIMPLE_SVE_VISITOR)
+#undef VIXL_DEFINE_SIMPLE_SVE_VISITOR
+#undef VIXL_SIMPLE_SVE_VISITOR_LIST
+
 void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
@ -1001,7 +1295,19 @@ void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
        required.Combine(CPUFeatures::kPAuth);
        break;
      default:
-        if (instr->GetImmHint() == ESB) required.Combine(CPUFeatures::kRAS);
+        switch (instr->GetImmHint()) {
+          case ESB:
+            required.Combine(CPUFeatures::kRAS);
+            break;
+          case BTI:
+          case BTI_j:
+          case BTI_c:
+          case BTI_jc:
+            required.Combine(CPUFeatures::kBTI);
+            break;
+          default:
+            break;
+        }
        break;
    }

@ -1009,6 +1315,52 @@ void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
    // features are not implemented, so we record the corresponding features
    // only if they are available.
    if (available_.Has(required)) scope.Record(required);
+  } else if (instr->Mask(SystemSysMask) == SYS) {
+    switch (instr->GetSysOp()) {
+      // DC instruction variants.
+      case CGVAC:
+      case CGDVAC:
+      case CGVAP:
+      case CGDVAP:
+      case CIGVAC:
+      case CIGDVAC:
+      case GVA:
+      case GZVA:
+        scope.Record(CPUFeatures::kMTE);
+        break;
+      case CVAP:
+        scope.Record(CPUFeatures::kDCPoP);
+        break;
+      case CVADP:
+        scope.Record(CPUFeatures::kDCCVADP);
+        break;
+      case IVAU:
+      case CVAC:
+      case CVAU:
+      case CIVAC:
+      case ZVA:
+        // No special CPU features.
+        break;
+    }
+  } else if (instr->Mask(SystemPStateFMask) == SystemPStateFixed) {
+    switch (instr->Mask(SystemPStateMask)) {
+      case CFINV:
+        scope.Record(CPUFeatures::kFlagM);
+        break;
+      case AXFLAG:
+      case XAFLAG:
+        scope.Record(CPUFeatures::kAXFlag);
+        break;
+    }
+  } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
+    if (instr->Mask(SystemSysRegMask) == MRS) {
+      switch (instr->GetImmSystemRegister()) {
+        case RNDR:
+        case RNDRRS:
+          scope.Record(CPUFeatures::kRNG);
+          break;
+      }
+    }
  }
 }

@ -1049,11 +1401,447 @@ void CPUFeaturesAuditor::VisitUnconditionalBranchToRegister(
  }
 }

+void CPUFeaturesAuditor::VisitReserved(const Instruction* instr) {
+  RecordInstructionFeaturesScope scope(this);
+  USE(instr);
+}
+
 void CPUFeaturesAuditor::VisitUnimplemented(const Instruction* instr) {
  RecordInstructionFeaturesScope scope(this);
  USE(instr);
 }

+void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) {
+  VIXL_ASSERT(metadata->count("form") > 0);
+  const std::string& form = (*metadata)["form"];
+  form_hash_ = Hash(form.c_str());
+  const FormToVisitorFnMap* fv = CPUFeaturesAuditor::GetFormToVisitorFnMap();
+  FormToVisitorFnMap::const_iterator it = fv->find(form_hash_);
+  if (it == fv->end()) {
+    RecordInstructionFeaturesScope scope(this);
+    std::map<uint32_t, const CPUFeatures> features = {
+        {"adclb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"adclt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"addhnb_z_zz"_h, CPUFeatures::kSVE2},
+        {"addhnt_z_zz"_h, CPUFeatures::kSVE2},
+        {"addp_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"bcax_z_zzz"_h, CPUFeatures::kSVE2},
+        {"bdep_z_zz"_h,
+         CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
+        {"bext_z_zz"_h,
+         CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
+        {"bgrp_z_zz"_h,
+         CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
+        {"bsl1n_z_zzz"_h, CPUFeatures::kSVE2},
+        {"bsl2n_z_zzz"_h, CPUFeatures::kSVE2},
+        {"bsl_z_zzz"_h, CPUFeatures::kSVE2},
+        {"cadd_z_zz"_h, CPUFeatures::kSVE2},
+        {"cdot_z_zzz"_h, CPUFeatures::kSVE2},
+        {"cdot_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"cdot_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"cmla_z_zzz"_h, CPUFeatures::kSVE2},
+        {"cmla_z_zzzi_h"_h, CPUFeatures::kSVE2},
+        {"cmla_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"eor3_z_zzz"_h, CPUFeatures::kSVE2},
+        {"eorbt_z_zz"_h, CPUFeatures::kSVE2},
+        {"eortb_z_zz"_h, CPUFeatures::kSVE2},
+        {"ext_z_zi_con"_h, CPUFeatures::kSVE2},
+        {"faddp_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"fcvtlt_z_p_z_h2s"_h, CPUFeatures::kSVE2},
+        {"fcvtlt_z_p_z_s2d"_h, CPUFeatures::kSVE2},
+        {"fcvtnt_z_p_z_d2s"_h, CPUFeatures::kSVE2},
+        {"fcvtnt_z_p_z_s2h"_h, CPUFeatures::kSVE2},
+        {"fcvtx_z_p_z_d2s"_h, CPUFeatures::kSVE2},
+        {"fcvtxnt_z_p_z_d2s"_h, CPUFeatures::kSVE2},
+        {"flogb_z_p_z"_h, CPUFeatures::kSVE2},
+        {"fmaxnmp_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"fmaxp_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"fminnmp_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"fminp_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"fmlalb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"fmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"fmlalt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"fmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"fmlslb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"fmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"fmlslt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"fmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"histcnt_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"histseg_z_zz"_h, CPUFeatures::kSVE2},
+        {"ldnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+        {"ldnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+        {"ldnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+        {"ldnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+        {"ldnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+        {"ldnt1sb_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+        {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+        {"ldnt1sh_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+        {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+        {"ldnt1sw_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+        {"ldnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+        {"ldnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+        {"match_p_p_zz"_h, CPUFeatures::kSVE2},
+        {"mla_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"mla_z_zzzi_h"_h, CPUFeatures::kSVE2},
+        {"mla_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"mls_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"mls_z_zzzi_h"_h, CPUFeatures::kSVE2},
+        {"mls_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"mul_z_zz"_h, CPUFeatures::kSVE2},
+        {"mul_z_zzi_d"_h, CPUFeatures::kSVE2},
+        {"mul_z_zzi_h"_h, CPUFeatures::kSVE2},
+        {"mul_z_zzi_s"_h, CPUFeatures::kSVE2},
+        {"nbsl_z_zzz"_h, CPUFeatures::kSVE2},
+        {"nmatch_p_p_zz"_h, CPUFeatures::kSVE2},
+        {"pmul_z_zz"_h, CPUFeatures::kSVE2},
+        {"pmullb_z_zz"_h, CPUFeatures::kSVE2},
+        {"pmullt_z_zz"_h, CPUFeatures::kSVE2},
+        {"raddhnb_z_zz"_h, CPUFeatures::kSVE2},
+        {"raddhnt_z_zz"_h, CPUFeatures::kSVE2},
+        {"rshrnb_z_zi"_h, CPUFeatures::kSVE2},
+        {"rshrnt_z_zi"_h, CPUFeatures::kSVE2},
+        {"rsubhnb_z_zz"_h, CPUFeatures::kSVE2},
+        {"rsubhnt_z_zz"_h, CPUFeatures::kSVE2},
+        {"saba_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sabalb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sabalt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sabdlb_z_zz"_h, CPUFeatures::kSVE2},
+        {"sabdlt_z_zz"_h, CPUFeatures::kSVE2},
+        {"sadalp_z_p_z"_h, CPUFeatures::kSVE2},
+        {"saddlb_z_zz"_h, CPUFeatures::kSVE2},
+        {"saddlbt_z_zz"_h, CPUFeatures::kSVE2},
+        {"saddlt_z_zz"_h, CPUFeatures::kSVE2},
+        {"saddwb_z_zz"_h, CPUFeatures::kSVE2},
+        {"saddwt_z_zz"_h, CPUFeatures::kSVE2},
+        {"sbclb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sbclt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"shadd_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"shrnb_z_zi"_h, CPUFeatures::kSVE2},
+        {"shrnt_z_zi"_h, CPUFeatures::kSVE2},
+        {"shsub_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"shsubr_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"sli_z_zzi"_h, CPUFeatures::kSVE2},
+        {"smaxp_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"sminp_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"smlalb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"smlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"smlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"smlalt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"smlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"smlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"smlslb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"smlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"smlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"smlslt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"smlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"smlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"smulh_z_zz"_h, CPUFeatures::kSVE2},
+        {"smullb_z_zz"_h, CPUFeatures::kSVE2},
+        {"smullb_z_zzi_d"_h, CPUFeatures::kSVE2},
+        {"smullb_z_zzi_s"_h, CPUFeatures::kSVE2},
+        {"smullt_z_zz"_h, CPUFeatures::kSVE2},
+        {"smullt_z_zzi_d"_h, CPUFeatures::kSVE2},
+        {"smullt_z_zzi_s"_h, CPUFeatures::kSVE2},
+        {"splice_z_p_zz_con"_h, CPUFeatures::kSVE2},
+        {"sqabs_z_p_z"_h, CPUFeatures::kSVE2},
+        {"sqadd_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"sqcadd_z_zz"_h, CPUFeatures::kSVE2},
+        {"sqdmlalb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sqdmlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"sqdmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"sqdmlalbt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sqdmlalt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sqdmlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"sqdmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"sqdmlslb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sqdmlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"sqdmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"sqdmlslbt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sqdmlslt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sqdmlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"sqdmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"sqdmulh_z_zz"_h, CPUFeatures::kSVE2},
+        {"sqdmulh_z_zzi_d"_h, CPUFeatures::kSVE2},
+        {"sqdmulh_z_zzi_h"_h, CPUFeatures::kSVE2},
+        {"sqdmulh_z_zzi_s"_h, CPUFeatures::kSVE2},
+        {"sqdmullb_z_zz"_h, CPUFeatures::kSVE2},
+        {"sqdmullb_z_zzi_d"_h, CPUFeatures::kSVE2},
+        {"sqdmullb_z_zzi_s"_h, CPUFeatures::kSVE2},
+        {"sqdmullt_z_zz"_h, CPUFeatures::kSVE2},
+        {"sqdmullt_z_zzi_d"_h, CPUFeatures::kSVE2},
+        {"sqdmullt_z_zzi_s"_h, CPUFeatures::kSVE2},
+        {"sqneg_z_p_z"_h, CPUFeatures::kSVE2},
+        {"sqrdcmlah_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sqrdcmlah_z_zzzi_h"_h, CPUFeatures::kSVE2},
+        {"sqrdcmlah_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"sqrdmlah_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sqrdmlah_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"sqrdmlah_z_zzzi_h"_h, CPUFeatures::kSVE2},
+        {"sqrdmlah_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"sqrdmlsh_z_zzz"_h, CPUFeatures::kSVE2},
+        {"sqrdmlsh_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"sqrdmlsh_z_zzzi_h"_h, CPUFeatures::kSVE2},
+        {"sqrdmlsh_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"sqrdmulh_z_zz"_h, CPUFeatures::kSVE2},
+        {"sqrdmulh_z_zzi_d"_h, CPUFeatures::kSVE2},
+        {"sqrdmulh_z_zzi_h"_h, CPUFeatures::kSVE2},
+        {"sqrdmulh_z_zzi_s"_h, CPUFeatures::kSVE2},
+        {"sqrshl_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"sqrshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"sqrshrnb_z_zi"_h, CPUFeatures::kSVE2},
+        {"sqrshrnt_z_zi"_h, CPUFeatures::kSVE2},
+        {"sqrshrunb_z_zi"_h, CPUFeatures::kSVE2},
+        {"sqrshrunt_z_zi"_h, CPUFeatures::kSVE2},
+        {"sqshl_z_p_zi"_h, CPUFeatures::kSVE2},
+        {"sqshl_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"sqshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"sqshlu_z_p_zi"_h, CPUFeatures::kSVE2},
+        {"sqshrnb_z_zi"_h, CPUFeatures::kSVE2},
+        {"sqshrnt_z_zi"_h, CPUFeatures::kSVE2},
+        {"sqshrunb_z_zi"_h, CPUFeatures::kSVE2},
+        {"sqshrunt_z_zi"_h, CPUFeatures::kSVE2},
+        {"sqsub_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"sqsubr_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"sqxtnb_z_zz"_h, CPUFeatures::kSVE2},
+        {"sqxtnt_z_zz"_h, CPUFeatures::kSVE2},
+        {"sqxtunb_z_zz"_h, CPUFeatures::kSVE2},
+        {"sqxtunt_z_zz"_h, CPUFeatures::kSVE2},
+        {"srhadd_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"sri_z_zzi"_h, CPUFeatures::kSVE2},
+        {"srshl_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"srshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"srshr_z_p_zi"_h, CPUFeatures::kSVE2},
+        {"srsra_z_zi"_h, CPUFeatures::kSVE2},
+        {"sshllb_z_zi"_h, CPUFeatures::kSVE2},
+        {"sshllt_z_zi"_h, CPUFeatures::kSVE2},
+        {"ssra_z_zi"_h, CPUFeatures::kSVE2},
+        {"ssublb_z_zz"_h, CPUFeatures::kSVE2},
+        {"ssublbt_z_zz"_h, CPUFeatures::kSVE2},
+        {"ssublt_z_zz"_h, CPUFeatures::kSVE2},
+        {"ssubltb_z_zz"_h, CPUFeatures::kSVE2},
+        {"ssubwb_z_zz"_h, CPUFeatures::kSVE2},
+        {"ssubwt_z_zz"_h, CPUFeatures::kSVE2},
+        {"stnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+        {"stnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+        {"stnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+        {"stnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+        {"stnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+        {"stnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+        {"stnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+        {"subhnb_z_zz"_h, CPUFeatures::kSVE2},
+        {"subhnt_z_zz"_h, CPUFeatures::kSVE2},
+        {"suqadd_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"tbl_z_zz_2"_h, CPUFeatures::kSVE2},
+        {"tbx_z_zz"_h, CPUFeatures::kSVE2},
+        {"uaba_z_zzz"_h, CPUFeatures::kSVE2},
+        {"uabalb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"uabalt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"uabdlb_z_zz"_h, CPUFeatures::kSVE2},
+        {"uabdlt_z_zz"_h, CPUFeatures::kSVE2},
+        {"uadalp_z_p_z"_h, CPUFeatures::kSVE2},
+        {"uaddlb_z_zz"_h, CPUFeatures::kSVE2},
+        {"uaddlt_z_zz"_h, CPUFeatures::kSVE2},
+        {"uaddwb_z_zz"_h, CPUFeatures::kSVE2},
+        {"uaddwt_z_zz"_h, CPUFeatures::kSVE2},
+        {"uhadd_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"uhsub_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"uhsubr_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"umaxp_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"uminp_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"umlalb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"umlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"umlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"umlalt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"umlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"umlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"umlslb_z_zzz"_h, CPUFeatures::kSVE2},
+        {"umlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"umlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"umlslt_z_zzz"_h, CPUFeatures::kSVE2},
+        {"umlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+        {"umlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+        {"umulh_z_zz"_h, CPUFeatures::kSVE2},
+        {"umullb_z_zz"_h, CPUFeatures::kSVE2},
+        {"umullb_z_zzi_d"_h, CPUFeatures::kSVE2},
+        {"umullb_z_zzi_s"_h, CPUFeatures::kSVE2},
+        {"umullt_z_zz"_h, CPUFeatures::kSVE2},
+        {"umullt_z_zzi_d"_h, CPUFeatures::kSVE2},
+        {"umullt_z_zzi_s"_h, CPUFeatures::kSVE2},
+        {"uqadd_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"uqrshl_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"uqrshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"uqrshrnb_z_zi"_h, CPUFeatures::kSVE2},
+        {"uqrshrnt_z_zi"_h, CPUFeatures::kSVE2},
+        {"uqshl_z_p_zi"_h, CPUFeatures::kSVE2},
+        {"uqshl_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"uqshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"uqshrnb_z_zi"_h, CPUFeatures::kSVE2},
+        {"uqshrnt_z_zi"_h, CPUFeatures::kSVE2},
+        {"uqsub_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"uqsubr_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"uqxtnb_z_zz"_h, CPUFeatures::kSVE2},
+        {"uqxtnt_z_zz"_h, CPUFeatures::kSVE2},
+        {"urecpe_z_p_z"_h, CPUFeatures::kSVE2},
+        {"urhadd_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"urshl_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"urshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"urshr_z_p_zi"_h, CPUFeatures::kSVE2},
+        {"ursqrte_z_p_z"_h, CPUFeatures::kSVE2},
+        {"ursra_z_zi"_h, CPUFeatures::kSVE2},
+        {"ushllb_z_zi"_h, CPUFeatures::kSVE2},
+        {"ushllt_z_zi"_h, CPUFeatures::kSVE2},
+        {"usqadd_z_p_zz"_h, CPUFeatures::kSVE2},
+        {"usra_z_zi"_h, CPUFeatures::kSVE2},
+        {"usublb_z_zz"_h, CPUFeatures::kSVE2},
+        {"usublt_z_zz"_h, CPUFeatures::kSVE2},
+        {"usubwb_z_zz"_h, CPUFeatures::kSVE2},
+        {"usubwt_z_zz"_h, CPUFeatures::kSVE2},
+        {"whilege_p_p_rr"_h, CPUFeatures::kSVE2},
+        {"whilegt_p_p_rr"_h, CPUFeatures::kSVE2},
+        {"whilehi_p_p_rr"_h, CPUFeatures::kSVE2},
+        {"whilehs_p_p_rr"_h, CPUFeatures::kSVE2},
+        {"whilerw_p_rr"_h, CPUFeatures::kSVE2},
+        {"whilewr_p_rr"_h, CPUFeatures::kSVE2},
+        {"xar_z_zzi"_h, CPUFeatures::kSVE2},
+        {"smmla_z_zzz"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+        {"ummla_z_zzz"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+        {"usmmla_z_zzz"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+        {"fmmla_z_zzz_s"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF32MM)},
+        {"fmmla_z_zzz_d"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+        {"smmla_asimdsame2_g"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+        {"ummla_asimdsame2_g"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+        {"usmmla_asimdsame2_g"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+        {"ld1row_z_p_bi_u32"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+        {"ld1row_z_p_br_contiguous"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+        {"ld1rod_z_p_bi_u64"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+        {"ld1rod_z_p_br_contiguous"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+        {"ld1rob_z_p_bi_u8"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+        {"ld1rob_z_p_br_contiguous"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+        {"ld1roh_z_p_bi_u16"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+        {"ld1roh_z_p_br_contiguous"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+        {"usdot_asimdsame2_d"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+        {"sudot_asimdelem_d"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+        {"usdot_asimdelem_d"_h,
+         CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+        {"usdot_z_zzz_s"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+        {"usdot_z_zzzi_s"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+        {"sudot_z_zzzi_s"_h,
+         CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+        {"addg_64_addsub_immtags"_h, CPUFeatures::kMTE},
+        {"gmi_64g_dp_2src"_h, CPUFeatures::kMTE},
+        {"irg_64i_dp_2src"_h, CPUFeatures::kMTE},
+        {"ldg_64loffset_ldsttags"_h, CPUFeatures::kMTE},
+        {"st2g_64soffset_ldsttags"_h, CPUFeatures::kMTE},
+        {"st2g_64spost_ldsttags"_h, CPUFeatures::kMTE},
+        {"st2g_64spre_ldsttags"_h, CPUFeatures::kMTE},
+        {"stgp_64_ldstpair_off"_h, CPUFeatures::kMTE},
+        {"stgp_64_ldstpair_post"_h, CPUFeatures::kMTE},
+        {"stgp_64_ldstpair_pre"_h, CPUFeatures::kMTE},
+        {"stg_64soffset_ldsttags"_h, CPUFeatures::kMTE},
+        {"stg_64spost_ldsttags"_h, CPUFeatures::kMTE},
+        {"stg_64spre_ldsttags"_h, CPUFeatures::kMTE},
+        {"stz2g_64soffset_ldsttags"_h, CPUFeatures::kMTE},
+        {"stz2g_64spost_ldsttags"_h, CPUFeatures::kMTE},
+        {"stz2g_64spre_ldsttags"_h, CPUFeatures::kMTE},
+        {"stzg_64soffset_ldsttags"_h, CPUFeatures::kMTE},
+        {"stzg_64spost_ldsttags"_h, CPUFeatures::kMTE},
+        {"stzg_64spre_ldsttags"_h, CPUFeatures::kMTE},
+        {"subg_64_addsub_immtags"_h, CPUFeatures::kMTE},
+        {"subps_64s_dp_2src"_h, CPUFeatures::kMTE},
+        {"subp_64s_dp_2src"_h, CPUFeatures::kMTE},
+        {"cpyen_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyern_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyewn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpye_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfen_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfern_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfewn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfe_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfmn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfmrn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfmwn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfm_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfpn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfprn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfpwn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyfp_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpymn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpymrn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpymwn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpym_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpypn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyprn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpypwn_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"cpyp_cpy_memcms"_h, CPUFeatures::kMOPS},
+        {"seten_set_memcms"_h, CPUFeatures::kMOPS},
+        {"sete_set_memcms"_h, CPUFeatures::kMOPS},
+        {"setgen_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setge_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setgmn_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setgm_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setgpn_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setgp_set_memcms"_h,
+         CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)},
+        {"setmn_set_memcms"_h, CPUFeatures::kMOPS},
+        {"setm_set_memcms"_h, CPUFeatures::kMOPS},
+        {"setpn_set_memcms"_h, CPUFeatures::kMOPS},
+        {"setp_set_memcms"_h, CPUFeatures::kMOPS},
+        {"abs_32_dp_1src"_h, CPUFeatures::kCSSC},
+        {"abs_64_dp_1src"_h, CPUFeatures::kCSSC},
+        {"cnt_32_dp_1src"_h, CPUFeatures::kCSSC},
+        {"cnt_64_dp_1src"_h, CPUFeatures::kCSSC},
+        {"ctz_32_dp_1src"_h, CPUFeatures::kCSSC},
+        {"ctz_64_dp_1src"_h, CPUFeatures::kCSSC},
+        {"smax_32_dp_2src"_h, CPUFeatures::kCSSC},
+        {"smax_64_dp_2src"_h, CPUFeatures::kCSSC},
+        {"smin_32_dp_2src"_h, CPUFeatures::kCSSC},
+        {"smin_64_dp_2src"_h, CPUFeatures::kCSSC},
+        {"umax_32_dp_2src"_h, CPUFeatures::kCSSC},
+        {"umax_64_dp_2src"_h, CPUFeatures::kCSSC},
+        {"umin_32_dp_2src"_h, CPUFeatures::kCSSC},
+        {"umin_64_dp_2src"_h, CPUFeatures::kCSSC},
+        {"smax_32_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"smax_64_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"smin_32_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"smin_64_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"umax_32u_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"umax_64u_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"umin_32u_minmax_imm"_h, CPUFeatures::kCSSC},
+        {"umin_64u_minmax_imm"_h, CPUFeatures::kCSSC},
+    };
+
+    if (features.count(form_hash_) > 0) {
+      scope.Record(features[form_hash_]);
+    }
+  } else {
+    (it->second)(this, instr);
+  }
+}

 }  // namespace aarch64
 }  // namespace vixl
--- a/dep/vixl/src/aarch64/debugger-aarch64.cc
+++ b/dep/vixl/src/aarch64/debugger-aarch64.cc
@ -0,0 +1,499 @@
+// Copyright 2023, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+
+#include "debugger-aarch64.h"
+
+#include <cerrno>
+#include <cmath>
+#include <cstring>
+#include <errno.h>
+#include <limits>
+#include <unistd.h>
+
+namespace vixl {
+namespace aarch64 {
+
+
+Debugger::Debugger(Simulator* sim)
+    : sim_(sim), input_stream_(&std::cin), ostream_(sim->GetOutputStream()) {
+  // Register all basic debugger commands.
+  RegisterCmd<HelpCmd>();
+  RegisterCmd<BreakCmd>();
+  RegisterCmd<StepCmd>();
+  RegisterCmd<ContinueCmd>();
+  RegisterCmd<PrintCmd>();
+  RegisterCmd<TraceCmd>();
+  RegisterCmd<GdbCmd>();
+}
+
+
+template <class T>
+void Debugger::RegisterCmd() {
+  auto new_command = std::make_unique<T>(sim_);
+
+  // Check that the new command word and alias, don't already exist.
+  std::string_view new_cmd_word = new_command->GetCommandWord();
+  std::string_view new_cmd_alias = new_command->GetCommandAlias();
+  for (const auto& cmd : debugger_cmds_) {
+    std::string_view cmd_word = cmd->GetCommandWord();
+    std::string_view cmd_alias = cmd->GetCommandAlias();
+
+    if (new_cmd_word == cmd_word) {
+      VIXL_ABORT_WITH_MSG("Command word matches an existing command word.");
+    } else if (new_cmd_word == cmd_alias) {
+      VIXL_ABORT_WITH_MSG("Command word matches an existing command alias.");
+    }
+
+    if (new_cmd_alias != "") {
+      if (new_cmd_alias == cmd_word) {
+        VIXL_ABORT_WITH_MSG("Command alias matches an existing command word.");
+      } else if (new_cmd_alias == cmd_alias) {
+        VIXL_ABORT_WITH_MSG("Command alias matches an existing command alias.");
+      }
+    }
+  }
+
+  debugger_cmds_.push_back(std::move(new_command));
+}
+
+
+bool Debugger::IsAtBreakpoint() const {
+  return IsBreakpoint(reinterpret_cast<uint64_t>(sim_->ReadPc()));
+}
+
+
+void Debugger::Debug() {
+  DebugReturn done = DebugContinue;
+  while (done == DebugContinue) {
+    // Disassemble the next instruction to execute.
+    PrintDisassembler print_disasm = PrintDisassembler(ostream_);
+    print_disasm.Disassemble(sim_->ReadPc());
+
+    // Read the command line.
+    fprintf(ostream_, "sim> ");
+    std::string line;
+    std::getline(*input_stream_, line);
+
+    // Remove all control characters from the command string.
+    line.erase(std::remove_if(line.begin(),
+                              line.end(),
+                              [](char c) { return std::iscntrl(c); }),
+               line.end());
+
+    // Assume input from std::cin has already been output (e.g: by a terminal)
+    // but input from elsewhere (e.g: from a testing input stream) has not.
+    if (input_stream_ != &std::cin) {
+      fprintf(ostream_, "%s\n", line.c_str());
+    }
+
+    // Parse the command into tokens.
+    std::vector<std::string> tokenized_cmd = Tokenize(line);
+    if (!tokenized_cmd.empty()) {
+      done = ExecDebugCommand(tokenized_cmd);
+    }
+  }
+}
+
+
+std::optional<uint64_t> Debugger::ParseUint64String(std::string_view uint64_str,
+                                                    int base) {
+  // Clear any previous errors.
+  errno = 0;
+
+  // strtoull uses 0 to indicate that no conversion was possible so first
+  // check that the string isn't zero.
+  if (IsZeroUint64String(uint64_str, base)) {
+    return 0;
+  }
+
+  // Cannot use stoi as it might not be possible to use exceptions.
+  char* end;
+  uint64_t value = std::strtoull(uint64_str.data(), &end, base);
+  if (value == 0 || *end != '\0' || errno == ERANGE) {
+    return std::nullopt;
+  }
+
+  return value;
+}
+
+
+std::optional<Debugger::RegisterParsedFormat> Debugger::ParseRegString(
+    std::string_view reg_str) {
+  // A register should only have 2 (e.g: X0) or 3 (e.g: X31) characters.
+  if (reg_str.size() < 2 || reg_str.size() > 3) {
+    return std::nullopt;
+  }
+
+  // Check for aliases of registers.
+  if (reg_str == "lr") {
+    return {{'X', kLinkRegCode}};
+  } else if (reg_str == "sp") {
+    return {{'X', kSpRegCode}};
+  }
+
+  unsigned max_reg_num;
+  char reg_prefix = std::toupper(reg_str.front());
+  switch (reg_prefix) {
+    case 'W':
+      VIXL_FALLTHROUGH();
+    case 'X':
+      max_reg_num = kNumberOfRegisters - 1;
+      break;
+    case 'V':
+      max_reg_num = kNumberOfVRegisters - 1;
+      break;
+    case 'Z':
+      max_reg_num = kNumberOfZRegisters - 1;
+      break;
+    case 'P':
+      max_reg_num = kNumberOfPRegisters - 1;
+      break;
+    default:
+      return std::nullopt;
+  }
+
+  std::string_view str_code = reg_str.substr(1, reg_str.size());
+  auto reg_code = ParseUint64String(str_code, 10);
+  if (!reg_code) {
+    return std::nullopt;
+  }
+
+  if (*reg_code > max_reg_num) {
+    return std::nullopt;
+  }
+
+  return {{reg_prefix, *reg_code}};
+}
+
+
+void Debugger::PrintUsage() {
+  for (const auto& cmd : debugger_cmds_) {
+    // Print commands in the following format:
+    //  foo / f
+    //      foo <arg>
+    //      A description of the foo command.
+    //
+
+    std::string_view cmd_word = cmd->GetCommandWord();
+    std::string_view cmd_alias = cmd->GetCommandAlias();
+    if (cmd_alias != "") {
+      fprintf(ostream_, "%s / %s\n", cmd_word.data(), cmd_alias.data());
+    } else {
+      fprintf(ostream_, "%s\n", cmd_word.data());
+    }
+
+    std::string_view args_str = cmd->GetArgsString();
+    if (args_str != "") {
+      fprintf(ostream_, "\t%s %s\n", cmd_word.data(), args_str.data());
+    }
+
+    std::string_view description = cmd->GetDescription();
+    if (description != "") {
+      fprintf(ostream_, "\t%s\n", description.data());
+    }
+  }
+}
+
+
+std::vector<std::string> Debugger::Tokenize(std::string_view input_line,
+                                            char separator) {
+  std::vector<std::string> words;
+
+  if (input_line.empty()) {
+    return words;
+  }
+
+  for (auto separator_pos = input_line.find(separator);
+       separator_pos != input_line.npos;
+       separator_pos = input_line.find(separator)) {
+    // Skip consecutive, repeated separators.
+    if (separator_pos != 0) {
+      words.push_back(std::string{input_line.substr(0, separator_pos)});
+    }
+
+    // Remove characters up to and including the separator.
+    input_line.remove_prefix(separator_pos + 1);
+  }
+
+  // Add the rest of the string to the vector.
+  words.push_back(std::string{input_line});
+
+  return words;
+}
+
+
+DebugReturn Debugger::ExecDebugCommand(
+    const std::vector<std::string>& tokenized_cmd) {
+  std::string cmd_word = tokenized_cmd.front();
+  for (const auto& cmd : debugger_cmds_) {
+    if (cmd_word == cmd->GetCommandWord() ||
+        cmd_word == cmd->GetCommandAlias()) {
+      const std::vector<std::string> args(tokenized_cmd.begin() + 1,
+                                          tokenized_cmd.end());
+
+      // Call the handler for the command and pass the arguments.
+      return cmd->Action(args);
+    }
+  }
+
+  fprintf(ostream_, "Error: command '%s' not found\n", cmd_word.c_str());
+  return DebugContinue;
+}
+
+
+bool Debugger::IsZeroUint64String(std::string_view uint64_str, int base) {
+  // Remove any hex prefixes.
+  if (base == 0 || base == 16) {
+    std::string_view prefix = uint64_str.substr(0, 2);
+    if (prefix == "0x" || prefix == "0X") {
+      uint64_str.remove_prefix(2);
+    }
+  }
+
+  if (uint64_str.empty()) {
+    return false;
+  }
+
+  // Check all remaining digits in the string for anything other than zero.
+  for (char c : uint64_str) {
+    if (c != '0') {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+
+DebuggerCmd::DebuggerCmd(Simulator* sim,
+                         std::string cmd_word,
+                         std::string cmd_alias,
+                         std::string args_str,
+                         std::string description)
+    : sim_(sim),
+      ostream_(sim->GetOutputStream()),
+      command_word_(cmd_word),
+      command_alias_(cmd_alias),
+      args_str_(args_str),
+      description_(description) {}
+
+
+DebugReturn HelpCmd::Action(const std::vector<std::string>& args) {
+  USE(args);
+  sim_->GetDebugger()->PrintUsage();
+  return DebugContinue;
+}
+
+
+DebugReturn BreakCmd::Action(const std::vector<std::string>& args) {
+  if (args.size() != 1) {
+    fprintf(ostream_, "Error: Use `break <address>` to set a breakpoint\n");
+    return DebugContinue;
+  }
+
+  std::string arg = args.front();
+  auto break_addr = Debugger::ParseUint64String(arg);
+  if (!break_addr) {
+    fprintf(ostream_, "Error: Use `break <address>` to set a breakpoint\n");
+    return DebugContinue;
+  }
+
+  if (sim_->GetDebugger()->IsBreakpoint(*break_addr)) {
+    sim_->GetDebugger()->RemoveBreakpoint(*break_addr);
+    fprintf(ostream_,
+            "Breakpoint successfully removed at: 0x%" PRIx64 "\n",
+            *break_addr);
+  } else {
+    sim_->GetDebugger()->RegisterBreakpoint(*break_addr);
+    fprintf(ostream_,
+            "Breakpoint successfully added at: 0x%" PRIx64 "\n",
+            *break_addr);
+  }
+
+  return DebugContinue;
+}
+
+
+DebugReturn StepCmd::Action(const std::vector<std::string>& args) {
+  if (args.size() > 1) {
+    fprintf(ostream_,
+            "Error: use `step [number]` to step an optional number of"
+            " instructions\n");
+    return DebugContinue;
+  }
+
+  // Step 1 instruction by default.
+  std::optional<uint64_t> number_of_instructions_to_execute{1};
+
+  if (args.size() == 1) {
+    // Parse the argument to step that number of instructions.
+    std::string arg = args.front();
+    number_of_instructions_to_execute = Debugger::ParseUint64String(arg);
+    if (!number_of_instructions_to_execute) {
+      fprintf(ostream_,
+              "Error: use `step [number]` to step an optional number of"
+              " instructions\n");
+      return DebugContinue;
+    }
+  }
+
+  while (!sim_->IsSimulationFinished() &&
+         *number_of_instructions_to_execute > 0) {
+    sim_->ExecuteInstruction();
+    (*number_of_instructions_to_execute)--;
+
+    // The first instruction has already been printed by Debug() so only
+    // enable instruction tracing after the first instruction has been
+    // executed.
+    sim_->SetTraceParameters(sim_->GetTraceParameters() | LOG_DISASM);
+  }
+
+  // Disable instruction tracing after all instructions have been executed.
+  sim_->SetTraceParameters(sim_->GetTraceParameters() & ~LOG_DISASM);
+
+  if (sim_->IsSimulationFinished()) {
+    fprintf(ostream_,
+            "Debugger at the end of simulation, leaving simulator...\n");
+    return DebugExit;
+  }
+
+  return DebugContinue;
+}
+
+
+DebugReturn ContinueCmd::Action(const std::vector<std::string>& args) {
+  USE(args);
+
+  fprintf(ostream_, "Continuing...\n");
+
+  if (sim_->GetDebugger()->IsAtBreakpoint()) {
+    // This breakpoint has already been hit, so execute it before continuing.
+    sim_->ExecuteInstruction();
+  }
+
+  return DebugExit;
+}
+
+
+DebugReturn PrintCmd::Action(const std::vector<std::string>& args) {
+  if (args.size() != 1) {
+    fprintf(ostream_,
+            "Error: use `print <register|all>` to print the contents of a"
+            " specific register or all registers.\n");
+    return DebugContinue;
+  }
+
+  if (args.front() == "all") {
+    sim_->PrintRegisters();
+    sim_->PrintZRegisters();
+  } else if (args.front() == "system") {
+    sim_->PrintSystemRegisters();
+  } else if (args.front() == "ffr") {
+    sim_->PrintFFR();
+  } else {
+    auto reg = Debugger::ParseRegString(args.front());
+    if (!reg) {
+      fprintf(ostream_,
+              "Error: incorrect register format, use e.g: X0, x0, etc...\n");
+      return DebugContinue;
+    }
+
+    // Ensure the stack pointer is printed instead of the zero register.
+    if ((*reg).second == kSpRegCode) {
+      (*reg).second = kSPRegInternalCode;
+    }
+
+    // Registers are printed in different ways depending on their type.
+    switch ((*reg).first) {
+      case 'W':
+        sim_->PrintRegister(
+            (*reg).second,
+            static_cast<Simulator::PrintRegisterFormat>(
+                Simulator::PrintRegisterFormat::kPrintWReg |
+                Simulator::PrintRegisterFormat::kPrintRegPartial));
+        break;
+      case 'X':
+        sim_->PrintRegister((*reg).second,
+                            Simulator::PrintRegisterFormat::kPrintXReg);
+        break;
+      case 'V':
+        sim_->PrintVRegister((*reg).second);
+        break;
+      case 'Z':
+        sim_->PrintZRegister((*reg).second);
+        break;
+      case 'P':
+        sim_->PrintPRegister((*reg).second);
+        break;
+      default:
+        // ParseRegString should only allow valid register characters.
+        VIXL_UNREACHABLE();
+    }
+  }
+
+  return DebugContinue;
+}
+
+
+DebugReturn TraceCmd::Action(const std::vector<std::string>& args) {
+  if (args.size() != 0) {
+    fprintf(ostream_, "Error: use `trace` to toggle tracing of registers.\n");
+    return DebugContinue;
+  }
+
+  int trace_params = sim_->GetTraceParameters();
+  if ((trace_params & LOG_ALL) != LOG_ALL) {
+    fprintf(ostream_,
+            "Enabling disassembly, registers and memory write tracing\n");
+    sim_->SetTraceParameters(trace_params | LOG_ALL);
+  } else {
+    fprintf(ostream_,
+            "Disabling disassembly, registers and memory write tracing\n");
+    sim_->SetTraceParameters(trace_params & ~LOG_ALL);
+  }
+
+  return DebugContinue;
+}
+
+
+DebugReturn GdbCmd::Action(const std::vector<std::string>& args) {
+  if (args.size() != 0) {
+    fprintf(ostream_,
+            "Error: use `gdb` to enter GDB from the simulator debugger.\n");
+    return DebugContinue;
+  }
+
+  HostBreakpoint();
+  return DebugContinue;
+}
+
+
+}  // namespace aarch64
+}  // namespace vixl
+
+#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
--- a/dep/vixl/src/aarch64/decoder-aarch64.cc
+++ b/dep/vixl/src/aarch64/decoder-aarch64.cc
--- a/dep/vixl/src/aarch64/disasm-aarch64.cc
+++ b/dep/vixl/src/aarch64/disasm-aarch64.cc
--- a/dep/vixl/src/aarch64/instructions-aarch64.cc
+++ b/dep/vixl/src/aarch64/instructions-aarch64.cc
@ -25,6 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "instructions-aarch64.h"
+
 #include "assembler-aarch64.h"

 namespace vixl {
@ -35,7 +36,8 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
                                    unsigned width) {
  VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) ||
              (width == 32));
-  VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+  VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
+              (reg_size == kSRegSize) || (reg_size == kDRegSize));
  uint64_t result = value & ((UINT64_C(1) << width) - 1);
  for (unsigned i = width; i < reg_size; i *= 2) {
    result |= (result << i);
@ -43,6 +45,442 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
  return result;
 }

+bool Instruction::CanTakeSVEMovprfx(const char* form,
+                                    const Instruction* movprfx) const {
+  return CanTakeSVEMovprfx(Hash(form), movprfx);
+}
+
+bool Instruction::CanTakeSVEMovprfx(uint32_t form_hash,
+                                    const Instruction* movprfx) const {
+  bool movprfx_is_predicated = movprfx->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z;
+  bool movprfx_is_unpredicated =
+      movprfx->Mask(SVEConstructivePrefix_UnpredicatedMask) == MOVPRFX_z_z;
+  VIXL_ASSERT(movprfx_is_predicated != movprfx_is_unpredicated);
+
+  int movprfx_zd = movprfx->GetRd();
+  int movprfx_pg = movprfx_is_predicated ? movprfx->GetPgLow8() : -1;
+  VectorFormat movprfx_vform =
+      movprfx_is_predicated ? movprfx->GetSVEVectorFormat() : kFormatUndefined;
+
+  bool pg_matches_low8 = movprfx_pg == GetPgLow8();
+  bool vform_matches = movprfx_vform == GetSVEVectorFormat();
+  bool zd_matches = movprfx_zd == GetRd();
+  bool zd_isnt_zn = movprfx_zd != GetRn();
+  bool zd_isnt_zm = movprfx_zd != GetRm();
+
+  switch (form_hash) {
+    case "cdot_z_zzzi_s"_h:
+    case "sdot_z_zzzi_s"_h:
+    case "sudot_z_zzzi_s"_h:
+    case "udot_z_zzzi_s"_h:
+    case "usdot_z_zzzi_s"_h:
+      return (GetRd() != static_cast<int>(ExtractBits(18, 16))) &&
+             movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
+
+    case "cdot_z_zzzi_d"_h:
+    case "sdot_z_zzzi_d"_h:
+    case "udot_z_zzzi_d"_h:
+      return (GetRd() != static_cast<int>(ExtractBits(19, 16))) &&
+             movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
+
+    case "fmlalb_z_zzzi_s"_h:
+    case "fmlalt_z_zzzi_s"_h:
+    case "fmlslb_z_zzzi_s"_h:
+    case "fmlslt_z_zzzi_s"_h:
+    case "smlalb_z_zzzi_d"_h:
+    case "smlalb_z_zzzi_s"_h:
+    case "smlalt_z_zzzi_d"_h:
+    case "smlalt_z_zzzi_s"_h:
+    case "smlslb_z_zzzi_d"_h:
+    case "smlslb_z_zzzi_s"_h:
+    case "smlslt_z_zzzi_d"_h:
+    case "smlslt_z_zzzi_s"_h:
+    case "sqdmlalb_z_zzzi_d"_h:
+    case "sqdmlalb_z_zzzi_s"_h:
+    case "sqdmlalt_z_zzzi_d"_h:
+    case "sqdmlalt_z_zzzi_s"_h:
+    case "sqdmlslb_z_zzzi_d"_h:
+    case "sqdmlslb_z_zzzi_s"_h:
+    case "sqdmlslt_z_zzzi_d"_h:
+    case "sqdmlslt_z_zzzi_s"_h:
+    case "umlalb_z_zzzi_d"_h:
+    case "umlalb_z_zzzi_s"_h:
+    case "umlalt_z_zzzi_d"_h:
+    case "umlalt_z_zzzi_s"_h:
+    case "umlslb_z_zzzi_d"_h:
+    case "umlslb_z_zzzi_s"_h:
+    case "umlslt_z_zzzi_d"_h:
+    case "umlslt_z_zzzi_s"_h:
+      return (GetRd() != GetSVEMulLongZmAndIndex().first) &&
+             movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
+
+    case "cmla_z_zzzi_h"_h:
+    case "cmla_z_zzzi_s"_h:
+    case "fcmla_z_zzzi_h"_h:
+    case "fcmla_z_zzzi_s"_h:
+    case "fmla_z_zzzi_d"_h:
+    case "fmla_z_zzzi_h"_h:
+    case "fmla_z_zzzi_s"_h:
+    case "fmls_z_zzzi_d"_h:
+    case "fmls_z_zzzi_h"_h:
+    case "fmls_z_zzzi_s"_h:
+    case "mla_z_zzzi_d"_h:
+    case "mla_z_zzzi_h"_h:
+    case "mla_z_zzzi_s"_h:
+    case "mls_z_zzzi_d"_h:
+    case "mls_z_zzzi_h"_h:
+    case "mls_z_zzzi_s"_h:
+    case "sqrdcmlah_z_zzzi_h"_h:
+    case "sqrdcmlah_z_zzzi_s"_h:
+    case "sqrdmlah_z_zzzi_d"_h:
+    case "sqrdmlah_z_zzzi_h"_h:
+    case "sqrdmlah_z_zzzi_s"_h:
+    case "sqrdmlsh_z_zzzi_d"_h:
+    case "sqrdmlsh_z_zzzi_h"_h:
+    case "sqrdmlsh_z_zzzi_s"_h:
+      return (GetRd() != GetSVEMulZmAndIndex().first) &&
+             movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
+
+    case "adclb_z_zzz"_h:
+    case "adclt_z_zzz"_h:
+    case "bcax_z_zzz"_h:
+    case "bsl1n_z_zzz"_h:
+    case "bsl2n_z_zzz"_h:
+    case "bsl_z_zzz"_h:
+    case "cdot_z_zzz"_h:
+    case "cmla_z_zzz"_h:
+    case "eor3_z_zzz"_h:
+    case "eorbt_z_zz"_h:
+    case "eortb_z_zz"_h:
+    case "fmlalb_z_zzz"_h:
+    case "fmlalt_z_zzz"_h:
+    case "fmlslb_z_zzz"_h:
+    case "fmlslt_z_zzz"_h:
+    case "nbsl_z_zzz"_h:
+    case "saba_z_zzz"_h:
+    case "sabalb_z_zzz"_h:
+    case "sabalt_z_zzz"_h:
+    case "sbclb_z_zzz"_h:
+    case "sbclt_z_zzz"_h:
+    case "sdot_z_zzz"_h:
+    case "smlalb_z_zzz"_h:
+    case "smlalt_z_zzz"_h:
+    case "smlslb_z_zzz"_h:
+    case "smlslt_z_zzz"_h:
+    case "sqdmlalb_z_zzz"_h:
+    case "sqdmlalbt_z_zzz"_h:
+    case "sqdmlalt_z_zzz"_h:
+    case "sqdmlslb_z_zzz"_h:
+    case "sqdmlslbt_z_zzz"_h:
+    case "sqdmlslt_z_zzz"_h:
+    case "sqrdcmlah_z_zzz"_h:
+    case "sqrdmlah_z_zzz"_h:
+    case "sqrdmlsh_z_zzz"_h:
+    case "uaba_z_zzz"_h:
+    case "uabalb_z_zzz"_h:
+    case "uabalt_z_zzz"_h:
+    case "udot_z_zzz"_h:
+    case "umlalb_z_zzz"_h:
+    case "umlalt_z_zzz"_h:
+    case "umlslb_z_zzz"_h:
+    case "umlslt_z_zzz"_h:
+    case "usdot_z_zzz_s"_h:
+    case "fmmla_z_zzz_s"_h:
+    case "fmmla_z_zzz_d"_h:
+    case "smmla_z_zzz"_h:
+    case "ummla_z_zzz"_h:
+    case "usmmla_z_zzz"_h:
+      return movprfx_is_unpredicated && zd_isnt_zm && zd_isnt_zn && zd_matches;
+
+    case "addp_z_p_zz"_h:
+    case "cadd_z_zz"_h:
+    case "clasta_z_p_zz"_h:
+    case "clastb_z_p_zz"_h:
+    case "decd_z_zs"_h:
+    case "dech_z_zs"_h:
+    case "decw_z_zs"_h:
+    case "ext_z_zi_des"_h:
+    case "faddp_z_p_zz"_h:
+    case "fmaxnmp_z_p_zz"_h:
+    case "fmaxp_z_p_zz"_h:
+    case "fminnmp_z_p_zz"_h:
+    case "fminp_z_p_zz"_h:
+    case "ftmad_z_zzi"_h:
+    case "incd_z_zs"_h:
+    case "inch_z_zs"_h:
+    case "incw_z_zs"_h:
+    case "insr_z_v"_h:
+    case "smaxp_z_p_zz"_h:
+    case "sminp_z_p_zz"_h:
+    case "splice_z_p_zz_des"_h:
+    case "sqcadd_z_zz"_h:
+    case "sqdecd_z_zs"_h:
+    case "sqdech_z_zs"_h:
+    case "sqdecw_z_zs"_h:
+    case "sqincd_z_zs"_h:
+    case "sqinch_z_zs"_h:
+    case "sqincw_z_zs"_h:
+    case "srsra_z_zi"_h:
+    case "ssra_z_zi"_h:
+    case "umaxp_z_p_zz"_h:
+    case "uminp_z_p_zz"_h:
+    case "uqdecd_z_zs"_h:
+    case "uqdech_z_zs"_h:
+    case "uqdecw_z_zs"_h:
+    case "uqincd_z_zs"_h:
+    case "uqinch_z_zs"_h:
+    case "uqincw_z_zs"_h:
+    case "ursra_z_zi"_h:
+    case "usra_z_zi"_h:
+    case "xar_z_zzi"_h:
+      return movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
+
+    case "add_z_zi"_h:
+    case "and_z_zi"_h:
+    case "decp_z_p_z"_h:
+    case "eor_z_zi"_h:
+    case "incp_z_p_z"_h:
+    case "insr_z_r"_h:
+    case "mul_z_zi"_h:
+    case "orr_z_zi"_h:
+    case "smax_z_zi"_h:
+    case "smin_z_zi"_h:
+    case "sqadd_z_zi"_h:
+    case "sqdecp_z_p_z"_h:
+    case "sqincp_z_p_z"_h:
+    case "sqsub_z_zi"_h:
+    case "sub_z_zi"_h:
+    case "subr_z_zi"_h:
+    case "umax_z_zi"_h:
+    case "umin_z_zi"_h:
+    case "uqadd_z_zi"_h:
+    case "uqdecp_z_p_z"_h:
+    case "uqincp_z_p_z"_h:
+    case "uqsub_z_zi"_h:
+      return movprfx_is_unpredicated && zd_matches;
+
+    case "cpy_z_p_i"_h:
+      if (movprfx_is_predicated) {
+        if (!vform_matches) return false;
+        if (movprfx_pg != GetRx<19, 16>()) return false;
+      }
+      // Only the merging form can take movprfx.
+      if (ExtractBit(14) == 0) return false;
+      return zd_matches;
+
+    case "fcpy_z_p_i"_h:
+      return (movprfx_is_unpredicated ||
+              ((movprfx_pg == GetRx<19, 16>()) && vform_matches)) &&
+             zd_matches;
+
+    case "flogb_z_p_z"_h:
+      return (movprfx_is_unpredicated ||
+              ((movprfx_vform == GetSVEVectorFormat(17)) && pg_matches_low8)) &&
+             zd_isnt_zn && zd_matches;
+
+    case "asr_z_p_zi"_h:
+    case "asrd_z_p_zi"_h:
+    case "lsl_z_p_zi"_h:
+    case "lsr_z_p_zi"_h:
+    case "sqshl_z_p_zi"_h:
+    case "sqshlu_z_p_zi"_h:
+    case "srshr_z_p_zi"_h:
+    case "uqshl_z_p_zi"_h:
+    case "urshr_z_p_zi"_h:
+      return (movprfx_is_unpredicated ||
+              ((movprfx_vform ==
+                SVEFormatFromLaneSizeInBytesLog2(
+                    GetSVEImmShiftAndLaneSizeLog2(true).second)) &&
+               pg_matches_low8)) &&
+             zd_matches;
+
+    case "fcvt_z_p_z_d2h"_h:
+    case "fcvt_z_p_z_d2s"_h:
+    case "fcvt_z_p_z_h2d"_h:
+    case "fcvt_z_p_z_s2d"_h:
+    case "fcvtx_z_p_z_d2s"_h:
+    case "fcvtzs_z_p_z_d2w"_h:
+    case "fcvtzs_z_p_z_d2x"_h:
+    case "fcvtzs_z_p_z_fp162x"_h:
+    case "fcvtzs_z_p_z_s2x"_h:
+    case "fcvtzu_z_p_z_d2w"_h:
+    case "fcvtzu_z_p_z_d2x"_h:
+    case "fcvtzu_z_p_z_fp162x"_h:
+    case "fcvtzu_z_p_z_s2x"_h:
+    case "scvtf_z_p_z_w2d"_h:
+    case "scvtf_z_p_z_x2d"_h:
+    case "scvtf_z_p_z_x2fp16"_h:
+    case "scvtf_z_p_z_x2s"_h:
+    case "ucvtf_z_p_z_w2d"_h:
+    case "ucvtf_z_p_z_x2d"_h:
+    case "ucvtf_z_p_z_x2fp16"_h:
+    case "ucvtf_z_p_z_x2s"_h:
+      return (movprfx_is_unpredicated ||
+              ((movprfx_vform == kFormatVnD) && pg_matches_low8)) &&
+             zd_isnt_zn && zd_matches;
+
+    case "fcvtzs_z_p_z_fp162h"_h:
+    case "fcvtzu_z_p_z_fp162h"_h:
+    case "scvtf_z_p_z_h2fp16"_h:
+    case "ucvtf_z_p_z_h2fp16"_h:
+      return (movprfx_is_unpredicated ||
+              ((movprfx_vform == kFormatVnH) && pg_matches_low8)) &&
+             zd_isnt_zn && zd_matches;
+
+    case "fcvt_z_p_z_h2s"_h:
+    case "fcvt_z_p_z_s2h"_h:
+    case "fcvtzs_z_p_z_fp162w"_h:
+    case "fcvtzs_z_p_z_s2w"_h:
+    case "fcvtzu_z_p_z_fp162w"_h:
+    case "fcvtzu_z_p_z_s2w"_h:
+    case "scvtf_z_p_z_w2fp16"_h:
+    case "scvtf_z_p_z_w2s"_h:
+    case "ucvtf_z_p_z_w2fp16"_h:
+    case "ucvtf_z_p_z_w2s"_h:
+      return (movprfx_is_unpredicated ||
+              ((movprfx_vform == kFormatVnS) && pg_matches_low8)) &&
+             zd_isnt_zn && zd_matches;
+
+    case "fcmla_z_p_zzz"_h:
+    case "fmad_z_p_zzz"_h:
+    case "fmla_z_p_zzz"_h:
+    case "fmls_z_p_zzz"_h:
+    case "fmsb_z_p_zzz"_h:
+    case "fnmad_z_p_zzz"_h:
+    case "fnmla_z_p_zzz"_h:
+    case "fnmls_z_p_zzz"_h:
+    case "fnmsb_z_p_zzz"_h:
+    case "mad_z_p_zzz"_h:
+    case "mla_z_p_zzz"_h:
+    case "mls_z_p_zzz"_h:
+    case "msb_z_p_zzz"_h:
+      return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
+             zd_isnt_zm && zd_isnt_zn && zd_matches;
+
+    case "abs_z_p_z"_h:
+    case "add_z_p_zz"_h:
+    case "and_z_p_zz"_h:
+    case "asr_z_p_zw"_h:
+    case "asr_z_p_zz"_h:
+    case "asrr_z_p_zz"_h:
+    case "bic_z_p_zz"_h:
+    case "cls_z_p_z"_h:
+    case "clz_z_p_z"_h:
+    case "cnot_z_p_z"_h:
+    case "cnt_z_p_z"_h:
+    case "cpy_z_p_v"_h:
+    case "eor_z_p_zz"_h:
+    case "fabd_z_p_zz"_h:
+    case "fabs_z_p_z"_h:
+    case "fadd_z_p_zz"_h:
+    case "fcadd_z_p_zz"_h:
+    case "fdiv_z_p_zz"_h:
+    case "fdivr_z_p_zz"_h:
+    case "fmax_z_p_zz"_h:
+    case "fmaxnm_z_p_zz"_h:
+    case "fmin_z_p_zz"_h:
+    case "fminnm_z_p_zz"_h:
+    case "fmul_z_p_zz"_h:
+    case "fmulx_z_p_zz"_h:
+    case "fneg_z_p_z"_h:
+    case "frecpx_z_p_z"_h:
+    case "frinta_z_p_z"_h:
+    case "frinti_z_p_z"_h:
+    case "frintm_z_p_z"_h:
+    case "frintn_z_p_z"_h:
+    case "frintp_z_p_z"_h:
+    case "frintx_z_p_z"_h:
+    case "frintz_z_p_z"_h:
+    case "fscale_z_p_zz"_h:
+    case "fsqrt_z_p_z"_h:
+    case "fsub_z_p_zz"_h:
+    case "fsubr_z_p_zz"_h:
+    case "lsl_z_p_zw"_h:
+    case "lsl_z_p_zz"_h:
+    case "lslr_z_p_zz"_h:
+    case "lsr_z_p_zw"_h:
+    case "lsr_z_p_zz"_h:
+    case "lsrr_z_p_zz"_h:
+    case "mul_z_p_zz"_h:
+    case "neg_z_p_z"_h:
+    case "not_z_p_z"_h:
+    case "orr_z_p_zz"_h:
+    case "rbit_z_p_z"_h:
+    case "revb_z_z"_h:
+    case "revh_z_z"_h:
+    case "revw_z_z"_h:
+    case "sabd_z_p_zz"_h:
+    case "sadalp_z_p_z"_h:
+    case "sdiv_z_p_zz"_h:
+    case "sdivr_z_p_zz"_h:
+    case "shadd_z_p_zz"_h:
+    case "shsub_z_p_zz"_h:
+    case "shsubr_z_p_zz"_h:
+    case "smax_z_p_zz"_h:
+    case "smin_z_p_zz"_h:
+    case "smulh_z_p_zz"_h:
+    case "sqabs_z_p_z"_h:
+    case "sqadd_z_p_zz"_h:
+    case "sqneg_z_p_z"_h:
+    case "sqrshl_z_p_zz"_h:
+    case "sqrshlr_z_p_zz"_h:
+    case "sqshl_z_p_zz"_h:
+    case "sqshlr_z_p_zz"_h:
+    case "sqsub_z_p_zz"_h:
+    case "sqsubr_z_p_zz"_h:
+    case "srhadd_z_p_zz"_h:
+    case "srshl_z_p_zz"_h:
+    case "srshlr_z_p_zz"_h:
+    case "sub_z_p_zz"_h:
+    case "subr_z_p_zz"_h:
+    case "suqadd_z_p_zz"_h:
+    case "sxtb_z_p_z"_h:
+    case "sxth_z_p_z"_h:
+    case "sxtw_z_p_z"_h:
+    case "uabd_z_p_zz"_h:
+    case "uadalp_z_p_z"_h:
+    case "udiv_z_p_zz"_h:
+    case "udivr_z_p_zz"_h:
+    case "uhadd_z_p_zz"_h:
+    case "uhsub_z_p_zz"_h:
+    case "uhsubr_z_p_zz"_h:
+    case "umax_z_p_zz"_h:
+    case "umin_z_p_zz"_h:
+    case "umulh_z_p_zz"_h:
+    case "uqadd_z_p_zz"_h:
+    case "uqrshl_z_p_zz"_h:
+    case "uqrshlr_z_p_zz"_h:
+    case "uqshl_z_p_zz"_h:
+    case "uqshlr_z_p_zz"_h:
+    case "uqsub_z_p_zz"_h:
+    case "uqsubr_z_p_zz"_h:
+    case "urecpe_z_p_z"_h:
+    case "urhadd_z_p_zz"_h:
+    case "urshl_z_p_zz"_h:
+    case "urshlr_z_p_zz"_h:
+    case "ursqrte_z_p_z"_h:
+    case "usqadd_z_p_zz"_h:
+    case "uxtb_z_p_z"_h:
+    case "uxth_z_p_z"_h:
+    case "uxtw_z_p_z"_h:
+      return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
+             zd_isnt_zn && zd_matches;
+
+    case "cpy_z_p_r"_h:
+    case "fadd_z_p_zs"_h:
+    case "fmax_z_p_zs"_h:
+    case "fmaxnm_z_p_zs"_h:
+    case "fmin_z_p_zs"_h:
+    case "fminnm_z_p_zs"_h:
+    case "fmul_z_p_zs"_h:
+    case "fsub_z_p_zs"_h:
+    case "fsubr_z_p_zs"_h:
+      return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
+             zd_matches;
+    default:
+      return false;
+  }
+}  // NOLINT(readability/fn_size)

 bool Instruction::IsLoad() const {
  if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) {
@ -103,6 +541,68 @@ bool Instruction::IsStore() const {
 }


+std::pair<int, int> Instruction::GetSVEPermuteIndexAndLaneSizeLog2() const {
+  uint32_t imm_2 = ExtractBits<0x00C00000>();
+  uint32_t tsz_5 = ExtractBits<0x001F0000>();
+  uint32_t imm_7 = (imm_2 << 5) | tsz_5;
+  int lane_size_in_byte_log_2 = std::min(CountTrailingZeros(tsz_5), 5);
+  int index = ExtractUnsignedBitfield32(6, lane_size_in_byte_log_2 + 1, imm_7);
+  return std::make_pair(index, lane_size_in_byte_log_2);
+}
+
+// Get the register and index for SVE indexed multiplies encoded in the forms:
+//  .h : Zm = <18:16>, index = <22><20:19>
+//  .s : Zm = <18:16>, index = <20:19>
+//  .d : Zm = <19:16>, index = <20>
+std::pair<int, int> Instruction::GetSVEMulZmAndIndex() const {
+  int reg_code = GetRmLow16();
+  int index = ExtractBits(20, 19);
+
+  // For .h, index uses bit zero of the size field, so kFormatVnB below implies
+  // half-word lane, with most-significant bit of the index zero.
+  switch (GetSVEVectorFormat()) {
+    case kFormatVnD:
+      index >>= 1;  // Only bit 20 in the index for D lanes.
+      break;
+    case kFormatVnH:
+      index += 4;  // Bit 22 is the top bit of index.
+      VIXL_FALLTHROUGH();
+    case kFormatVnB:
+    case kFormatVnS:
+      reg_code &= 7;  // Three bits used for the register.
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  return std::make_pair(reg_code, index);
+}
+
+// Get the register and index for SVE indexed long multiplies encoded in the
+// forms:
+//  .h : Zm = <18:16>, index = <20:19><11>
+//  .s : Zm = <19:16>, index = <20><11>
+std::pair<int, int> Instruction::GetSVEMulLongZmAndIndex() const {
+  int reg_code = GetRmLow16();
+  int index = ExtractBit(11);
+
+  // For long multiplies, the SVE size field <23:22> encodes the destination
+  // element size. The source element size is half the width.
+  switch (GetSVEVectorFormat()) {
+    case kFormatVnS:
+      reg_code &= 7;
+      index |= ExtractBits(20, 19) << 1;
+      break;
+    case kFormatVnD:
+      index |= ExtractBit(20) << 1;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  return std::make_pair(reg_code, index);
+}
+
 // Logical immediates can't encode zero, so a return value of zero is used to
 // indicate a failure case. Specifically, where the constraints on imm_s are
 // not met.
@ -111,7 +611,114 @@ uint64_t Instruction::GetImmLogical() const {
  int32_t n = GetBitN();
  int32_t imm_s = GetImmSetBits();
  int32_t imm_r = GetImmRotate();
+  return DecodeImmBitMask(n, imm_s, imm_r, reg_size);
+}

+// Logical immediates can't encode zero, so a return value of zero is used to
+// indicate a failure case. Specifically, where the constraints on imm_s are
+// not met.
+uint64_t Instruction::GetSVEImmLogical() const {
+  int n = GetSVEBitN();
+  int imm_s = GetSVEImmSetBits();
+  int imm_r = GetSVEImmRotate();
+  int lane_size_in_bytes_log2 = GetSVEBitwiseImmLaneSizeInBytesLog2();
+  switch (lane_size_in_bytes_log2) {
+    case kDRegSizeInBytesLog2:
+    case kSRegSizeInBytesLog2:
+    case kHRegSizeInBytesLog2:
+    case kBRegSizeInBytesLog2: {
+      int lane_size_in_bits = 1 << (lane_size_in_bytes_log2 + 3);
+      return DecodeImmBitMask(n, imm_s, imm_r, lane_size_in_bits);
+    }
+    default:
+      return 0;
+  }
+}
+
+std::pair<int, int> Instruction::GetSVEImmShiftAndLaneSizeLog2(
+    bool is_predicated) const {
+  Instr tsize =
+      is_predicated ? ExtractBits<0x00C00300>() : ExtractBits<0x00D80000>();
+  Instr imm_3 =
+      is_predicated ? ExtractBits<0x000000E0>() : ExtractBits<0x00070000>();
+  if (tsize == 0) {
+    // The bit field `tsize` means undefined if it is zero, so return a
+    // convenience value kWMinInt to indicate a failure case.
+    return std::make_pair(kWMinInt, kWMinInt);
+  }
+
+  int lane_size_in_bytes_log_2 = 32 - CountLeadingZeros(tsize, 32) - 1;
+  int esize = (1 << lane_size_in_bytes_log_2) * kBitsPerByte;
+  int shift = (2 * esize) - ((tsize << 3) | imm_3);
+  return std::make_pair(shift, lane_size_in_bytes_log_2);
+}
+
+int Instruction::GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb) const {
+  Instr dtype_h = ExtractBits(dtype_h_lsb + 1, dtype_h_lsb);
+  if (is_signed) {
+    dtype_h = dtype_h ^ 0x3;
+  }
+  return dtype_h;
+}
+
+int Instruction::GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb) const {
+  Instr dtype_l = ExtractBits(dtype_l_lsb + 1, dtype_l_lsb);
+  if (is_signed) {
+    dtype_l = dtype_l ^ 0x3;
+  }
+  return dtype_l;
+}
+
+int Instruction::GetSVEBitwiseImmLaneSizeInBytesLog2() const {
+  int n = GetSVEBitN();
+  int imm_s = GetSVEImmSetBits();
+  unsigned type_bitset =
+      (n << SVEImmSetBits_width) | (~imm_s & GetUintMask(SVEImmSetBits_width));
+
+  // An lane size is constructed from the n and imm_s bits according to
+  // the following table:
+  //
+  // N   imms   size
+  // 0  0xxxxx   32
+  // 0  10xxxx   16
+  // 0  110xxx    8
+  // 0  1110xx    8
+  // 0  11110x    8
+  // 1  xxxxxx   64
+
+  if (type_bitset == 0) {
+    // Bail out early since `HighestSetBitPosition` doesn't accept zero
+    // value input.
+    return -1;
+  }
+
+  switch (HighestSetBitPosition(type_bitset)) {
+    case 6:
+      return kDRegSizeInBytesLog2;
+    case 5:
+      return kSRegSizeInBytesLog2;
+    case 4:
+      return kHRegSizeInBytesLog2;
+    case 3:
+    case 2:
+    case 1:
+      return kBRegSizeInBytesLog2;
+    default:
+      // RESERVED encoding.
+      return -1;
+  }
+}
+
+int Instruction::GetSVEExtractImmediate() const {
+  const int imm8h_mask = 0x001F0000;
+  const int imm8l_mask = 0x00001C00;
+  return ExtractBits<imm8h_mask | imm8l_mask>();
+}
+
+uint64_t Instruction::DecodeImmBitMask(int32_t n,
+                                       int32_t imm_s,
+                                       int32_t imm_r,
+                                       int32_t size) const {
  // An integer is constructed from the n, imm_s and imm_r bits according to
  // the following table:
  //
@ -146,7 +753,7 @@ uint64_t Instruction::GetImmLogical() const {
          return 0;
        }
        uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1;
-        return RepeatBitsAcrossReg(reg_size,
+        return RepeatBitsAcrossReg(size,
                                   RotateRight(bits, imm_r & mask, width),
                                   width);
      }
@ -397,8 +1004,6 @@ void Instruction::SetImmLLiteral(const Instruction* source) {


 VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
-  VIXL_ASSERT(vform == kFormat8H || vform == kFormat4S || vform == kFormat2D ||
-              vform == kFormatH || vform == kFormatS || vform == kFormatD);
  switch (vform) {
    case kFormat8H:
      return kFormat8B;
@ -406,12 +1011,20 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
      return kFormat4H;
    case kFormat2D:
      return kFormat2S;
+    case kFormat1Q:
+      return kFormat1D;
    case kFormatH:
      return kFormatB;
    case kFormatS:
      return kFormatH;
    case kFormatD:
      return kFormatS;
+    case kFormatVnH:
+      return kFormatVnB;
+    case kFormatVnS:
+      return kFormatVnH;
+    case kFormatVnD:
+      return kFormatVnS;
    default:
      VIXL_UNREACHABLE();
      return kFormatUndefined;
@ -420,8 +1033,6 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {


 VectorFormat VectorFormatDoubleWidth(VectorFormat vform) {
-  VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S ||
-              vform == kFormatB || vform == kFormatH || vform == kFormatS);
  switch (vform) {
    case kFormat8B:
      return kFormat8H;
@ -435,6 +1046,12 @@ VectorFormat VectorFormatDoubleWidth(VectorFormat vform) {
      return kFormatS;
    case kFormatS:
      return kFormatD;
+    case kFormatVnB:
+      return kFormatVnH;
+    case kFormatVnH:
+      return kFormatVnS;
+    case kFormatVnS:
+      return kFormatVnD;
    default:
      VIXL_UNREACHABLE();
      return kFormatUndefined;
@ -480,6 +1097,14 @@ VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform) {
      return kFormat2S;
    case kFormat2D:
      return kFormat4S;
+    case kFormat1Q:
+      return kFormat2D;
+    case kFormatVnH:
+      return kFormatVnB;
+    case kFormatVnS:
+      return kFormatVnH;
+    case kFormatVnD:
+      return kFormatVnS;
    default:
      VIXL_UNREACHABLE();
      return kFormatUndefined;
@ -518,8 +1143,8 @@ VectorFormat VectorFormatHalfLanes(VectorFormat vform) {
 }


-VectorFormat ScalarFormatFromLaneSize(int laneSize) {
-  switch (laneSize) {
+VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits) {
+  switch (lane_size_in_bits) {
    case 8:
      return kFormatB;
    case 16:
@ -535,6 +1160,70 @@ VectorFormat ScalarFormatFromLaneSize(int laneSize) {
 }


+bool IsSVEFormat(VectorFormat vform) {
+  switch (vform) {
+    case kFormatVnB:
+    case kFormatVnH:
+    case kFormatVnS:
+    case kFormatVnD:
+    case kFormatVnQ:
+    case kFormatVnO:
+      return true;
+    default:
+      return false;
+  }
+}
+
+
+VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes) {
+  switch (lane_size_in_bytes) {
+    case 1:
+      return kFormatVnB;
+    case 2:
+      return kFormatVnH;
+    case 4:
+      return kFormatVnS;
+    case 8:
+      return kFormatVnD;
+    case 16:
+      return kFormatVnQ;
+    default:
+      VIXL_UNREACHABLE();
+      return kFormatUndefined;
+  }
+}
+
+
+VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits) {
+  switch (lane_size_in_bits) {
+    case 8:
+    case 16:
+    case 32:
+    case 64:
+    case 128:
+      return SVEFormatFromLaneSizeInBytes(lane_size_in_bits / kBitsPerByte);
+    default:
+      VIXL_UNREACHABLE();
+      return kFormatUndefined;
+  }
+}
+
+
+VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log2) {
+  switch (lane_size_in_bytes_log2) {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+      return SVEFormatFromLaneSizeInBytes(1 << lane_size_in_bytes_log2);
+    default:
+      VIXL_UNREACHABLE();
+      return kFormatUndefined;
+  }
+}
+
+
 VectorFormat ScalarFormatFromFormat(VectorFormat vform) {
  return ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
 }
@ -542,6 +1231,7 @@ VectorFormat ScalarFormatFromFormat(VectorFormat vform) {

 unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
  VIXL_ASSERT(vform != kFormatUndefined);
+  VIXL_ASSERT(!IsSVEFormat(vform));
  switch (vform) {
    case kFormatB:
      return kBRegSize;
@ -551,14 +1241,20 @@ unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
    case kFormat2H:
      return kSRegSize;
    case kFormatD:
-      return kDRegSize;
    case kFormat8B:
    case kFormat4H:
    case kFormat2S:
    case kFormat1D:
      return kDRegSize;
-    default:
+    case kFormat16B:
+    case kFormat8H:
+    case kFormat4S:
+    case kFormat2D:
+    case kFormat1Q:
      return kQRegSize;
+    default:
+      VIXL_UNREACHABLE();
+      return 0;
  }
 }

@ -574,20 +1270,29 @@ unsigned LaneSizeInBitsFromFormat(VectorFormat vform) {
    case kFormatB:
    case kFormat8B:
    case kFormat16B:
+    case kFormatVnB:
      return 8;
    case kFormatH:
    case kFormat2H:
    case kFormat4H:
    case kFormat8H:
+    case kFormatVnH:
      return 16;
    case kFormatS:
    case kFormat2S:
    case kFormat4S:
+    case kFormatVnS:
      return 32;
    case kFormatD:
    case kFormat1D:
    case kFormat2D:
+    case kFormatVnD:
      return 64;
+    case kFormat1Q:
+    case kFormatVnQ:
+      return 128;
+    case kFormatVnO:
+      return 256;
    default:
      VIXL_UNREACHABLE();
      return 0;
@ -606,20 +1311,26 @@ int LaneSizeInBytesLog2FromFormat(VectorFormat vform) {
    case kFormatB:
    case kFormat8B:
    case kFormat16B:
+    case kFormatVnB:
      return 0;
    case kFormatH:
    case kFormat2H:
    case kFormat4H:
    case kFormat8H:
+    case kFormatVnH:
      return 1;
    case kFormatS:
    case kFormat2S:
    case kFormat4S:
+    case kFormatVnS:
      return 2;
    case kFormatD:
    case kFormat1D:
    case kFormat2D:
+    case kFormatVnD:
      return 3;
+    case kFormatVnQ:
+      return 4;
    default:
      VIXL_UNREACHABLE();
      return 0;
@ -643,6 +1354,7 @@ int LaneCountFromFormat(VectorFormat vform) {
    case kFormat2D:
      return 2;
    case kFormat1D:
+    case kFormat1Q:
    case kFormatB:
    case kFormatH:
    case kFormatS:
@ -697,17 +1409,19 @@ bool IsVectorFormat(VectorFormat vform) {


 int64_t MaxIntFromFormat(VectorFormat vform) {
-  return INT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+  int lane_size = LaneSizeInBitsFromFormat(vform);
+  return static_cast<int64_t>(GetUintMask(lane_size) >> 1);
 }


 int64_t MinIntFromFormat(VectorFormat vform) {
-  return INT64_MIN >> (64 - LaneSizeInBitsFromFormat(vform));
+  return -MaxIntFromFormat(vform) - 1;
 }


 uint64_t MaxUintFromFormat(VectorFormat vform) {
-  return UINT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+  return GetUintMask(LaneSizeInBitsFromFormat(vform));
 }
+
 }  // namespace aarch64
 }  // namespace vixl
--- a/dep/vixl/src/aarch64/instrument-aarch64.cc
+++ b/dep/vixl/src/aarch64/instrument-aarch64.cc
@ -1,916 +0,0 @@
-// Copyright 2014, VIXL authors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//   * Redistributions of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//   * Neither the name of ARM Limited nor the names of its contributors may be
-//     used to endorse or promote products derived from this software without
-//     specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "instrument-aarch64.h"
-
-namespace vixl {
-namespace aarch64 {
-
-Counter::Counter(const char* name, CounterType type)
-    : count_(0), enabled_(false), type_(type) {
-  VIXL_ASSERT(name != NULL);
-  strncpy(name_, name, kCounterNameMaxLength);
-  // Make sure `name_` is always NULL-terminated, even if the source's length is
-  // higher.
-  name_[kCounterNameMaxLength - 1] = '\0';
-}
-
-
-void Counter::Enable() { enabled_ = true; }
-
-
-void Counter::Disable() { enabled_ = false; }
-
-
-bool Counter::IsEnabled() { return enabled_; }
-
-
-void Counter::Increment() {
-  if (enabled_) {
-    count_++;
-  }
-}
-
-
-uint64_t Counter::GetCount() {
-  uint64_t result = count_;
-  if (type_ == Gauge) {
-    // If the counter is a Gauge, reset the count after reading.
-    count_ = 0;
-  }
-  return result;
-}
-
-
-const char* Counter::GetName() { return name_; }
-
-
-CounterType Counter::GetType() { return type_; }
-
-
-struct CounterDescriptor {
-  const char* name;
-  CounterType type;
-};
-
-
-static const CounterDescriptor kCounterList[] =
-    {{"Instruction", Cumulative},
-
-     {"Move Immediate", Gauge},
-     {"Add/Sub DP", Gauge},
-     {"Logical DP", Gauge},
-     {"Other Int DP", Gauge},
-     {"FP DP", Gauge},
-
-     {"Conditional Select", Gauge},
-     {"Conditional Compare", Gauge},
-
-     {"Unconditional Branch", Gauge},
-     {"Compare and Branch", Gauge},
-     {"Test and Branch", Gauge},
-     {"Conditional Branch", Gauge},
-
-     {"Load Integer", Gauge},
-     {"Load FP", Gauge},
-     {"Load Pair", Gauge},
-     {"Load Literal", Gauge},
-
-     {"Store Integer", Gauge},
-     {"Store FP", Gauge},
-     {"Store Pair", Gauge},
-
-     {"PC Addressing", Gauge},
-     {"Other", Gauge},
-     {"NEON", Gauge},
-     {"Crypto", Gauge}};
-
-
-Instrument::Instrument(const char* datafile, uint64_t sample_period)
-    : output_stream_(stdout), sample_period_(sample_period) {
-  // Set up the output stream. If datafile is non-NULL, use that file. If it
-  // can't be opened, or datafile is NULL, use stdout.
-  if (datafile != NULL) {
-    output_stream_ = fopen(datafile, "w");
-    if (output_stream_ == NULL) {
-      printf("Can't open output file %s. Using stdout.\n", datafile);
-      output_stream_ = stdout;
-    }
-  }
-
-  static const int num_counters =
-      sizeof(kCounterList) / sizeof(CounterDescriptor);
-
-  // Dump an instrumentation description comment at the top of the file.
-  fprintf(output_stream_, "# counters=%d\n", num_counters);
-  fprintf(output_stream_, "# sample_period=%" PRIu64 "\n", sample_period_);
-
-  // Construct Counter objects from counter description array.
-  for (int i = 0; i < num_counters; i++) {
-    Counter* counter = new Counter(kCounterList[i].name, kCounterList[i].type);
-    counters_.push_back(counter);
-  }
-
-  DumpCounterNames();
-}
-
-
-Instrument::~Instrument() {
-  // Dump any remaining instruction data to the output file.
-  DumpCounters();
-
-  // Free all the counter objects.
-  std::list<Counter*>::iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    delete *it;
-  }
-
-  if (output_stream_ != stdout) {
-    fclose(output_stream_);
-  }
-}
-
-
-void Instrument::Update() {
-  // Increment the instruction counter, and dump all counters if a sample period
-  // has elapsed.
-  static Counter* counter = GetCounter("Instruction");
-  VIXL_ASSERT(counter->GetType() == Cumulative);
-  counter->Increment();
-
-  if ((sample_period_ != 0) && counter->IsEnabled() &&
-      (counter->GetCount() % sample_period_) == 0) {
-    DumpCounters();
-  }
-}
-
-
-void Instrument::DumpCounters() {
-  // Iterate through the counter objects, dumping their values to the output
-  // stream.
-  std::list<Counter*>::const_iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    fprintf(output_stream_, "%" PRIu64 ",", (*it)->GetCount());
-  }
-  fprintf(output_stream_, "\n");
-  fflush(output_stream_);
-}
-
-
-void Instrument::DumpCounterNames() {
-  // Iterate through the counter objects, dumping the counter names to the
-  // output stream.
-  std::list<Counter*>::const_iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    fprintf(output_stream_, "%s,", (*it)->GetName());
-  }
-  fprintf(output_stream_, "\n");
-  fflush(output_stream_);
-}
-
-
-void Instrument::HandleInstrumentationEvent(unsigned event) {
-  switch (event) {
-    case InstrumentStateEnable:
-      Enable();
-      break;
-    case InstrumentStateDisable:
-      Disable();
-      break;
-    default:
-      DumpEventMarker(event);
-  }
-}
-
-
-void Instrument::DumpEventMarker(unsigned marker) {
-  // Dumpan event marker to the output stream as a specially formatted comment
-  // line.
-  static Counter* counter = GetCounter("Instruction");
-
-  fprintf(output_stream_,
-          "# %c%c @ %" PRId64 "\n",
-          marker & 0xff,
-          (marker >> 8) & 0xff,
-          counter->GetCount());
-}
-
-
-Counter* Instrument::GetCounter(const char* name) {
-  // Get a Counter object by name from the counter list.
-  std::list<Counter*>::const_iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    if (strcmp((*it)->GetName(), name) == 0) {
-      return *it;
-    }
-  }
-
-  // A Counter by that name does not exist: print an error message to stderr
-  // and the output file, and exit.
-  static const char* error_message =
-      "# Error: Unknown counter \"%s\". Exiting.\n";
-  fprintf(stderr, error_message, name);
-  fprintf(output_stream_, error_message, name);
-  exit(1);
-}
-
-
-void Instrument::Enable() {
-  std::list<Counter*>::iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    (*it)->Enable();
-  }
-}
-
-
-void Instrument::Disable() {
-  std::list<Counter*>::iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    (*it)->Disable();
-  }
-}
-
-
-void Instrument::VisitPCRelAddressing(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("PC Addressing");
-  counter->Increment();
-}
-
-
-void Instrument::VisitAddSubImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Add/Sub DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitLogicalImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Logical DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitMoveWideImmediate(const Instruction* instr) {
-  Update();
-  static Counter* counter = GetCounter("Move Immediate");
-
-  if (instr->IsMovn() && (instr->GetRd() == kZeroRegCode)) {
-    unsigned imm = instr->GetImmMoveWide();
-    HandleInstrumentationEvent(imm);
-  } else {
-    counter->Increment();
-  }
-}
-
-
-void Instrument::VisitBitfield(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other Int DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitExtract(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other Int DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitUnconditionalBranch(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Unconditional Branch");
-  counter->Increment();
-}
-
-
-void Instrument::VisitUnconditionalBranchToRegister(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Unconditional Branch");
-  counter->Increment();
-}
-
-
-void Instrument::VisitCompareBranch(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Compare and Branch");
-  counter->Increment();
-}
-
-
-void Instrument::VisitTestBranch(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Test and Branch");
-  counter->Increment();
-}
-
-
-void Instrument::VisitConditionalBranch(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Branch");
-  counter->Increment();
-}
-
-
-void Instrument::VisitSystem(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::VisitException(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::InstrumentLoadStorePair(const Instruction* instr) {
-  static Counter* load_pair_counter = GetCounter("Load Pair");
-  static Counter* store_pair_counter = GetCounter("Store Pair");
-
-  if (instr->Mask(LoadStorePairLBit) != 0) {
-    load_pair_counter->Increment();
-  } else {
-    store_pair_counter->Increment();
-  }
-}
-
-
-void Instrument::VisitLoadStorePairPostIndex(const Instruction* instr) {
-  Update();
-  InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStorePairOffset(const Instruction* instr) {
-  Update();
-  InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStorePairPreIndex(const Instruction* instr) {
-  Update();
-  InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStorePairNonTemporal(const Instruction* instr) {
-  Update();
-  InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStoreExclusive(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::VisitAtomicMemory(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::VisitLoadLiteral(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Load Literal");
-  counter->Increment();
-}
-
-
-void Instrument::InstrumentLoadStore(const Instruction* instr) {
-  static Counter* load_int_counter = GetCounter("Load Integer");
-  static Counter* store_int_counter = GetCounter("Store Integer");
-  static Counter* load_fp_counter = GetCounter("Load FP");
-  static Counter* store_fp_counter = GetCounter("Store FP");
-
-  switch (instr->Mask(LoadStoreMask)) {
-    case STRB_w:
-    case STRH_w:
-    case STR_w:
-      VIXL_FALLTHROUGH();
-    case STR_x:
-      store_int_counter->Increment();
-      break;
-    case STR_s:
-      VIXL_FALLTHROUGH();
-    case STR_d:
-      store_fp_counter->Increment();
-      break;
-    case LDRB_w:
-    case LDRH_w:
-    case LDR_w:
-    case LDR_x:
-    case LDRSB_x:
-    case LDRSH_x:
-    case LDRSW_x:
-    case LDRSB_w:
-      VIXL_FALLTHROUGH();
-    case LDRSH_w:
-      load_int_counter->Increment();
-      break;
-    case LDR_s:
-      VIXL_FALLTHROUGH();
-    case LDR_d:
-      load_fp_counter->Increment();
-      break;
-  }
-}
-
-
-void Instrument::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
-  Update();
-  InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLoadStorePostIndex(const Instruction* instr) {
-  USE(instr);
-  Update();
-  InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLoadStorePreIndex(const Instruction* instr) {
-  Update();
-  InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) {
-  Update();
-  InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
-  Update();
-  InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLogicalShifted(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Logical DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitAddSubShifted(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Add/Sub DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitAddSubExtended(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Add/Sub DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitAddSubWithCarry(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Add/Sub DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitConditionalCompareRegister(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Compare");
-  counter->Increment();
-}
-
-
-void Instrument::VisitConditionalCompareImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Compare");
-  counter->Increment();
-}
-
-
-void Instrument::VisitConditionalSelect(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Select");
-  counter->Increment();
-}
-
-
-void Instrument::VisitDataProcessing1Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other Int DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitDataProcessing2Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other Int DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitDataProcessing3Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other Int DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPCompare(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPConditionalCompare(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Compare");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPConditionalSelect(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Select");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPDataProcessing1Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPDataProcessing2Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPDataProcessing3Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPIntegerConvert(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPFixedPointConvert(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitCrypto2RegSHA(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Crypto");
-  counter->Increment();
-}
-
-
-void Instrument::VisitCrypto3RegSHA(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Crypto");
-  counter->Increment();
-}
-
-
-void Instrument::VisitCryptoAES(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Crypto");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON2RegMisc(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON2RegMiscFP16(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON3Same(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON3SameFP16(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON3SameExtra(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON3Different(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONAcrossLanes(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONByIndexedElement(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONCopy(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONExtract(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreMultiStructPostIndex(
-    const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreSingleStructPostIndex(
-    const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONModifiedImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar2RegMisc(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3Diff(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3Same(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3SameFP16(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3SameExtra(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarByIndexedElement(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarCopy(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarPairwise(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarShiftImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONShiftImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONTable(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONPerm(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitUnallocated(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::VisitUnimplemented(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-}  // namespace aarch64
-}  // namespace vixl
--- a/dep/vixl/src/aarch64/logic-aarch64.cc
+++ b/dep/vixl/src/aarch64/logic-aarch64.cc
--- a/dep/vixl/src/aarch64/macro-assembler-aarch64.cc
+++ b/dep/vixl/src/aarch64/macro-assembler-aarch64.cc
--- a/dep/vixl/src/aarch64/macro-assembler-sve-aarch64.cc
+++ b/dep/vixl/src/aarch64/macro-assembler-sve-aarch64.cc
--- a/dep/vixl/src/aarch64/operands-aarch64.cc
+++ b/dep/vixl/src/aarch64/operands-aarch64.cc
@ -30,32 +30,32 @@ namespace vixl {
 namespace aarch64 {

 // CPURegList utilities.
-CPURegister CPURegList::PopLowestIndex() {
-  if (IsEmpty()) {
-    return NoCPUReg;
-  }
-  int index = CountTrailingZeros(list_);
-  VIXL_ASSERT((1 << index) & list_);
+CPURegister CPURegList::PopLowestIndex(RegList mask) {
+  RegList list = list_ & mask;
+  if (list == 0) return NoCPUReg;
+  int index = CountTrailingZeros(list);
+  VIXL_ASSERT(((static_cast<RegList>(1) << index) & list) != 0);
  Remove(index);
  return CPURegister(index, size_, type_);
 }


-CPURegister CPURegList::PopHighestIndex() {
-  VIXL_ASSERT(IsValid());
-  if (IsEmpty()) {
-    return NoCPUReg;
-  }
-  int index = CountLeadingZeros(list_);
+CPURegister CPURegList::PopHighestIndex(RegList mask) {
+  RegList list = list_ & mask;
+  if (list == 0) return NoCPUReg;
+  int index = CountLeadingZeros(list);
  index = kRegListSizeInBits - 1 - index;
-  VIXL_ASSERT((1 << index) & list_);
+  VIXL_ASSERT(((static_cast<RegList>(1) << index) & list) != 0);
  Remove(index);
  return CPURegister(index, size_, type_);
 }


 bool CPURegList::IsValid() const {
-  if ((type_ == CPURegister::kRegister) || (type_ == CPURegister::kVRegister)) {
+  if (type_ == CPURegister::kNoRegister) {
+    // We can't use IsEmpty here because that asserts IsValid().
+    return list_ == 0;
+  } else {
    bool is_valid = true;
    // Try to create a CPURegister for each element in the list.
    for (int i = 0; i < kRegListSizeInBits; i++) {
@ -64,11 +64,6 @@ bool CPURegList::IsValid() const {
      }
    }
    return is_valid;
-  } else if (type_ == CPURegister::kNoRegister) {
-    // We can't use IsEmpty here because that asserts IsValid().
-    return list_ == 0;
-  } else {
-    return false;
  }
 }

@ -149,145 +144,6 @@ const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV();
 const CPURegList kCallerSaved = CPURegList::GetCallerSaved();
 const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV();

-
-// Registers.
-#define WREG(n) w##n,
-const Register Register::wregisters[] = {AARCH64_REGISTER_CODE_LIST(WREG)};
-#undef WREG
-
-#define XREG(n) x##n,
-const Register Register::xregisters[] = {AARCH64_REGISTER_CODE_LIST(XREG)};
-#undef XREG
-
-#define BREG(n) b##n,
-const VRegister VRegister::bregisters[] = {AARCH64_REGISTER_CODE_LIST(BREG)};
-#undef BREG
-
-#define HREG(n) h##n,
-const VRegister VRegister::hregisters[] = {AARCH64_REGISTER_CODE_LIST(HREG)};
-#undef HREG
-
-#define SREG(n) s##n,
-const VRegister VRegister::sregisters[] = {AARCH64_REGISTER_CODE_LIST(SREG)};
-#undef SREG
-
-#define DREG(n) d##n,
-const VRegister VRegister::dregisters[] = {AARCH64_REGISTER_CODE_LIST(DREG)};
-#undef DREG
-
-#define QREG(n) q##n,
-const VRegister VRegister::qregisters[] = {AARCH64_REGISTER_CODE_LIST(QREG)};
-#undef QREG
-
-#define VREG(n) v##n,
-const VRegister VRegister::vregisters[] = {AARCH64_REGISTER_CODE_LIST(VREG)};
-#undef VREG
-
-
-const Register& Register::GetWRegFromCode(unsigned code) {
-  if (code == kSPRegInternalCode) {
-    return wsp;
-  } else {
-    VIXL_ASSERT(code < kNumberOfRegisters);
-    return wregisters[code];
-  }
-}
-
-
-const Register& Register::GetXRegFromCode(unsigned code) {
-  if (code == kSPRegInternalCode) {
-    return sp;
-  } else {
-    VIXL_ASSERT(code < kNumberOfRegisters);
-    return xregisters[code];
-  }
-}
-
-
-const VRegister& VRegister::GetBRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return bregisters[code];
-}
-
-
-const VRegister& VRegister::GetHRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return hregisters[code];
-}
-
-
-const VRegister& VRegister::GetSRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return sregisters[code];
-}
-
-
-const VRegister& VRegister::GetDRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return dregisters[code];
-}
-
-
-const VRegister& VRegister::GetQRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return qregisters[code];
-}
-
-
-const VRegister& VRegister::GetVRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return vregisters[code];
-}
-
-
-const Register& CPURegister::W() const {
-  VIXL_ASSERT(IsValidRegister());
-  return Register::GetWRegFromCode(code_);
-}
-
-
-const Register& CPURegister::X() const {
-  VIXL_ASSERT(IsValidRegister());
-  return Register::GetXRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::B() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetBRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::H() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetHRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::S() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetSRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::D() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetDRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::Q() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetQRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::V() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetVRegFromCode(code_);
-}
-
-
 // Operand.
 Operand::Operand(int64_t immediate)
    : immediate_(immediate),
@ -296,6 +152,12 @@ Operand::Operand(int64_t immediate)
      extend_(NO_EXTEND),
      shift_amount_(0) {}

+Operand::Operand(IntegerOperand immediate)
+    : immediate_(immediate.AsIntN(64)),
+      reg_(NoReg),
+      shift_(NO_SHIFT),
+      extend_(NO_EXTEND),
+      shift_amount_(0) {}

 Operand::Operand(Register reg, Shift shift, unsigned shift_amount)
    : reg_(reg),
@ -471,6 +333,24 @@ MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode)
 }


+bool MemOperand::IsPlainRegister() const {
+  return IsImmediateOffset() && (GetOffset() == 0);
+}
+
+
+bool MemOperand::IsEquivalentToPlainRegister() const {
+  if (regoffset_.Is(NoReg)) {
+    // Immediate offset, pre-index or post-index.
+    return GetOffset() == 0;
+  } else if (GetRegisterOffset().IsZero()) {
+    // Zero register offset, pre-index or post-index.
+    // We can ignore shift and extend options because they all result in zero.
+    return true;
+  }
+  return false;
+}
+
+
 bool MemOperand::IsImmediateOffset() const {
  return (addrmode_ == Offset) && regoffset_.Is(NoReg);
 }
@ -480,12 +360,16 @@ bool MemOperand::IsRegisterOffset() const {
  return (addrmode_ == Offset) && !regoffset_.Is(NoReg);
 }

-
 bool MemOperand::IsPreIndex() const { return addrmode_ == PreIndex; }
-
-
 bool MemOperand::IsPostIndex() const { return addrmode_ == PostIndex; }

+bool MemOperand::IsImmediatePreIndex() const {
+  return IsPreIndex() && regoffset_.Is(NoReg);
+}
+
+bool MemOperand::IsImmediatePostIndex() const {
+  return IsPostIndex() && regoffset_.Is(NoReg);
+}

 void MemOperand::AddOffset(int64_t offset) {
  VIXL_ASSERT(IsImmediateOffset());
@ -493,6 +377,63 @@ void MemOperand::AddOffset(int64_t offset) {
 }


+bool SVEMemOperand::IsValid() const {
+#ifdef VIXL_DEBUG
+  {
+    // It should not be possible for an SVEMemOperand to match multiple types.
+    int count = 0;
+    if (IsScalarPlusImmediate()) count++;
+    if (IsScalarPlusScalar()) count++;
+    if (IsScalarPlusVector()) count++;
+    if (IsVectorPlusImmediate()) count++;
+    if (IsVectorPlusScalar()) count++;
+    if (IsVectorPlusVector()) count++;
+    VIXL_ASSERT(count <= 1);
+  }
+#endif
+
+  // We can't have a register _and_ an immediate offset.
+  if ((offset_ != 0) && (!regoffset_.IsNone())) return false;
+
+  if (shift_amount_ != 0) {
+    // Only shift and extend modifiers can take a shift amount.
+    switch (mod_) {
+      case NO_SVE_OFFSET_MODIFIER:
+      case SVE_MUL_VL:
+        return false;
+      case SVE_LSL:
+      case SVE_UXTW:
+      case SVE_SXTW:
+        // Fall through.
+        break;
+    }
+  }
+
+  return IsScalarPlusImmediate() || IsScalarPlusScalar() ||
+         IsScalarPlusVector() || IsVectorPlusImmediate() ||
+         IsVectorPlusScalar() || IsVectorPlusVector();
+}
+
+
+bool SVEMemOperand::IsEquivalentToScalar() const {
+  if (IsScalarPlusImmediate()) {
+    return GetImmediateOffset() == 0;
+  }
+  if (IsScalarPlusScalar()) {
+    // We can ignore the shift because it will still result in zero.
+    return GetScalarOffset().IsZero();
+  }
+  // Forms involving vectors are never equivalent to a single scalar.
+  return false;
+}
+
+bool SVEMemOperand::IsPlainRegister() const {
+  if (IsScalarPlusImmediate()) {
+    return GetImmediateOffset() == 0;
+  }
+  return false;
+}
+
 GenericOperand::GenericOperand(const CPURegister& reg)
    : cpu_register_(reg), mem_op_size_(0) {
  if (reg.IsQ()) {
@ -524,5 +465,5 @@ bool GenericOperand::Equals(const GenericOperand& other) const {
  }
  return false;
 }
-}
-}  // namespace vixl::aarch64
+}  // namespace aarch64
+}  // namespace vixl
--- a/dep/vixl/src/aarch64/pointer-auth-aarch64.cc
+++ b/dep/vixl/src/aarch64/pointer-auth-aarch64.cc
@ -26,10 +26,10 @@

 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64

-#include "simulator-aarch64.h"
-
 #include "utils-vixl.h"

+#include "simulator-aarch64.h"
+
 namespace vixl {
 namespace aarch64 {

--- a/dep/vixl/src/aarch64/registers-aarch64.cc
+++ b/dep/vixl/src/aarch64/registers-aarch64.cc
@ -0,0 +1,322 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "registers-aarch64.h"
+
+#include <sstream>
+#include <string>
+
+namespace vixl {
+namespace aarch64 {
+
+std::string CPURegister::GetArchitecturalName() const {
+  std::ostringstream name;
+  if (IsZRegister()) {
+    name << 'z' << GetCode();
+    if (HasLaneSize()) {
+      name << '.' << GetLaneSizeSymbol();
+    }
+  } else if (IsPRegister()) {
+    name << 'p' << GetCode();
+    if (HasLaneSize()) {
+      name << '.' << GetLaneSizeSymbol();
+    }
+    switch (qualifiers_) {
+      case kNoQualifiers:
+        break;
+      case kMerging:
+        name << "/m";
+        break;
+      case kZeroing:
+        name << "/z";
+        break;
+    }
+  } else {
+    VIXL_UNIMPLEMENTED();
+  }
+  return name.str();
+}
+
+unsigned CPURegister::GetMaxCodeFor(CPURegister::RegisterBank bank) {
+  switch (bank) {
+    case kNoRegisterBank:
+      return 0;
+    case kRRegisterBank:
+      return Register::GetMaxCode();
+    case kVRegisterBank:
+#ifdef VIXL_HAS_CONSTEXPR
+      VIXL_STATIC_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
+#else
+      VIXL_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
+#endif
+      return VRegister::GetMaxCode();
+    case kPRegisterBank:
+      return PRegister::GetMaxCode();
+  }
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
+bool CPURegister::IsValidRegister() const {
+  return ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)) &&
+         (bank_ == kRRegisterBank) &&
+         ((size_ == kEncodedWRegSize) || (size_ == kEncodedXRegSize)) &&
+         (qualifiers_ == kNoQualifiers) && (lane_size_ == size_);
+}
+
+bool CPURegister::IsValidVRegister() const {
+  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+  return (code_ < kNumberOfVRegisters) && (bank_ == kVRegisterBank) &&
+         ((size_ >= kEncodedBRegSize) && (size_ <= kEncodedQRegSize)) &&
+         (qualifiers_ == kNoQualifiers) &&
+         (lane_size_ != kEncodedUnknownSize) && (lane_size_ <= size_);
+}
+
+bool CPURegister::IsValidFPRegister() const {
+  return IsValidVRegister() && IsFPRegister();
+}
+
+bool CPURegister::IsValidZRegister() const {
+  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+  // Z registers are valid with or without a lane size, so we don't need to
+  // check lane_size_.
+  return (code_ < kNumberOfZRegisters) && (bank_ == kVRegisterBank) &&
+         (size_ == kEncodedUnknownSize) && (qualifiers_ == kNoQualifiers);
+}
+
+bool CPURegister::IsValidPRegister() const {
+  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+  // P registers are valid with or without a lane size, so we don't need to
+  // check lane_size_.
+  return (code_ < kNumberOfPRegisters) && (bank_ == kPRegisterBank) &&
+         (size_ == kEncodedUnknownSize) &&
+         ((qualifiers_ == kNoQualifiers) || (qualifiers_ == kMerging) ||
+          (qualifiers_ == kZeroing));
+}
+
+bool CPURegister::IsValid() const {
+  return IsValidRegister() || IsValidVRegister() || IsValidZRegister() ||
+         IsValidPRegister();
+}
+
+// Most coercions simply invoke the necessary constructor.
+#define VIXL_CPUREG_COERCION_LIST(U) \
+  U(Register, W, R)                  \
+  U(Register, X, R)                  \
+  U(VRegister, B, V)                 \
+  U(VRegister, H, V)                 \
+  U(VRegister, S, V)                 \
+  U(VRegister, D, V)                 \
+  U(VRegister, Q, V)                 \
+  U(VRegister, V, V)                 \
+  U(ZRegister, Z, V)                 \
+  U(PRegister, P, P)
+#define VIXL_DEFINE_CPUREG_COERCION(RET_TYPE, CTOR_TYPE, BANK) \
+  RET_TYPE CPURegister::CTOR_TYPE() const {                    \
+    VIXL_ASSERT(GetBank() == k##BANK##RegisterBank);           \
+    return CTOR_TYPE##Register(GetCode());                     \
+  }
+VIXL_CPUREG_COERCION_LIST(VIXL_DEFINE_CPUREG_COERCION)
+#undef VIXL_CPUREG_COERCION_LIST
+#undef VIXL_DEFINE_CPUREG_COERCION
+
+// NEON lane-format coercions always return VRegisters.
+#define VIXL_CPUREG_NEON_COERCION_LIST(V) \
+  V(8, B)                                 \
+  V(16, B)                                \
+  V(2, H)                                 \
+  V(4, H)                                 \
+  V(8, H)                                 \
+  V(2, S)                                 \
+  V(4, S)                                 \
+  V(1, D)                                 \
+  V(2, D)                                 \
+  V(1, Q)
+#define VIXL_DEFINE_CPUREG_NEON_COERCION(LANES, LANE_TYPE)             \
+  VRegister VRegister::V##LANES##LANE_TYPE() const {                   \
+    VIXL_ASSERT(IsVRegister());                                        \
+    return VRegister(GetCode(), LANES * k##LANE_TYPE##RegSize, LANES); \
+  }
+VIXL_CPUREG_NEON_COERCION_LIST(VIXL_DEFINE_CPUREG_NEON_COERCION)
+#undef VIXL_CPUREG_NEON_COERCION_LIST
+#undef VIXL_DEFINE_CPUREG_NEON_COERCION
+
+// Semantic type coercion for sdot and udot.
+// TODO: Use the qualifiers_ field to distinguish this from ::S().
+VRegister VRegister::S4B() const {
+  VIXL_ASSERT(IsVRegister());
+  return SRegister(GetCode());
+}
+
+bool AreAliased(const CPURegister& reg1,
+                const CPURegister& reg2,
+                const CPURegister& reg3,
+                const CPURegister& reg4,
+                const CPURegister& reg5,
+                const CPURegister& reg6,
+                const CPURegister& reg7,
+                const CPURegister& reg8) {
+  int number_of_valid_regs = 0;
+  int number_of_valid_vregs = 0;
+  int number_of_valid_pregs = 0;
+
+  RegList unique_regs = 0;
+  RegList unique_vregs = 0;
+  RegList unique_pregs = 0;
+
+  const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
+
+  for (size_t i = 0; i < ArrayLength(regs); i++) {
+    switch (regs[i].GetBank()) {
+      case CPURegister::kRRegisterBank:
+        number_of_valid_regs++;
+        unique_regs |= regs[i].GetBit();
+        break;
+      case CPURegister::kVRegisterBank:
+        number_of_valid_vregs++;
+        unique_vregs |= regs[i].GetBit();
+        break;
+      case CPURegister::kPRegisterBank:
+        number_of_valid_pregs++;
+        unique_pregs |= regs[i].GetBit();
+        break;
+      case CPURegister::kNoRegisterBank:
+        VIXL_ASSERT(regs[i].IsNone());
+        break;
+    }
+  }
+
+  int number_of_unique_regs = CountSetBits(unique_regs);
+  int number_of_unique_vregs = CountSetBits(unique_vregs);
+  int number_of_unique_pregs = CountSetBits(unique_pregs);
+
+  VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
+  VIXL_ASSERT(number_of_valid_vregs >= number_of_unique_vregs);
+  VIXL_ASSERT(number_of_valid_pregs >= number_of_unique_pregs);
+
+  return (number_of_valid_regs != number_of_unique_regs) ||
+         (number_of_valid_vregs != number_of_unique_vregs) ||
+         (number_of_valid_pregs != number_of_unique_pregs);
+}
+
+bool AreSameSizeAndType(const CPURegister& reg1,
+                        const CPURegister& reg2,
+                        const CPURegister& reg3,
+                        const CPURegister& reg4,
+                        const CPURegister& reg5,
+                        const CPURegister& reg6,
+                        const CPURegister& reg7,
+                        const CPURegister& reg8) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1);
+  match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1);
+  match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1);
+  match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1);
+  match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1);
+  match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1);
+  match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1);
+  return match;
+}
+
+bool AreEven(const CPURegister& reg1,
+             const CPURegister& reg2,
+             const CPURegister& reg3,
+             const CPURegister& reg4,
+             const CPURegister& reg5,
+             const CPURegister& reg6,
+             const CPURegister& reg7,
+             const CPURegister& reg8) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool even = (reg1.GetCode() % 2) == 0;
+  even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0);
+  even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0);
+  even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0);
+  even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0);
+  even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0);
+  even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0);
+  even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0);
+  return even;
+}
+
+bool AreConsecutive(const CPURegister& reg1,
+                    const CPURegister& reg2,
+                    const CPURegister& reg3,
+                    const CPURegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+
+  if (!reg2.IsValid()) {
+    return true;
+  } else if (reg2.GetCode() !=
+             ((reg1.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+    return false;
+  }
+
+  if (!reg3.IsValid()) {
+    return true;
+  } else if (reg3.GetCode() !=
+             ((reg2.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+    return false;
+  }
+
+  if (!reg4.IsValid()) {
+    return true;
+  } else if (reg4.GetCode() !=
+             ((reg3.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+    return false;
+  }
+
+  return true;
+}
+
+bool AreSameFormat(const CPURegister& reg1,
+                   const CPURegister& reg2,
+                   const CPURegister& reg3,
+                   const CPURegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &= !reg2.IsValid() || reg2.IsSameFormat(reg1);
+  match &= !reg3.IsValid() || reg3.IsSameFormat(reg1);
+  match &= !reg4.IsValid() || reg4.IsSameFormat(reg1);
+  return match;
+}
+
+bool AreSameLaneSize(const CPURegister& reg1,
+                     const CPURegister& reg2,
+                     const CPURegister& reg3,
+                     const CPURegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &=
+      !reg2.IsValid() || (reg2.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+  match &=
+      !reg3.IsValid() || (reg3.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+  match &=
+      !reg4.IsValid() || (reg4.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+  return match;
+}
+}  // namespace aarch64
+}  // namespace vixl
--- a/dep/vixl/src/aarch64/simulator-aarch64.cc
+++ b/dep/vixl/src/aarch64/simulator-aarch64.cc
--- a/dep/vixl/src/code-buffer-vixl.cc
+++ b/dep/vixl/src/code-buffer-vixl.cc
@ -24,51 +24,17 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-#ifdef VIXL_CODE_BUFFER_MMAP
-extern "C" {
-#include <sys/mman.h>
-}
-#endif
-
 #include "code-buffer-vixl.h"
 #include "utils-vixl.h"

 namespace vixl {

-
-CodeBuffer::CodeBuffer(size_t capacity)
-    : buffer_(NULL),
-      managed_(true),
-      cursor_(NULL),
-      dirty_(false),
-      capacity_(capacity) {
-  if (capacity_ == 0) {
-    return;
+CodeBuffer::CodeBuffer() : buffer_(nullptr), cursor_(nullptr), dirty_(false), capacity_(0)
+{
 }
-#ifdef VIXL_CODE_BUFFER_MALLOC
-  buffer_ = reinterpret_cast<byte*>(malloc(capacity_));
-#elif defined(VIXL_CODE_BUFFER_MMAP)
-  buffer_ = reinterpret_cast<byte*>(mmap(NULL,
-                                         capacity,
-                                         PROT_READ | PROT_WRITE,
-                                         MAP_PRIVATE | MAP_ANONYMOUS,
-                                         -1,
-                                         0));
-#else
-#error Unknown code buffer allocator.
-#endif
-  VIXL_CHECK(buffer_ != NULL);
-  // Aarch64 instructions must be word aligned, we assert the default allocator
-  // always returns word align memory.
-  VIXL_ASSERT(IsWordAligned(buffer_));
-
-  cursor_ = buffer_;
-}
-

 CodeBuffer::CodeBuffer(byte* buffer, size_t capacity)
    : buffer_(reinterpret_cast<byte*>(buffer)),
-      managed_(false),
      cursor_(reinterpret_cast<byte*>(buffer)),
      dirty_(false),
      capacity_(capacity) {
@ -76,42 +42,18 @@ CodeBuffer::CodeBuffer(byte* buffer, size_t capacity)
 }


-CodeBuffer::~CodeBuffer() {
+CodeBuffer::~CodeBuffer() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
  VIXL_ASSERT(!IsDirty());
-  if (managed_) {
-#ifdef VIXL_CODE_BUFFER_MALLOC
-    free(buffer_);
-#elif defined(VIXL_CODE_BUFFER_MMAP)
-    munmap(buffer_, capacity_);
-#else
-#error Unknown code buffer allocator.
-#endif
 }
-}
-
-
-#ifdef VIXL_CODE_BUFFER_MMAP
-void CodeBuffer::SetExecutable() {
-  int ret = mprotect(buffer_, capacity_, PROT_READ | PROT_EXEC);
-  VIXL_CHECK(ret == 0);
-}
-#endif
-
-
-#ifdef VIXL_CODE_BUFFER_MMAP
-void CodeBuffer::SetWritable() {
-  int ret = mprotect(buffer_, capacity_, PROT_READ | PROT_WRITE);
-  VIXL_CHECK(ret == 0);
-}
-#endif


 void CodeBuffer::EmitString(const char* string) {
-  VIXL_ASSERT(HasSpaceFor(strlen(string) + 1));
+  const auto len = strlen(string) + 1;
+  VIXL_ASSERT(HasSpaceFor(len));
  char* dst = reinterpret_cast<char*>(cursor_);
  dirty_ = true;
-  char* null_char = strcpy(dst, string);
-  cursor_ = reinterpret_cast<byte*>(null_char) + 1;
+  memcpy(dst, string, len);
+  cursor_ = reinterpret_cast<byte*>(dst + len);
 }


@ -139,48 +81,22 @@ void CodeBuffer::Align() {
 }

 void CodeBuffer::EmitZeroedBytes(int n) {
-  EnsureSpaceFor(n);
+  VIXL_ASSERT(HasSpaceFor(n));
  dirty_ = true;
  memset(cursor_, 0, n);
  cursor_ += n;
 }

 void CodeBuffer::Reset() {
-#ifdef VIXL_DEBUG
-  if (managed_) {
-    // Fill with zeros (there is no useful value common to A32 and T32).
-    memset(buffer_, 0, capacity_);
-  }
-#endif
  cursor_ = buffer_;
  SetClean();
 }

-void CodeBuffer::Reset(byte* buffer, size_t capacity, bool managed) {
+void CodeBuffer::Reset(byte* buffer, size_t capacity) {
  buffer_ = buffer;
  cursor_ = buffer;
  capacity_ = capacity;
-  managed_ = managed;
+  SetClean();
 }

-void CodeBuffer::Grow(size_t new_capacity) {
-  VIXL_ASSERT(managed_);
-  VIXL_ASSERT(new_capacity > capacity_);
-  ptrdiff_t cursor_offset = GetCursorOffset();
-#ifdef VIXL_CODE_BUFFER_MALLOC
-  buffer_ = static_cast<byte*>(realloc(buffer_, new_capacity));
-  VIXL_CHECK(buffer_ != NULL);
-#elif defined(VIXL_CODE_BUFFER_MMAP)
-  buffer_ = static_cast<byte*>(
-      mremap(buffer_, capacity_, new_capacity, MREMAP_MAYMOVE));
-  VIXL_CHECK(buffer_ != MAP_FAILED);
-#else
-#error Unknown code buffer allocator.
-#endif
-
-  cursor_ = buffer_ + cursor_offset;
-  capacity_ = new_capacity;
-}
-
-
 }  // namespace vixl
--- a/dep/vixl/src/compiler-intrinsics-vixl.cc
+++ b/dep/vixl/src/compiler-intrinsics-vixl.cc
@ -26,11 +26,14 @@

 #include "compiler-intrinsics-vixl.h"

+#include "utils-vixl.h"
+
 namespace vixl {


 int CountLeadingSignBitsFallBack(int64_t value, int width) {
  VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+  if (width < 64) VIXL_ASSERT(IsIntN(width, value));
  if (value >= 0) {
    return CountLeadingZeros(value, width) - 1;
  } else {
--- a/dep/vixl/src/cpu-features.cc
+++ b/dep/vixl/src/cpu-features.cc
@ -24,119 +24,71 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+#include "cpu-features.h"
+
 #include <ostream>

-#include "cpu-features.h"
 #include "globals-vixl.h"
 #include "utils-vixl.h"

+#if defined(__aarch64__) && defined(VIXL_INCLUDE_TARGET_AARCH64)
+#include "aarch64/cpu-aarch64.h"
+#define VIXL_USE_AARCH64_CPU_HELPERS
+#endif
+
 namespace vixl {

-static uint64_t MakeFeatureMask(CPUFeatures::Feature feature) {
-  if (feature == CPUFeatures::kNone) {
-    return 0;
-  } else {
-    // Check that the shift is well-defined, and that the feature is valid.
-    VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures <=
-                       (sizeof(uint64_t) * 8));
-    VIXL_ASSERT(feature < CPUFeatures::kNumberOfFeatures);
-    return UINT64_C(1) << feature;
-  }
-}
-
-CPUFeatures::CPUFeatures(Feature feature0,
-                         Feature feature1,
-                         Feature feature2,
-                         Feature feature3)
-    : features_(0) {
-  Combine(feature0, feature1, feature2, feature3);
-}
-
 CPUFeatures CPUFeatures::All() {
  CPUFeatures all;
-  // Check that the shift is well-defined.
-  VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures < (sizeof(uint64_t) * 8));
-  all.features_ = (UINT64_C(1) << kNumberOfFeatures) - 1;
+  all.features_.set();
  return all;
 }

-CPUFeatures CPUFeatures::InferFromOS() {
-  // TODO: Actually infer features from the OS.
+CPUFeatures CPUFeatures::InferFromIDRegisters() {
+  // This function assumes that kIDRegisterEmulation is available.
+  CPUFeatures features(CPUFeatures::kIDRegisterEmulation);
+#ifdef VIXL_USE_AARCH64_CPU_HELPERS
+  // Note that the Linux kernel filters these values during emulation, so the
+  // results may not exactly match the expected hardware support.
+  features.Combine(aarch64::CPU::InferCPUFeaturesFromIDRegisters());
+#endif
+  return features;
+}
+
+CPUFeatures CPUFeatures::InferFromOS(QueryIDRegistersOption option) {
+#ifdef VIXL_USE_AARCH64_CPU_HELPERS
+  return aarch64::CPU::InferCPUFeaturesFromOS(option);
+#else
+  USE(option);
  return CPUFeatures();
+#endif
 }

 void CPUFeatures::Combine(const CPUFeatures& other) {
  features_ |= other.features_;
 }

-void CPUFeatures::Combine(Feature feature0,
-                          Feature feature1,
-                          Feature feature2,
-                          Feature feature3) {
-  features_ |= MakeFeatureMask(feature0);
-  features_ |= MakeFeatureMask(feature1);
-  features_ |= MakeFeatureMask(feature2);
-  features_ |= MakeFeatureMask(feature3);
+void CPUFeatures::Combine(Feature feature) {
+  if (feature != CPUFeatures::kNone) features_.set(feature);
 }

 void CPUFeatures::Remove(const CPUFeatures& other) {
  features_ &= ~other.features_;
 }

-void CPUFeatures::Remove(Feature feature0,
-                         Feature feature1,
-                         Feature feature2,
-                         Feature feature3) {
-  features_ &= ~MakeFeatureMask(feature0);
-  features_ &= ~MakeFeatureMask(feature1);
-  features_ &= ~MakeFeatureMask(feature2);
-  features_ &= ~MakeFeatureMask(feature3);
-}
-
-CPUFeatures CPUFeatures::With(const CPUFeatures& other) const {
-  CPUFeatures f(*this);
-  f.Combine(other);
-  return f;
-}
-
-CPUFeatures CPUFeatures::With(Feature feature0,
-                              Feature feature1,
-                              Feature feature2,
-                              Feature feature3) const {
-  CPUFeatures f(*this);
-  f.Combine(feature0, feature1, feature2, feature3);
-  return f;
-}
-
-CPUFeatures CPUFeatures::Without(const CPUFeatures& other) const {
-  CPUFeatures f(*this);
-  f.Remove(other);
-  return f;
-}
-
-CPUFeatures CPUFeatures::Without(Feature feature0,
-                                 Feature feature1,
-                                 Feature feature2,
-                                 Feature feature3) const {
-  CPUFeatures f(*this);
-  f.Remove(feature0, feature1, feature2, feature3);
-  return f;
+void CPUFeatures::Remove(Feature feature) {
+  if (feature != CPUFeatures::kNone) features_.reset(feature);
 }

 bool CPUFeatures::Has(const CPUFeatures& other) const {
  return (features_ & other.features_) == other.features_;
 }

-bool CPUFeatures::Has(Feature feature0,
-                      Feature feature1,
-                      Feature feature2,
-                      Feature feature3) const {
-  uint64_t mask = MakeFeatureMask(feature0) | MakeFeatureMask(feature1) |
-                  MakeFeatureMask(feature2) | MakeFeatureMask(feature3);
-  return (features_ & mask) == mask;
+bool CPUFeatures::Has(Feature feature) const {
+  return (feature == CPUFeatures::kNone) || features_[feature];
 }

-size_t CPUFeatures::Count() const { return CountSetBits(features_); }
+size_t CPUFeatures::Count() const { return features_.count(); }

 std::ostream& operator<<(std::ostream& os, CPUFeatures::Feature feature) {
  // clang-format off
@ -157,12 +109,9 @@ VIXL_CPU_FEATURE_LIST(VIXL_FORMAT_FEATURE)
 }

 CPUFeatures::const_iterator CPUFeatures::begin() const {
-  if (features_ == 0) return const_iterator(this, kNone);
-
-  int feature_number = CountTrailingZeros(features_);
-  vixl::CPUFeatures::Feature feature =
-      static_cast<CPUFeatures::Feature>(feature_number);
-  return const_iterator(this, feature);
+  // For iterators in general, it's undefined to increment `end()`, but here we
+  // control the implementation and it is safe to do this.
+  return ++end();
 }

 CPUFeatures::const_iterator CPUFeatures::end() const {
@ -170,11 +119,11 @@ CPUFeatures::const_iterator CPUFeatures::end() const {
 }

 std::ostream& operator<<(std::ostream& os, const CPUFeatures& features) {
-  CPUFeatures::const_iterator it = features.begin();
-  while (it != features.end()) {
-    os << *it;
-    ++it;
-    if (it != features.end()) os << ", ";
+  bool need_separator = false;
+  for (CPUFeatures::Feature feature : features) {
+    if (need_separator) os << ", ";
+    need_separator = true;
+    os << feature;
  }
  return os;
 }
@ -185,7 +134,7 @@ bool CPUFeaturesConstIterator::operator==(
  return (cpu_features_ == other.cpu_features_) && (feature_ == other.feature_);
 }

-CPUFeatures::Feature CPUFeaturesConstIterator::operator++() {  // Prefix
+CPUFeaturesConstIterator& CPUFeaturesConstIterator::operator++() {  // Prefix
  VIXL_ASSERT(IsValid());
  do {
    // Find the next feature. The order is unspecified.
@ -199,11 +148,11 @@ CPUFeatures::Feature CPUFeaturesConstIterator::operator++() {  // Prefix
    // cpu_features_->Has(kNone) is always true, so this will terminate even if
    // the features list is empty.
  } while (!cpu_features_->Has(feature_));
-  return feature_;
+  return *this;
 }

-CPUFeatures::Feature CPUFeaturesConstIterator::operator++(int) {  // Postfix
-  CPUFeatures::Feature result = feature_;
+CPUFeaturesConstIterator CPUFeaturesConstIterator::operator++(int) {  // Postfix
+  CPUFeaturesConstIterator result = *this;
  ++(*this);
  return result;
 }
--- a/dep/vixl/src/utils-vixl.cc
+++ b/dep/vixl/src/utils-vixl.cc
@ -24,10 +24,10 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-#include <cstdio>
-
 #include "utils-vixl.h"

+#include <cstdio>
+
 namespace vixl {

 // The default NaN values (for FPCR.DN=1).
@ -391,7 +391,7 @@ float FPToFloat(double value,
  }

  VIXL_UNREACHABLE();
-  return value;
+  return static_cast<float>(value);
 }

 // TODO: We should consider implementing a full FPToDouble(Float16)
--- a/dep/vixl/vixl.vcxproj
+++ b/dep/vixl/vixl.vcxproj
@ -1,7 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <Import Project="..\msvc\vsprops\Configurations.props" />
-
  <ItemGroup>
    <ClInclude Include="include\vixl\aarch32\assembler-aarch32.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM'">true</ExcludedFromBuild>
@ -42,21 +41,27 @@
    <ClInclude Include="include\vixl\aarch64\decoder-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\decoder-constants-aarch64.h">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\decoder-visitor-map-aarch64.h">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\disasm-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\instructions-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\instrument-aarch64.h">
-      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
-    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\macro-assembler-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\operands-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\registers-aarch64.h">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClInclude>
    <ClInclude Include="include\vixl\aarch64\simulator-aarch64.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
@ -101,6 +106,9 @@
    <ClCompile Include="src\aarch64\assembler-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
+    <ClCompile Include="src\aarch64\assembler-sve-aarch64.cc">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
    <ClCompile Include="src\aarch64\cpu-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
@ -116,22 +124,22 @@
    <ClCompile Include="src\aarch64\instructions-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
-    <ClCompile Include="src\aarch64\instrument-aarch64.cc">
-      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
-    </ClCompile>
    <ClCompile Include="src\aarch64\logic-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\aarch64\macro-assembler-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
+    <ClCompile Include="src\aarch64\macro-assembler-sve-aarch64.cc">
+      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
+    </ClCompile>
    <ClCompile Include="src\aarch64\operands-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\aarch64\pointer-auth-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
-    <ClCompile Include="src\aarch64\simulator-aarch64.cc">
+    <ClCompile Include="src\aarch64\registers-aarch64.cc">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="src\code-buffer-vixl.cc" />
@ -139,18 +147,14 @@
    <ClCompile Include="src\cpu-features.cc" />
    <ClCompile Include="src\utils-vixl.cc" />
  </ItemGroup>
-
  <PropertyGroup Label="Globals">
    <ProjectGuid>{8906836E-F06E-46E8-B11A-74E5E8C7B8FB}</ProjectGuid>
  </PropertyGroup>
-
  <Import Project="..\msvc\vsprops\StaticLibrary.props" />
-
  <PropertyGroup Condition="'$(Platform)'=='ARM64'">
    <VixlPreprocessorDefinitions>VIXL_INCLUDE_TARGET_AARCH64;VIXL_CODE_BUFFER_MALLOC</VixlPreprocessorDefinitions>
    <VixlIncludeDirectories>$(ProjectDir)include\vixl\aarch64</VixlIncludeDirectories>
  </PropertyGroup>
-
  <ItemDefinitionGroup>
    <ClCompile>
      <WarningLevel>TurnOffAllWarnings</WarningLevel>
@ -159,6 +163,5 @@
      <AdditionalOptions Condition="$(Configuration.Contains(Clang))"> -Wno-deprecated-enum-enum-conversion %(AdditionalOptions)</AdditionalOptions>
    </ClCompile>
  </ItemDefinitionGroup>
-  
  <Import Project="..\msvc\vsprops\Targets.props" />
 </Project>
--- a/dep/vixl/vixl.vcxproj.filters
+++ b/dep/vixl/vixl.vcxproj.filters
@ -20,45 +20,6 @@
    <ClInclude Include="include\vixl\pool-manager.h" />
    <ClInclude Include="include\vixl\pool-manager-impl.h" />
    <ClInclude Include="include\vixl\utils-vixl.h" />
-    <ClInclude Include="include\vixl\aarch64\assembler-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\constants-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\cpu-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\cpu-features-auditor-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\decoder-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\disasm-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\instructions-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\instrument-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\macro-assembler-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\operands-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\simulator-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\simulator-constants-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
-    <ClInclude Include="include\vixl\aarch64\abi-aarch64.h">
-      <Filter>aarch64</Filter>
-    </ClInclude>
    <ClInclude Include="include\vixl\aarch32\assembler-aarch32.h">
      <Filter>aarch32</Filter>
    </ClInclude>
@ -81,6 +42,51 @@
      <Filter>aarch32</Filter>
    </ClInclude>
    <ClInclude Include="include\vixl\code-buffer-vixl.h" />
+    <ClInclude Include="include\vixl\aarch64\constants-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\cpu-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\cpu-features-auditor-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\decoder-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\decoder-constants-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\decoder-visitor-map-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\disasm-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\instructions-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\macro-assembler-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\operands-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\registers-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\simulator-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\simulator-constants-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\abi-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\assembler-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="src\aarch32\disasm-aarch32.cc">
@ -104,6 +110,19 @@
    <ClCompile Include="src\aarch32\constants-aarch32.cc">
      <Filter>aarch32</Filter>
    </ClCompile>
+    <ClCompile Include="src\compiler-intrinsics-vixl.cc" />
+    <ClCompile Include="src\cpu-features.cc" />
+    <ClCompile Include="src\utils-vixl.cc" />
+    <ClCompile Include="src\code-buffer-vixl.cc" />
+    <ClCompile Include="src\aarch64\assembler-sve-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\cpu-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\cpu-features-auditor-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
    <ClCompile Include="src\aarch64\decoder-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
@ -113,36 +132,26 @@
    <ClCompile Include="src\aarch64\instructions-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
-    <ClCompile Include="src\aarch64\instrument-aarch64.cc">
-      <Filter>aarch64</Filter>
-    </ClCompile>
    <ClCompile Include="src\aarch64\logic-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\macro-assembler-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
+    <ClCompile Include="src\aarch64\macro-assembler-sve-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
    <ClCompile Include="src\aarch64\operands-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\pointer-auth-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
-    <ClCompile Include="src\aarch64\simulator-aarch64.cc">
+    <ClCompile Include="src\aarch64\registers-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
    <ClCompile Include="src\aarch64\assembler-aarch64.cc">
      <Filter>aarch64</Filter>
    </ClCompile>
-    <ClCompile Include="src\aarch64\cpu-aarch64.cc">
-      <Filter>aarch64</Filter>
-    </ClCompile>
-    <ClCompile Include="src\aarch64\cpu-features-auditor-aarch64.cc">
-      <Filter>aarch64</Filter>
-    </ClCompile>
-    <ClCompile Include="src\compiler-intrinsics-vixl.cc" />
-    <ClCompile Include="src\cpu-features.cc" />
-    <ClCompile Include="src\utils-vixl.cc" />
-    <ClCompile Include="src\code-buffer-vixl.cc" />
  </ItemGroup>
 </Project>
--- a/src/core/cpu_newrec_compiler_aarch64.cpp
+++ b/src/core/cpu_newrec_compiler_aarch64.cpp
@ -63,10 +63,10 @@ void CPU::NewRec::AArch64Compiler::Reset(CodeCache::Block* block, u8* code_buffe
  armAsm = &m_emitter;

 #ifdef VIXL_DEBUG
-  m_emitter_check = std::make_unique<vixl::CodeBufferCheckScope>(m_emitter.get(), code_buffer_space,
+  m_emitter_check = std::make_unique<vixl::CodeBufferCheckScope>(&m_emitter, code_buffer_space,
                                                                 vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
  m_far_emitter_check = std::make_unique<vixl::CodeBufferCheckScope>(
-    m_far_emitter.get(), far_code_space, vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
+    &m_far_emitter, far_code_space, vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
 #endif

  // Need to wipe it out so it's correct when toggling fastmem.
@ -162,7 +162,7 @@ void CPU::NewRec::AArch64Compiler::SwitchToNearCode(bool emit_jump, vixl::aarch6
  armAsm = &m_emitter;
 }

-void CPU::NewRec::AArch64Compiler::EmitMov(const vixl::aarch64::WRegister& dst, u32 val)
+void CPU::NewRec::AArch64Compiler::EmitMov(const vixl::aarch64::Register& dst, u32 val)
 {
  armEmitMov(armAsm, dst, val);
 }
@ -495,38 +495,39 @@ vixl::aarch64::MemOperand CPU::NewRec::AArch64Compiler::MipsPtr(Reg r) const
  return PTR(&g_state.regs.r[static_cast<u32>(r)]);
 }

-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::CFGetRegD(CompileFlags cf) const
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegD(CompileFlags cf) const
 {
  DebugAssert(cf.valid_host_d);
  return WRegister(cf.host_d);
 }

-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::CFGetRegS(CompileFlags cf) const
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegS(CompileFlags cf) const
 {
  DebugAssert(cf.valid_host_s);
  return WRegister(cf.host_s);
 }

-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::CFGetRegT(CompileFlags cf) const
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegT(CompileFlags cf) const
 {
  DebugAssert(cf.valid_host_t);
  return WRegister(cf.host_t);
 }

-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::CFGetRegLO(CompileFlags cf) const
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegLO(CompileFlags cf) const
 {
  DebugAssert(cf.valid_host_lo);
  return WRegister(cf.host_lo);
 }

-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::CFGetRegHI(CompileFlags cf) const
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegHI(CompileFlags cf) const
 {
  DebugAssert(cf.valid_host_hi);
  return WRegister(cf.host_hi);
 }

-void CPU::NewRec::AArch64Compiler::MoveSToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf)
+void CPU::NewRec::AArch64Compiler::MoveSToReg(const vixl::aarch64::Register& dst, CompileFlags cf)
 {
+  DebugAssert(dst.IsW());
  if (cf.valid_host_s)
  {
    if (cf.host_s != dst.GetCode())
@ -547,8 +548,9 @@ void CPU::NewRec::AArch64Compiler::MoveSToReg(const vixl::aarch64::WRegister& ds
  }
 }

-void CPU::NewRec::AArch64Compiler::MoveTToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf)
+void CPU::NewRec::AArch64Compiler::MoveTToReg(const vixl::aarch64::Register& dst, CompileFlags cf)
 {
+  DebugAssert(dst.IsW());
  if (cf.valid_host_t)
  {
    if (cf.host_t != dst.GetCode())
@ -569,9 +571,9 @@ void CPU::NewRec::AArch64Compiler::MoveTToReg(const vixl::aarch64::WRegister& ds
  }
 }

-void CPU::NewRec::AArch64Compiler::MoveMIPSRegToReg(const vixl::aarch64::WRegister& dst, Reg reg)
+void CPU::NewRec::AArch64Compiler::MoveMIPSRegToReg(const vixl::aarch64::Register& dst, Reg reg)
 {
-  DebugAssert(reg < Reg::count);
+  DebugAssert(reg < Reg::count && dst.IsW());
  if (const std::optional<u32> hreg = CheckHostReg(0, Compiler::HR_TYPE_CPU_REG, reg))
    armAsm->mov(dst, WRegister(hreg.value()));
  else if (HasConstantReg(reg))
@ -712,8 +714,9 @@ void CPU::NewRec::AArch64Compiler::Compile_Fallback()
  m_load_delay_dirty = EMULATE_LOAD_DELAYS;
 }

-void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::WRegister& pcreg)
+void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::Register& pcreg)
 {
+  DebugAssert(pcreg.IsW());
  if (!g_settings.cpu_recompiler_memory_exceptions)
    return;

@ -729,7 +732,7 @@ void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::WRegis

 void CPU::NewRec::AArch64Compiler::Compile_jr(CompileFlags cf)
 {
-  const WRegister pcreg = CFGetRegS(cf);
+  const Register pcreg = CFGetRegS(cf);
  CheckBranchTarget(pcreg);

  armAsm->str(pcreg, PTR(&g_state.pc));
@ -740,7 +743,7 @@ void CPU::NewRec::AArch64Compiler::Compile_jr(CompileFlags cf)

 void CPU::NewRec::AArch64Compiler::Compile_jalr(CompileFlags cf)
 {
-  const WRegister pcreg = CFGetRegS(cf);
+  const Register pcreg = CFGetRegS(cf);
  if (MipsD() != Reg::zero)
    SetConstantReg(MipsD(), GetBranchReturnAddress(cf));

@ -765,7 +768,7 @@ void CPU::NewRec::AArch64Compiler::Compile_bxx(CompileFlags cf, BranchCondition
  DebugAssert(cond == BranchCondition::Equal || cond == BranchCondition::NotEqual || cf.MipsT() == Reg::zero);

  Label taken;
-  const WRegister rs = CFGetRegS(cf);
+  const Register rs = CFGetRegS(cf);
  switch (cond)
  {
    case BranchCondition::Equal:
@ -834,8 +837,8 @@ void CPU::NewRec::AArch64Compiler::Compile_bxx(CompileFlags cf, BranchCondition

 void CPU::NewRec::AArch64Compiler::Compile_addi(CompileFlags cf, bool overflow)
 {
-  const WRegister rs = CFGetRegS(cf);
-  const WRegister rt = CFGetRegT(cf);
+  const Register rs = CFGetRegS(cf);
+  const Register rt = CFGetRegT(cf);
  if (const u32 imm = inst->i.imm_sext32(); imm != 0)
  {
    if (!overflow)
@ -882,7 +885,7 @@ void CPU::NewRec::AArch64Compiler::Compile_slti(CompileFlags cf, bool sign)

 void CPU::NewRec::AArch64Compiler::Compile_andi(CompileFlags cf)
 {
-  const WRegister rt = CFGetRegT(cf);
+  const Register rt = CFGetRegT(cf);
  if (const u32 imm = inst->i.imm_zext32(); imm != 0)
    armAsm->and_(rt, CFGetRegS(cf), armCheckLogicalConstant(imm));
  else
@ -891,8 +894,8 @@ void CPU::NewRec::AArch64Compiler::Compile_andi(CompileFlags cf)

 void CPU::NewRec::AArch64Compiler::Compile_ori(CompileFlags cf)
 {
-  const WRegister rt = CFGetRegT(cf);
-  const WRegister rs = CFGetRegS(cf);
+  const Register rt = CFGetRegT(cf);
+  const Register rs = CFGetRegS(cf);
  if (const u32 imm = inst->i.imm_zext32(); imm != 0)
    armAsm->orr(rt, rs, armCheckLogicalConstant(imm));
  else if (rt.GetCode() != rs.GetCode())
@ -901,8 +904,8 @@ void CPU::NewRec::AArch64Compiler::Compile_ori(CompileFlags cf)

 void CPU::NewRec::AArch64Compiler::Compile_xori(CompileFlags cf)
 {
-  const WRegister rt = CFGetRegT(cf);
-  const WRegister rs = CFGetRegS(cf);
+  const Register rt = CFGetRegT(cf);
+  const Register rs = CFGetRegS(cf);
  if (const u32 imm = inst->i.imm_zext32(); imm != 0)
    armAsm->eor(rt, rs, armCheckLogicalConstant(imm));
  else if (rt.GetCode() != rs.GetCode())
@ -914,8 +917,8 @@ void CPU::NewRec::AArch64Compiler::Compile_shift(CompileFlags cf,
                                                                                      const vixl::aarch64::Register&,
                                                                                      unsigned))
 {
-  const WRegister rd = CFGetRegD(cf);
-  const WRegister rt = CFGetRegT(cf);
+  const Register rd = CFGetRegD(cf);
+  const Register rt = CFGetRegT(cf);
  if (inst->r.shamt > 0)
    (armAsm->*op)(rd, rt, inst->r.shamt);
  else if (rd.GetCode() != rt.GetCode())
@ -943,12 +946,12 @@ void CPU::NewRec::AArch64Compiler::Compile_variable_shift(
                                       const vixl::aarch64::Register&),
  void (vixl::aarch64::Assembler::*op_const)(const vixl::aarch64::Register&, const vixl::aarch64::Register&, unsigned))
 {
-  const WRegister rd = CFGetRegD(cf);
+  const Register rd = CFGetRegD(cf);

  AssertRegOrConstS(cf);
  AssertRegOrConstT(cf);

-  const WRegister rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
+  const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
  if (!cf.valid_host_t)
    MoveTToReg(rt, cf);

@ -982,17 +985,17 @@ void CPU::NewRec::AArch64Compiler::Compile_srav(CompileFlags cf)

 void CPU::NewRec::AArch64Compiler::Compile_mult(CompileFlags cf, bool sign)
 {
-  const WRegister rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
+  const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
  if (!cf.valid_host_s)
    MoveSToReg(rs, cf);

-  const WRegister rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
+  const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
  if (!cf.valid_host_t)
    MoveTToReg(rt, cf);

  // TODO: if lo/hi gets killed, we can use a 32-bit multiply
-  const WRegister lo = CFGetRegLO(cf);
-  const WRegister hi = CFGetRegHI(cf);
+  const Register lo = CFGetRegLO(cf);
+  const Register hi = CFGetRegHI(cf);

  (sign) ? armAsm->smull(lo.X(), rs, rt) : armAsm->umull(lo.X(), rs, rt);
  armAsm->lsr(hi.X(), lo.X(), 32);
@ -1010,16 +1013,16 @@ void CPU::NewRec::AArch64Compiler::Compile_multu(CompileFlags cf)

 void CPU::NewRec::AArch64Compiler::Compile_div(CompileFlags cf)
 {
-  const WRegister rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
+  const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
  if (!cf.valid_host_s)
    MoveSToReg(rs, cf);

-  const WRegister rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
+  const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
  if (!cf.valid_host_t)
    MoveTToReg(rt, cf);

-  const WRegister rlo = CFGetRegLO(cf);
-  const WRegister rhi = CFGetRegHI(cf);
+  const Register rlo = CFGetRegLO(cf);
+  const Register rhi = CFGetRegHI(cf);

  // TODO: This could be slightly more optimal
  Label done;
@ -1055,16 +1058,16 @@ void CPU::NewRec::AArch64Compiler::Compile_div(CompileFlags cf)

 void CPU::NewRec::AArch64Compiler::Compile_divu(CompileFlags cf)
 {
-  const WRegister rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
+  const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1;
  if (!cf.valid_host_s)
    MoveSToReg(rs, cf);

-  const WRegister rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
+  const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
  if (!cf.valid_host_t)
    MoveTToReg(rt, cf);

-  const WRegister rlo = CFGetRegLO(cf);
-  const WRegister rhi = CFGetRegHI(cf);
+  const Register rlo = CFGetRegLO(cf);
+  const Register rhi = CFGetRegHI(cf);

  Label done;
  Label not_divide_by_zero;
@ -1083,8 +1086,9 @@ void CPU::NewRec::AArch64Compiler::Compile_divu(CompileFlags cf)
  armAsm->bind(&done);
 }

-void CPU::NewRec::AArch64Compiler::TestOverflow(const vixl::aarch64::WRegister& result)
+void CPU::NewRec::AArch64Compiler::TestOverflow(const vixl::aarch64::Register& result)
 {
+  DebugAssert(result.IsW());
  SwitchToFarCode(true, vs);

  BackupHostState();
@ -1108,14 +1112,14 @@ void CPU::NewRec::AArch64Compiler::Compile_dst_op(CompileFlags cf,
  AssertRegOrConstS(cf);
  AssertRegOrConstT(cf);

-  const WRegister rd = CFGetRegD(cf);
+  const Register rd = CFGetRegD(cf);
  if (cf.valid_host_s && cf.valid_host_t)
  {
    (armAsm->*op)(rd, CFGetRegS(cf), CFGetRegT(cf));
  }
  else if (commutative && (cf.const_s || cf.const_t))
  {
-    const WRegister src = cf.const_s ? CFGetRegT(cf) : CFGetRegS(cf);
+    const Register src = cf.const_s ? CFGetRegT(cf) : CFGetRegS(cf);
    if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)
    {
      (armAsm->*op)(rd, src, logical ? armCheckLogicalConstant(cv) : armCheckAddSubConstant(cv));
@ -1135,7 +1139,7 @@ void CPU::NewRec::AArch64Compiler::Compile_dst_op(CompileFlags cf,
  }
  else if (cf.const_t)
  {
-    const WRegister rs = CFGetRegS(cf);
+    const Register rs = CFGetRegS(cf);
    if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)
    {
      (armAsm->*op)(rd, rs, logical ? armCheckLogicalConstant(cv) : armCheckAddSubConstant(cv));
@ -1184,7 +1188,7 @@ void CPU::NewRec::AArch64Compiler::Compile_and(CompileFlags cf)
  AssertRegOrConstT(cf);

  // special cases - and with self -> self, and with 0 -> 0
-  const WRegister regd = CFGetRegD(cf);
+  const Register regd = CFGetRegD(cf);
  if (cf.MipsS() == cf.MipsT())
  {
    armAsm->mov(regd, CFGetRegS(cf));
@ -1205,7 +1209,7 @@ void CPU::NewRec::AArch64Compiler::Compile_or(CompileFlags cf)
  AssertRegOrConstT(cf);

  // or/nor with 0 -> no effect
-  const WRegister regd = CFGetRegD(cf);
+  const Register regd = CFGetRegD(cf);
  if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0) || cf.MipsS() == cf.MipsT())
  {
    cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf);
@ -1220,7 +1224,7 @@ void CPU::NewRec::AArch64Compiler::Compile_xor(CompileFlags cf)
  AssertRegOrConstS(cf);
  AssertRegOrConstT(cf);

-  const WRegister regd = CFGetRegD(cf);
+  const Register regd = CFGetRegD(cf);
  if (cf.MipsS() == cf.MipsT())
  {
    // xor with self -> zero
@ -1276,16 +1280,16 @@ void CPU::NewRec::AArch64Compiler::Compile_slt(CompileFlags cf, bool sign)
  armAsm->cset(CFGetRegD(cf), sign ? lt : lo);
 }

-vixl::aarch64::WRegister
+vixl::aarch64::Register
 CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
                                                         const std::optional<VirtualMemoryAddress>& address,
-                                                         const std::optional<const vixl::aarch64::WRegister>& reg)
+                                                         const std::optional<const vixl::aarch64::Register>& reg)
 {
  const u32 imm = inst->i.imm_sext32();
  if (cf.valid_host_s && imm == 0 && !reg.has_value())
    return CFGetRegS(cf);

-  const WRegister dst = reg.has_value() ? reg.value() : RWARG1;
+  const Register dst = reg.has_value() ? reg.value() : RWARG1;
  if (address.has_value())
  {
    EmitMov(dst, address.value());
@ -1294,7 +1298,7 @@ CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
  {
    if (cf.valid_host_s)
    {
-      if (const WRegister src = CFGetRegS(cf); src.GetCode() != dst.GetCode())
+      if (const Register src = CFGetRegS(cf); src.GetCode() != dst.GetCode())
        armAsm->mov(dst, CFGetRegS(cf));
    }
    else
@ -1319,15 +1323,16 @@ CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
 }

 template<typename RegAllocFn>
-vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::aarch64::WRegister& addr_reg,
+vixl::aarch64::Register CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::aarch64::Register& addr_reg,
                                                                   MemoryAccessSize size, bool sign, bool use_fastmem,
                                                                   const RegAllocFn& dst_reg_alloc)
 {
+  DebugAssert(addr_reg.IsW());
  if (use_fastmem)
  {
    m_cycles += Bus::RAM_READ_TICKS;

-    const WRegister dst = dst_reg_alloc();
+    const Register dst = dst_reg_alloc();

    if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
    {
@ -1410,7 +1415,7 @@ vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::
    SwitchToNearCode(false);
  }

-  const WRegister dst_reg = dst_reg_alloc();
+  const Register dst_reg = dst_reg_alloc();
  switch (size)
  {
    case MemoryAccessSize::Byte:
@ -1434,10 +1439,11 @@ vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::
  return dst_reg;
 }

-void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister& addr_reg,
-                                                 const vixl::aarch64::WRegister& value_reg, MemoryAccessSize size,
+void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::Register& addr_reg,
+                                                 const vixl::aarch64::Register& value_reg, MemoryAccessSize size,
                                                 bool use_fastmem)
 {
+  DebugAssert(addr_reg.IsW() && value_reg.IsW());
  if (use_fastmem)
  {
    if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
@ -1529,8 +1535,8 @@ void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize
    g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
                                 std::optional<WRegister>();
  FlushForLoadStore(address, false, use_fastmem);
-  const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
-  const WRegister data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
+  const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
+  const Register data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() -> Register {
    if (cf.MipsT() == Reg::zero)
      return RWRET;

@ -1556,7 +1562,7 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
 {
  DebugAssert(size == MemoryAccessSize::Word && !sign);

-  const WRegister addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
+  const Register addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
  FlushForLoadStore(address, false, use_fastmem);

  // TODO: if address is constant, this can be simplified..
@ -1579,7 +1585,7 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
  // lwl/lwr from a load-delayed value takes the new value, but it itself, is load delayed, so the original value is
  // never written back. NOTE: can't trust T in cf because of the flush
  const Reg rt = inst->r.rt;
-  WRegister value;
+  Register value;
  if (m_load_delay_register == rt)
  {
    const u32 existing_ld_rt = (m_load_delay_value_register == NUM_HOST_REGS) ?
@ -1654,8 +1660,8 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
    g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
                                 std::optional<WRegister>();
  FlushForLoadStore(address, false, use_fastmem);
-  const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
-  const WRegister value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action = action]() {
+  const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
+  const Register value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action = action]() {
    return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ?
             WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) :
             RWRET;
@ -1741,8 +1747,8 @@ void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize
    g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
                                 std::optional<WRegister>();
  FlushForLoadStore(address, true, use_fastmem);
-  const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
-  const WRegister data = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
+  const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
+  const Register data = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
  if (!cf.valid_host_t)
    MoveTToReg(RWARG2, cf);

@ -1766,8 +1772,8 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize

  // TODO: this can take over rt's value if it's no longer needed
  // NOTE: can't trust T in cf because of the alloc
-  const WRegister addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
-  const WRegister value = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
+  const Register addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
+  const Register value = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
  if (g_settings.gpu_pgxp_enable)
    MoveMIPSRegToReg(value, inst->r.rt);

@ -1838,10 +1844,10 @@ void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz
 {
  const u32 index = static_cast<u32>(inst->r.rt.GetValue());
  const auto [ptr, action] = GetGTERegisterPointer(index, false);
-  const WRegister addr = (g_settings.gpu_pgxp_enable || action == GTERegisterAccessAction::CallHandler) ?
+  const Register addr = (g_settings.gpu_pgxp_enable || action == GTERegisterAccessAction::CallHandler) ?
                          WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) :
                          RWARG1;
-  const WRegister data = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
+  const Register data = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
  FlushForLoadStore(address, true, use_fastmem);
  ComputeLoadStoreAddressArg(cf, address, addr);

@ -1912,10 +1918,10 @@ void CPU::NewRec::AArch64Compiler::Compile_mtc0(CompileFlags cf)

  // for some registers, we need to test certain bits
  const bool needs_bit_test = (reg == Cop0Reg::SR);
-  const WRegister new_value = RWARG1;
-  const WRegister old_value = RWARG2;
-  const WRegister changed_bits = RWARG3;
-  const WRegister mask_reg = RWSCRATCH;
+  const Register new_value = RWARG1;
+  const Register old_value = RWARG2;
+  const Register changed_bits = RWARG3;
+  const Register mask_reg = RWSCRATCH;

  // Load old value
  armAsm->ldr(old_value, PTR(ptr));
@ -1975,8 +1981,10 @@ void CPU::NewRec::AArch64Compiler::Compile_rfe(CompileFlags cf)
  TestInterrupts(RWARG1);
 }

-void CPU::NewRec::AArch64Compiler::TestInterrupts(const vixl::aarch64::WRegister& sr)
+void CPU::NewRec::AArch64Compiler::TestInterrupts(const vixl::aarch64::Register& sr)
 {
+  DebugAssert(sr.IsW());
+
  // if Iec == 0 then goto no_interrupt
  Label no_interrupt;
  armAsm->tbz(sr, 0, &no_interrupt);
--- a/src/core/cpu_newrec_compiler_aarch64.h
+++ b/src/core/cpu_newrec_compiler_aarch64.h
@ -43,7 +43,7 @@ protected:

  void Compile_Fallback() override;

-  void CheckBranchTarget(const vixl::aarch64::WRegister& pcreg);
+  void CheckBranchTarget(const vixl::aarch64::Register& pcreg);
  void Compile_jr(CompileFlags cf) override;
  void Compile_jalr(CompileFlags cf) override;
  void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
@ -77,7 +77,7 @@ protected:
  void Compile_multu(CompileFlags cf) override;
  void Compile_div(CompileFlags cf) override;
  void Compile_divu(CompileFlags cf) override;
-  void TestOverflow(const vixl::aarch64::WRegister& result);
+  void TestOverflow(const vixl::aarch64::Register& result);
  void Compile_dst_op(CompileFlags cf,
                      void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&,
                                                           const vixl::aarch64::Register&,
@ -95,13 +95,13 @@ protected:
  void Compile_slt(CompileFlags cf) override;
  void Compile_sltu(CompileFlags cf) override;

-  vixl::aarch64::WRegister
+  vixl::aarch64::Register
  ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
-                             const std::optional<const vixl::aarch64::WRegister>& reg = std::nullopt);
+                             const std::optional<const vixl::aarch64::Register>& reg = std::nullopt);
  template<typename RegAllocFn>
-  vixl::aarch64::WRegister GenerateLoad(const vixl::aarch64::WRegister& addr_reg, MemoryAccessSize size, bool sign,
+  vixl::aarch64::Register GenerateLoad(const vixl::aarch64::Register& addr_reg, MemoryAccessSize size, bool sign,
                                       bool use_fastmem, const RegAllocFn& dst_reg_alloc);
-  void GenerateStore(const vixl::aarch64::WRegister& addr_reg, const vixl::aarch64::WRegister& value_reg,
+  void GenerateStore(const vixl::aarch64::Register& addr_reg, const vixl::aarch64::Register& value_reg,
                     MemoryAccessSize size, bool use_fastmem);
  void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
                   const std::optional<VirtualMemoryAddress>& address) override;
@ -116,7 +116,7 @@ protected:
  void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
                    const std::optional<VirtualMemoryAddress>& address) override;

-  void TestInterrupts(const vixl::aarch64::WRegister& sr);
+  void TestInterrupts(const vixl::aarch64::Register& sr);
  void Compile_mtc0(CompileFlags cf) override;
  void Compile_rfe(CompileFlags cf) override;

@ -128,7 +128,7 @@ protected:
                                    Reg arg3reg = Reg::count) override;

 private:
-  void EmitMov(const vixl::aarch64::WRegister& dst, u32 val);
+  void EmitMov(const vixl::aarch64::Register& dst, u32 val);
  void EmitCall(const void* ptr, bool force_inline = false);

  vixl::aarch64::Operand armCheckAddSubConstant(s32 val);
@ -144,15 +144,15 @@ private:
  void AssertRegOrConstS(CompileFlags cf) const;
  void AssertRegOrConstT(CompileFlags cf) const;
  vixl::aarch64::MemOperand MipsPtr(Reg r) const;
-  vixl::aarch64::WRegister CFGetRegD(CompileFlags cf) const;
-  vixl::aarch64::WRegister CFGetRegS(CompileFlags cf) const;
-  vixl::aarch64::WRegister CFGetRegT(CompileFlags cf) const;
-  vixl::aarch64::WRegister CFGetRegLO(CompileFlags cf) const;
-  vixl::aarch64::WRegister CFGetRegHI(CompileFlags cf) const;
+  vixl::aarch64::Register CFGetRegD(CompileFlags cf) const;
+  vixl::aarch64::Register CFGetRegS(CompileFlags cf) const;
+  vixl::aarch64::Register CFGetRegT(CompileFlags cf) const;
+  vixl::aarch64::Register CFGetRegLO(CompileFlags cf) const;
+  vixl::aarch64::Register CFGetRegHI(CompileFlags cf) const;

-  void MoveSToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf);
-  void MoveTToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf);
-  void MoveMIPSRegToReg(const vixl::aarch64::WRegister& dst, Reg reg);
+  void MoveSToReg(const vixl::aarch64::Register& dst, CompileFlags cf);
+  void MoveTToReg(const vixl::aarch64::Register& dst, CompileFlags cf);
+  void MoveMIPSRegToReg(const vixl::aarch64::Register& dst, Reg reg);

  vixl::aarch64::Assembler m_emitter;
  vixl::aarch64::Assembler m_far_emitter;
--- a/src/core/cpu_recompiler_code_generator_aarch64.cpp
+++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp
@ -145,8 +145,10 @@ s64 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* targe
  return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
 }

-void CPU::Recompiler::armMoveAddressToReg(a64::Assembler* armAsm, const a64::XRegister& reg, const void* addr)
+void CPU::Recompiler::armMoveAddressToReg(a64::Assembler* armAsm, const a64::Register& reg, const void* addr)
 {
+  DebugAssert(reg.IsX());
+
  const void* cur = armAsm->GetCursorAddress<const void*>();
  const void* current_code_ptr_page =
    reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
@ -259,8 +261,13 @@ u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)

  u8* start = s_trampoline_start_ptr + offset;
  a64::Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset);
+#ifdef VIXL_DEBUG
+  vixl::CodeBufferCheckScope armAsmCheck(&armAsm, TRAMPOLINE_AREA_SIZE - offset,
+                                         vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
+#endif
  armMoveAddressToReg(&armAsm, RXSCRATCH, target);
  armAsm.br(RXSCRATCH);
+  armAsm.FinalizeCode();

  const u32 size = static_cast<u32>(armAsm.GetSizeOfCodeGenerated());
  DebugAssert(size < 20);
--- a/src/core/cpu_recompiler_types.h
+++ b/src/core/cpu_recompiler_types.h
@ -123,7 +123,7 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;

 bool armIsCallerSavedRegister(u32 id);
 s64 armGetPCDisplacement(const void* current, const void* target);
-void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::XRegister& reg, const void* addr);
+void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr);
 void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm);
 void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
 void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);