3rdparty/vixl: Import @ 8eca2b7

2024-05-29 21:37:33 +10:00 · 2024-05-29 21:37:33 +10:00 · 18665b81c4
parent 525a7c48e9
commit 18665b81c4
51 changed files with 94322 additions and 0 deletions
--- a/3rdparty/vixl/AUTHORS
+++ b/3rdparty/vixl/AUTHORS
@ -0,0 +1,8 @@
+# Below is a list of people and organisations that have contributed to the VIXL
+# project. Entries should be added to the list as:
+#
+#   Name/Organization <email address>
+
+ARM Ltd. <*@arm.com>
+Google Inc. <*@google.com>
+Linaro <*@linaro.org>
--- a/3rdparty/vixl/CMakeLists.txt
+++ b/3rdparty/vixl/CMakeLists.txt
@ -0,0 +1,63 @@
+add_library(vixl
+	include/vixl/aarch64/abi-aarch64.h
+	include/vixl/aarch64/assembler-aarch64.h
+	include/vixl/aarch64/constants-aarch64.h
+	include/vixl/aarch64/cpu-aarch64.h
+	include/vixl/aarch64/cpu-features-auditor-aarch64.h
+	include/vixl/aarch64/decoder-aarch64.h
+	include/vixl/aarch64/decoder-constants-aarch64.h
+	include/vixl/aarch64/decoder-visitor-map-aarch64.h
+	include/vixl/aarch64/disasm-aarch64.h
+	include/vixl/aarch64/instructions-aarch64.h
+	include/vixl/aarch64/macro-assembler-aarch64.h
+	include/vixl/aarch64/operands-aarch64.h
+	include/vixl/aarch64/registers-aarch64.h
+	include/vixl/aarch64/simulator-aarch64.h
+	include/vixl/aarch64/simulator-constants-aarch64.h
+	include/vixl/assembler-base-vixl.h
+	include/vixl/code-buffer-vixl.h
+	include/vixl/code-generation-scopes-vixl.h
+	include/vixl/compiler-intrinsics-vixl.h
+	include/vixl/cpu-features.h
+	include/vixl/globals-vixl.h
+	include/vixl/invalset-vixl.h
+	include/vixl/macro-assembler-interface.h
+	include/vixl/platform-vixl.h
+	include/vixl/pool-manager-impl.h
+	include/vixl/pool-manager.h
+	include/vixl/utils-vixl.h
+	src/aarch64/assembler-aarch64.cc
+	src/aarch64/assembler-sve-aarch64.cc
+	src/aarch64/cpu-aarch64.cc
+	src/aarch64/cpu-features-auditor-aarch64.cc
+	src/aarch64/decoder-aarch64.cc
+	src/aarch64/disasm-aarch64.cc
+	src/aarch64/instructions-aarch64.cc
+	src/aarch64/logic-aarch64.cc
+	src/aarch64/macro-assembler-aarch64.cc
+	src/aarch64/macro-assembler-sve-aarch64.cc
+	src/aarch64/operands-aarch64.cc
+	src/aarch64/pointer-auth-aarch64.cc
+	src/aarch64/registers-aarch64.cc
+	src/code-buffer-vixl.cc
+	src/compiler-intrinsics-vixl.cc
+	src/cpu-features.cc
+	src/utils-vixl.cc
+)
+
+target_include_directories(vixl PUBLIC
+	${CMAKE_CURRENT_SOURCE_DIR}/include
+)
+target_include_directories(vixl PRIVATE
+	${CMAKE_CURRENT_SOURCE_DIR}/include/vixl
+	${CMAKE_CURRENT_SOURCE_DIR}/include/vixl/aarch64
+)
+target_compile_definitions(vixl PUBLIC
+	VIXL_INCLUDE_TARGET_A64
+)
+
+if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
+	message("Enabling vixl debug assertions")
+	target_compile_definitions(vixl PUBLIC VIXL_DEBUG)
+endif()
+
--- a/3rdparty/vixl/LICENCE
+++ b/3rdparty/vixl/LICENCE
@ -0,0 +1,30 @@
+LICENCE
+=======
+
+The software in this repository is covered by the following licence.
+
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/3rdparty/vixl/README.md
+++ b/3rdparty/vixl/README.md
@ -0,0 +1,186 @@
+VIXL: Armv8 Runtime Code Generation Library, 3.0.0
+==================================================
+
+Contents:
+
+ * Overview
+ * Licence
+ * Requirements
+ * Known limitations
+ * Usage
+
+
+Overview
+========
+
+VIXL contains three components.
+
+ 1. Programmatic **assemblers** to generate A64, A32 or T32 code at runtime. The
+    assemblers abstract some of the constraints of each ISA; for example, most
+    instructions support any immediate.
+ 2. **Disassemblers** that can print any instruction emitted by the assemblers.
+ 3. A **simulator** that can simulate any instruction emitted by the A64
+    assembler. The simulator allows generated code to be run on another
+    architecture without the need for a full ISA model.
+
+The VIXL git repository can be found [on 'https://git.linaro.org'][vixl].
+
+Changes from previous versions of VIXL can be found in the
+[Changelog](doc/changelog.md).
+
+
+Licence
+=======
+
+This software is covered by the licence described in the [LICENCE](LICENCE)
+file.
+
+
+Requirements
+============
+
+To build VIXL the following software is required:
+
+ 1. Python 2.7
+ 2. SCons 2.0
+ 3. GCC 4.8+ or Clang 3.4+
+
+A 64-bit host machine is required, implementing an LP64 data model. VIXL has
+been tested using GCC on AArch64 Debian, GCC and Clang on amd64 Ubuntu
+systems.
+
+To run the linter and code formatting stages of the tests, the following
+software is also required:
+
+ 1. Git
+ 2. [Google's `cpplint.py`][cpplint]
+ 3. clang-format-3.8
+
+Refer to the 'Usage' section for details.
+
+
+Known Limitations for AArch64 code generation
+=============================================
+
+VIXL was developed for JavaScript engines so a number of features from A64 were
+deemed unnecessary:
+
+ * Limited rounding mode support for floating point.
+ * Limited support for synchronisation instructions.
+ * Limited support for system instructions.
+ * A few miscellaneous integer and floating point instructions are missing.
+
+The VIXL simulator supports only those instructions that the VIXL assembler can
+generate. The `doc` directory contains a
+[list of supported A64 instructions](doc/aarch64/supported-instructions-aarch64.md).
+
+The VIXL simulator was developed to run on 64-bit amd64 platforms. Whilst it
+builds and mostly works for 32-bit x86 platforms, there are a number of
+floating-point operations which do not work correctly, and a number of tests
+fail as a result.
+
+VIXL may not build using Clang 3.7, due to a compiler warning. A workaround is
+to disable conversion of warnings to errors, or to delete the offending
+`return` statement reported and rebuild. This problem will be fixed in the next
+release.
+
+Debug Builds
+------------
+
+Your project's build system must define `VIXL_DEBUG` (eg. `-DVIXL_DEBUG`)
+when using a VIXL library that has been built with debug enabled.
+
+Some classes defined in VIXL header files contain fields that are only present
+in debug builds, so if `VIXL_DEBUG` is defined when the library is built, but
+not defined for the header files included in your project, you will see runtime
+failures.
+
+Exclusive-Access Instructions
+-----------------------------
+
+All exclusive-access instructions are supported, but the simulator cannot
+accurately simulate their behaviour as described in the ARMv8 Architecture
+Reference Manual.
+
+ * A local monitor is simulated, so simulated exclusive loads and stores execute
+   as expected in a single-threaded environment.
+ * The global monitor is simulated by occasionally causing exclusive-access
+   instructions to fail regardless of the local monitor state.
+ * Load-acquire, store-release semantics are approximated by issuing a host
+   memory barrier after loads or before stores. The built-in
+   `__sync_synchronize()` is used for this purpose.
+
+The simulator tries to be strict, and implements the following restrictions that
+the ARMv8 ARM allows:
+
+ * A pair of load-/store-exclusive instructions will only succeed if they have
+   the same address and access size.
+ * Most of the time, cache-maintenance operations or explicit memory accesses
+   will clear the exclusive monitor.
+    * To ensure that simulated code does not depend on this behaviour, the
+      exclusive monitor will sometimes be left intact after these instructions.
+
+Instructions affected by these limitations:
+  `stxrb`, `stxrh`, `stxr`, `ldxrb`, `ldxrh`, `ldxr`, `stxp`, `ldxp`, `stlxrb`,
+  `stlxrh`, `stlxr`, `ldaxrb`, `ldaxrh`, `ldaxr`, `stlxp`, `ldaxp`, `stlrb`,
+  `stlrh`, `stlr`, `ldarb`, `ldarh`, `ldar`, `clrex`.
+
+
+Usage
+=====
+
+Running all Tests
+-----------------
+
+The helper script `tools/test.py` will build and run every test that is provided
+with VIXL, in both release and debug mode. It is a useful script for verifying
+that all of VIXL's dependencies are in place and that VIXL is working as it
+should.
+
+By default, the `tools/test.py` script runs a linter to check that the source
+code conforms with the code style guide, and to detect several common errors
+that the compiler may not warn about. This is most useful for VIXL developers.
+The linter has the following dependencies:
+
+ 1. Git must be installed, and the VIXL project must be in a valid Git
+    repository, such as one produced using `git clone`.
+ 2. `cpplint.py`, [as provided by Google][cpplint], must be available (and
+    executable) on the `PATH`.
+
+It is possible to tell `tools/test.py` to skip the linter stage by passing
+`--nolint`. This removes the dependency on `cpplint.py` and Git. The `--nolint`
+option is implied if the VIXL project is a snapshot (with no `.git` directory).
+
+Additionally, `tools/test.py` tests code formatting using `clang-format-3.8`.
+If you don't have `clang-format-3.8`, disable the test using the
+`--noclang-format` option.
+
+Also note that the tests for the tracing features depend upon external `diff`
+and `sed` tools. If these tools are not available in `PATH`, these tests will
+fail.
+
+Getting Started
+---------------
+
+We have separate guides for introducing VIXL, depending on what architecture you
+are targeting. A guide for working with AArch32 can be found
+[here][getting-started-aarch32], while the AArch64 guide is
+[here][getting-started-aarch64]. Example source code is provided in the
+[examples](examples) directory. You can build examples with either `scons
+aarch32_examples` or `scons aarch64_examples` from the root directory, or use
+`scons --help` to get a detailed list of available build targets.
+
+
+
+
+[cpplint]: http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
+           "Google's cpplint.py script."
+
+[vixl]: https://git.linaro.org/arm/vixl.git
+        "The VIXL repository at 'https://git.linaro.org'."
+
+[getting-started-aarch32]: doc/aarch32/getting-started-aarch32.md
+                           "Introduction to VIXL for AArch32."
+
+[getting-started-aarch64]: doc/aarch64/getting-started-aarch64.md
+                           "Introduction to VIXL for AArch64."
--- a/3rdparty/vixl/VERSIONS.md
+++ b/3rdparty/vixl/VERSIONS.md
@ -0,0 +1,30 @@
+Versioning
+==========
+
+Since version 3.0.0, VIXL uses [Semantic Versioning 2.0.0][semver].
+
+Briefly:
+
+- Backwards-incompatible changes update the _major_ version.
+- New features update the _minor_ version.
+- Bug fixes update the _patch_ version.
+
+Why 3.0.0?
+----------
+
+VIXL was originally released as 1.x using snapshot releases. When we moved VIXL
+into Linaro, we started working directly on `master` and stopped tagging
+named releases. However, we informally called this "VIXL 2", so we are skipping
+2.0.0 to avoid potential confusion.
+
+Using `master`
+--------------
+
+Users who want to take the latest development version of VIXL can still take
+commits from `master`. Our day-to-day development process hasn't changed and
+these commits should still pass their own tests. However, note that commits not
+explicitly tagged with a given version should be considered to be unversioned,
+with no backwards-compatibility guarantees.
+
+[semver]: https://semver.org/spec/v2.0.0.html
+          "Semantic Versioning 2.0.0 Specification"
--- a/3rdparty/vixl/include/vixl/aarch64/abi-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/abi-aarch64.h
@ -0,0 +1,167 @@
+// Copyright 2016, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The ABI features are only supported with C++11 or later.
+#if __cplusplus >= 201103L
+// This should not be defined manually.
+#define VIXL_HAS_ABI_SUPPORT
+#elif defined(VIXL_HAS_ABI_SUPPORT)
+#error "The ABI support requires C++11 or later."
+#endif
+
+#ifdef VIXL_HAS_ABI_SUPPORT
+
+#ifndef VIXL_AARCH64_ABI_AARCH64_H_
+#define VIXL_AARCH64_ABI_AARCH64_H_
+
+#include <algorithm>
+#include <type_traits>
+
+#include "../globals-vixl.h"
+
+#include "instructions-aarch64.h"
+#include "operands-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+// Class describing the AArch64 procedure call standard, as defined in "ARM
+// Procedure Call Standard for the ARM 64-bit Architecture (AArch64)",
+// release 1.0 (AAPCS below).
+//
+// The stages in the comments match the description in that document.
+//
+// Stage B does not apply to arguments handled by this class.
+class ABI {
+ public:
+  explicit ABI(Register stack_pointer = sp) : stack_pointer_(stack_pointer) {
+    // Stage A - Initialization
+    Reset();
+  }
+
+  void Reset() {
+    NGRN_ = 0;
+    NSRN_ = 0;
+    stack_offset_ = 0;
+  }
+
+  int GetStackSpaceRequired() { return stack_offset_; }
+
+  // The logic is described in section 5.5 of the AAPCS.
+  template <typename T>
+  GenericOperand GetReturnGenericOperand() const {
+    ABI abi(stack_pointer_);
+    GenericOperand result = abi.GetNextParameterGenericOperand<T>();
+    VIXL_ASSERT(result.IsCPURegister());
+    return result;
+  }
+
+  // The logic is described in section 5.4.2 of the AAPCS.
+  // The `GenericOperand` returned describes the location reserved for the
+  // argument from the point of view of the callee.
+  template <typename T>
+  GenericOperand GetNextParameterGenericOperand() {
+    const bool is_floating_point_type = std::is_floating_point<T>::value;
+    const bool is_integral_type =
+        std::is_integral<T>::value || std::is_enum<T>::value;
+    const bool is_pointer_type = std::is_pointer<T>::value;
+    int type_alignment = std::alignment_of<T>::value;
+
+    // We only support basic types.
+    VIXL_ASSERT(is_floating_point_type || is_integral_type || is_pointer_type);
+
+    // To ensure we get the correct type of operand when simulating on a 32-bit
+    // host, force the size of pointer types to the native AArch64 pointer size.
+    unsigned size = is_pointer_type ? 8 : sizeof(T);
+    // The size of the 'operand' reserved for the argument.
+    unsigned operand_size = AlignUp(size, kWRegSizeInBytes);
+    if (size > 8) {
+      VIXL_UNIMPLEMENTED();
+      return GenericOperand();
+    }
+
+    // Stage C.1
+    if (is_floating_point_type && (NSRN_ < 8)) {
+      return GenericOperand(VRegister(NSRN_++, size * kBitsPerByte));
+    }
+    // Stages C.2, C.3, and C.4: Unsupported. Caught by the assertions above.
+    // Stages C.5 and C.6
+    if (is_floating_point_type) {
+      VIXL_STATIC_ASSERT(
+          !is_floating_point_type ||
+          (std::is_same<T, float>::value || std::is_same<T, double>::value));
+      int offset = stack_offset_;
+      stack_offset_ += 8;
+      return GenericOperand(MemOperand(stack_pointer_, offset), operand_size);
+    }
+    // Stage C.7
+    if ((is_integral_type || is_pointer_type) && (size <= 8) && (NGRN_ < 8)) {
+      return GenericOperand(Register(NGRN_++, operand_size * kBitsPerByte));
+    }
+    // Stage C.8
+    if (type_alignment == 16) {
+      NGRN_ = AlignUp(NGRN_, 2);
+    }
+    // Stage C.9
+    if (is_integral_type && (size == 16) && (NGRN_ < 7)) {
+      VIXL_UNIMPLEMENTED();
+      return GenericOperand();
+    }
+    // Stage C.10: Unsupported. Caught by the assertions above.
+    // Stage C.11
+    NGRN_ = 8;
+    // Stage C.12
+    stack_offset_ = AlignUp(stack_offset_, std::max(type_alignment, 8));
+    // Stage C.13: Unsupported. Caught by the assertions above.
+    // Stage C.14
+    VIXL_ASSERT(size <= 8u);
+    size = std::max(size, 8u);
+    int offset = stack_offset_;
+    stack_offset_ += size;
+    return GenericOperand(MemOperand(stack_pointer_, offset), operand_size);
+  }
+
+ private:
+  Register stack_pointer_;
+  // Next General-purpose Register Number.
+  int NGRN_;
+  // Next SIMD and Floating-point Register Number.
+  int NSRN_;
+  // The acronym "NSAA" used in the standard refers to the "Next Stacked
+  // Argument Address". Here we deal with offsets from the stack pointer.
+  int stack_offset_;
+};
+
+template <>
+inline GenericOperand ABI::GetReturnGenericOperand<void>() const {
+  return GenericOperand();
+}
+}
+}  // namespace vixl::aarch64
+
+#endif  // VIXL_AARCH64_ABI_AARCH64_H_
+
+#endif  // VIXL_HAS_ABI_SUPPORT
--- a/3rdparty/vixl/include/vixl/aarch64/assembler-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/assembler-aarch64.h
--- a/3rdparty/vixl/include/vixl/aarch64/constants-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/constants-aarch64.h
--- a/3rdparty/vixl/include/vixl/aarch64/cpu-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/cpu-aarch64.h
@ -0,0 +1,332 @@
+// Copyright 2014, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CPU_AARCH64_H
+#define VIXL_CPU_AARCH64_H
+
+#include "../cpu-features.h"
+#include "../globals-vixl.h"
+
+#include "instructions-aarch64.h"
+#include "simulator-aarch64.h"
+
+#ifndef VIXL_INCLUDE_TARGET_AARCH64
+// The supporting .cc file is only compiled when the A64 target is selected.
+// Throw an explicit error now to avoid a harder-to-debug linker error later.
+//
+// These helpers _could_ work on any AArch64 host, even when generating AArch32
+// code, but we don't support this because the available features may differ
+// between AArch32 and AArch64 on the same platform, so basing AArch32 code
+// generation on aarch64::CPU features is probably broken.
+#error cpu-aarch64.h requires VIXL_INCLUDE_TARGET_AARCH64 (scons target=a64).
+#endif
+
+namespace vixl {
+namespace aarch64 {
+
+// A CPU ID register, for use with CPUFeatures::kIDRegisterEmulation. Fields
+// specific to each register are described in relevant subclasses.
+class IDRegister {
+ protected:
+  explicit IDRegister(uint64_t value = 0) : value_(value) {}
+
+  class Field {
+   public:
+    enum Type { kUnsigned, kSigned };
+
+    static const int kMaxWidthInBits = 4;
+
+    // This needs to be constexpr so that fields have "constant initialisation".
+    // This avoids initialisation order problems when these values are used to
+    // (dynamically) initialise static variables, etc.
+    explicit constexpr Field(int lsb,
+                             int bitWidth = kMaxWidthInBits,
+                             Type type = kUnsigned)
+        : lsb_(lsb), bitWidth_(bitWidth), type_(type) {}
+
+    int GetWidthInBits() const { return bitWidth_; }
+    int GetLsb() const { return lsb_; }
+    int GetMsb() const { return lsb_ + GetWidthInBits() - 1; }
+    Type GetType() const { return type_; }
+
+   private:
+    int lsb_;
+    int bitWidth_;
+    Type type_;
+  };
+
+ public:
+  // Extract the specified field, performing sign-extension for signed fields.
+  // This allows us to implement the 'value >= number' detection mechanism
+  // recommended by the Arm ARM, for both signed and unsigned fields.
+  int Get(Field field) const;
+
+ private:
+  uint64_t value_;
+};
+
+class AA64PFR0 : public IDRegister {
+ public:
+  explicit AA64PFR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kFP;
+  static const Field kAdvSIMD;
+  static const Field kRAS;
+  static const Field kSVE;
+  static const Field kDIT;
+  static const Field kCSV2;
+  static const Field kCSV3;
+};
+
+class AA64PFR1 : public IDRegister {
+ public:
+  explicit AA64PFR1(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kBT;
+  static const Field kSSBS;
+  static const Field kMTE;
+  static const Field kSME;
+};
+
+class AA64ISAR0 : public IDRegister {
+ public:
+  explicit AA64ISAR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kAES;
+  static const Field kSHA1;
+  static const Field kSHA2;
+  static const Field kCRC32;
+  static const Field kAtomic;
+  static const Field kRDM;
+  static const Field kSHA3;
+  static const Field kSM3;
+  static const Field kSM4;
+  static const Field kDP;
+  static const Field kFHM;
+  static const Field kTS;
+  static const Field kRNDR;
+};
+
+class AA64ISAR1 : public IDRegister {
+ public:
+  explicit AA64ISAR1(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kDPB;
+  static const Field kAPA;
+  static const Field kAPI;
+  static const Field kJSCVT;
+  static const Field kFCMA;
+  static const Field kLRCPC;
+  static const Field kGPA;
+  static const Field kGPI;
+  static const Field kFRINTTS;
+  static const Field kSB;
+  static const Field kSPECRES;
+  static const Field kBF16;
+  static const Field kDGH;
+  static const Field kI8MM;
+};
+
+class AA64ISAR2 : public IDRegister {
+ public:
+  explicit AA64ISAR2(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kWFXT;
+  static const Field kRPRES;
+  static const Field kMOPS;
+  static const Field kCSSC;
+};
+
+class AA64MMFR0 : public IDRegister {
+ public:
+  explicit AA64MMFR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kECV;
+};
+
+class AA64MMFR1 : public IDRegister {
+ public:
+  explicit AA64MMFR1(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kLO;
+  static const Field kAFP;
+};
+
+class AA64MMFR2 : public IDRegister {
+ public:
+  explicit AA64MMFR2(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kAT;
+};
+
+class AA64ZFR0 : public IDRegister {
+ public:
+  explicit AA64ZFR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kSVEver;
+  static const Field kAES;
+  static const Field kBitPerm;
+  static const Field kBF16;
+  static const Field kSHA3;
+  static const Field kSM4;
+  static const Field kI8MM;
+  static const Field kF32MM;
+  static const Field kF64MM;
+};
+
+class AA64SMFR0 : public IDRegister {
+ public:
+  explicit AA64SMFR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kSMEf32f32;
+  static const Field kSMEb16f32;
+  static const Field kSMEf16f32;
+  static const Field kSMEi8i32;
+  static const Field kSMEf64f64;
+  static const Field kSMEi16i64;
+  static const Field kSMEfa64;
+};
+
+class CPU {
+ public:
+  // Initialise CPU support.
+  static void SetUp();
+
+  // Ensures the data at a given address and with a given size is the same for
+  // the I and D caches. I and D caches are not automatically coherent on ARM
+  // so this operation is required before any dynamically generated code can
+  // safely run.
+  static void EnsureIAndDCacheCoherency(void *address, size_t length);
+
+  // Read and interpret the ID registers. This requires
+  // CPUFeatures::kIDRegisterEmulation, and therefore cannot be called on
+  // non-AArch64 platforms.
+  static CPUFeatures InferCPUFeaturesFromIDRegisters();
+
+  // Read and interpret CPUFeatures reported by the OS. Failed queries (or
+  // unsupported platforms) return an empty list. Note that this is
+  // indistinguishable from a successful query on a platform that advertises no
+  // features.
+  //
+  // Non-AArch64 hosts are considered to be unsupported platforms, and this
+  // function returns an empty list.
+  static CPUFeatures InferCPUFeaturesFromOS(
+      CPUFeatures::QueryIDRegistersOption option =
+          CPUFeatures::kQueryIDRegistersIfAvailable);
+
+  // Query the SVE vector length. This requires CPUFeatures::kSVE.
+  static int ReadSVEVectorLengthInBits();
+
+  // Handle tagged pointers.
+  template <typename T>
+  static T SetPointerTag(T pointer, uint64_t tag) {
+    VIXL_ASSERT(IsUintN(kAddressTagWidth, tag));
+
+    // Use C-style casts to get static_cast behaviour for integral types (T),
+    // and reinterpret_cast behaviour for other types.
+
+    uint64_t raw = (uint64_t)pointer;
+    VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw));
+
+    raw = (raw & ~kAddressTagMask) | (tag << kAddressTagOffset);
+    return (T)raw;
+  }
+
+  template <typename T>
+  static uint64_t GetPointerTag(T pointer) {
+    // Use C-style casts to get static_cast behaviour for integral types (T),
+    // and reinterpret_cast behaviour for other types.
+
+    uint64_t raw = (uint64_t)pointer;
+    VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw));
+
+    return (raw & kAddressTagMask) >> kAddressTagOffset;
+  }
+
+ private:
+#define VIXL_AARCH64_ID_REG_LIST(V)                                           \
+  V(AA64PFR0, "ID_AA64PFR0_EL1")                                              \
+  V(AA64PFR1, "ID_AA64PFR1_EL1")                                              \
+  V(AA64ISAR0, "ID_AA64ISAR0_EL1")                                            \
+  V(AA64ISAR1, "ID_AA64ISAR1_EL1")                                            \
+  V(AA64MMFR0, "ID_AA64MMFR0_EL1")                                            \
+  V(AA64MMFR1, "ID_AA64MMFR1_EL1")                                            \
+  /* These registers are RES0 in the baseline Arm8.0. We can always safely */ \
+  /* read them, but some compilers don't accept the symbolic names. */        \
+  V(AA64SMFR0, "S3_0_C0_C4_5")                                                \
+  V(AA64ISAR2, "S3_0_C0_C6_2")                                                \
+  V(AA64MMFR2, "S3_0_C0_C7_2")                                                \
+  V(AA64ZFR0, "S3_0_C0_C4_4")
+
+#define VIXL_READ_ID_REG(NAME, MRS_ARG) static NAME Read##NAME();
+  // On native AArch64 platforms, read the named CPU ID registers. These require
+  // CPUFeatures::kIDRegisterEmulation, and should not be called on non-AArch64
+  // platforms.
+  VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
+#undef VIXL_READ_ID_REG
+
+  // Return the content of the cache type register.
+  static uint32_t GetCacheType();
+
+  // I and D cache line size in bytes.
+  static unsigned icache_line_size_;
+  static unsigned dcache_line_size_;
+};
+
+}  // namespace aarch64
+}  // namespace vixl
+
+#endif  // VIXL_CPU_AARCH64_H
--- a/3rdparty/vixl/include/vixl/aarch64/cpu-features-auditor-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/cpu-features-auditor-aarch64.h
@ -0,0 +1,134 @@
+// Copyright 2018, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of Arm Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_
+#define VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_
+
+#include <functional>
+#include <iostream>
+#include <unordered_map>
+
+#include "../cpu-features.h"
+#include "decoder-aarch64.h"
+#include "decoder-visitor-map-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+// This visitor records the CPU features that each decoded instruction requires.
+// It provides:
+//  - the set of CPU features required by the most recently decoded instruction,
+//  - a cumulative set of encountered CPU features,
+//  - an optional list of 'available' CPU features.
+//
+// Primarily, this allows the Disassembler and Simulator to share the same CPU
+// features logic. However, it can be used standalone to scan code blocks for
+// CPU features.
+class CPUFeaturesAuditor : public DecoderVisitor {
+ public:
+  // Construction arguments:
+  //   - If a decoder is specified, the CPUFeaturesAuditor automatically
+  //     registers itself as a visitor. Otherwise, this can be done manually.
+  //
+  //   - If an `available` features list is provided, it is used as a hint in
+  //     cases where instructions may be provided by multiple separate features.
+  //     An example of this is FP&SIMD loads and stores: some of these are used
+  //     in both FP and integer SIMD code. If exactly one of those features is
+  //     in `available` when one of these instructions is encountered, then the
+  //     auditor will record that feature. Otherwise, it will record _both_
+  //     features.
+  explicit CPUFeaturesAuditor(
+      Decoder* decoder, const CPUFeatures& available = CPUFeatures::None())
+      : available_(available), decoder_(decoder) {
+    if (decoder_ != NULL) decoder_->AppendVisitor(this);
+  }
+
+  explicit CPUFeaturesAuditor(
+      const CPUFeatures& available = CPUFeatures::None())
+      : available_(available), decoder_(NULL) {}
+
+  virtual ~CPUFeaturesAuditor() {
+    if (decoder_ != NULL) decoder_->RemoveVisitor(this);
+  }
+
+  void ResetSeenFeatures() {
+    seen_ = CPUFeatures::None();
+    last_instruction_ = CPUFeatures::None();
+  }
+
+  // Query or set available CPUFeatures.
+  const CPUFeatures& GetAvailableFeatures() const { return available_; }
+  void SetAvailableFeatures(const CPUFeatures& available) {
+    available_ = available;
+  }
+
+  // Query CPUFeatures seen since construction (or the last call to `Reset()`).
+  const CPUFeatures& GetSeenFeatures() const { return seen_; }
+
+  // Query CPUFeatures from the last instruction visited by this auditor.
+  const CPUFeatures& GetInstructionFeatures() const {
+    return last_instruction_;
+  }
+
+  bool InstructionIsAvailable() const {
+    return available_.Has(last_instruction_);
+  }
+
+  // The common CPUFeatures interface operates on the available_ list.
+  CPUFeatures* GetCPUFeatures() { return &available_; }
+  void SetCPUFeatures(const CPUFeatures& available) {
+    SetAvailableFeatures(available);
+  }
+
+  virtual void Visit(Metadata* metadata,
+                     const Instruction* instr) VIXL_OVERRIDE;
+
+ private:
+  class RecordInstructionFeaturesScope;
+
+#define DECLARE(A) virtual void Visit##A(const Instruction* instr);
+  VISITOR_LIST(DECLARE)
+#undef DECLARE
+
+  void LoadStoreHelper(const Instruction* instr);
+  void LoadStorePairHelper(const Instruction* instr);
+
+  CPUFeatures seen_;
+  CPUFeatures last_instruction_;
+  CPUFeatures available_;
+
+  Decoder* decoder_;
+
+  using FormToVisitorFnMap = std::unordered_map<
+      uint32_t,
+      std::function<void(CPUFeaturesAuditor*, const Instruction*)>>;
+  static const FormToVisitorFnMap* GetFormToVisitorFnMap();
+};
+
+}  // namespace aarch64
+}  // namespace vixl
+
+#endif  // VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_
--- a/3rdparty/vixl/include/vixl/aarch64/decoder-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/decoder-aarch64.h
@ -0,0 +1,695 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_AARCH64_DECODER_AARCH64_H_
+#define VIXL_AARCH64_DECODER_AARCH64_H_
+
+#include <list>
+#include <map>
+#include <string>
+
+#include "../globals-vixl.h"
+
+#include "instructions-aarch64.h"
+
+// List macro containing all visitors needed by the decoder class.
+#define VISITOR_LIST_THAT_RETURN(V)                              \
+  V(AddSubExtended)                                              \
+  V(AddSubImmediate)                                             \
+  V(AddSubShifted)                                               \
+  V(AddSubWithCarry)                                             \
+  V(AtomicMemory)                                                \
+  V(Bitfield)                                                    \
+  V(CompareBranch)                                               \
+  V(ConditionalBranch)                                           \
+  V(ConditionalCompareImmediate)                                 \
+  V(ConditionalCompareRegister)                                  \
+  V(ConditionalSelect)                                           \
+  V(Crypto2RegSHA)                                               \
+  V(Crypto3RegSHA)                                               \
+  V(CryptoAES)                                                   \
+  V(DataProcessing1Source)                                       \
+  V(DataProcessing2Source)                                       \
+  V(DataProcessing3Source)                                       \
+  V(EvaluateIntoFlags)                                           \
+  V(Exception)                                                   \
+  V(Extract)                                                     \
+  V(FPCompare)                                                   \
+  V(FPConditionalCompare)                                        \
+  V(FPConditionalSelect)                                         \
+  V(FPDataProcessing1Source)                                     \
+  V(FPDataProcessing2Source)                                     \
+  V(FPDataProcessing3Source)                                     \
+  V(FPFixedPointConvert)                                         \
+  V(FPImmediate)                                                 \
+  V(FPIntegerConvert)                                            \
+  V(LoadLiteral)                                                 \
+  V(LoadStoreExclusive)                                          \
+  V(LoadStorePAC)                                                \
+  V(LoadStorePairNonTemporal)                                    \
+  V(LoadStorePairOffset)                                         \
+  V(LoadStorePairPostIndex)                                      \
+  V(LoadStorePairPreIndex)                                       \
+  V(LoadStorePostIndex)                                          \
+  V(LoadStorePreIndex)                                           \
+  V(LoadStoreRCpcUnscaledOffset)                                 \
+  V(LoadStoreRegisterOffset)                                     \
+  V(LoadStoreUnscaledOffset)                                     \
+  V(LoadStoreUnsignedOffset)                                     \
+  V(LogicalImmediate)                                            \
+  V(LogicalShifted)                                              \
+  V(MoveWideImmediate)                                           \
+  V(NEON2RegMisc)                                                \
+  V(NEON2RegMiscFP16)                                            \
+  V(NEON3Different)                                              \
+  V(NEON3Same)                                                   \
+  V(NEON3SameExtra)                                              \
+  V(NEON3SameFP16)                                               \
+  V(NEONAcrossLanes)                                             \
+  V(NEONByIndexedElement)                                        \
+  V(NEONCopy)                                                    \
+  V(NEONExtract)                                                 \
+  V(NEONLoadStoreMultiStruct)                                    \
+  V(NEONLoadStoreMultiStructPostIndex)                           \
+  V(NEONLoadStoreSingleStruct)                                   \
+  V(NEONLoadStoreSingleStructPostIndex)                          \
+  V(NEONModifiedImmediate)                                       \
+  V(NEONPerm)                                                    \
+  V(NEONScalar2RegMisc)                                          \
+  V(NEONScalar2RegMiscFP16)                                      \
+  V(NEONScalar3Diff)                                             \
+  V(NEONScalar3Same)                                             \
+  V(NEONScalar3SameExtra)                                        \
+  V(NEONScalar3SameFP16)                                         \
+  V(NEONScalarByIndexedElement)                                  \
+  V(NEONScalarCopy)                                              \
+  V(NEONScalarPairwise)                                          \
+  V(NEONScalarShiftImmediate)                                    \
+  V(NEONShiftImmediate)                                          \
+  V(NEONTable)                                                   \
+  V(PCRelAddressing)                                             \
+  V(RotateRightIntoFlags)                                        \
+  V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)           \
+  V(SVE32BitGatherLoad_VectorPlusImm)                            \
+  V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)    \
+  V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)        \
+  V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)         \
+  V(SVE32BitGatherPrefetch_VectorPlusImm)                        \
+  V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)           \
+  V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)         \
+  V(SVE32BitScatterStore_VectorPlusImm)                          \
+  V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets)     \
+  V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets)             \
+  V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets)           \
+  V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets)   \
+  V(SVE64BitGatherLoad_VectorPlusImm)                            \
+  V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)         \
+  V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \
+  V(SVE64BitGatherPrefetch_VectorPlusImm)                        \
+  V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)           \
+  V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets)         \
+  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)   \
+  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \
+  V(SVE64BitScatterStore_VectorPlusImm)                          \
+  V(SVEAddressGeneration)                                        \
+  V(SVEBitwiseLogicalUnpredicated)                               \
+  V(SVEBitwiseShiftUnpredicated)                                 \
+  V(SVEFFRInitialise)                                            \
+  V(SVEFFRWriteFromPredicate)                                    \
+  V(SVEFPAccumulatingReduction)                                  \
+  V(SVEFPArithmeticUnpredicated)                                 \
+  V(SVEFPCompareVectors)                                         \
+  V(SVEFPCompareWithZero)                                        \
+  V(SVEFPComplexAddition)                                        \
+  V(SVEFPComplexMulAdd)                                          \
+  V(SVEFPComplexMulAddIndex)                                     \
+  V(SVEFPFastReduction)                                          \
+  V(SVEFPMulIndex)                                               \
+  V(SVEFPMulAdd)                                                 \
+  V(SVEFPMulAddIndex)                                            \
+  V(SVEFPUnaryOpUnpredicated)                                    \
+  V(SVEIncDecByPredicateCount)                                   \
+  V(SVEIndexGeneration)                                          \
+  V(SVEIntArithmeticUnpredicated)                                \
+  V(SVEIntCompareSignedImm)                                      \
+  V(SVEIntCompareUnsignedImm)                                    \
+  V(SVEIntCompareVectors)                                        \
+  V(SVEIntMulAddPredicated)                                      \
+  V(SVEIntMulAddUnpredicated)                                    \
+  V(SVEIntReduction)                                             \
+  V(SVEIntUnaryArithmeticPredicated)                             \
+  V(SVEMovprfx)                                                  \
+  V(SVEMulIndex)                                                 \
+  V(SVEPermuteVectorExtract)                                     \
+  V(SVEPermuteVectorInterleaving)                                \
+  V(SVEPredicateCount)                                           \
+  V(SVEPredicateLogical)                                         \
+  V(SVEPropagateBreak)                                           \
+  V(SVEStackFrameAdjustment)                                     \
+  V(SVEStackFrameSize)                                           \
+  V(SVEVectorSelect)                                             \
+  V(SVEBitwiseLogical_Predicated)                                \
+  V(SVEBitwiseLogicalWithImm_Unpredicated)                       \
+  V(SVEBitwiseShiftByImm_Predicated)                             \
+  V(SVEBitwiseShiftByVector_Predicated)                          \
+  V(SVEBitwiseShiftByWideElements_Predicated)                    \
+  V(SVEBroadcastBitmaskImm)                                      \
+  V(SVEBroadcastFPImm_Unpredicated)                              \
+  V(SVEBroadcastGeneralRegister)                                 \
+  V(SVEBroadcastIndexElement)                                    \
+  V(SVEBroadcastIntImm_Unpredicated)                             \
+  V(SVECompressActiveElements)                                   \
+  V(SVEConditionallyBroadcastElementToVector)                    \
+  V(SVEConditionallyExtractElementToSIMDFPScalar)                \
+  V(SVEConditionallyExtractElementToGeneralRegister)             \
+  V(SVEConditionallyTerminateScalars)                            \
+  V(SVEConstructivePrefix_Unpredicated)                          \
+  V(SVEContiguousFirstFaultLoad_ScalarPlusScalar)                \
+  V(SVEContiguousLoad_ScalarPlusImm)                             \
+  V(SVEContiguousLoad_ScalarPlusScalar)                          \
+  V(SVEContiguousNonFaultLoad_ScalarPlusImm)                     \
+  V(SVEContiguousNonTemporalLoad_ScalarPlusImm)                  \
+  V(SVEContiguousNonTemporalLoad_ScalarPlusScalar)               \
+  V(SVEContiguousNonTemporalStore_ScalarPlusImm)                 \
+  V(SVEContiguousNonTemporalStore_ScalarPlusScalar)              \
+  V(SVEContiguousPrefetch_ScalarPlusImm)                         \
+  V(SVEContiguousPrefetch_ScalarPlusScalar)                      \
+  V(SVEContiguousStore_ScalarPlusImm)                            \
+  V(SVEContiguousStore_ScalarPlusScalar)                         \
+  V(SVECopySIMDFPScalarRegisterToVector_Predicated)              \
+  V(SVECopyFPImm_Predicated)                                     \
+  V(SVECopyGeneralRegisterToVector_Predicated)                   \
+  V(SVECopyIntImm_Predicated)                                    \
+  V(SVEElementCount)                                             \
+  V(SVEExtractElementToSIMDFPScalarRegister)                     \
+  V(SVEExtractElementToGeneralRegister)                          \
+  V(SVEFPArithmetic_Predicated)                                  \
+  V(SVEFPArithmeticWithImm_Predicated)                           \
+  V(SVEFPConvertPrecision)                                       \
+  V(SVEFPConvertToInt)                                           \
+  V(SVEFPExponentialAccelerator)                                 \
+  V(SVEFPRoundToIntegralValue)                                   \
+  V(SVEFPTrigMulAddCoefficient)                                  \
+  V(SVEFPTrigSelectCoefficient)                                  \
+  V(SVEFPUnaryOp)                                                \
+  V(SVEIncDecRegisterByElementCount)                             \
+  V(SVEIncDecVectorByElementCount)                               \
+  V(SVEInsertSIMDFPScalarRegister)                               \
+  V(SVEInsertGeneralRegister)                                    \
+  V(SVEIntAddSubtractImm_Unpredicated)                           \
+  V(SVEIntAddSubtractVectors_Predicated)                         \
+  V(SVEIntCompareScalarCountAndLimit)                            \
+  V(SVEIntConvertToFP)                                           \
+  V(SVEIntDivideVectors_Predicated)                              \
+  V(SVEIntMinMaxImm_Unpredicated)                                \
+  V(SVEIntMinMaxDifference_Predicated)                           \
+  V(SVEIntMulImm_Unpredicated)                                   \
+  V(SVEIntMulVectors_Predicated)                                 \
+  V(SVELoadAndBroadcastElement)                                  \
+  V(SVELoadAndBroadcastQOWord_ScalarPlusImm)                     \
+  V(SVELoadAndBroadcastQOWord_ScalarPlusScalar)                  \
+  V(SVELoadMultipleStructures_ScalarPlusImm)                     \
+  V(SVELoadMultipleStructures_ScalarPlusScalar)                  \
+  V(SVELoadPredicateRegister)                                    \
+  V(SVELoadVectorRegister)                                       \
+  V(SVEPartitionBreakCondition)                                  \
+  V(SVEPermutePredicateElements)                                 \
+  V(SVEPredicateFirstActive)                                     \
+  V(SVEPredicateInitialize)                                      \
+  V(SVEPredicateNextActive)                                      \
+  V(SVEPredicateReadFromFFR_Predicated)                          \
+  V(SVEPredicateReadFromFFR_Unpredicated)                        \
+  V(SVEPredicateTest)                                            \
+  V(SVEPredicateZero)                                            \
+  V(SVEPropagateBreakToNextPartition)                            \
+  V(SVEReversePredicateElements)                                 \
+  V(SVEReverseVectorElements)                                    \
+  V(SVEReverseWithinElements)                                    \
+  V(SVESaturatingIncDecRegisterByElementCount)                   \
+  V(SVESaturatingIncDecVectorByElementCount)                     \
+  V(SVEStoreMultipleStructures_ScalarPlusImm)                    \
+  V(SVEStoreMultipleStructures_ScalarPlusScalar)                 \
+  V(SVEStorePredicateRegister)                                   \
+  V(SVEStoreVectorRegister)                                      \
+  V(SVETableLookup)                                              \
+  V(SVEUnpackPredicateElements)                                  \
+  V(SVEUnpackVectorElements)                                     \
+  V(SVEVectorSplice)                                             \
+  V(System)                                                      \
+  V(TestBranch)                                                  \
+  V(Unallocated)                                                 \
+  V(UnconditionalBranch)                                         \
+  V(UnconditionalBranchToRegister)                               \
+  V(Unimplemented)
+
+#define VISITOR_LIST_THAT_DONT_RETURN(V) V(Reserved)
+
+#define VISITOR_LIST(V)       \
+  VISITOR_LIST_THAT_RETURN(V) \
+  VISITOR_LIST_THAT_DONT_RETURN(V)
+
+namespace vixl {
+namespace aarch64 {
+
+using Metadata = std::map<std::string, std::string>;
+
+// The Visitor interface consists only of the Visit() method. User classes
+// that inherit from this one must provide an implementation of the method.
+// Information about the instruction encountered by the Decoder is available
+// via the metadata pointer.
+class DecoderVisitor {
+ public:
+  enum VisitorConstness { kConstVisitor, kNonConstVisitor };
+  explicit DecoderVisitor(VisitorConstness constness = kConstVisitor)
+      : constness_(constness) {}
+
+  virtual ~DecoderVisitor() {}
+
+  virtual void Visit(Metadata* metadata, const Instruction* instr) = 0;
+
+  bool IsConstVisitor() const { return constness_ == kConstVisitor; }
+  Instruction* MutableInstruction(const Instruction* instr) {
+    VIXL_ASSERT(!IsConstVisitor());
+    return const_cast<Instruction*>(instr);
+  }
+
+ private:
+  const VisitorConstness constness_;
+};
+
+class DecodeNode;
+class CompiledDecodeNode;
+
+// The instruction decoder is constructed from a graph of decode nodes. At each
+// node, a number of bits are sampled from the instruction being decoded. The
+// resulting value is used to look up the next node in the graph, which then
+// samples other bits, and moves to other decode nodes. Eventually, a visitor
+// node is reached, and the corresponding visitor function is called, which
+// handles the instruction.
+class Decoder {
+ public:
+  Decoder() { ConstructDecodeGraph(); }
+
+  // Top-level wrappers around the actual decoding function.
+  void Decode(const Instruction* instr);
+  void Decode(Instruction* instr);
+
+  // Decode all instructions from start (inclusive) to end (exclusive).
+  template <typename T>
+  void Decode(T start, T end) {
+    for (T instr = start; instr < end; instr = instr->GetNextInstruction()) {
+      Decode(instr);
+    }
+  }
+
+  // Register a new visitor class with the decoder.
+  // Decode() will call the corresponding visitor method from all registered
+  // visitor classes when decoding reaches the leaf node of the instruction
+  // decode tree.
+  // Visitors are called in order.
+  // A visitor can be registered multiple times.
+  //
+  //   d.AppendVisitor(V1);
+  //   d.AppendVisitor(V2);
+  //   d.PrependVisitor(V2);
+  //   d.AppendVisitor(V3);
+  //
+  //   d.Decode(i);
+  //
+  // will call in order visitor methods in V2, V1, V2, V3.
+  void AppendVisitor(DecoderVisitor* visitor);
+  void PrependVisitor(DecoderVisitor* visitor);
+  // These helpers register `new_visitor` before or after the first instance of
+  // `registered_visiter` in the list.
+  // So if
+  //   V1, V2, V1, V2
+  // are registered in this order in the decoder, calls to
+  //   d.InsertVisitorAfter(V3, V1);
+  //   d.InsertVisitorBefore(V4, V2);
+  // will yield the order
+  //   V1, V3, V4, V2, V1, V2
+  //
+  // For more complex modifications of the order of registered visitors, one can
+  // directly access and modify the list of visitors via the `visitors()'
+  // accessor.
+  void InsertVisitorBefore(DecoderVisitor* new_visitor,
+                           DecoderVisitor* registered_visitor);
+  void InsertVisitorAfter(DecoderVisitor* new_visitor,
+                          DecoderVisitor* registered_visitor);
+
+  // Remove all instances of a previously registered visitor class from the list
+  // of visitors stored by the decoder.
+  void RemoveVisitor(DecoderVisitor* visitor);
+
+  void VisitNamedInstruction(const Instruction* instr, const std::string& name);
+
+  std::list<DecoderVisitor*>* visitors() { return &visitors_; }
+
+  // Get a DecodeNode by name from the Decoder's map.
+  DecodeNode* GetDecodeNode(std::string name);
+
+ private:
+  // Decodes an instruction and calls the visitor functions registered with the
+  // Decoder class.
+  void DecodeInstruction(const Instruction* instr);
+
+  // Add an initialised DecodeNode to the decode_node_ map.
+  void AddDecodeNode(const DecodeNode& node);
+
+  // Visitors are registered in a list.
+  std::list<DecoderVisitor*> visitors_;
+
+  // Compile the dynamically generated decode graph based on the static
+  // information in kDecodeMapping and kVisitorNodes.
+  void ConstructDecodeGraph();
+
+  // Root node for the compiled decoder graph, stored here to avoid a map lookup
+  // for every instruction decoded.
+  CompiledDecodeNode* compiled_decoder_root_;
+
+  // Map of node names to DecodeNodes.
+  std::map<std::string, DecodeNode> decode_nodes_;
+};
+
+typedef void (Decoder::*DecodeFnPtr)(const Instruction*);
+typedef uint32_t (Instruction::*BitExtractFn)(void) const;
+
+// A Visitor node maps the name of a visitor to the function that handles it.
+struct VisitorNode {
+  const char* name;
+  const DecodeFnPtr visitor_fn;
+};
+
+// DecodePattern and DecodeMapping represent the input data to the decoder
+// compilation stage. After compilation, the decoder is embodied in the graph
+// of CompiledDecodeNodes pointer to by compiled_decoder_root_.
+
+// A DecodePattern maps a pattern of set/unset/don't care (1, 0, x) bits encoded
+// as uint32_t to its handler.
+// The encoding uses two bits per symbol: 0 => 0b00, 1 => 0b01, x => 0b10.
+// 0b11 marks the edge of the most-significant bits of the pattern, which is
+// required to determine the length. For example, the pattern "1x01"_b is
+// encoded in a uint32_t as 0b11_01_10_00_01.
+struct DecodePattern {
+  uint32_t pattern;
+  const char* handler;
+};
+
+// A DecodeMapping consists of the name of a handler, the bits sampled in the
+// instruction by that handler, and a mapping from the pattern that those
+// sampled bits match to the corresponding name of a node.
+struct DecodeMapping {
+  const char* name;
+  const std::vector<uint8_t> sampled_bits;
+  const std::vector<DecodePattern> mapping;
+};
+
+// For speed, before nodes can be used for decoding instructions, they must
+// be compiled. This converts the mapping "bit pattern strings to decoder name
+// string" stored in DecodeNodes to an array look up for the pointer to the next
+// node, stored in CompiledDecodeNodes. Compilation may also apply other
+// optimisations for simple decode patterns.
+class CompiledDecodeNode {
+ public:
+  // Constructor for decode node, containing a decode table and pointer to a
+  // function that extracts the bits to be sampled.
+  CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size)
+      : bit_extract_fn_(bit_extract_fn),
+        instruction_name_("node"),
+        decode_table_size_(decode_table_size),
+        decoder_(NULL) {
+    decode_table_ = new CompiledDecodeNode*[decode_table_size_];
+    memset(decode_table_, 0, decode_table_size_ * sizeof(decode_table_[0]));
+  }
+
+  // Constructor for wrappers around visitor functions. These require no
+  // decoding, so no bit extraction function or decode table is assigned.
+  explicit CompiledDecodeNode(std::string iname, Decoder* decoder)
+      : bit_extract_fn_(NULL),
+        instruction_name_(iname),
+        decode_table_(NULL),
+        decode_table_size_(0),
+        decoder_(decoder) {}
+
+  ~CompiledDecodeNode() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
+    // Free the decode table, if this is a compiled, non-leaf node.
+    if (decode_table_ != NULL) {
+      VIXL_ASSERT(!IsLeafNode());
+      delete[] decode_table_;
+    }
+  }
+
+  // Decode the instruction by either sampling the bits using the bit extract
+  // function to find the next node, or, if we're at a leaf, calling the visitor
+  // function.
+  void Decode(const Instruction* instr) const;
+
+  // A leaf node is a wrapper for a visitor function.
+  bool IsLeafNode() const {
+    VIXL_ASSERT(((instruction_name_ == "node") && (bit_extract_fn_ != NULL)) ||
+                ((instruction_name_ != "node") && (bit_extract_fn_ == NULL)));
+    return instruction_name_ != "node";
+  }
+
+  // Get a pointer to the next node required in the decode process, based on the
+  // bits sampled by the current node.
+  CompiledDecodeNode* GetNodeForBits(uint32_t bits) const {
+    VIXL_ASSERT(bits < decode_table_size_);
+    return decode_table_[bits];
+  }
+
+  // Set the next node in the decode process for the pattern of sampled bits in
+  // the current node.
+  void SetNodeForBits(uint32_t bits, CompiledDecodeNode* n) {
+    VIXL_ASSERT(bits < decode_table_size_);
+    VIXL_ASSERT(n != NULL);
+    decode_table_[bits] = n;
+  }
+
+ private:
+  // Pointer to an instantiated template function for extracting the bits
+  // sampled by this node. Set to NULL for leaf nodes.
+  const BitExtractFn bit_extract_fn_;
+
+  // Visitor function that handles the instruction identified. Set only for
+  // leaf nodes, where no extra decoding is required, otherwise NULL.
+  std::string instruction_name_;
+
+  // Mapping table from instruction bits to next decode stage.
+  CompiledDecodeNode** decode_table_;
+  const size_t decode_table_size_;
+
+  // Pointer to the decoder containing this node, used to call its visitor
+  // function for leaf nodes. Set to NULL for non-leaf nodes.
+  Decoder* decoder_;
+};
+
+class DecodeNode {
+ public:
+  // Default constructor needed for map initialisation.
+  DecodeNode()
+      : sampled_bits_(DecodeNode::kEmptySampledBits),
+        pattern_table_(DecodeNode::kEmptyPatternTable),
+        compiled_node_(NULL) {}
+
+  // Constructor for DecodeNode wrappers around visitor functions. These are
+  // marked as "compiled", as there is no decoding left to do.
+  explicit DecodeNode(const std::string& iname, Decoder* decoder)
+      : name_(iname),
+        sampled_bits_(DecodeNode::kEmptySampledBits),
+        instruction_name_(iname),
+        pattern_table_(DecodeNode::kEmptyPatternTable),
+        decoder_(decoder),
+        compiled_node_(NULL) {}
+
+  // Constructor for DecodeNodes that map bit patterns to other DecodeNodes.
+  explicit DecodeNode(const DecodeMapping& map, Decoder* decoder = NULL)
+      : name_(map.name),
+        sampled_bits_(map.sampled_bits),
+        instruction_name_("node"),
+        pattern_table_(map.mapping),
+        decoder_(decoder),
+        compiled_node_(NULL) {
+    // With the current two bits per symbol encoding scheme, the maximum pattern
+    // length is (32 - 2) / 2 = 15 bits.
+    VIXL_CHECK(GetPatternLength(map.mapping[0].pattern) <= 15);
+    for (const DecodePattern& p : map.mapping) {
+      VIXL_CHECK(GetPatternLength(p.pattern) == map.sampled_bits.size());
+    }
+  }
+
+  ~DecodeNode() {
+    // Delete the compiled version of this node, if one was created.
+    if (compiled_node_ != NULL) {
+      delete compiled_node_;
+    }
+  }
+
+  // Get the bits sampled from the instruction by this node.
+  const std::vector<uint8_t>& GetSampledBits() const { return sampled_bits_; }
+
+  // Get the number of bits sampled from the instruction by this node.
+  size_t GetSampledBitsCount() const { return sampled_bits_.size(); }
+
+  // A leaf node is a DecodeNode that wraps the visitor function for the
+  // identified instruction class.
+  bool IsLeafNode() const { return instruction_name_ != "node"; }
+
+  std::string GetName() const { return name_; }
+
+  // Create a CompiledDecodeNode of specified table size that uses
+  // bit_extract_fn to sample bits from the instruction.
+  void CreateCompiledNode(BitExtractFn bit_extract_fn, size_t table_size) {
+    VIXL_ASSERT(bit_extract_fn != NULL);
+    VIXL_ASSERT(table_size > 0);
+    compiled_node_ = new CompiledDecodeNode(bit_extract_fn, table_size);
+  }
+
+  // Create a CompiledDecodeNode wrapping a visitor function. No decoding is
+  // required for this node; the visitor function is called instead.
+  void CreateVisitorNode() {
+    compiled_node_ = new CompiledDecodeNode(instruction_name_, decoder_);
+  }
+
+  // Find and compile the DecodeNode named "name", and set it as the node for
+  // the pattern "bits".
+  void CompileNodeForBits(Decoder* decoder, std::string name, uint32_t bits);
+
+  // Get a pointer to an instruction method that extracts the instruction bits
+  // specified by the mask argument, and returns those sampled bits as a
+  // contiguous sequence, suitable for indexing an array.
+  // For example, a mask of 0b1010 returns a function that, given an instruction
+  // 0bXYZW, will return 0bXZ.
+  BitExtractFn GetBitExtractFunction(uint32_t mask) {
+    return GetBitExtractFunctionHelper(mask, 0);
+  }
+
+  // Get a pointer to an Instruction method that applies a mask to the
+  // instruction bits, and tests if the result is equal to value. The returned
+  // function gives a 1 result if (inst & mask == value), 0 otherwise.
+  BitExtractFn GetBitExtractFunction(uint32_t mask, uint32_t value) {
+    return GetBitExtractFunctionHelper(value, mask);
+  }
+
+  // Compile this DecodeNode into a new CompiledDecodeNode and returns a pointer
+  // to it. This pointer is also stored inside the DecodeNode itself. Destroying
+  // a DecodeNode frees its associated CompiledDecodeNode.
+  CompiledDecodeNode* Compile(Decoder* decoder);
+
+  // Get a pointer to the CompiledDecodeNode associated with this DecodeNode.
+  // Returns NULL if the node has not been compiled yet.
+  CompiledDecodeNode* GetCompiledNode() const { return compiled_node_; }
+  bool IsCompiled() const { return GetCompiledNode() != NULL; }
+
+  enum class PatternSymbol { kSymbol0 = 0, kSymbol1 = 1, kSymbolX = 2 };
+  static const uint32_t kEndOfPattern = 3;
+  static const uint32_t kPatternSymbolMask = 3;
+
+  size_t GetPatternLength(uint32_t pattern) const {
+    uint32_t hsb = HighestSetBitPosition(pattern);
+    // The pattern length is signified by two set bits in a two bit-aligned
+    // position. Ensure that the pattern has a highest set bit, it's at an odd
+    // bit position, and that the bit to the right of the hsb is also set.
+    VIXL_ASSERT(((hsb % 2) == 1) && (pattern >> (hsb - 1)) == kEndOfPattern);
+    return hsb / 2;
+  }
+
+  bool PatternContainsSymbol(uint32_t pattern, PatternSymbol symbol) const {
+    while ((pattern & kPatternSymbolMask) != kEndOfPattern) {
+      if (static_cast<PatternSymbol>(pattern & kPatternSymbolMask) == symbol)
+        return true;
+      pattern >>= 2;
+    }
+    return false;
+  }
+
+  PatternSymbol GetSymbolAt(uint32_t pattern, size_t pos) const {
+    size_t len = GetPatternLength(pattern);
+    VIXL_ASSERT((pos < 15) && (pos < len));
+    uint32_t shift = static_cast<uint32_t>(2 * (len - pos - 1));
+    uint32_t sym = (pattern >> shift) & kPatternSymbolMask;
+    return static_cast<PatternSymbol>(sym);
+  }
+
+ private:
+  // Generate a mask and value pair from a pattern constructed from 0, 1 and x
+  // (don't care) 2-bit symbols.
+  // For example "10x1"_b should return mask = 0b1101, value = 0b1001.
+  typedef std::pair<Instr, Instr> MaskValuePair;
+  MaskValuePair GenerateMaskValuePair(uint32_t pattern) const;
+
+  // Generate a pattern ordered by the bit positions sampled by this node.
+  // The symbol corresponding to the lowest sample position is placed in the
+  // least-significant bits of the result pattern.
+  // For example, a pattern of "1x0"_b expected when sampling bits 31, 1 and 30
+  // returns the pattern "x01"_b; bit 1 should be 'x', bit 30 '0' and bit 31
+  // '1'.
+  // This output makes comparisons easier between the pattern and bits sampled
+  // from an instruction using the fast "compress" algorithm. See
+  // Instruction::Compress().
+  uint32_t GenerateOrderedPattern(uint32_t pattern) const;
+
+  // Generate a mask with a bit set at each sample position.
+  uint32_t GenerateSampledBitsMask() const;
+
+  // Try to compile a more optimised decode operation for this node, returning
+  // true if successful.
+  bool TryCompileOptimisedDecodeTable(Decoder* decoder);
+
+  // Helper function that returns a bit extracting function. If y is zero,
+  // x is a bit extraction mask. Otherwise, y is the mask, and x is the value
+  // to match after masking.
+  BitExtractFn GetBitExtractFunctionHelper(uint32_t x, uint32_t y);
+
+  // Name of this decoder node, used to construct edges in the decode graph.
+  std::string name_;
+
+  // Vector of bits sampled from an instruction to determine which node to look
+  // up next in the decode process.
+  const std::vector<uint8_t>& sampled_bits_;
+  static const std::vector<uint8_t> kEmptySampledBits;
+
+  // For leaf nodes, this is the name of the instruction form that the node
+  // represents. For other nodes, this is always set to "node".
+  std::string instruction_name_;
+
+  // Source mapping from bit pattern to name of next decode stage.
+  const std::vector<DecodePattern>& pattern_table_;
+  static const std::vector<DecodePattern> kEmptyPatternTable;
+
+  // Pointer to the decoder containing this node, used to call its visitor
+  // function for leaf nodes.
+  Decoder* decoder_;
+
+  // Pointer to the compiled version of this node. Is this node hasn't been
+  // compiled yet, this pointer is NULL.
+  CompiledDecodeNode* compiled_node_;
+};
+
+}  // namespace aarch64
+}  // namespace vixl
+
+#endif  // VIXL_AARCH64_DECODER_AARCH64_H_
--- a/3rdparty/vixl/include/vixl/aarch64/decoder-constants-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/decoder-constants-aarch64.h
--- a/3rdparty/vixl/include/vixl/aarch64/decoder-visitor-map-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/decoder-visitor-map-aarch64.h
--- a/3rdparty/vixl/include/vixl/aarch64/disasm-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/disasm-aarch64.h
@ -0,0 +1,372 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_AARCH64_DISASM_AARCH64_H
+#define VIXL_AARCH64_DISASM_AARCH64_H
+
+#include <functional>
+#include <unordered_map>
+#include <utility>
+
+#include "../globals-vixl.h"
+#include "../utils-vixl.h"
+
+#include "cpu-features-auditor-aarch64.h"
+#include "decoder-aarch64.h"
+#include "decoder-visitor-map-aarch64.h"
+#include "instructions-aarch64.h"
+#include "operands-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+class Disassembler : public DecoderVisitor {
+ public:
+  Disassembler();
+  Disassembler(char* text_buffer, int buffer_size);
+  virtual ~Disassembler();
+  char* GetOutput();
+
+  // Declare all Visitor functions.
+  virtual void Visit(Metadata* metadata,
+                     const Instruction* instr) VIXL_OVERRIDE;
+
+ protected:
+  virtual void ProcessOutput(const Instruction* instr);
+
+  // Default output functions. The functions below implement a default way of
+  // printing elements in the disassembly. A sub-class can override these to
+  // customize the disassembly output.
+
+  // Prints the name of a register.
+  // TODO: This currently doesn't allow renaming of V registers.
+  virtual void AppendRegisterNameToOutput(const Instruction* instr,
+                                          const CPURegister& reg);
+
+  // Prints a PC-relative offset. This is used for example when disassembling
+  // branches to immediate offsets.
+  virtual void AppendPCRelativeOffsetToOutput(const Instruction* instr,
+                                              int64_t offset);
+
+  // Prints an address, in the general case. It can be code or data. This is
+  // used for example to print the target address of an ADR instruction.
+  virtual void AppendCodeRelativeAddressToOutput(const Instruction* instr,
+                                                 const void* addr);
+
+  // Prints the address of some code.
+  // This is used for example to print the target address of a branch to an
+  // immediate offset.
+  // A sub-class can for example override this method to lookup the address and
+  // print an appropriate name.
+  virtual void AppendCodeRelativeCodeAddressToOutput(const Instruction* instr,
+                                                     const void* addr);
+
+  // Prints the address of some data.
+  // This is used for example to print the source address of a load literal
+  // instruction.
+  virtual void AppendCodeRelativeDataAddressToOutput(const Instruction* instr,
+                                                     const void* addr);
+
+  // Same as the above, but for addresses that are not relative to the code
+  // buffer. They are currently not used by VIXL.
+  virtual void AppendAddressToOutput(const Instruction* instr,
+                                     const void* addr);
+  virtual void AppendCodeAddressToOutput(const Instruction* instr,
+                                         const void* addr);
+  virtual void AppendDataAddressToOutput(const Instruction* instr,
+                                         const void* addr);
+
+ public:
+  // Get/Set the offset that should be added to code addresses when printing
+  // code-relative addresses in the AppendCodeRelative<Type>AddressToOutput()
+  // helpers.
+  // Below is an example of how a branch immediate instruction in memory at
+  // address 0xb010200 would disassemble with different offsets.
+  // Base address | Disassembly
+  //          0x0 | 0xb010200:  b #+0xcc  (addr 0xb0102cc)
+  //      0x10000 | 0xb000200:  b #+0xcc  (addr 0xb0002cc)
+  //    0xb010200 |       0x0:  b #+0xcc  (addr 0xcc)
+  void MapCodeAddress(int64_t base_address, const Instruction* instr_address);
+  int64_t CodeRelativeAddress(const void* instr);
+
+ private:
+#define DECLARE(A) virtual void Visit##A(const Instruction* instr);
+  VISITOR_LIST(DECLARE)
+#undef DECLARE
+
+  using FormToVisitorFnMap = std::unordered_map<
+      uint32_t,
+      std::function<void(Disassembler*, const Instruction*)>>;
+  static const FormToVisitorFnMap* GetFormToVisitorFnMap();
+
+  std::string mnemonic_;
+  uint32_t form_hash_;
+
+  void SetMnemonicFromForm(const std::string& form) {
+    if (form != "unallocated") {
+      VIXL_ASSERT(form.find_first_of('_') != std::string::npos);
+      mnemonic_ = form.substr(0, form.find_first_of('_'));
+    }
+  }
+
+  void Disassemble_PdT_PgZ_ZnT_ZmT(const Instruction* instr);
+  void Disassemble_ZdB_Zn1B_Zn2B_imm(const Instruction* instr);
+  void Disassemble_ZdB_ZnB_ZmB(const Instruction* instr);
+  void Disassemble_ZdD_PgM_ZnS(const Instruction* instr);
+  void Disassemble_ZdD_ZnD_ZmD(const Instruction* instr);
+  void Disassemble_ZdD_ZnD_ZmD_imm(const Instruction* instr);
+  void Disassemble_ZdD_ZnS_ZmS_imm(const Instruction* instr);
+  void Disassemble_ZdH_PgM_ZnS(const Instruction* instr);
+  void Disassemble_ZdH_ZnH_ZmH_imm(const Instruction* instr);
+  void Disassemble_ZdS_PgM_ZnD(const Instruction* instr);
+  void Disassemble_ZdS_PgM_ZnH(const Instruction* instr);
+  void Disassemble_ZdS_PgM_ZnS(const Instruction* instr);
+  void Disassemble_ZdS_ZnH_ZmH_imm(const Instruction* instr);
+  void Disassemble_ZdS_ZnS_ZmS(const Instruction* instr);
+  void Disassemble_ZdS_ZnS_ZmS_imm(const Instruction* instr);
+  void Disassemble_ZdT_PgM_ZnT(const Instruction* instr);
+  void Disassemble_ZdT_PgZ_ZnT_ZmT(const Instruction* instr);
+  void Disassemble_ZdT_Pg_Zn1T_Zn2T(const Instruction* instr);
+  void Disassemble_ZdT_Zn1T_Zn2T_ZmT(const Instruction* instr);
+  void Disassemble_ZdT_ZnT_ZmT(const Instruction* instr);
+  void Disassemble_ZdT_ZnT_ZmTb(const Instruction* instr);
+  void Disassemble_ZdT_ZnTb(const Instruction* instr);
+  void Disassemble_ZdT_ZnTb_ZmTb(const Instruction* instr);
+  void Disassemble_ZdaD_ZnD_ZmD_imm(const Instruction* instr);
+  void Disassemble_ZdaD_ZnH_ZmH_imm_const(const Instruction* instr);
+  void Disassemble_ZdaD_ZnS_ZmS_imm(const Instruction* instr);
+  void Disassemble_ZdaH_ZnH_ZmH_imm(const Instruction* instr);
+  void Disassemble_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr);
+  void Disassemble_ZdaS_ZnB_ZmB_imm_const(const Instruction* instr);
+  void Disassemble_ZdaS_ZnH_ZmH(const Instruction* instr);
+  void Disassemble_ZdaS_ZnH_ZmH_imm(const Instruction* instr);
+  void Disassemble_ZdaS_ZnS_ZmS_imm(const Instruction* instr);
+  void Disassemble_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr);
+  void Disassemble_ZdaT_PgM_ZnTb(const Instruction* instr);
+  void Disassemble_ZdaT_ZnT_ZmT(const Instruction* instr);
+  void Disassemble_ZdaT_ZnT_ZmT_const(const Instruction* instr);
+  void Disassemble_ZdaT_ZnT_const(const Instruction* instr);
+  void Disassemble_ZdaT_ZnTb_ZmTb(const Instruction* instr);
+  void Disassemble_ZdaT_ZnTb_ZmTb_const(const Instruction* instr);
+  void Disassemble_ZdnB_ZdnB(const Instruction* instr);
+  void Disassemble_ZdnB_ZdnB_ZmB(const Instruction* instr);
+  void Disassemble_ZdnS_ZdnS_ZmS(const Instruction* instr);
+  void Disassemble_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr);
+  void Disassemble_ZdnT_PgM_ZdnT_const(const Instruction* instr);
+  void Disassemble_ZdnT_ZdnT_ZmT_const(const Instruction* instr);
+  void Disassemble_ZtD_PgZ_ZnD_Xm(const Instruction* instr);
+  void Disassemble_ZtD_Pg_ZnD_Xm(const Instruction* instr);
+  void Disassemble_ZtS_PgZ_ZnS_Xm(const Instruction* instr);
+  void Disassemble_ZtS_Pg_ZnS_Xm(const Instruction* instr);
+  void Disassemble_ZdaS_ZnB_ZmB(const Instruction* instr);
+  void Disassemble_Vd4S_Vn16B_Vm16B(const Instruction* instr);
+
+  void DisassembleCpy(const Instruction* instr);
+  void DisassembleSet(const Instruction* instr);
+  void DisassembleMinMaxImm(const Instruction* instr);
+
+  void DisassembleSVEShiftLeftImm(const Instruction* instr);
+  void DisassembleSVEShiftRightImm(const Instruction* instr);
+  void DisassembleSVEAddSubCarry(const Instruction* instr);
+  void DisassembleSVEAddSubHigh(const Instruction* instr);
+  void DisassembleSVEComplexIntAddition(const Instruction* instr);
+  void DisassembleSVEBitwiseTernary(const Instruction* instr);
+  void DisassembleSVEFlogb(const Instruction* instr);
+  void DisassembleSVEFPPair(const Instruction* instr);
+
+  void DisassembleNoArgs(const Instruction* instr);
+
+  void DisassembleNEONMulByElementLong(const Instruction* instr);
+  void DisassembleNEONDotProdByElement(const Instruction* instr);
+  void DisassembleNEONFPMulByElement(const Instruction* instr);
+  void DisassembleNEONHalfFPMulByElement(const Instruction* instr);
+  void DisassembleNEONFPMulByElementLong(const Instruction* instr);
+  void DisassembleNEONComplexMulByElement(const Instruction* instr);
+  void DisassembleNEON2RegLogical(const Instruction* instr);
+  void DisassembleNEON2RegExtract(const Instruction* instr);
+  void DisassembleNEON2RegAddlp(const Instruction* instr);
+  void DisassembleNEON2RegCompare(const Instruction* instr);
+  void DisassembleNEON2RegFPCompare(const Instruction* instr);
+  void DisassembleNEON2RegFPConvert(const Instruction* instr);
+  void DisassembleNEON2RegFP(const Instruction* instr);
+  void DisassembleNEON3SameLogical(const Instruction* instr);
+  void DisassembleNEON3SameFHM(const Instruction* instr);
+  void DisassembleNEON3SameNoD(const Instruction* instr);
+  void DisassembleNEONShiftLeftLongImm(const Instruction* instr);
+  void DisassembleNEONShiftRightImm(const Instruction* instr);
+  void DisassembleNEONShiftRightNarrowImm(const Instruction* instr);
+  void DisassembleNEONScalarSatMulLongIndex(const Instruction* instr);
+  void DisassembleNEONFPScalarMulIndex(const Instruction* instr);
+  void DisassembleNEONFPScalar3Same(const Instruction* instr);
+  void DisassembleNEONScalar3SameOnlyD(const Instruction* instr);
+  void DisassembleNEONFPAcrossLanes(const Instruction* instr);
+  void DisassembleNEONFP16AcrossLanes(const Instruction* instr);
+  void DisassembleNEONScalarShiftImmOnlyD(const Instruction* instr);
+  void DisassembleNEONScalarShiftRightNarrowImm(const Instruction* instr);
+  void DisassembleNEONScalar2RegMiscOnlyD(const Instruction* instr);
+  void DisassembleNEONFPScalar2RegMisc(const Instruction* instr);
+
+  void DisassembleMTELoadTag(const Instruction* instr);
+  void DisassembleMTEStoreTag(const Instruction* instr);
+  void DisassembleMTEStoreTagPair(const Instruction* instr);
+
+  void Disassemble_XdSP_XnSP_Xm(const Instruction* instr);
+  void Disassemble_XdSP_XnSP_uimm6_uimm4(const Instruction* instr);
+  void Disassemble_Xd_XnSP_Xm(const Instruction* instr);
+  void Disassemble_Xd_XnSP_XmSP(const Instruction* instr);
+
+  void Format(const Instruction* instr,
+              const char* mnemonic,
+              const char* format0,
+              const char* format1 = NULL);
+  void FormatWithDecodedMnemonic(const Instruction* instr,
+                                 const char* format0,
+                                 const char* format1 = NULL);
+
+  void Substitute(const Instruction* instr, const char* string);
+  int SubstituteField(const Instruction* instr, const char* format);
+  int SubstituteRegisterField(const Instruction* instr, const char* format);
+  int SubstitutePredicateRegisterField(const Instruction* instr,
+                                       const char* format);
+  int SubstituteImmediateField(const Instruction* instr, const char* format);
+  int SubstituteLiteralField(const Instruction* instr, const char* format);
+  int SubstituteBitfieldImmediateField(const Instruction* instr,
+                                       const char* format);
+  int SubstituteShiftField(const Instruction* instr, const char* format);
+  int SubstituteExtendField(const Instruction* instr, const char* format);
+  int SubstituteConditionField(const Instruction* instr, const char* format);
+  int SubstitutePCRelAddressField(const Instruction* instr, const char* format);
+  int SubstituteBranchTargetField(const Instruction* instr, const char* format);
+  int SubstituteLSRegOffsetField(const Instruction* instr, const char* format);
+  int SubstitutePrefetchField(const Instruction* instr, const char* format);
+  int SubstituteBarrierField(const Instruction* instr, const char* format);
+  int SubstituteSysOpField(const Instruction* instr, const char* format);
+  int SubstituteCrField(const Instruction* instr, const char* format);
+  int SubstituteIntField(const Instruction* instr, const char* format);
+  int SubstituteSVESize(const Instruction* instr, const char* format);
+  int SubstituteTernary(const Instruction* instr, const char* format);
+
+  std::pair<unsigned, unsigned> GetRegNumForField(const Instruction* instr,
+                                                  char reg_prefix,
+                                                  const char* field);
+
+  bool RdIsZROrSP(const Instruction* instr) const {
+    return (instr->GetRd() == kZeroRegCode);
+  }
+
+  bool RnIsZROrSP(const Instruction* instr) const {
+    return (instr->GetRn() == kZeroRegCode);
+  }
+
+  bool RmIsZROrSP(const Instruction* instr) const {
+    return (instr->GetRm() == kZeroRegCode);
+  }
+
+  bool RaIsZROrSP(const Instruction* instr) const {
+    return (instr->GetRa() == kZeroRegCode);
+  }
+
+  bool IsMovzMovnImm(unsigned reg_size, uint64_t value);
+
+  int64_t code_address_offset() const { return code_address_offset_; }
+
+ protected:
+  void ResetOutput();
+  void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3);
+
+  void set_code_address_offset(int64_t code_address_offset) {
+    code_address_offset_ = code_address_offset;
+  }
+
+  char* buffer_;
+  uint32_t buffer_pos_;
+  uint32_t buffer_size_;
+  bool own_buffer_;
+
+  int64_t code_address_offset_;
+};
+
+
+class PrintDisassembler : public Disassembler {
+ public:
+  explicit PrintDisassembler(FILE* stream)
+      : cpu_features_auditor_(NULL),
+        cpu_features_prefix_("// Needs: "),
+        cpu_features_suffix_(""),
+        signed_addresses_(false),
+        stream_(stream) {}
+
+  // Convenience helpers for quick disassembly, without having to manually
+  // create a decoder.
+  void DisassembleBuffer(const Instruction* start, uint64_t size);
+  void DisassembleBuffer(const Instruction* start, const Instruction* end);
+  void Disassemble(const Instruction* instr);
+
+  // If a CPUFeaturesAuditor is specified, it will be used to annotate
+  // disassembly. The CPUFeaturesAuditor is expected to visit the instructions
+  // _before_ the disassembler, such that the CPUFeatures information is
+  // available when the disassembler is called.
+  void RegisterCPUFeaturesAuditor(CPUFeaturesAuditor* auditor) {
+    cpu_features_auditor_ = auditor;
+  }
+
+  // Set the prefix to appear before the CPU features annotations.
+  void SetCPUFeaturesPrefix(const char* prefix) {
+    VIXL_ASSERT(prefix != NULL);
+    cpu_features_prefix_ = prefix;
+  }
+
+  // Set the suffix to appear after the CPU features annotations.
+  void SetCPUFeaturesSuffix(const char* suffix) {
+    VIXL_ASSERT(suffix != NULL);
+    cpu_features_suffix_ = suffix;
+  }
+
+  // By default, addresses are printed as simple, unsigned 64-bit hex values.
+  //
+  // With `PrintSignedAddresses(true)`:
+  //  - negative addresses are printed as "-0x1234...",
+  //  - positive addresses have a leading space, like " 0x1234...", to maintain
+  //    alignment.
+  //
+  // This is most useful in combination with Disassembler::MapCodeAddress(...).
+  void PrintSignedAddresses(bool s) { signed_addresses_ = s; }
+
+ protected:
+  virtual void ProcessOutput(const Instruction* instr) VIXL_OVERRIDE;
+
+  CPUFeaturesAuditor* cpu_features_auditor_;
+  const char* cpu_features_prefix_;
+  const char* cpu_features_suffix_;
+  bool signed_addresses_;
+
+ private:
+  FILE* stream_;
+};
+}  // namespace aarch64
+}  // namespace vixl
+
+#endif  // VIXL_AARCH64_DISASM_AARCH64_H
--- a/3rdparty/vixl/include/vixl/aarch64/instructions-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/instructions-aarch64.h
--- a/3rdparty/vixl/include/vixl/aarch64/macro-assembler-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/macro-assembler-aarch64.h
--- a/3rdparty/vixl/include/vixl/aarch64/operands-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/operands-aarch64.h
@ -0,0 +1,999 @@
+// Copyright 2016, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_AARCH64_OPERANDS_AARCH64_H_
+#define VIXL_AARCH64_OPERANDS_AARCH64_H_
+
+#include <sstream>
+#include <string>
+
+#include "instructions-aarch64.h"
+#include "registers-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+// Lists of registers.
+class CPURegList {
+ public:
+  explicit CPURegList(CPURegister reg1,
+                      CPURegister reg2 = NoCPUReg,
+                      CPURegister reg3 = NoCPUReg,
+                      CPURegister reg4 = NoCPUReg)
+      : list_(reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit()),
+        size_(reg1.GetSizeInBits()),
+        type_(reg1.GetType()) {
+    VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4));
+    VIXL_ASSERT(IsValid());
+  }
+
+  CPURegList(CPURegister::RegisterType type, unsigned size, RegList list)
+      : list_(list), size_(size), type_(type) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  CPURegList(CPURegister::RegisterType type,
+             unsigned size,
+             unsigned first_reg,
+             unsigned last_reg)
+      : size_(size), type_(type) {
+    VIXL_ASSERT(
+        ((type == CPURegister::kRegister) && (last_reg < kNumberOfRegisters)) ||
+        ((type == CPURegister::kVRegister) &&
+         (last_reg < kNumberOfVRegisters)));
+    VIXL_ASSERT(last_reg >= first_reg);
+    list_ = (UINT64_C(1) << (last_reg + 1)) - 1;
+    list_ &= ~((UINT64_C(1) << first_reg) - 1);
+    VIXL_ASSERT(IsValid());
+  }
+
+  // Construct an empty CPURegList with the specified size and type. If `size`
+  // is CPURegister::kUnknownSize and the register type requires a size, a valid
+  // but unspecified default will be picked.
+  static CPURegList Empty(CPURegister::RegisterType type,
+                          unsigned size = CPURegister::kUnknownSize) {
+    return CPURegList(type, GetDefaultSizeFor(type, size), 0);
+  }
+
+  // Construct a CPURegList with all possible registers with the specified size
+  // and type. If `size` is CPURegister::kUnknownSize and the register type
+  // requires a size, a valid but unspecified default will be picked.
+  static CPURegList All(CPURegister::RegisterType type,
+                        unsigned size = CPURegister::kUnknownSize) {
+    unsigned number_of_registers = (CPURegister::GetMaxCodeFor(type) + 1);
+    RegList list = (static_cast<RegList>(1) << number_of_registers) - 1;
+    if (type == CPURegister::kRegister) {
+      // GetMaxCodeFor(kRegister) ignores SP, so explicitly include it.
+      list |= (static_cast<RegList>(1) << kSPRegInternalCode);
+    }
+    return CPURegList(type, GetDefaultSizeFor(type, size), list);
+  }
+
+  CPURegister::RegisterType GetType() const {
+    VIXL_ASSERT(IsValid());
+    return type_;
+  }
+  VIXL_DEPRECATED("GetType", CPURegister::RegisterType type() const) {
+    return GetType();
+  }
+
+  CPURegister::RegisterBank GetBank() const {
+    return CPURegister::GetBankFor(GetType());
+  }
+
+  // Combine another CPURegList into this one. Registers that already exist in
+  // this list are left unchanged. The type and size of the registers in the
+  // 'other' list must match those in this list.
+  void Combine(const CPURegList& other) {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(other.GetType() == type_);
+    VIXL_ASSERT(other.GetRegisterSizeInBits() == size_);
+    list_ |= other.GetList();
+  }
+
+  // Remove every register in the other CPURegList from this one. Registers that
+  // do not exist in this list are ignored. The type and size of the registers
+  // in the 'other' list must match those in this list.
+  void Remove(const CPURegList& other) {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(other.GetType() == type_);
+    VIXL_ASSERT(other.GetRegisterSizeInBits() == size_);
+    list_ &= ~other.GetList();
+  }
+
+  // Variants of Combine and Remove which take a single register.
+  void Combine(const CPURegister& other) {
+    VIXL_ASSERT(other.GetType() == type_);
+    VIXL_ASSERT(other.GetSizeInBits() == size_);
+    Combine(other.GetCode());
+  }
+
+  void Remove(const CPURegister& other) {
+    VIXL_ASSERT(other.GetType() == type_);
+    VIXL_ASSERT(other.GetSizeInBits() == size_);
+    Remove(other.GetCode());
+  }
+
+  // Variants of Combine and Remove which take a single register by its code;
+  // the type and size of the register is inferred from this list.
+  void Combine(int code) {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
+    list_ |= (UINT64_C(1) << code);
+  }
+
+  void Remove(int code) {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
+    list_ &= ~(UINT64_C(1) << code);
+  }
+
+  static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) {
+    VIXL_ASSERT(list_1.type_ == list_2.type_);
+    VIXL_ASSERT(list_1.size_ == list_2.size_);
+    return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_);
+  }
+  static CPURegList Union(const CPURegList& list_1,
+                          const CPURegList& list_2,
+                          const CPURegList& list_3);
+  static CPURegList Union(const CPURegList& list_1,
+                          const CPURegList& list_2,
+                          const CPURegList& list_3,
+                          const CPURegList& list_4);
+
+  static CPURegList Intersection(const CPURegList& list_1,
+                                 const CPURegList& list_2) {
+    VIXL_ASSERT(list_1.type_ == list_2.type_);
+    VIXL_ASSERT(list_1.size_ == list_2.size_);
+    return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_);
+  }
+  static CPURegList Intersection(const CPURegList& list_1,
+                                 const CPURegList& list_2,
+                                 const CPURegList& list_3);
+  static CPURegList Intersection(const CPURegList& list_1,
+                                 const CPURegList& list_2,
+                                 const CPURegList& list_3,
+                                 const CPURegList& list_4);
+
+  bool Overlaps(const CPURegList& other) const {
+    return (type_ == other.type_) && ((list_ & other.list_) != 0);
+  }
+
+  RegList GetList() const {
+    VIXL_ASSERT(IsValid());
+    return list_;
+  }
+  VIXL_DEPRECATED("GetList", RegList list() const) { return GetList(); }
+
+  void SetList(RegList new_list) {
+    VIXL_ASSERT(IsValid());
+    list_ = new_list;
+  }
+  VIXL_DEPRECATED("SetList", void set_list(RegList new_list)) {
+    return SetList(new_list);
+  }
+
+  // Remove all callee-saved registers from the list. This can be useful when
+  // preparing registers for an AAPCS64 function call, for example.
+  void RemoveCalleeSaved();
+
+  // Find the register in this list that appears in `mask` with the lowest or
+  // highest code, remove it from the list and return it as a CPURegister. If
+  // the list is empty, leave it unchanged and return NoCPUReg.
+  CPURegister PopLowestIndex(RegList mask = ~static_cast<RegList>(0));
+  CPURegister PopHighestIndex(RegList mask = ~static_cast<RegList>(0));
+
+  // AAPCS64 callee-saved registers.
+  static CPURegList GetCalleeSaved(unsigned size = kXRegSize);
+  static CPURegList GetCalleeSavedV(unsigned size = kDRegSize);
+
+  // AAPCS64 caller-saved registers. Note that this includes lr.
+  // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top
+  // 64-bits being caller-saved.
+  static CPURegList GetCallerSaved(unsigned size = kXRegSize);
+  static CPURegList GetCallerSavedV(unsigned size = kDRegSize);
+
+  bool IsEmpty() const {
+    VIXL_ASSERT(IsValid());
+    return list_ == 0;
+  }
+
+  bool IncludesAliasOf(const CPURegister& other) const {
+    VIXL_ASSERT(IsValid());
+    return (GetBank() == other.GetBank()) && IncludesAliasOf(other.GetCode());
+  }
+
+  bool IncludesAliasOf(int code) const {
+    VIXL_ASSERT(IsValid());
+    return (((static_cast<RegList>(1) << code) & list_) != 0);
+  }
+
+  int GetCount() const {
+    VIXL_ASSERT(IsValid());
+    return CountSetBits(list_);
+  }
+  VIXL_DEPRECATED("GetCount", int Count()) const { return GetCount(); }
+
+  int GetRegisterSizeInBits() const {
+    VIXL_ASSERT(IsValid());
+    return size_;
+  }
+  VIXL_DEPRECATED("GetRegisterSizeInBits", int RegisterSizeInBits() const) {
+    return GetRegisterSizeInBits();
+  }
+
+  int GetRegisterSizeInBytes() const {
+    int size_in_bits = GetRegisterSizeInBits();
+    VIXL_ASSERT((size_in_bits % 8) == 0);
+    return size_in_bits / 8;
+  }
+  VIXL_DEPRECATED("GetRegisterSizeInBytes", int RegisterSizeInBytes() const) {
+    return GetRegisterSizeInBytes();
+  }
+
+  unsigned GetTotalSizeInBytes() const {
+    VIXL_ASSERT(IsValid());
+    return GetRegisterSizeInBytes() * GetCount();
+  }
+  VIXL_DEPRECATED("GetTotalSizeInBytes", unsigned TotalSizeInBytes() const) {
+    return GetTotalSizeInBytes();
+  }
+
+ private:
+  // If `size` is CPURegister::kUnknownSize and the type requires a known size,
+  // then return an arbitrary-but-valid size.
+  //
+  // Otherwise, the size is checked for validity and returned unchanged.
+  static unsigned GetDefaultSizeFor(CPURegister::RegisterType type,
+                                    unsigned size) {
+    if (size == CPURegister::kUnknownSize) {
+      if (type == CPURegister::kRegister) size = kXRegSize;
+      if (type == CPURegister::kVRegister) size = kQRegSize;
+      // All other types require kUnknownSize.
+    }
+    VIXL_ASSERT(CPURegister(0, size, type).IsValid());
+    return size;
+  }
+
+  RegList list_;
+  int size_;
+  CPURegister::RegisterType type_;
+
+  bool IsValid() const;
+};
+
+
+// AAPCS64 callee-saved registers.
+extern const CPURegList kCalleeSaved;
+extern const CPURegList kCalleeSavedV;
+
+
+// AAPCS64 caller-saved registers. Note that this includes lr.
+extern const CPURegList kCallerSaved;
+extern const CPURegList kCallerSavedV;
+
+class IntegerOperand;
+
+// Operand.
+class Operand {
+ public:
+  // #<immediate>
+  // where <immediate> is int64_t.
+  // This is allowed to be an implicit constructor because Operand is
+  // a wrapper class that doesn't normally perform any type conversion.
+  Operand(int64_t immediate);  // NOLINT(runtime/explicit)
+
+  Operand(IntegerOperand immediate);  // NOLINT(runtime/explicit)
+
+  // rm, {<shift> #<shift_amount>}
+  // where <shift> is one of {LSL, LSR, ASR, ROR}.
+  //       <shift_amount> is uint6_t.
+  // This is allowed to be an implicit constructor because Operand is
+  // a wrapper class that doesn't normally perform any type conversion.
+  Operand(Register reg,
+          Shift shift = LSL,
+          unsigned shift_amount = 0);  // NOLINT(runtime/explicit)
+
+  // rm, {<extend> {#<shift_amount>}}
+  // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}.
+  //       <shift_amount> is uint2_t.
+  explicit Operand(Register reg, Extend extend, unsigned shift_amount = 0);
+
+  bool IsImmediate() const;
+  bool IsPlainRegister() const;
+  bool IsShiftedRegister() const;
+  bool IsExtendedRegister() const;
+  bool IsZero() const;
+
+  // This returns an LSL shift (<= 4) operand as an equivalent extend operand,
+  // which helps in the encoding of instructions that use the stack pointer.
+  Operand ToExtendedRegister() const;
+
+  int64_t GetImmediate() const {
+    VIXL_ASSERT(IsImmediate());
+    return immediate_;
+  }
+  VIXL_DEPRECATED("GetImmediate", int64_t immediate() const) {
+    return GetImmediate();
+  }
+
+  int64_t GetEquivalentImmediate() const {
+    return IsZero() ? 0 : GetImmediate();
+  }
+
+  Register GetRegister() const {
+    VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
+    return reg_;
+  }
+  VIXL_DEPRECATED("GetRegister", Register reg() const) { return GetRegister(); }
+  Register GetBaseRegister() const { return GetRegister(); }
+
+  Shift GetShift() const {
+    VIXL_ASSERT(IsShiftedRegister());
+    return shift_;
+  }
+  VIXL_DEPRECATED("GetShift", Shift shift() const) { return GetShift(); }
+
+  Extend GetExtend() const {
+    VIXL_ASSERT(IsExtendedRegister());
+    return extend_;
+  }
+  VIXL_DEPRECATED("GetExtend", Extend extend() const) { return GetExtend(); }
+
+  unsigned GetShiftAmount() const {
+    VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
+    return shift_amount_;
+  }
+  VIXL_DEPRECATED("GetShiftAmount", unsigned shift_amount() const) {
+    return GetShiftAmount();
+  }
+
+ private:
+  int64_t immediate_;
+  Register reg_;
+  Shift shift_;
+  Extend extend_;
+  unsigned shift_amount_;
+};
+
+
+// MemOperand represents the addressing mode of a load or store instruction.
+// In assembly syntax, MemOperands are normally denoted by one or more elements
+// inside or around square brackets.
+class MemOperand {
+ public:
+  // Creates an invalid `MemOperand`.
+  MemOperand();
+  explicit MemOperand(Register base,
+                      int64_t offset = 0,
+                      AddrMode addrmode = Offset);
+  MemOperand(Register base,
+             Register regoffset,
+             Shift shift = LSL,
+             unsigned shift_amount = 0);
+  MemOperand(Register base,
+             Register regoffset,
+             Extend extend,
+             unsigned shift_amount = 0);
+  MemOperand(Register base, const Operand& offset, AddrMode addrmode = Offset);
+
+  const Register& GetBaseRegister() const { return base_; }
+
+  // If the MemOperand has a register offset, return it. (This also applies to
+  // pre- and post-index modes.) Otherwise, return NoReg.
+  const Register& GetRegisterOffset() const { return regoffset_; }
+
+  // If the MemOperand has an immediate offset, return it. (This also applies to
+  // pre- and post-index modes.) Otherwise, return 0.
+  int64_t GetOffset() const { return offset_; }
+
+  AddrMode GetAddrMode() const { return addrmode_; }
+  Shift GetShift() const { return shift_; }
+  Extend GetExtend() const { return extend_; }
+
+  unsigned GetShiftAmount() const {
+    // Extend modes can also encode a shift for some instructions.
+    VIXL_ASSERT((GetShift() != NO_SHIFT) || (GetExtend() != NO_EXTEND));
+    return shift_amount_;
+  }
+
+  // True for MemOperands which represent something like [x0].
+  // Currently, this will also return true for [x0, #0], because MemOperand has
+  // no way to distinguish the two.
+  bool IsPlainRegister() const;
+
+  // True for MemOperands which represent something like [x0], or for compound
+  // MemOperands which are functionally equivalent, such as [x0, #0], [x0, xzr]
+  // or [x0, wzr, UXTW #3].
+  bool IsEquivalentToPlainRegister() const;
+
+  // True for immediate-offset (but not indexed) MemOperands.
+  bool IsImmediateOffset() const;
+  // True for register-offset (but not indexed) MemOperands.
+  bool IsRegisterOffset() const;
+  // True for immediate or register pre-indexed MemOperands.
+  bool IsPreIndex() const;
+  // True for immediate or register post-indexed MemOperands.
+  bool IsPostIndex() const;
+  // True for immediate pre-indexed MemOperands, [reg, #imm]!
+  bool IsImmediatePreIndex() const;
+  // True for immediate post-indexed MemOperands, [reg], #imm
+  bool IsImmediatePostIndex() const;
+
+  void AddOffset(int64_t offset);
+
+  bool IsValid() const {
+    return base_.IsValid() &&
+           ((addrmode_ == Offset) || (addrmode_ == PreIndex) ||
+            (addrmode_ == PostIndex)) &&
+           ((shift_ == NO_SHIFT) || (extend_ == NO_EXTEND)) &&
+           ((offset_ == 0) || !regoffset_.IsValid());
+  }
+
+  bool Equals(const MemOperand& other) const {
+    return base_.Is(other.base_) && regoffset_.Is(other.regoffset_) &&
+           (offset_ == other.offset_) && (addrmode_ == other.addrmode_) &&
+           (shift_ == other.shift_) && (extend_ == other.extend_) &&
+           (shift_amount_ == other.shift_amount_);
+  }
+
+ private:
+  Register base_;
+  Register regoffset_;
+  int64_t offset_;
+  AddrMode addrmode_;
+  Shift shift_;
+  Extend extend_;
+  unsigned shift_amount_;
+};
+
+// SVE supports memory operands which don't make sense to the core ISA, such as
+// scatter-gather forms, in which either the base or offset registers are
+// vectors. This class exists to avoid complicating core-ISA code with
+// SVE-specific behaviour.
+//
+// Note that SVE does not support any pre- or post-index modes.
+class SVEMemOperand {
+ public:
+  // "vector-plus-immediate", like [z0.s, #21]
+  explicit SVEMemOperand(ZRegister base, uint64_t offset = 0)
+      : base_(base),
+        regoffset_(NoReg),
+        offset_(RawbitsToInt64(offset)),
+        mod_(NO_SVE_OFFSET_MODIFIER),
+        shift_amount_(0) {
+    VIXL_ASSERT(IsVectorPlusImmediate());
+    VIXL_ASSERT(IsValid());
+  }
+
+  // "scalar-plus-immediate", like [x0], [x0, #42] or [x0, #42, MUL_VL]
+  // The only supported modifiers are NO_SVE_OFFSET_MODIFIER or SVE_MUL_VL.
+  //
+  // Note that VIXL cannot currently distinguish between `SVEMemOperand(x0)` and
+  // `SVEMemOperand(x0, 0)`. This is only significant in scalar-plus-scalar
+  // instructions where xm defaults to xzr. However, users should not rely on
+  // `SVEMemOperand(x0, 0)` being accepted in such cases.
+  explicit SVEMemOperand(Register base,
+                         uint64_t offset = 0,
+                         SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER)
+      : base_(base),
+        regoffset_(NoReg),
+        offset_(RawbitsToInt64(offset)),
+        mod_(mod),
+        shift_amount_(0) {
+    VIXL_ASSERT(IsScalarPlusImmediate());
+    VIXL_ASSERT(IsValid());
+  }
+
+  // "scalar-plus-scalar", like [x0, x1]
+  // "scalar-plus-vector", like [x0, z1.d]
+  SVEMemOperand(Register base, CPURegister offset)
+      : base_(base),
+        regoffset_(offset),
+        offset_(0),
+        mod_(NO_SVE_OFFSET_MODIFIER),
+        shift_amount_(0) {
+    VIXL_ASSERT(IsScalarPlusScalar() || IsScalarPlusVector());
+    if (offset.IsZero()) VIXL_ASSERT(IsEquivalentToScalar());
+    VIXL_ASSERT(IsValid());
+  }
+
+  // "scalar-plus-vector", like [x0, z1.d, UXTW]
+  // The type of `mod` can be any `SVEOffsetModifier` (other than LSL), or a
+  // corresponding `Extend` value.
+  template <typename M>
+  SVEMemOperand(Register base, ZRegister offset, M mod)
+      : base_(base),
+        regoffset_(offset),
+        offset_(0),
+        mod_(GetSVEOffsetModifierFor(mod)),
+        shift_amount_(0) {
+    VIXL_ASSERT(mod_ != SVE_LSL);  // LSL requires an explicit shift amount.
+    VIXL_ASSERT(IsScalarPlusVector());
+    VIXL_ASSERT(IsValid());
+  }
+
+  // "scalar-plus-scalar", like [x0, x1, LSL #1]
+  // "scalar-plus-vector", like [x0, z1.d, LSL #2]
+  // The type of `mod` can be any `SVEOffsetModifier`, or a corresponding
+  // `Shift` or `Extend` value.
+  template <typename M>
+  SVEMemOperand(Register base, CPURegister offset, M mod, unsigned shift_amount)
+      : base_(base),
+        regoffset_(offset),
+        offset_(0),
+        mod_(GetSVEOffsetModifierFor(mod)),
+        shift_amount_(shift_amount) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  // "vector-plus-scalar", like [z0.d, x0]
+  SVEMemOperand(ZRegister base, Register offset)
+      : base_(base),
+        regoffset_(offset),
+        offset_(0),
+        mod_(NO_SVE_OFFSET_MODIFIER),
+        shift_amount_(0) {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(IsVectorPlusScalar());
+  }
+
+  // "vector-plus-vector", like [z0.d, z1.d, UXTW]
+  template <typename M = SVEOffsetModifier>
+  SVEMemOperand(ZRegister base,
+                ZRegister offset,
+                M mod = NO_SVE_OFFSET_MODIFIER,
+                unsigned shift_amount = 0)
+      : base_(base),
+        regoffset_(offset),
+        offset_(0),
+        mod_(GetSVEOffsetModifierFor(mod)),
+        shift_amount_(shift_amount) {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(IsVectorPlusVector());
+  }
+
+  // True for SVEMemOperands which represent something like [x0].
+  // This will also return true for [x0, #0], because there is no way
+  // to distinguish the two.
+  bool IsPlainScalar() const {
+    return IsScalarPlusImmediate() && (offset_ == 0);
+  }
+
+  // True for SVEMemOperands which represent something like [x0], or for
+  // compound SVEMemOperands which are functionally equivalent, such as
+  // [x0, #0], [x0, xzr] or [x0, wzr, UXTW #3].
+  bool IsEquivalentToScalar() const;
+
+  // True for SVEMemOperands like [x0], [x0, #0], false for [x0, xzr] and
+  // similar.
+  bool IsPlainRegister() const;
+
+  bool IsScalarPlusImmediate() const {
+    return base_.IsX() && regoffset_.IsNone() &&
+           ((mod_ == NO_SVE_OFFSET_MODIFIER) || IsMulVl());
+  }
+
+  bool IsScalarPlusScalar() const {
+    // SVE offers no extend modes for scalar-plus-scalar, so both registers must
+    // be X registers.
+    return base_.IsX() && regoffset_.IsX() &&
+           ((mod_ == NO_SVE_OFFSET_MODIFIER) || (mod_ == SVE_LSL));
+  }
+
+  bool IsScalarPlusVector() const {
+    // The modifier can be LSL or an an extend mode (UXTW or SXTW) here. Unlike
+    // in the core ISA, these extend modes do not imply an S-sized lane, so the
+    // modifier is independent from the lane size. The architecture describes
+    // [US]XTW with a D-sized lane as an "unpacked" offset.
+    return base_.IsX() && regoffset_.IsZRegister() &&
+           (regoffset_.IsLaneSizeS() || regoffset_.IsLaneSizeD()) && !IsMulVl();
+  }
+
+  bool IsVectorPlusImmediate() const {
+    return base_.IsZRegister() &&
+           (base_.IsLaneSizeS() || base_.IsLaneSizeD()) &&
+           regoffset_.IsNone() && (mod_ == NO_SVE_OFFSET_MODIFIER);
+  }
+
+  bool IsVectorPlusScalar() const {
+    return base_.IsZRegister() && regoffset_.IsX() &&
+           (base_.IsLaneSizeS() || base_.IsLaneSizeD());
+  }
+
+  bool IsVectorPlusVector() const {
+    return base_.IsZRegister() && regoffset_.IsZRegister() && (offset_ == 0) &&
+           AreSameFormat(base_, regoffset_) &&
+           (base_.IsLaneSizeS() || base_.IsLaneSizeD());
+  }
+
+  bool IsContiguous() const { return !IsScatterGather(); }
+  bool IsScatterGather() const {
+    return base_.IsZRegister() || regoffset_.IsZRegister();
+  }
+
+  // TODO: If necessary, add helpers like `HasScalarBase()`.
+
+  Register GetScalarBase() const {
+    VIXL_ASSERT(base_.IsX());
+    return Register(base_);
+  }
+
+  ZRegister GetVectorBase() const {
+    VIXL_ASSERT(base_.IsZRegister());
+    VIXL_ASSERT(base_.HasLaneSize());
+    return ZRegister(base_);
+  }
+
+  Register GetScalarOffset() const {
+    VIXL_ASSERT(regoffset_.IsRegister());
+    return Register(regoffset_);
+  }
+
+  ZRegister GetVectorOffset() const {
+    VIXL_ASSERT(regoffset_.IsZRegister());
+    VIXL_ASSERT(regoffset_.HasLaneSize());
+    return ZRegister(regoffset_);
+  }
+
+  int64_t GetImmediateOffset() const {
+    VIXL_ASSERT(regoffset_.IsNone());
+    return offset_;
+  }
+
+  SVEOffsetModifier GetOffsetModifier() const { return mod_; }
+  unsigned GetShiftAmount() const { return shift_amount_; }
+
+  bool IsEquivalentToLSL(unsigned amount) const {
+    if (shift_amount_ != amount) return false;
+    if (amount == 0) {
+      // No-shift is equivalent to "LSL #0".
+      return ((mod_ == SVE_LSL) || (mod_ == NO_SVE_OFFSET_MODIFIER));
+    }
+    return mod_ == SVE_LSL;
+  }
+
+  bool IsMulVl() const { return mod_ == SVE_MUL_VL; }
+
+  bool IsValid() const;
+
+ private:
+  // Allow standard `Shift` and `Extend` arguments to be used.
+  SVEOffsetModifier GetSVEOffsetModifierFor(Shift shift) {
+    if (shift == LSL) return SVE_LSL;
+    if (shift == NO_SHIFT) return NO_SVE_OFFSET_MODIFIER;
+    // SVE does not accept any other shift.
+    VIXL_UNIMPLEMENTED();
+    return NO_SVE_OFFSET_MODIFIER;
+  }
+
+  SVEOffsetModifier GetSVEOffsetModifierFor(Extend extend = NO_EXTEND) {
+    if (extend == UXTW) return SVE_UXTW;
+    if (extend == SXTW) return SVE_SXTW;
+    if (extend == NO_EXTEND) return NO_SVE_OFFSET_MODIFIER;
+    // SVE does not accept any other extend mode.
+    VIXL_UNIMPLEMENTED();
+    return NO_SVE_OFFSET_MODIFIER;
+  }
+
+  SVEOffsetModifier GetSVEOffsetModifierFor(SVEOffsetModifier mod) {
+    return mod;
+  }
+
+  CPURegister base_;
+  CPURegister regoffset_;
+  int64_t offset_;
+  SVEOffsetModifier mod_;
+  unsigned shift_amount_;
+};
+
+// Represent a signed or unsigned integer operand.
+//
+// This is designed to make instructions which naturally accept a _signed_
+// immediate easier to implement and use, when we also want users to be able to
+// specify raw-bits values (such as with hexadecimal constants). The advantage
+// of this class over a simple uint64_t (with implicit C++ sign-extension) is
+// that this class can strictly check the range of allowed values. With a simple
+// uint64_t, it is impossible to distinguish -1 from UINT64_MAX.
+//
+// For example, these instructions are equivalent:
+//
+//     __ Insr(z0.VnB(), -1);
+//     __ Insr(z0.VnB(), 0xff);
+//
+// ... as are these:
+//
+//     __ Insr(z0.VnD(), -1);
+//     __ Insr(z0.VnD(), 0xffffffffffffffff);
+//
+// ... but this is invalid:
+//
+//     __ Insr(z0.VnB(), 0xffffffffffffffff);  // Too big for B-sized lanes.
+class IntegerOperand {
+ public:
+#define VIXL_INT_TYPES(V) \
+  V(char) V(short) V(int) V(long) V(long long)  // NOLINT(runtime/int)
+#define VIXL_DECL_INT_OVERLOADS(T)                                        \
+  /* These are allowed to be implicit constructors because this is a */   \
+  /* wrapper class that doesn't normally perform any type conversion. */  \
+  IntegerOperand(signed T immediate) /* NOLINT(runtime/explicit) */       \
+      : raw_bits_(immediate),        /* Allow implicit sign-extension. */ \
+        is_negative_(immediate < 0) {}                                    \
+  IntegerOperand(unsigned T immediate) /* NOLINT(runtime/explicit) */     \
+      : raw_bits_(immediate), is_negative_(false) {}
+  VIXL_INT_TYPES(VIXL_DECL_INT_OVERLOADS)
+#undef VIXL_DECL_INT_OVERLOADS
+#undef VIXL_INT_TYPES
+
+  // TODO: `Operand` can currently only hold an int64_t, so some large, unsigned
+  // values will be misrepresented here.
+  explicit IntegerOperand(const Operand& operand)
+      : raw_bits_(operand.GetEquivalentImmediate()),
+        is_negative_(operand.GetEquivalentImmediate() < 0) {}
+
+  bool IsIntN(unsigned n) const {
+    return is_negative_ ? vixl::IsIntN(n, RawbitsToInt64(raw_bits_))
+                        : vixl::IsIntN(n, raw_bits_);
+  }
+  bool IsUintN(unsigned n) const {
+    return !is_negative_ && vixl::IsUintN(n, raw_bits_);
+  }
+
+  bool IsUint8() const { return IsUintN(8); }
+  bool IsUint16() const { return IsUintN(16); }
+  bool IsUint32() const { return IsUintN(32); }
+  bool IsUint64() const { return IsUintN(64); }
+
+  bool IsInt8() const { return IsIntN(8); }
+  bool IsInt16() const { return IsIntN(16); }
+  bool IsInt32() const { return IsIntN(32); }
+  bool IsInt64() const { return IsIntN(64); }
+
+  bool FitsInBits(unsigned n) const {
+    return is_negative_ ? IsIntN(n) : IsUintN(n);
+  }
+  bool FitsInLane(const CPURegister& zd) const {
+    return FitsInBits(zd.GetLaneSizeInBits());
+  }
+  bool FitsInSignedLane(const CPURegister& zd) const {
+    return IsIntN(zd.GetLaneSizeInBits());
+  }
+  bool FitsInUnsignedLane(const CPURegister& zd) const {
+    return IsUintN(zd.GetLaneSizeInBits());
+  }
+
+  // Cast a value in the range [INT<n>_MIN, UINT<n>_MAX] to an unsigned integer
+  // in the range [0, UINT<n>_MAX] (using two's complement mapping).
+  uint64_t AsUintN(unsigned n) const {
+    VIXL_ASSERT(FitsInBits(n));
+    return raw_bits_ & GetUintMask(n);
+  }
+
+  uint8_t AsUint8() const { return static_cast<uint8_t>(AsUintN(8)); }
+  uint16_t AsUint16() const { return static_cast<uint16_t>(AsUintN(16)); }
+  uint32_t AsUint32() const { return static_cast<uint32_t>(AsUintN(32)); }
+  uint64_t AsUint64() const { return AsUintN(64); }
+
+  // Cast a value in the range [INT<n>_MIN, UINT<n>_MAX] to a signed integer in
+  // the range [INT<n>_MIN, INT<n>_MAX] (using two's complement mapping).
+  int64_t AsIntN(unsigned n) const {
+    VIXL_ASSERT(FitsInBits(n));
+    return ExtractSignedBitfield64(n - 1, 0, raw_bits_);
+  }
+
+  int8_t AsInt8() const { return static_cast<int8_t>(AsIntN(8)); }
+  int16_t AsInt16() const { return static_cast<int16_t>(AsIntN(16)); }
+  int32_t AsInt32() const { return static_cast<int32_t>(AsIntN(32)); }
+  int64_t AsInt64() const { return AsIntN(64); }
+
+  // Several instructions encode a signed int<N>_t, which is then (optionally)
+  // left-shifted and sign-extended to a Z register lane with a size which may
+  // be larger than N. This helper tries to find an int<N>_t such that the
+  // IntegerOperand's arithmetic value is reproduced in each lane.
+  //
+  // This is the mechanism that allows `Insr(z0.VnB(), 0xff)` to be treated as
+  // `Insr(z0.VnB(), -1)`.
+  template <unsigned N, unsigned kShift, typename T>
+  bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd, T* imm) const {
+    VIXL_STATIC_ASSERT(std::numeric_limits<T>::digits > N);
+    VIXL_ASSERT(FitsInLane(zd));
+    if ((raw_bits_ & GetUintMask(kShift)) != 0) return false;
+
+    // Reverse the specified left-shift.
+    IntegerOperand unshifted(*this);
+    unshifted.ArithmeticShiftRight(kShift);
+
+    if (unshifted.IsIntN(N)) {
+      // This is trivial, since sign-extension produces the same arithmetic
+      // value irrespective of the destination size.
+      *imm = static_cast<T>(unshifted.AsIntN(N));
+      return true;
+    }
+
+    // Otherwise, we might be able to use the sign-extension to produce the
+    // desired bit pattern. We can only do this for values in the range
+    // [INT<N>_MAX + 1, UINT<N>_MAX], where the highest set bit is the sign bit.
+    //
+    // The lane size has to be adjusted to compensate for `kShift`, since the
+    // high bits will be dropped when the encoded value is left-shifted.
+    if (unshifted.IsUintN(zd.GetLaneSizeInBits() - kShift)) {
+      int64_t encoded = unshifted.AsIntN(zd.GetLaneSizeInBits() - kShift);
+      if (vixl::IsIntN(N, encoded)) {
+        *imm = static_cast<T>(encoded);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // As above, but `kShift` is written to the `*shift` parameter on success, so
+  // that it is easy to chain calls like this:
+  //
+  //     if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
+  //         imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
+  //       insn(zd, imm8, shift)
+  //     }
+  template <unsigned N, unsigned kShift, typename T, typename S>
+  bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd,
+                                     T* imm,
+                                     S* shift) const {
+    if (TryEncodeAsShiftedIntNForLane<N, kShift>(zd, imm)) {
+      *shift = kShift;
+      return true;
+    }
+    return false;
+  }
+
+  // As above, but assume that `kShift` is 0.
+  template <unsigned N, typename T>
+  bool TryEncodeAsIntNForLane(const CPURegister& zd, T* imm) const {
+    return TryEncodeAsShiftedIntNForLane<N, 0>(zd, imm);
+  }
+
+  // As above, but for unsigned fields. This is usually a simple operation, but
+  // is provided for symmetry.
+  template <unsigned N, unsigned kShift, typename T>
+  bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd, T* imm) const {
+    VIXL_STATIC_ASSERT(std::numeric_limits<T>::digits > N);
+    VIXL_ASSERT(FitsInLane(zd));
+
+    // TODO: Should we convert -1 to 0xff here?
+    if (is_negative_) return false;
+    USE(zd);
+
+    if ((raw_bits_ & GetUintMask(kShift)) != 0) return false;
+
+    if (vixl::IsUintN(N, raw_bits_ >> kShift)) {
+      *imm = static_cast<T>(raw_bits_ >> kShift);
+      return true;
+    }
+    return false;
+  }
+
+  template <unsigned N, unsigned kShift, typename T, typename S>
+  bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd,
+                                      T* imm,
+                                      S* shift) const {
+    if (TryEncodeAsShiftedUintNForLane<N, kShift>(zd, imm)) {
+      *shift = kShift;
+      return true;
+    }
+    return false;
+  }
+
+  bool IsZero() const { return raw_bits_ == 0; }
+  bool IsNegative() const { return is_negative_; }
+  bool IsPositiveOrZero() const { return !is_negative_; }
+
+  uint64_t GetMagnitude() const {
+    return is_negative_ ? UnsignedNegate(raw_bits_) : raw_bits_;
+  }
+
+ private:
+  // Shift the arithmetic value right, with sign extension if is_negative_.
+  void ArithmeticShiftRight(int shift) {
+    VIXL_ASSERT((shift >= 0) && (shift < 64));
+    if (shift == 0) return;
+    if (is_negative_) {
+      raw_bits_ = ExtractSignedBitfield64(63, shift, raw_bits_);
+    } else {
+      raw_bits_ >>= shift;
+    }
+  }
+
+  uint64_t raw_bits_;
+  bool is_negative_;
+};
+
+// This an abstraction that can represent a register or memory location. The
+// `MacroAssembler` provides helpers to move data between generic operands.
+class GenericOperand {
+ public:
+  GenericOperand() { VIXL_ASSERT(!IsValid()); }
+  GenericOperand(const CPURegister& reg);  // NOLINT(runtime/explicit)
+  GenericOperand(const MemOperand& mem_op,
+                 size_t mem_op_size = 0);  // NOLINT(runtime/explicit)
+
+  bool IsValid() const { return cpu_register_.IsValid() != mem_op_.IsValid(); }
+
+  bool Equals(const GenericOperand& other) const;
+
+  bool IsCPURegister() const {
+    VIXL_ASSERT(IsValid());
+    return cpu_register_.IsValid();
+  }
+
+  bool IsRegister() const {
+    return IsCPURegister() && cpu_register_.IsRegister();
+  }
+
+  bool IsVRegister() const {
+    return IsCPURegister() && cpu_register_.IsVRegister();
+  }
+
+  bool IsSameCPURegisterType(const GenericOperand& other) {
+    return IsCPURegister() && other.IsCPURegister() &&
+           GetCPURegister().IsSameType(other.GetCPURegister());
+  }
+
+  bool IsMemOperand() const {
+    VIXL_ASSERT(IsValid());
+    return mem_op_.IsValid();
+  }
+
+  CPURegister GetCPURegister() const {
+    VIXL_ASSERT(IsCPURegister());
+    return cpu_register_;
+  }
+
+  MemOperand GetMemOperand() const {
+    VIXL_ASSERT(IsMemOperand());
+    return mem_op_;
+  }
+
+  size_t GetMemOperandSizeInBytes() const {
+    VIXL_ASSERT(IsMemOperand());
+    return mem_op_size_;
+  }
+
+  size_t GetSizeInBytes() const {
+    return IsCPURegister() ? cpu_register_.GetSizeInBytes()
+                           : GetMemOperandSizeInBytes();
+  }
+
+  size_t GetSizeInBits() const { return GetSizeInBytes() * kBitsPerByte; }
+
+ private:
+  CPURegister cpu_register_;
+  MemOperand mem_op_;
+  // The size of the memory region pointed to, in bytes.
+  // We only support sizes up to X/D register sizes.
+  size_t mem_op_size_;
+};
+}
+}  // namespace vixl::aarch64
+
+#endif  // VIXL_AARCH64_OPERANDS_AARCH64_H_
--- a/3rdparty/vixl/include/vixl/aarch64/registers-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/registers-aarch64.h
@ -0,0 +1,901 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_AARCH64_REGISTERS_AARCH64_H_
+#define VIXL_AARCH64_REGISTERS_AARCH64_H_
+
+#include <string>
+
+#include "instructions-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+// An integer type capable of representing a homogeneous, non-overlapping set of
+// registers as a bitmask of their codes.
+typedef uint64_t RegList;
+static const int kRegListSizeInBits = sizeof(RegList) * 8;
+
+class Register;
+class WRegister;
+class XRegister;
+
+class VRegister;
+class BRegister;
+class HRegister;
+class SRegister;
+class DRegister;
+class QRegister;
+
+class ZRegister;
+
+class PRegister;
+class PRegisterWithLaneSize;
+class PRegisterM;
+class PRegisterZ;
+
+// A container for any single register supported by the processor. Selected
+// qualifications are also supported. Basic registers can be constructed
+// directly as CPURegister objects. Other variants should be constructed as one
+// of the derived classes.
+//
+// CPURegister aims to support any getter that would also be available to more
+// specialised register types. However, using the equivalent functions on the
+// specialised register types can avoid run-time checks, and should therefore be
+// preferred where run-time polymorphism isn't required.
+//
+// Type-specific modifiers are typically implemented only on the derived
+// classes.
+//
+// The encoding is such that CPURegister objects are cheap to pass by value.
+class CPURegister {
+ public:
+  enum RegisterBank : uint8_t {
+    kNoRegisterBank = 0,
+    kRRegisterBank,
+    kVRegisterBank,
+    kPRegisterBank
+  };
+  enum RegisterType {
+    kNoRegister,
+    kRegister,
+    kVRegister,
+    kZRegister,
+    kPRegister
+  };
+
+  static const unsigned kUnknownSize = 0;
+
+  VIXL_CONSTEXPR CPURegister()
+      : code_(0),
+        bank_(kNoRegisterBank),
+        size_(kEncodedUnknownSize),
+        qualifiers_(kNoQualifiers),
+        lane_size_(kEncodedUnknownSize) {}
+
+  CPURegister(int code, int size_in_bits, RegisterType type)
+      : code_(code),
+        bank_(GetBankFor(type)),
+        size_(EncodeSizeInBits(size_in_bits)),
+        qualifiers_(kNoQualifiers),
+        lane_size_(EncodeSizeInBits(size_in_bits)) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  // Basic accessors.
+
+  // TODO: Make this return 'int'.
+  unsigned GetCode() const { return code_; }
+
+  RegisterBank GetBank() const { return bank_; }
+
+  // For scalar registers, the lane size matches the register size, and is
+  // always known.
+  bool HasSize() const { return size_ != kEncodedUnknownSize; }
+  bool HasLaneSize() const { return lane_size_ != kEncodedUnknownSize; }
+
+  RegList GetBit() const {
+    if (IsNone()) return 0;
+    VIXL_ASSERT(code_ < kRegListSizeInBits);
+    return static_cast<RegList>(1) << code_;
+  }
+
+  // Return the architectural name for this register.
+  // TODO: This is temporary. Ultimately, we should move the
+  // Simulator::*RegNameForCode helpers out of the simulator, and provide an
+  // independent way to obtain the name of a register.
+  std::string GetArchitecturalName() const;
+
+  // Return the highest valid register code for this type, to allow generic
+  // loops to be written. This excludes kSPRegInternalCode, since it is not
+  // contiguous, and sp usually requires special handling anyway.
+  unsigned GetMaxCode() const { return GetMaxCodeFor(GetBank()); }
+
+  // Registers without a known size report kUnknownSize.
+  int GetSizeInBits() const { return DecodeSizeInBits(size_); }
+  int GetSizeInBytes() const { return DecodeSizeInBytes(size_); }
+  // TODO: Make these return 'int'.
+  unsigned GetLaneSizeInBits() const { return DecodeSizeInBits(lane_size_); }
+  unsigned GetLaneSizeInBytes() const { return DecodeSizeInBytes(lane_size_); }
+  unsigned GetLaneSizeInBytesLog2() const {
+    VIXL_ASSERT(HasLaneSize());
+    return DecodeSizeInBytesLog2(lane_size_);
+  }
+
+  int GetLanes() const {
+    if (HasSize() && HasLaneSize()) {
+      // Take advantage of the size encoding to calculate this efficiently.
+      VIXL_STATIC_ASSERT(kEncodedHRegSize == (kEncodedBRegSize + 1));
+      VIXL_STATIC_ASSERT(kEncodedSRegSize == (kEncodedHRegSize + 1));
+      VIXL_STATIC_ASSERT(kEncodedDRegSize == (kEncodedSRegSize + 1));
+      VIXL_STATIC_ASSERT(kEncodedQRegSize == (kEncodedDRegSize + 1));
+      int log2_delta = static_cast<int>(size_) - static_cast<int>(lane_size_);
+      VIXL_ASSERT(log2_delta >= 0);
+      return 1 << log2_delta;
+    }
+    return kUnknownSize;
+  }
+
+  bool Is8Bits() const { return size_ == kEncodedBRegSize; }
+  bool Is16Bits() const { return size_ == kEncodedHRegSize; }
+  bool Is32Bits() const { return size_ == kEncodedSRegSize; }
+  bool Is64Bits() const { return size_ == kEncodedDRegSize; }
+  bool Is128Bits() const { return size_ == kEncodedQRegSize; }
+
+  bool IsLaneSizeB() const { return lane_size_ == kEncodedBRegSize; }
+  bool IsLaneSizeH() const { return lane_size_ == kEncodedHRegSize; }
+  bool IsLaneSizeS() const { return lane_size_ == kEncodedSRegSize; }
+  bool IsLaneSizeD() const { return lane_size_ == kEncodedDRegSize; }
+  bool IsLaneSizeQ() const { return lane_size_ == kEncodedQRegSize; }
+
+  // If Is<Foo>Register(), then it is valid to convert the CPURegister to some
+  // <Foo>Register<Bar> type.
+  //
+  //  If...                              ... then it is safe to construct ...
+  //      r.IsRegister()                       -> Register(r)
+  //      r.IsVRegister()                      -> VRegister(r)
+  //      r.IsZRegister()                      -> ZRegister(r)
+  //      r.IsPRegister()                      -> PRegister(r)
+  //
+  //      r.IsPRegister() && HasLaneSize()     -> PRegisterWithLaneSize(r)
+  //      r.IsPRegister() && IsMerging()       -> PRegisterM(r)
+  //      r.IsPRegister() && IsZeroing()       -> PRegisterZ(r)
+  bool IsRegister() const { return GetType() == kRegister; }
+  bool IsVRegister() const { return GetType() == kVRegister; }
+  bool IsZRegister() const { return GetType() == kZRegister; }
+  bool IsPRegister() const { return GetType() == kPRegister; }
+
+  bool IsNone() const { return GetType() == kNoRegister; }
+
+  // `GetType() == kNoRegister` implies IsNone(), and vice-versa.
+  // `GetType() == k<Foo>Register` implies Is<Foo>Register(), and vice-versa.
+  RegisterType GetType() const {
+    switch (bank_) {
+      case kNoRegisterBank:
+        return kNoRegister;
+      case kRRegisterBank:
+        return kRegister;
+      case kVRegisterBank:
+        return HasSize() ? kVRegister : kZRegister;
+      case kPRegisterBank:
+        return kPRegister;
+    }
+    VIXL_UNREACHABLE();
+    return kNoRegister;
+  }
+
+  // IsFPRegister() is true for scalar FP types (and therefore implies
+  // IsVRegister()). There is no corresponding FPRegister type.
+  bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); }
+
+  // TODO: These are stricter forms of the helpers above. We should make the
+  // basic helpers strict, and remove these.
+  bool IsValidRegister() const;
+  bool IsValidVRegister() const;
+  bool IsValidFPRegister() const;
+  bool IsValidZRegister() const;
+  bool IsValidPRegister() const;
+
+  bool IsValid() const;
+  bool IsValidOrNone() const { return IsNone() || IsValid(); }
+
+  bool IsVector() const { return HasLaneSize() && (size_ != lane_size_); }
+  bool IsScalar() const { return HasLaneSize() && (size_ == lane_size_); }
+
+  bool IsSameType(const CPURegister& other) const {
+    return GetType() == other.GetType();
+  }
+
+  bool IsSameBank(const CPURegister& other) const {
+    return GetBank() == other.GetBank();
+  }
+
+  // Two registers with unknown size are considered to have the same size if
+  // they also have the same type. For example, all Z registers have the same
+  // size, even though we don't know what that is.
+  bool IsSameSizeAndType(const CPURegister& other) const {
+    return IsSameType(other) && (size_ == other.size_);
+  }
+
+  bool IsSameFormat(const CPURegister& other) const {
+    return IsSameSizeAndType(other) && (lane_size_ == other.lane_size_);
+  }
+
+  // Note that NoReg aliases itself, so that 'Is' implies 'Aliases'.
+  bool Aliases(const CPURegister& other) const {
+    return IsSameBank(other) && (code_ == other.code_);
+  }
+
+  bool Is(const CPURegister& other) const {
+    if (IsRegister() || IsVRegister()) {
+      // For core (W, X) and FP/NEON registers, we only consider the code, size
+      // and type. This is legacy behaviour.
+      // TODO: We should probably check every field for all registers.
+      return Aliases(other) && (size_ == other.size_);
+    } else {
+      // For Z and P registers, we require all fields to match exactly.
+      VIXL_ASSERT(IsNone() || IsZRegister() || IsPRegister());
+      return (code_ == other.code_) && (bank_ == other.bank_) &&
+             (size_ == other.size_) && (qualifiers_ == other.qualifiers_) &&
+             (lane_size_ == other.lane_size_);
+    }
+  }
+
+  // Conversions to specific register types. The result is a register that
+  // aliases the original CPURegister. That is, the original register bank
+  // (`GetBank()`) is checked and the code (`GetCode()`) preserved, but all
+  // other properties are ignored.
+  //
+  // Typical usage:
+  //
+  //     if (reg.GetBank() == kVRegisterBank) {
+  //       DRegister d = reg.D();
+  //       ...
+  //     }
+  //
+  // These could all return types with compile-time guarantees (like XRegister),
+  // but this breaks backwards-compatibility quite severely, particularly with
+  // code like `cond ? reg.W() : reg.X()`, which would have indeterminate type.
+
+  // Core registers, like "w0".
+  Register W() const;
+  Register X() const;
+  // FP/NEON registers, like "b0".
+  VRegister B() const;
+  VRegister H() const;
+  VRegister S() const;
+  VRegister D() const;
+  VRegister Q() const;
+  VRegister V() const;
+  // SVE registers, like "z0".
+  ZRegister Z() const;
+  PRegister P() const;
+
+  // Utilities for kRegister types.
+
+  bool IsZero() const { return IsRegister() && (code_ == kZeroRegCode); }
+  bool IsSP() const { return IsRegister() && (code_ == kSPRegInternalCode); }
+  bool IsW() const { return IsRegister() && Is32Bits(); }
+  bool IsX() const { return IsRegister() && Is64Bits(); }
+
+  // Utilities for FP/NEON kVRegister types.
+
+  // These helpers ensure that the size and type of the register are as
+  // described. They do not consider the number of lanes that make up a vector.
+  // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
+  // does not imply Is1D() or Is8B().
+  // Check the number of lanes, ie. the format of the vector, using methods such
+  // as Is8B(), Is1D(), etc.
+  bool IsB() const { return IsVRegister() && Is8Bits(); }
+  bool IsH() const { return IsVRegister() && Is16Bits(); }
+  bool IsS() const { return IsVRegister() && Is32Bits(); }
+  bool IsD() const { return IsVRegister() && Is64Bits(); }
+  bool IsQ() const { return IsVRegister() && Is128Bits(); }
+
+  // As above, but also check that the register has exactly one lane. For
+  // example, reg.Is1D() implies DRegister(reg).IsValid(), but reg.IsD() does
+  // not.
+  bool Is1B() const { return IsB() && IsScalar(); }
+  bool Is1H() const { return IsH() && IsScalar(); }
+  bool Is1S() const { return IsS() && IsScalar(); }
+  bool Is1D() const { return IsD() && IsScalar(); }
+  bool Is1Q() const { return IsQ() && IsScalar(); }
+
+  // Check the specific NEON format.
+  bool Is8B() const { return IsD() && IsLaneSizeB(); }
+  bool Is16B() const { return IsQ() && IsLaneSizeB(); }
+  bool Is2H() const { return IsS() && IsLaneSizeH(); }
+  bool Is4H() const { return IsD() && IsLaneSizeH(); }
+  bool Is8H() const { return IsQ() && IsLaneSizeH(); }
+  bool Is2S() const { return IsD() && IsLaneSizeS(); }
+  bool Is4S() const { return IsQ() && IsLaneSizeS(); }
+  bool Is2D() const { return IsQ() && IsLaneSizeD(); }
+
+  // A semantic alias for sdot and udot (indexed and by element) instructions.
+  // The current CPURegister implementation cannot not tell this from Is1S(),
+  // but it might do later.
+  // TODO: Do this with the qualifiers_ field.
+  bool Is1S4B() const { return Is1S(); }
+
+  // Utilities for SVE registers.
+
+  bool IsUnqualified() const { return qualifiers_ == kNoQualifiers; }
+  bool IsMerging() const { return IsPRegister() && (qualifiers_ == kMerging); }
+  bool IsZeroing() const { return IsPRegister() && (qualifiers_ == kZeroing); }
+
+  // SVE types have unknown sizes, but within known bounds.
+
+  int GetMaxSizeInBytes() const {
+    switch (GetType()) {
+      case kZRegister:
+        return kZRegMaxSizeInBytes;
+      case kPRegister:
+        return kPRegMaxSizeInBytes;
+      default:
+        VIXL_ASSERT(HasSize());
+        return GetSizeInBits();
+    }
+  }
+
+  int GetMinSizeInBytes() const {
+    switch (GetType()) {
+      case kZRegister:
+        return kZRegMinSizeInBytes;
+      case kPRegister:
+        return kPRegMinSizeInBytes;
+      default:
+        VIXL_ASSERT(HasSize());
+        return GetSizeInBits();
+    }
+  }
+
+  int GetMaxSizeInBits() const { return GetMaxSizeInBytes() * kBitsPerByte; }
+  int GetMinSizeInBits() const { return GetMinSizeInBytes() * kBitsPerByte; }
+
+  static RegisterBank GetBankFor(RegisterType type) {
+    switch (type) {
+      case kNoRegister:
+        return kNoRegisterBank;
+      case kRegister:
+        return kRRegisterBank;
+      case kVRegister:
+      case kZRegister:
+        return kVRegisterBank;
+      case kPRegister:
+        return kPRegisterBank;
+    }
+    VIXL_UNREACHABLE();
+    return kNoRegisterBank;
+  }
+
+  static unsigned GetMaxCodeFor(CPURegister::RegisterType type) {
+    return GetMaxCodeFor(GetBankFor(type));
+  }
+
+ protected:
+  enum EncodedSize : uint8_t {
+    // Ensure that kUnknownSize (and therefore kNoRegister) is encoded as zero.
+    kEncodedUnknownSize = 0,
+
+    // The implementation assumes that the remaining sizes are encoded as
+    // `log2(size) + c`, so the following names must remain in sequence.
+    kEncodedBRegSize,
+    kEncodedHRegSize,
+    kEncodedSRegSize,
+    kEncodedDRegSize,
+    kEncodedQRegSize,
+
+    kEncodedWRegSize = kEncodedSRegSize,
+    kEncodedXRegSize = kEncodedDRegSize
+  };
+  VIXL_STATIC_ASSERT(kSRegSize == kWRegSize);
+  VIXL_STATIC_ASSERT(kDRegSize == kXRegSize);
+
+  char GetLaneSizeSymbol() const {
+    switch (lane_size_) {
+      case kEncodedBRegSize:
+        return 'B';
+      case kEncodedHRegSize:
+        return 'H';
+      case kEncodedSRegSize:
+        return 'S';
+      case kEncodedDRegSize:
+        return 'D';
+      case kEncodedQRegSize:
+        return 'Q';
+      case kEncodedUnknownSize:
+        break;
+    }
+    VIXL_UNREACHABLE();
+    return '?';
+  }
+
+  static EncodedSize EncodeSizeInBits(int size_in_bits) {
+    switch (size_in_bits) {
+      case kUnknownSize:
+        return kEncodedUnknownSize;
+      case kBRegSize:
+        return kEncodedBRegSize;
+      case kHRegSize:
+        return kEncodedHRegSize;
+      case kSRegSize:
+        return kEncodedSRegSize;
+      case kDRegSize:
+        return kEncodedDRegSize;
+      case kQRegSize:
+        return kEncodedQRegSize;
+    }
+    VIXL_UNREACHABLE();
+    return kEncodedUnknownSize;
+  }
+
+  static int DecodeSizeInBytesLog2(EncodedSize encoded_size) {
+    switch (encoded_size) {
+      case kEncodedUnknownSize:
+        // Log2 of B-sized lane in bytes is 0, so we can't just return 0 here.
+        VIXL_UNREACHABLE();
+        return -1;
+      case kEncodedBRegSize:
+        return kBRegSizeInBytesLog2;
+      case kEncodedHRegSize:
+        return kHRegSizeInBytesLog2;
+      case kEncodedSRegSize:
+        return kSRegSizeInBytesLog2;
+      case kEncodedDRegSize:
+        return kDRegSizeInBytesLog2;
+      case kEncodedQRegSize:
+        return kQRegSizeInBytesLog2;
+    }
+    VIXL_UNREACHABLE();
+    return kUnknownSize;
+  }
+
+  static int DecodeSizeInBytes(EncodedSize encoded_size) {
+    if (encoded_size == kEncodedUnknownSize) {
+      return kUnknownSize;
+    }
+    return 1 << DecodeSizeInBytesLog2(encoded_size);
+  }
+
+  static int DecodeSizeInBits(EncodedSize encoded_size) {
+    VIXL_STATIC_ASSERT(kUnknownSize == 0);
+    return DecodeSizeInBytes(encoded_size) * kBitsPerByte;
+  }
+
+  static unsigned GetMaxCodeFor(CPURegister::RegisterBank bank);
+
+  enum Qualifiers : uint8_t {
+    kNoQualifiers = 0,
+    // Used by P registers.
+    kMerging,
+    kZeroing
+  };
+
+  // An unchecked constructor, for use by derived classes.
+  CPURegister(int code,
+              EncodedSize size,
+              RegisterBank bank,
+              EncodedSize lane_size,
+              Qualifiers qualifiers = kNoQualifiers)
+      : code_(code),
+        bank_(bank),
+        size_(size),
+        qualifiers_(qualifiers),
+        lane_size_(lane_size) {}
+
+  // TODO: Check that access to these fields is reasonably efficient.
+  uint8_t code_;
+  RegisterBank bank_;
+  EncodedSize size_;
+  Qualifiers qualifiers_;
+  EncodedSize lane_size_;
+};
+// Ensure that CPURegisters can fit in a single (64-bit) register. This is a
+// proxy for being "cheap to pass by value", which is hard to check directly.
+VIXL_STATIC_ASSERT(sizeof(CPURegister) <= sizeof(uint64_t));
+
+// TODO: Add constexpr constructors.
+#define VIXL_DECLARE_REGISTER_COMMON(NAME, REGISTER_TYPE, PARENT_TYPE) \
+  VIXL_CONSTEXPR NAME() : PARENT_TYPE() {}                             \
+                                                                       \
+  explicit NAME(CPURegister other) : PARENT_TYPE(other) {              \
+    VIXL_ASSERT(IsValid());                                            \
+  }                                                                    \
+                                                                       \
+  VIXL_CONSTEXPR static unsigned GetMaxCode() {                        \
+    return kNumberOf##REGISTER_TYPE##s - 1;                            \
+  }
+
+// Any W or X register, including the zero register and the stack pointer.
+class Register : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(Register, Register, CPURegister)
+
+  Register(int code, int size_in_bits)
+      : CPURegister(code, size_in_bits, kRegister) {
+    VIXL_ASSERT(IsValidRegister());
+  }
+
+  bool IsValid() const { return IsValidRegister(); }
+};
+
+// Any FP or NEON V register, including vector (V.<T>) and scalar forms
+// (B, H, S, D, Q).
+class VRegister : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(VRegister, VRegister, CPURegister)
+
+  // For historical reasons, VRegister(0) returns v0.1Q (or equivalently, q0).
+  explicit VRegister(int code, int size_in_bits = kQRegSize, int lanes = 1)
+      : CPURegister(code,
+                    EncodeSizeInBits(size_in_bits),
+                    kVRegisterBank,
+                    EncodeLaneSizeInBits(size_in_bits, lanes)) {
+    VIXL_ASSERT(IsValidVRegister());
+  }
+
+  VRegister(int code, VectorFormat format)
+      : CPURegister(code,
+                    EncodeSizeInBits(RegisterSizeInBitsFromFormat(format)),
+                    kVRegisterBank,
+                    EncodeSizeInBits(LaneSizeInBitsFromFormat(format)),
+                    kNoQualifiers) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  VRegister V8B() const;
+  VRegister V16B() const;
+  VRegister V2H() const;
+  VRegister V4H() const;
+  VRegister V8H() const;
+  VRegister V2S() const;
+  VRegister V4S() const;
+  VRegister V1D() const;
+  VRegister V2D() const;
+  VRegister S4B() const;
+
+  bool IsValid() const { return IsValidVRegister(); }
+
+ protected:
+  static EncodedSize EncodeLaneSizeInBits(int size_in_bits, int lanes) {
+    VIXL_ASSERT(lanes >= 1);
+    VIXL_ASSERT((size_in_bits % lanes) == 0);
+    return EncodeSizeInBits(size_in_bits / lanes);
+  }
+};
+
+// Any SVE Z register, with or without a lane size specifier.
+class ZRegister : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(ZRegister, ZRegister, CPURegister)
+
+  explicit ZRegister(int code, int lane_size_in_bits = kUnknownSize)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kVRegisterBank,
+                    EncodeSizeInBits(lane_size_in_bits)) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  ZRegister(int code, VectorFormat format)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kVRegisterBank,
+                    EncodeSizeInBits(LaneSizeInBitsFromFormat(format)),
+                    kNoQualifiers) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  // Return a Z register with a known lane size (like "z0.B").
+  ZRegister VnB() const { return ZRegister(GetCode(), kBRegSize); }
+  ZRegister VnH() const { return ZRegister(GetCode(), kHRegSize); }
+  ZRegister VnS() const { return ZRegister(GetCode(), kSRegSize); }
+  ZRegister VnD() const { return ZRegister(GetCode(), kDRegSize); }
+  ZRegister VnQ() const { return ZRegister(GetCode(), kQRegSize); }
+
+  template <typename T>
+  ZRegister WithLaneSize(T format) const {
+    return ZRegister(GetCode(), format);
+  }
+
+  ZRegister WithSameLaneSizeAs(const CPURegister& other) const {
+    VIXL_ASSERT(other.HasLaneSize());
+    return this->WithLaneSize(other.GetLaneSizeInBits());
+  }
+
+  bool IsValid() const { return IsValidZRegister(); }
+};
+
+// Any SVE P register, with or without a qualifier or lane size specifier.
+class PRegister : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegister, PRegister, CPURegister)
+
+  explicit PRegister(int code) : CPURegister(code, kUnknownSize, kPRegister) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && !HasLaneSize() && IsUnqualified();
+  }
+
+  // Return a P register with a known lane size (like "p0.B").
+  PRegisterWithLaneSize VnB() const;
+  PRegisterWithLaneSize VnH() const;
+  PRegisterWithLaneSize VnS() const;
+  PRegisterWithLaneSize VnD() const;
+
+  template <typename T>
+  PRegisterWithLaneSize WithLaneSize(T format) const;
+
+  PRegisterWithLaneSize WithSameLaneSizeAs(const CPURegister& other) const;
+
+  // SVE predicates are specified (in normal assembly) with a "/z" (zeroing) or
+  // "/m" (merging) suffix. These methods are VIXL's equivalents.
+  PRegisterZ Zeroing() const;
+  PRegisterM Merging() const;
+
+ protected:
+  // Unchecked constructors, for use by derived classes.
+  PRegister(int code, EncodedSize encoded_lane_size)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kPRegisterBank,
+                    encoded_lane_size,
+                    kNoQualifiers) {}
+
+  PRegister(int code, Qualifiers qualifiers)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kPRegisterBank,
+                    kEncodedUnknownSize,
+                    qualifiers) {}
+};
+
+// Any SVE P register with a known lane size (like "p0.B").
+class PRegisterWithLaneSize : public PRegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegisterWithLaneSize, PRegister, PRegister)
+
+  PRegisterWithLaneSize(int code, int lane_size_in_bits)
+      : PRegister(code, EncodeSizeInBits(lane_size_in_bits)) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  PRegisterWithLaneSize(int code, VectorFormat format)
+      : PRegister(code, EncodeSizeInBits(LaneSizeInBitsFromFormat(format))) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && HasLaneSize() && IsUnqualified();
+  }
+
+  // Overload lane size accessors so we can assert `HasLaneSize()`. This allows
+  // tools such as clang-tidy to prove that the result of GetLaneSize* is
+  // non-zero.
+
+  // TODO: Make these return 'int'.
+  unsigned GetLaneSizeInBits() const {
+    VIXL_ASSERT(HasLaneSize());
+    return PRegister::GetLaneSizeInBits();
+  }
+
+  unsigned GetLaneSizeInBytes() const {
+    VIXL_ASSERT(HasLaneSize());
+    return PRegister::GetLaneSizeInBytes();
+  }
+};
+
+// Any SVE P register with the zeroing qualifier (like "p0/z").
+class PRegisterZ : public PRegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegisterZ, PRegister, PRegister)
+
+  explicit PRegisterZ(int code) : PRegister(code, kZeroing) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && !HasLaneSize() && IsZeroing();
+  }
+};
+
+// Any SVE P register with the merging qualifier (like "p0/m").
+class PRegisterM : public PRegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegisterM, PRegister, PRegister)
+
+  explicit PRegisterM(int code) : PRegister(code, kMerging) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && !HasLaneSize() && IsMerging();
+  }
+};
+
+inline PRegisterWithLaneSize PRegister::VnB() const {
+  return PRegisterWithLaneSize(GetCode(), kBRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnH() const {
+  return PRegisterWithLaneSize(GetCode(), kHRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnS() const {
+  return PRegisterWithLaneSize(GetCode(), kSRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnD() const {
+  return PRegisterWithLaneSize(GetCode(), kDRegSize);
+}
+
+template <typename T>
+inline PRegisterWithLaneSize PRegister::WithLaneSize(T format) const {
+  return PRegisterWithLaneSize(GetCode(), format);
+}
+
+inline PRegisterWithLaneSize PRegister::WithSameLaneSizeAs(
+    const CPURegister& other) const {
+  VIXL_ASSERT(other.HasLaneSize());
+  return this->WithLaneSize(other.GetLaneSizeInBits());
+}
+
+inline PRegisterZ PRegister::Zeroing() const { return PRegisterZ(GetCode()); }
+inline PRegisterM PRegister::Merging() const { return PRegisterM(GetCode()); }
+
+#define VIXL_REGISTER_WITH_SIZE_LIST(V) \
+  V(WRegister, kWRegSize, Register)     \
+  V(XRegister, kXRegSize, Register)     \
+  V(QRegister, kQRegSize, VRegister)    \
+  V(DRegister, kDRegSize, VRegister)    \
+  V(SRegister, kSRegSize, VRegister)    \
+  V(HRegister, kHRegSize, VRegister)    \
+  V(BRegister, kBRegSize, VRegister)
+
+#define VIXL_DEFINE_REGISTER_WITH_SIZE(NAME, SIZE, PARENT)           \
+  class NAME : public PARENT {                                       \
+   public:                                                           \
+    VIXL_CONSTEXPR NAME() : PARENT() {}                              \
+    explicit NAME(int code) : PARENT(code, SIZE) {}                  \
+                                                                     \
+    explicit NAME(PARENT other) : PARENT(other) {                    \
+      VIXL_ASSERT(GetSizeInBits() == SIZE);                          \
+    }                                                                \
+                                                                     \
+    PARENT As##PARENT() const { return *this; }                      \
+                                                                     \
+    VIXL_CONSTEXPR int GetSizeInBits() const { return SIZE; }        \
+                                                                     \
+    bool IsValid() const {                                           \
+      return PARENT::IsValid() && (PARENT::GetSizeInBits() == SIZE); \
+    }                                                                \
+  };
+
+VIXL_REGISTER_WITH_SIZE_LIST(VIXL_DEFINE_REGISTER_WITH_SIZE)
+
+// No*Reg is used to provide default values for unused arguments, error cases
+// and so on. Note that these (and the default constructors) all compare equal
+// (using the Is() method).
+const Register NoReg;
+const VRegister NoVReg;
+const CPURegister NoCPUReg;
+const ZRegister NoZReg;
+
+// TODO: Ideally, these would use specialised register types (like XRegister and
+// so on). However, doing so throws up template overloading problems elsewhere.
+#define VIXL_DEFINE_REGISTERS(N)       \
+  const Register w##N = WRegister(N);  \
+  const Register x##N = XRegister(N);  \
+  const VRegister b##N = BRegister(N); \
+  const VRegister h##N = HRegister(N); \
+  const VRegister s##N = SRegister(N); \
+  const VRegister d##N = DRegister(N); \
+  const VRegister q##N = QRegister(N); \
+  const VRegister v##N(N);             \
+  const ZRegister z##N(N);
+AARCH64_REGISTER_CODE_LIST(VIXL_DEFINE_REGISTERS)
+#undef VIXL_DEFINE_REGISTERS
+
+#define VIXL_DEFINE_P_REGISTERS(N) const PRegister p##N(N);
+AARCH64_P_REGISTER_CODE_LIST(VIXL_DEFINE_P_REGISTERS)
+#undef VIXL_DEFINE_P_REGISTERS
+
+// VIXL represents 'sp' with a unique code, to tell it apart from 'xzr'.
+const Register wsp = WRegister(kSPRegInternalCode);
+const Register sp = XRegister(kSPRegInternalCode);
+
+// Standard aliases.
+const Register ip0 = x16;
+const Register ip1 = x17;
+const Register lr = x30;
+const Register xzr = x31;
+const Register wzr = w31;
+
+// AreAliased returns true if any of the named registers overlap. Arguments
+// set to NoReg are ignored. The system stack pointer may be specified.
+bool AreAliased(const CPURegister& reg1,
+                const CPURegister& reg2,
+                const CPURegister& reg3 = NoReg,
+                const CPURegister& reg4 = NoReg,
+                const CPURegister& reg5 = NoReg,
+                const CPURegister& reg6 = NoReg,
+                const CPURegister& reg7 = NoReg,
+                const CPURegister& reg8 = NoReg);
+
+// AreSameSizeAndType returns true if all of the specified registers have the
+// same size, and are of the same type. The system stack pointer may be
+// specified. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreSameSizeAndType(const CPURegister& reg1,
+                        const CPURegister& reg2,
+                        const CPURegister& reg3 = NoCPUReg,
+                        const CPURegister& reg4 = NoCPUReg,
+                        const CPURegister& reg5 = NoCPUReg,
+                        const CPURegister& reg6 = NoCPUReg,
+                        const CPURegister& reg7 = NoCPUReg,
+                        const CPURegister& reg8 = NoCPUReg);
+
+// AreEven returns true if all of the specified registers have even register
+// indices. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreEven(const CPURegister& reg1,
+             const CPURegister& reg2,
+             const CPURegister& reg3 = NoReg,
+             const CPURegister& reg4 = NoReg,
+             const CPURegister& reg5 = NoReg,
+             const CPURegister& reg6 = NoReg,
+             const CPURegister& reg7 = NoReg,
+             const CPURegister& reg8 = NoReg);
+
+// AreConsecutive returns true if all of the specified registers are
+// consecutive in the register file. Arguments set to NoReg are ignored, as are
+// any subsequent arguments. At least one argument (reg1) must be valid
+// (not NoCPUReg).
+bool AreConsecutive(const CPURegister& reg1,
+                    const CPURegister& reg2,
+                    const CPURegister& reg3 = NoCPUReg,
+                    const CPURegister& reg4 = NoCPUReg);
+
+// AreSameFormat returns true if all of the specified registers have the same
+// vector format. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoVReg).
+bool AreSameFormat(const CPURegister& reg1,
+                   const CPURegister& reg2,
+                   const CPURegister& reg3 = NoCPUReg,
+                   const CPURegister& reg4 = NoCPUReg);
+
+// AreSameLaneSize returns true if all of the specified registers have the same
+// element lane size, B, H, S or D. It doesn't compare the type of registers.
+// Arguments set to NoReg are ignored, as are any subsequent arguments.
+// At least one argument (reg1) must be valid (not NoVReg).
+// TODO: Remove this, and replace its uses with AreSameFormat.
+bool AreSameLaneSize(const CPURegister& reg1,
+                     const CPURegister& reg2,
+                     const CPURegister& reg3 = NoCPUReg,
+                     const CPURegister& reg4 = NoCPUReg);
+}
+}  // namespace vixl::aarch64
+
+#endif  // VIXL_AARCH64_REGISTERS_AARCH64_H_
--- a/3rdparty/vixl/include/vixl/aarch64/simulator-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/simulator-aarch64.h
--- a/3rdparty/vixl/include/vixl/aarch64/simulator-constants-aarch64.h
+++ b/3rdparty/vixl/include/vixl/aarch64/simulator-constants-aarch64.h
@ -0,0 +1,194 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_AARCH64_SIMULATOR_CONSTANTS_AARCH64_H_
+#define VIXL_AARCH64_SIMULATOR_CONSTANTS_AARCH64_H_
+
+#include "instructions-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+// Debug instructions.
+//
+// VIXL's macro-assembler and simulator support a few pseudo instructions to
+// make debugging easier. These pseudo instructions do not exist on real
+// hardware.
+//
+// TODO: Also consider allowing these pseudo-instructions to be disabled in the
+// simulator, so that users can check that the input is a valid native code.
+// (This isn't possible in all cases. Printf won't work, for example.)
+//
+// Each debug pseudo instruction is represented by a HLT instruction. The HLT
+// immediate field is used to identify the type of debug pseudo instruction.
+
+enum DebugHltOpcode {
+  kUnreachableOpcode = 0xdeb0,
+  kPrintfOpcode,
+  kTraceOpcode,
+  kLogOpcode,
+  kRuntimeCallOpcode,
+  kSetCPUFeaturesOpcode,
+  kEnableCPUFeaturesOpcode,
+  kDisableCPUFeaturesOpcode,
+  kSaveCPUFeaturesOpcode,
+  kRestoreCPUFeaturesOpcode,
+  kMTEActive,
+  kMTEInactive,
+  // Aliases.
+  kDebugHltFirstOpcode = kUnreachableOpcode,
+  kDebugHltLastOpcode = kLogOpcode
+};
+VIXL_DEPRECATED("DebugHltOpcode", typedef DebugHltOpcode DebugHltOpcodes);
+
+// Each pseudo instruction uses a custom encoding for additional arguments, as
+// described below.
+
+// Unreachable - kUnreachableOpcode
+//
+// Instruction which should never be executed. This is used as a guard in parts
+// of the code that should not be reachable, such as in data encoded inline in
+// the instructions.
+
+// Printf - kPrintfOpcode
+//  - arg_count: The number of arguments.
+//  - arg_pattern: A set of PrintfArgPattern values, packed into two-bit fields.
+//
+// Simulate a call to printf.
+//
+// Floating-point and integer arguments are passed in separate sets of registers
+// in AAPCS64 (even for varargs functions), so it is not possible to determine
+// the type of each argument without some information about the values that were
+// passed in. This information could be retrieved from the printf format string,
+// but the format string is not trivial to parse so we encode the relevant
+// information with the HLT instruction.
+//
+// Also, the following registers are populated (as if for a native Aarch64
+// call):
+//    x0: The format string
+// x1-x7: Optional arguments, if type == CPURegister::kRegister
+// d0-d7: Optional arguments, if type == CPURegister::kVRegister
+const unsigned kPrintfArgCountOffset = 1 * kInstructionSize;
+const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize;
+const unsigned kPrintfLength = 3 * kInstructionSize;
+
+const unsigned kPrintfMaxArgCount = 4;
+
+// The argument pattern is a set of two-bit-fields, each with one of the
+// following values:
+enum PrintfArgPattern {
+  kPrintfArgW = 1,
+  kPrintfArgX = 2,
+  // There is no kPrintfArgS because floats are always converted to doubles in C
+  // varargs calls.
+  kPrintfArgD = 3
+};
+static const unsigned kPrintfArgPatternBits = 2;
+
+// Trace - kTraceOpcode
+//  - parameter: TraceParameter stored as a uint32_t
+//  - command: TraceCommand stored as a uint32_t
+//
+// Allow for trace management in the generated code. This enables or disables
+// automatic tracing of the specified information for every simulated
+// instruction.
+const unsigned kTraceParamsOffset = 1 * kInstructionSize;
+const unsigned kTraceCommandOffset = 2 * kInstructionSize;
+const unsigned kTraceLength = 3 * kInstructionSize;
+
+// Trace parameters.
+enum TraceParameters {
+  LOG_DISASM = 1 << 0,   // Log disassembly.
+  LOG_REGS = 1 << 1,     // Log general purpose registers.
+  LOG_VREGS = 1 << 2,    // Log SVE, NEON and floating-point registers.
+  LOG_SYSREGS = 1 << 3,  // Log the flags and system registers.
+  LOG_WRITE = 1 << 4,    // Log writes to memory.
+  LOG_BRANCH = 1 << 5,   // Log taken branches.
+
+  LOG_NONE = 0,
+  LOG_STATE = LOG_REGS | LOG_VREGS | LOG_SYSREGS,
+  LOG_ALL = LOG_DISASM | LOG_STATE | LOG_WRITE | LOG_BRANCH
+};
+
+// Trace commands.
+enum TraceCommand { TRACE_ENABLE = 1, TRACE_DISABLE = 2 };
+
+// Log - kLogOpcode
+//  - parameter: TraceParameter stored as a uint32_t
+//
+// Print the specified information once. This mechanism is separate from Trace.
+// In particular, _all_ of the specified registers are printed, rather than just
+// the registers that the instruction writes.
+//
+// Any combination of the TraceParameters values can be used, except that
+// LOG_DISASM is not supported for Log.
+const unsigned kLogParamsOffset = 1 * kInstructionSize;
+const unsigned kLogLength = 2 * kInstructionSize;
+
+// Runtime call simulation - kRuntimeCallOpcode
+enum RuntimeCallType { kCallRuntime, kTailCallRuntime };
+
+const unsigned kRuntimeCallWrapperOffset = 1 * kInstructionSize;
+// The size of a pointer on host.
+const unsigned kRuntimeCallAddressSize = sizeof(uintptr_t);
+const unsigned kRuntimeCallFunctionOffset =
+    kRuntimeCallWrapperOffset + kRuntimeCallAddressSize;
+const unsigned kRuntimeCallTypeOffset =
+    kRuntimeCallFunctionOffset + kRuntimeCallAddressSize;
+const unsigned kRuntimeCallLength = kRuntimeCallTypeOffset + sizeof(uint32_t);
+
+// Enable or disable CPU features - kSetCPUFeaturesOpcode
+//                                - kEnableCPUFeaturesOpcode
+//                                - kDisableCPUFeaturesOpcode
+//  - parameter[...]: A list of `CPUFeatures::Feature`s, encoded as
+//    ConfigureCPUFeaturesElementType and terminated with CPUFeatures::kNone.
+//  - [Padding to align to kInstructionSize.]
+//
+// 'Set' completely overwrites the existing CPU features.
+// 'Enable' and 'Disable' update the existing CPU features.
+//
+// These mechanisms allows users to strictly check the use of CPU features in
+// different regions of code.
+//
+// These have no effect on the set of 'seen' features (as reported by
+// CPUFeaturesAuditor::HasSeen(...)).
+typedef uint8_t ConfigureCPUFeaturesElementType;
+const unsigned kConfigureCPUFeaturesListOffset = 1 * kInstructionSize;
+
+// Save or restore CPU features - kSaveCPUFeaturesOpcode
+//                              - kRestoreCPUFeaturesOpcode
+//
+// These mechanisms provide a stack-like mechanism for preserving the CPU
+// features, or restoring the last-preserved features. These pseudo-instructions
+// take no arguments.
+//
+// These have no effect on the set of 'seen' features (as reported by
+// CPUFeaturesAuditor::HasSeen(...)).
+
+}  // namespace aarch64
+}  // namespace vixl
+
+#endif  // VIXL_AARCH64_SIMULATOR_CONSTANTS_AARCH64_H_
--- a/3rdparty/vixl/include/vixl/assembler-base-vixl.h
+++ b/3rdparty/vixl/include/vixl/assembler-base-vixl.h
@ -0,0 +1,104 @@
+// Copyright 2016, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_ASSEMBLER_BASE_H
+#define VIXL_ASSEMBLER_BASE_H
+
+#include "code-buffer-vixl.h"
+
+// Microsoft Visual C++ defines a `mvn` macro that conflicts with our own
+// definition.
+#if defined(_MSC_VER) && defined(mvn)
+#undef mvn
+#endif
+
+namespace vixl {
+
+class CodeBufferCheckScope;
+
+namespace internal {
+
+class AssemblerBase {
+ public:
+  AssemblerBase(byte* buffer, size_t capacity)
+      : buffer_(buffer, capacity), allow_assembler_(false) {}
+
+  virtual ~AssemblerBase() {}
+
+  // Finalize a code buffer of generated instructions. This function must be
+  // called before executing or copying code from the buffer.
+  void FinalizeCode() { GetBuffer()->SetClean(); }
+
+  ptrdiff_t GetCursorOffset() const { return GetBuffer().GetCursorOffset(); }
+
+  // Return the address of the cursor.
+  template <typename T>
+  T GetCursorAddress() const {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return GetBuffer().GetOffsetAddress<T>(GetCursorOffset());
+  }
+
+  size_t GetSizeOfCodeGenerated() const { return GetCursorOffset(); }
+
+  // Accessors.
+  CodeBuffer* GetBuffer() { return &buffer_; }
+  const CodeBuffer& GetBuffer() const { return buffer_; }
+  bool AllowAssembler() const { return allow_assembler_; }
+
+ protected:
+  void SetAllowAssembler(bool allow) { allow_assembler_ = allow; }
+
+  // CodeBufferCheckScope must be able to temporarily allow the assembler.
+  friend class vixl::CodeBufferCheckScope;
+
+  // Buffer where the code is emitted.
+  CodeBuffer buffer_;
+
+ private:
+  bool allow_assembler_;
+
+ public:
+  // Deprecated public interface.
+
+  // Return the address of an offset in the buffer.
+  template <typename T>
+  VIXL_DEPRECATED("GetBuffer().GetOffsetAddress<T>(offset)",
+                  T GetOffsetAddress(ptrdiff_t offset) const) {
+    return GetBuffer().GetOffsetAddress<T>(offset);
+  }
+
+  // Return the address of the start of the buffer.
+  template <typename T>
+  VIXL_DEPRECATED("GetBuffer().GetStartAddress<T>()",
+                  T GetStartAddress() const) {
+    return GetBuffer().GetOffsetAddress<T>(0);
+  }
+};
+
+}  // namespace internal
+}  // namespace vixl
+
+#endif  // VIXL_ASSEMBLER_BASE_H
--- a/3rdparty/vixl/include/vixl/code-buffer-vixl.h
+++ b/3rdparty/vixl/include/vixl/code-buffer-vixl.h
@ -0,0 +1,160 @@
+// Copyright 2017, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CODE_BUFFER_H
+#define VIXL_CODE_BUFFER_H
+
+#include <cstring>
+
+#include "globals-vixl.h"
+#include "utils-vixl.h"
+
+namespace vixl {
+
+class CodeBuffer {
+ public:
+  CodeBuffer(byte* buffer, size_t capacity);
+  ~CodeBuffer() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION;
+
+  void Reset();
+
+  ptrdiff_t GetOffsetFrom(ptrdiff_t offset) const {
+    ptrdiff_t cursor_offset = cursor_ - buffer_;
+    VIXL_ASSERT((offset >= 0) && (offset <= cursor_offset));
+    return cursor_offset - offset;
+  }
+  VIXL_DEPRECATED("GetOffsetFrom",
+                  ptrdiff_t OffsetFrom(ptrdiff_t offset) const) {
+    return GetOffsetFrom(offset);
+  }
+
+  ptrdiff_t GetCursorOffset() const { return GetOffsetFrom(0); }
+  VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
+    return GetCursorOffset();
+  }
+
+  void Rewind(ptrdiff_t offset) {
+    byte* rewound_cursor = buffer_ + offset;
+    VIXL_ASSERT((buffer_ <= rewound_cursor) && (rewound_cursor <= cursor_));
+    cursor_ = rewound_cursor;
+  }
+
+  template <typename T>
+  T GetOffsetAddress(ptrdiff_t offset) const {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    VIXL_ASSERT((offset >= 0) && (offset <= (cursor_ - buffer_)));
+    return reinterpret_cast<T>(buffer_ + offset);
+  }
+
+  // Return the address of the start or end of the emitted code.
+  template <typename T>
+  T GetStartAddress() const {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return GetOffsetAddress<T>(0);
+  }
+  template <typename T>
+  T GetEndAddress() const {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return GetOffsetAddress<T>(GetSizeInBytes());
+  }
+
+  size_t GetRemainingBytes() const {
+    VIXL_ASSERT((cursor_ >= buffer_) && (cursor_ <= (buffer_ + capacity_)));
+    return (buffer_ + capacity_) - cursor_;
+  }
+  VIXL_DEPRECATED("GetRemainingBytes", size_t RemainingBytes() const) {
+    return GetRemainingBytes();
+  }
+
+  size_t GetSizeInBytes() const {
+    VIXL_ASSERT((cursor_ >= buffer_) && (cursor_ <= (buffer_ + capacity_)));
+    return cursor_ - buffer_;
+  }
+
+  // A code buffer can emit:
+  //  * 8, 16, 32 or 64-bit data: constant.
+  //  * 16 or 32-bit data: instruction.
+  //  * string: debug info.
+  void Emit8(uint8_t data) { Emit(data); }
+
+  void Emit16(uint16_t data) { Emit(data); }
+
+  void Emit32(uint32_t data) { Emit(data); }
+
+  void Emit64(uint64_t data) { Emit(data); }
+
+  void EmitString(const char* string);
+
+  void EmitData(const void* data, size_t size);
+
+  template <typename T>
+  void Emit(T value) {
+    VIXL_ASSERT(HasSpaceFor(sizeof(value)));
+    dirty_ = true;
+    byte* c = cursor_;
+    memcpy(c, &value, sizeof(value));
+    cursor_ = c + sizeof(value);
+  }
+
+  void UpdateData(size_t offset, const void* data, size_t size);
+
+  // Align to 32bit.
+  void Align();
+
+  // Ensure there is enough space for and emit 'n' zero bytes.
+  void EmitZeroedBytes(int n);
+
+  bool Is16bitAligned() const { return IsAligned<2>(cursor_); }
+
+  bool Is32bitAligned() const { return IsAligned<4>(cursor_); }
+
+  size_t GetCapacity() const { return capacity_; }
+  VIXL_DEPRECATED("GetCapacity", size_t capacity() const) {
+    return GetCapacity();
+  }
+
+  bool IsDirty() const { return dirty_; }
+
+  void SetClean() { dirty_ = false; }
+
+  bool HasSpaceFor(size_t amount) const {
+    return GetRemainingBytes() >= amount;
+  }
+
+ private:
+  // Backing store of the buffer.
+  byte* buffer_;
+  // Pointer to the next location to be written.
+  byte* cursor_;
+  // True if there has been any write since the buffer was created or cleaned.
+  bool dirty_;
+  // Capacity in bytes of the backing store.
+  size_t capacity_;
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_CODE_BUFFER_H
--- a/3rdparty/vixl/include/vixl/code-generation-scopes-vixl.h
+++ b/3rdparty/vixl/include/vixl/code-generation-scopes-vixl.h
@ -0,0 +1,322 @@
+// Copyright 2016, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#ifndef VIXL_CODE_GENERATION_SCOPES_H_
+#define VIXL_CODE_GENERATION_SCOPES_H_
+
+
+#include "assembler-base-vixl.h"
+#include "macro-assembler-interface.h"
+
+
+namespace vixl {
+
+// This scope will:
+// - Allow code emission from the specified `Assembler`.
+// - Optionally reserve space in the `CodeBuffer` (if it is managed by VIXL).
+// - Optionally, on destruction, check the size of the generated code.
+//   (The size can be either exact or a maximum size.)
+class CodeBufferCheckScope {
+ public:
+  // Tell whether or not the scope needs to ensure the associated CodeBuffer
+  // has enough space for the requested size.
+  enum BufferSpacePolicy {
+    kReserveBufferSpace,
+    kDontReserveBufferSpace,
+
+    // Deprecated, but kept for backward compatibility.
+    kCheck = kReserveBufferSpace,
+    kNoCheck = kDontReserveBufferSpace
+  };
+
+  // Tell whether or not the scope should assert the amount of code emitted
+  // within the scope is consistent with the requested amount.
+  enum SizePolicy {
+    kNoAssert,    // Do not check the size of the code emitted.
+    kExactSize,   // The code emitted must be exactly size bytes.
+    kMaximumSize  // The code emitted must be at most size bytes.
+  };
+
+  // This constructor implicitly calls `Open` to initialise the scope
+  // (`assembler` must not be `NULL`), so it is ready to use immediately after
+  // it has been constructed.
+  CodeBufferCheckScope(internal::AssemblerBase* assembler,
+                       size_t size,
+                       BufferSpacePolicy check_policy = kReserveBufferSpace,
+                       SizePolicy size_policy = kMaximumSize)
+      : assembler_(NULL), initialised_(false) {
+    Open(assembler, size, check_policy, size_policy);
+  }
+
+  // This constructor does not implicitly initialise the scope. Instead, the
+  // user is required to explicitly call the `Open` function before using the
+  // scope.
+  CodeBufferCheckScope() : assembler_(NULL), initialised_(false) {
+    // Nothing to do.
+  }
+
+  virtual ~CodeBufferCheckScope() { Close(); }
+
+  // This function performs the actual initialisation work.
+  void Open(internal::AssemblerBase* assembler,
+            size_t size,
+            BufferSpacePolicy check_policy = kReserveBufferSpace,
+            SizePolicy size_policy = kMaximumSize) {
+    VIXL_ASSERT(!initialised_);
+    VIXL_ASSERT(assembler != NULL);
+    assembler_ = assembler;
+    if (check_policy == kReserveBufferSpace) {
+      VIXL_ASSERT(assembler->GetBuffer()->HasSpaceFor(size));
+    }
+#ifdef VIXL_DEBUG
+    limit_ = assembler_->GetSizeOfCodeGenerated() + size;
+    assert_policy_ = size_policy;
+    previous_allow_assembler_ = assembler_->AllowAssembler();
+    assembler_->SetAllowAssembler(true);
+#else
+    USE(size_policy);
+#endif
+    initialised_ = true;
+  }
+
+  // This function performs the cleaning-up work. It must succeed even if the
+  // scope has not been opened. It is safe to call multiple times.
+  void Close() {
+#ifdef VIXL_DEBUG
+    if (!initialised_) {
+      return;
+    }
+    assembler_->SetAllowAssembler(previous_allow_assembler_);
+    switch (assert_policy_) {
+      case kNoAssert:
+        break;
+      case kExactSize:
+        VIXL_ASSERT(assembler_->GetSizeOfCodeGenerated() == limit_);
+        break;
+      case kMaximumSize:
+        VIXL_ASSERT(assembler_->GetSizeOfCodeGenerated() <= limit_);
+        break;
+      default:
+        VIXL_UNREACHABLE();
+    }
+#endif
+    initialised_ = false;
+  }
+
+ protected:
+  internal::AssemblerBase* assembler_;
+  SizePolicy assert_policy_;
+  size_t limit_;
+  bool previous_allow_assembler_;
+  bool initialised_;
+};
+
+
+// This scope will:
+// - Do the same as `CodeBufferCheckSCope`, but:
+//   - If managed by VIXL, always reserve space in the `CodeBuffer`.
+//   - Always check the size (exact or maximum) of the generated code on
+//     destruction.
+// - Emit pools if the specified size would push them out of range.
+// - Block pools emission for the duration of the scope.
+// This scope allows the `Assembler` and `MacroAssembler` to be freely and
+// safely mixed for its duration.
+class EmissionCheckScope : public CodeBufferCheckScope {
+ public:
+  // This constructor implicitly calls `Open` (when `masm` is not `NULL`) to
+  // initialise the scope, so it is ready to use immediately after it has been
+  // constructed.
+  EmissionCheckScope(MacroAssemblerInterface* masm,
+                     size_t size,
+                     SizePolicy size_policy = kMaximumSize) {
+    Open(masm, size, size_policy);
+  }
+
+  // This constructor does not implicitly initialise the scope. Instead, the
+  // user is required to explicitly call the `Open` function before using the
+  // scope.
+  EmissionCheckScope() {}
+
+  virtual ~EmissionCheckScope() { Close(); }
+
+  enum PoolPolicy {
+    // Do not forbid pool emission inside the scope. Pools will not be emitted
+    // on `Open` either.
+    kIgnorePools,
+    // Force pools to be generated on `Open` if necessary and block their
+    // emission inside the scope.
+    kBlockPools,
+    // Deprecated, but kept for backward compatibility.
+    kCheckPools = kBlockPools
+  };
+
+  void Open(MacroAssemblerInterface* masm,
+            size_t size,
+            SizePolicy size_policy = kMaximumSize) {
+    Open(masm, size, size_policy, kBlockPools);
+  }
+
+  void Close() {
+    if (!initialised_) {
+      return;
+    }
+    if (masm_ == NULL) {
+      // Nothing to do.
+      return;
+    }
+    // Perform the opposite of `Open`, which is:
+    //   - Check the code generation limit was not exceeded.
+    //   - Release the pools.
+    CodeBufferCheckScope::Close();
+    if (pool_policy_ == kBlockPools) {
+      masm_->ReleasePools();
+    }
+    VIXL_ASSERT(!initialised_);
+  }
+
+ protected:
+  void Open(MacroAssemblerInterface* masm,
+            size_t size,
+            SizePolicy size_policy,
+            PoolPolicy pool_policy) {
+    if (masm == NULL) {
+      // Nothing to do.
+      // We may reach this point in a context of conditional code generation.
+      // See `aarch64::MacroAssembler::MoveImmediateHelper()` for an example.
+      return;
+    }
+    masm_ = masm;
+    pool_policy_ = pool_policy;
+    if (pool_policy_ == kBlockPools) {
+      // To avoid duplicating the work to check that enough space is available
+      // in the buffer, do not use the more generic `EnsureEmitFor()`. It is
+      // done below when opening `CodeBufferCheckScope`.
+      masm->EnsureEmitPoolsFor(size);
+      masm->BlockPools();
+    }
+    // The buffer should be checked *after* we emit the pools.
+    CodeBufferCheckScope::Open(masm->AsAssemblerBase(),
+                               size,
+                               kReserveBufferSpace,
+                               size_policy);
+    VIXL_ASSERT(initialised_);
+  }
+
+  // This constructor should only be used from code that is *currently
+  // generating* the pools, to avoid an infinite loop.
+  EmissionCheckScope(MacroAssemblerInterface* masm,
+                     size_t size,
+                     SizePolicy size_policy,
+                     PoolPolicy pool_policy) {
+    Open(masm, size, size_policy, pool_policy);
+  }
+
+  MacroAssemblerInterface* masm_;
+  PoolPolicy pool_policy_;
+};
+
+// Use this scope when you need a one-to-one mapping between methods and
+// instructions. This scope will:
+// - Do the same as `EmissionCheckScope`.
+// - Block access to the MacroAssemblerInterface (using run-time assertions).
+class ExactAssemblyScope : public EmissionCheckScope {
+ public:
+  // This constructor implicitly calls `Open` (when `masm` is not `NULL`) to
+  // initialise the scope, so it is ready to use immediately after it has been
+  // constructed.
+  ExactAssemblyScope(MacroAssemblerInterface* masm,
+                     size_t size,
+                     SizePolicy size_policy = kExactSize) {
+    Open(masm, size, size_policy);
+  }
+
+  // This constructor does not implicitly initialise the scope. Instead, the
+  // user is required to explicitly call the `Open` function before using the
+  // scope.
+  ExactAssemblyScope() {}
+
+  virtual ~ExactAssemblyScope() { Close(); }
+
+  void Open(MacroAssemblerInterface* masm,
+            size_t size,
+            SizePolicy size_policy = kExactSize) {
+    Open(masm, size, size_policy, kBlockPools);
+  }
+
+  void Close() {
+    if (!initialised_) {
+      return;
+    }
+    if (masm_ == NULL) {
+      // Nothing to do.
+      return;
+    }
+#ifdef VIXL_DEBUG
+    masm_->SetAllowMacroInstructions(previous_allow_macro_assembler_);
+#else
+    USE(previous_allow_macro_assembler_);
+#endif
+    EmissionCheckScope::Close();
+  }
+
+ protected:
+  // This protected constructor allows overriding the pool policy. It is
+  // available to allow this scope to be used in code that handles generation
+  // of pools.
+  ExactAssemblyScope(MacroAssemblerInterface* masm,
+                     size_t size,
+                     SizePolicy assert_policy,
+                     PoolPolicy pool_policy) {
+    Open(masm, size, assert_policy, pool_policy);
+  }
+
+  void Open(MacroAssemblerInterface* masm,
+            size_t size,
+            SizePolicy size_policy,
+            PoolPolicy pool_policy) {
+    VIXL_ASSERT(size_policy != kNoAssert);
+    if (masm == NULL) {
+      // Nothing to do.
+      return;
+    }
+    // Rely on EmissionCheckScope::Open to initialise `masm_` and
+    // `pool_policy_`.
+    EmissionCheckScope::Open(masm, size, size_policy, pool_policy);
+#ifdef VIXL_DEBUG
+    previous_allow_macro_assembler_ = masm->AllowMacroInstructions();
+    masm->SetAllowMacroInstructions(false);
+#endif
+  }
+
+ private:
+  bool previous_allow_macro_assembler_;
+};
+
+
+}  // namespace vixl
+
+#endif  // VIXL_CODE_GENERATION_SCOPES_H_
--- a/3rdparty/vixl/include/vixl/compiler-intrinsics-vixl.h
+++ b/3rdparty/vixl/include/vixl/compiler-intrinsics-vixl.h
@ -0,0 +1,167 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#ifndef VIXL_COMPILER_INTRINSICS_H
+#define VIXL_COMPILER_INTRINSICS_H
+
+#include <limits.h>
+#include "globals-vixl.h"
+
+namespace vixl {
+
+// Helper to check whether the version of GCC used is greater than the specified
+// requirement.
+#define MAJOR 1000000
+#define MINOR 1000
+#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel)                      \
+  ((__GNUC__ * (MAJOR) + __GNUC_MINOR__ * (MINOR) + __GNUC_PATCHLEVEL__) >= \
+   ((major) * (MAJOR) + ((minor)) * (MINOR) + (patchlevel)))
+#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) \
+  ((__GNUC__ * (MAJOR) + __GNUC_MINOR__ * (MINOR)) >=  \
+   ((major) * (MAJOR) + ((minor)) * (MINOR) + (patchlevel)))
+#else
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) 0
+#endif
+
+
+#if defined(__clang__) && !defined(VIXL_NO_COMPILER_BUILTINS)
+
+// clang-format off
+#define COMPILER_HAS_BUILTIN_CLRSB    (__has_builtin(__builtin_clrsb))
+#define COMPILER_HAS_BUILTIN_CLZ      (__has_builtin(__builtin_clz))
+#define COMPILER_HAS_BUILTIN_CTZ      (__has_builtin(__builtin_ctz))
+#define COMPILER_HAS_BUILTIN_FFS      (__has_builtin(__builtin_ffs))
+#define COMPILER_HAS_BUILTIN_POPCOUNT (__has_builtin(__builtin_popcount))
+// clang-format on
+
+#elif defined(__GNUC__) && !defined(VIXL_NO_COMPILER_BUILTINS)
+// The documentation for these builtins is available at:
+// https://gcc.gnu.org/onlinedocs/gcc-$MAJOR.$MINOR.$PATCHLEVEL/gcc//Other-Builtins.html
+
+// clang-format off
+# define COMPILER_HAS_BUILTIN_CLRSB    (GCC_VERSION_OR_NEWER(4, 7, 0))
+# define COMPILER_HAS_BUILTIN_CLZ      (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_CTZ      (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_FFS      (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_POPCOUNT (GCC_VERSION_OR_NEWER(3, 4, 0))
+// clang-format on
+
+#else
+// One can define VIXL_NO_COMPILER_BUILTINS to force using the manually
+// implemented C++ methods.
+
+// clang-format off
+#define COMPILER_HAS_BUILTIN_BSWAP    false
+#define COMPILER_HAS_BUILTIN_CLRSB    false
+#define COMPILER_HAS_BUILTIN_CLZ      false
+#define COMPILER_HAS_BUILTIN_CTZ      false
+#define COMPILER_HAS_BUILTIN_FFS      false
+#define COMPILER_HAS_BUILTIN_POPCOUNT false
+// clang-format on
+
+#endif
+
+
+template <typename V>
+inline bool IsPowerOf2(V value) {
+  return (value != 0) && ((value & (value - 1)) == 0);
+}
+
+
+// Declaration of fallback functions.
+int CountLeadingSignBitsFallBack(int64_t value, int width);
+int CountLeadingZerosFallBack(uint64_t value, int width);
+int CountSetBitsFallBack(uint64_t value, int width);
+int CountTrailingZerosFallBack(uint64_t value, int width);
+
+
+// Implementation of intrinsics functions.
+// TODO: The implementations could be improved for sizes different from 32bit
+// and 64bit: we could mask the values and call the appropriate builtin.
+
+// Return the number of leading bits that match the topmost (sign) bit,
+// excluding the topmost bit itself.
+template <typename V>
+inline int CountLeadingSignBits(V value, int width = (sizeof(V) * 8)) {
+  VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+#if COMPILER_HAS_BUILTIN_CLRSB
+  VIXL_ASSERT((LLONG_MIN <= value) && (value <= LLONG_MAX));
+  int ll_width = sizeof(long long) * kBitsPerByte;  // NOLINT(runtime/int)
+  int result = __builtin_clrsbll(value) - (ll_width - width);
+  // Check that the value fits in the specified width.
+  VIXL_ASSERT(result >= 0);
+  return result;
+#else
+  VIXL_ASSERT((INT64_MIN <= value) && (value <= INT64_MAX));
+  return CountLeadingSignBitsFallBack(value, width);
+#endif
+}
+
+
+template <typename V>
+inline int CountLeadingZeros(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_CLZ
+  if (width == 32) {
+    return (value == 0) ? 32 : __builtin_clz(static_cast<unsigned>(value));
+  } else if (width == 64) {
+    return (value == 0) ? 64 : __builtin_clzll(value);
+  }
+#endif
+  return CountLeadingZerosFallBack(value, width);
+}
+
+
+template <typename V>
+inline int CountSetBits(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_POPCOUNT
+  if (width == 32) {
+    return __builtin_popcount(static_cast<unsigned>(value));
+  } else if (width == 64) {
+    return __builtin_popcountll(value);
+  }
+#endif
+  return CountSetBitsFallBack(value, width);
+}
+
+
+template <typename V>
+inline int CountTrailingZeros(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_CTZ
+  if (width == 32) {
+    return (value == 0) ? 32 : __builtin_ctz(static_cast<unsigned>(value));
+  } else if (width == 64) {
+    return (value == 0) ? 64 : __builtin_ctzll(value);
+  }
+#endif
+  return CountTrailingZerosFallBack(value, width);
+}
+
+}  // namespace vixl
+
+#endif  // VIXL_COMPILER_INTRINSICS_H
--- a/3rdparty/vixl/include/vixl/cpu-features.h
+++ b/3rdparty/vixl/include/vixl/cpu-features.h
@ -0,0 +1,508 @@
+// Copyright 2018, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CPU_FEATURES_H
+#define VIXL_CPU_FEATURES_H
+
+#include <bitset>
+#include <ostream>
+
+#include "globals-vixl.h"
+
+namespace vixl {
+
+
+// VIXL aims to handle and detect all architectural features that are likely to
+// influence code-generation decisions at EL0 (user-space).
+//
+// - There may be multiple VIXL feature flags for a given architectural
+//   extension. This occurs where the extension allow components to be
+//   implemented independently, or where kernel support is needed, and is likely
+//   to be fragmented.
+//
+//   For example, Pointer Authentication (kPAuth*) has a separate feature flag
+//   for access to PACGA, and to indicate that the QARMA algorithm is
+//   implemented.
+//
+// - Conversely, some extensions have configuration options that do not affect
+//   EL0, so these are presented as a single VIXL feature.
+//
+//   For example, the RAS extension (kRAS) has several variants, but the only
+//   feature relevant to VIXL is the addition of the ESB instruction so we only
+//   need a single flag.
+//
+// - VIXL offers separate flags for separate features even if they're
+//   architecturally linked.
+//
+//   For example, the architecture requires kFPHalf and kNEONHalf to be equal,
+//   but they have separate hardware ID register fields so VIXL presents them as
+//   separate features.
+//
+// - VIXL can detect every feature for which it can generate code.
+//
+// - VIXL can detect some features for which it cannot generate code.
+//
+// The CPUFeatures::Feature enum — derived from the macro list below — is
+// frequently extended. New features may be added to the list at any point, and
+// no assumptions should be made about the numerical values assigned to each
+// enum constant. The symbolic names can be considered to be stable.
+//
+// The debug descriptions are used only for debug output. The 'cpuinfo' strings
+// are informative; VIXL does not use /proc/cpuinfo for feature detection.
+
+// clang-format off
+#define VIXL_CPU_FEATURE_LIST(V)                                               \
+  /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_*  */ \
+  /* registers, so that the detailed feature registers can be read          */ \
+  /* directly.                                                              */ \
+                                                                               \
+  /* Constant name        Debug description         Linux 'cpuinfo' string. */ \
+  V(kIDRegisterEmulation, "ID register emulation",  "cpuid")                   \
+                                                                               \
+  V(kFP,                  "FP",                     "fp")                      \
+  V(kNEON,                "NEON",                   "asimd")                   \
+  V(kCRC32,               "CRC32",                  "crc32")                   \
+  V(kDGH,                 "DGH",                    "dgh")                     \
+  /* Speculation control features.                                          */ \
+  V(kCSV2,                "CSV2",                   NULL)                      \
+  V(kSCXTNUM,             "SCXTNUM",                NULL)                      \
+  V(kCSV3,                "CSV3",                   NULL)                      \
+  V(kSB,                  "SB",                     "sb")                      \
+  V(kSPECRES,             "SPECRES",                NULL)                      \
+  V(kSSBS,                "SSBS",                   NULL)                      \
+  V(kSSBSControl,         "SSBS (PSTATE control)",  "ssbs")                    \
+  /* Cryptographic support instructions.                                    */ \
+  V(kAES,                 "AES",                    "aes")                     \
+  V(kSHA1,                "SHA1",                   "sha1")                    \
+  V(kSHA2,                "SHA2",                   "sha2")                    \
+  /* A form of PMULL{2} with a 128-bit (1Q) result.                         */ \
+  V(kPmull1Q,             "Pmull1Q",                "pmull")                   \
+  /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc.              */ \
+  V(kAtomics,             "Atomics",                "atomics")                 \
+  /* Limited ordering regions: LDLAR, STLLR and their variants.             */ \
+  V(kLORegions,           "LORegions",              NULL)                      \
+  /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH.        */ \
+  V(kRDM,                 "RDM",                    "asimdrdm")                \
+  /* Scalable Vector Extension.                                             */ \
+  V(kSVE,                 "SVE",                    "sve")                     \
+  V(kSVEF64MM,            "SVE F64MM",              "svef64mm")                \
+  V(kSVEF32MM,            "SVE F32MM",              "svef32mm")                \
+  V(kSVEI8MM,             "SVE I8MM",               "svei8imm")                \
+  V(kSVEBF16,             "SVE BFloat16",           "svebf16")                 \
+  /* SDOT and UDOT support (in NEON).                                       */ \
+  V(kDotProduct,          "DotProduct",             "asimddp")                 \
+  /* Int8 matrix multiplication (in NEON).                                  */ \
+  V(kI8MM,                "NEON I8MM",              "i8mm")                    \
+  /* Half-precision (FP16) support for FP and NEON, respectively.           */ \
+  V(kFPHalf,              "FPHalf",                 "fphp")                    \
+  V(kNEONHalf,            "NEONHalf",               "asimdhp")                 \
+  /* BFloat16 support (in both FP and NEON.)                                */ \
+  V(kBF16,                "FP/NEON BFloat 16",      "bf16")                    \
+  /* The RAS extension, including the ESB instruction.                      */ \
+  V(kRAS,                 "RAS",                    NULL)                      \
+  /* Data cache clean to the point of persistence: DC CVAP.                 */ \
+  V(kDCPoP,               "DCPoP",                  "dcpop")                   \
+  /* Data cache clean to the point of deep persistence: DC CVADP.           */ \
+  V(kDCCVADP,             "DCCVADP",                "dcpodp")                  \
+  /* Cryptographic support instructions.                                    */ \
+  V(kSHA3,                "SHA3",                   "sha3")                    \
+  V(kSHA512,              "SHA512",                 "sha512")                  \
+  V(kSM3,                 "SM3",                    "sm3")                     \
+  V(kSM4,                 "SM4",                    "sm4")                     \
+  /* Pointer authentication for addresses.                                  */ \
+  V(kPAuth,               "PAuth",                  "paca")                    \
+  /* Pointer authentication for addresses uses QARMA.                       */ \
+  V(kPAuthQARMA,          "PAuthQARMA",             NULL)                      \
+  /* Generic authentication (using the PACGA instruction).                  */ \
+  V(kPAuthGeneric,        "PAuthGeneric",           "pacg")                    \
+  /* Generic authentication uses QARMA.                                     */ \
+  V(kPAuthGenericQARMA,   "PAuthGenericQARMA",      NULL)                      \
+  /* JavaScript-style FP -> integer conversion instruction: FJCVTZS.        */ \
+  V(kJSCVT,               "JSCVT",                  "jscvt")                   \
+  /* Complex number support for NEON: FCMLA and FCADD.                      */ \
+  V(kFcma,                "Fcma",                   "fcma")                    \
+  /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \
+  V(kRCpc,                "RCpc",                   "lrcpc")                   \
+  V(kRCpcImm,             "RCpc (imm)",             "ilrcpc")                  \
+  /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF.               */ \
+  V(kFlagM,               "FlagM",                  "flagm")                   \
+  /* Unaligned single-copy atomicity.                                       */ \
+  V(kUSCAT,               "USCAT",                  "uscat")                   \
+  /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}.         */ \
+  V(kFHM,                 "FHM",                    "asimdfhm")                \
+  /* Data-independent timing (for selected instructions).                   */ \
+  V(kDIT,                 "DIT",                    "dit")                     \
+  /* Branch target identification.                                          */ \
+  V(kBTI,                 "BTI",                    "bti")                     \
+  /* Flag manipulation instructions: {AX,XA}FLAG                            */ \
+  V(kAXFlag,              "AXFlag",                 "flagm2")                  \
+  /* Random number generation extension,                                    */ \
+  V(kRNG,                 "RNG",                    "rng")                     \
+  /* Floating-point round to {32,64}-bit integer.                           */ \
+  V(kFrintToFixedSizedInt,"Frint (bounded)",        "frint")                   \
+  /* Memory Tagging Extension.                                              */ \
+  V(kMTEInstructions,     "MTE (EL0 instructions)", NULL)                      \
+  V(kMTE,                 "MTE",                    NULL)                      \
+  V(kMTE3,                "MTE (asymmetric)",       "mte3")                    \
+  /* PAuth extensions.                                                      */ \
+  V(kPAuthEnhancedPAC,    "PAuth EnhancedPAC",      NULL)                      \
+  V(kPAuthEnhancedPAC2,   "PAuth EnhancedPAC2",     NULL)                      \
+  V(kPAuthFPAC,           "PAuth FPAC",             NULL)                      \
+  V(kPAuthFPACCombined,   "PAuth FPACCombined",     NULL)                      \
+  /* Scalable Vector Extension 2.                                           */ \
+  V(kSVE2,                "SVE2",                   "sve2")                    \
+  V(kSVESM4,              "SVE SM4",                "svesm4")                  \
+  V(kSVESHA3,             "SVE SHA3",               "svesha3")                 \
+  V(kSVEBitPerm,          "SVE BitPerm",            "svebitperm")              \
+  V(kSVEAES,              "SVE AES",                "sveaes")                  \
+  V(kSVEPmull128,         "SVE Pmull128",           "svepmull")                \
+  /* Alternate floating-point behavior                                      */ \
+  V(kAFP,                 "AFP",                    "afp")                     \
+  /* Enhanced Counter Virtualization                                        */ \
+  V(kECV,                 "ECV",                    "ecv")                     \
+  /* Increased precision of Reciprocal Estimate and Square Root Estimate    */ \
+  V(kRPRES,               "RPRES",                  "rpres")                   \
+  /* Memory operation instructions, for memcpy, memset                      */ \
+  V(kMOPS,                "Memory ops",             NULL)                      \
+  /* Scalable Matrix Extension (SME)                                        */ \
+  V(kSME,                 "SME",                    "sme")                     \
+  V(kSMEi16i64,           "SME (i16i64)",           "smei16i64")               \
+  V(kSMEf64f64,           "SME (f64f64)",           "smef64f64")               \
+  V(kSMEi8i32,            "SME (i8i32)",            "smei8i32")                \
+  V(kSMEf16f32,           "SME (f16f32)",           "smef16f32")               \
+  V(kSMEb16f32,           "SME (b16f32)",           "smeb16f32")               \
+  V(kSMEf32f32,           "SME (f32f32)",           "smef32f32")               \
+  V(kSMEfa64,             "SME (fa64)",             "smefa64")                 \
+  /* WFET and WFIT instruction support                                      */ \
+  V(kWFXT,                "WFXT",                   "wfxt")                    \
+  /* Extended BFloat16 instructions                                         */ \
+  V(kEBF16,               "EBF16",                  "ebf16")                   \
+  V(kSVE_EBF16,           "EBF16 (SVE)",            "sveebf16")                \
+  V(kCSSC,                "CSSC",                   "cssc")
+// clang-format on
+
+
+class CPUFeaturesConstIterator;
+
+// A representation of the set of features known to be supported by the target
+// device. Each feature is represented by a simple boolean flag.
+//
+//   - When the Assembler is asked to assemble an instruction, it asserts (in
+//     debug mode) that the necessary features are available.
+//
+//   - TODO: The MacroAssembler relies on the Assembler's assertions, but in
+//     some cases it may be useful for macros to generate a fall-back sequence
+//     in case features are not available.
+//
+//   - The Simulator assumes by default that all features are available, but it
+//     is possible to configure it to fail if the simulated code uses features
+//     that are not enabled.
+//
+//     The Simulator also offers pseudo-instructions to allow features to be
+//     enabled and disabled dynamically. This is useful when you want to ensure
+//     that some features are constrained to certain areas of code.
+//
+//   - The base Disassembler knows nothing about CPU features, but the
+//     PrintDisassembler can be configured to annotate its output with warnings
+//     about unavailable features. The Simulator uses this feature when
+//     instruction trace is enabled.
+//
+//   - The Decoder-based components -- the Simulator and PrintDisassembler --
+//     rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of
+//     features actually encountered so that a large block of code can be
+//     examined (either directly or through simulation), and the required
+//     features analysed later.
+//
+// Expected usage:
+//
+//     // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for
+//     // compatibility with older version of VIXL.
+//     MacroAssembler masm;
+//
+//     // Generate code only for the current CPU.
+//     masm.SetCPUFeatures(CPUFeatures::InferFromOS());
+//
+//     // Turn off feature checking entirely.
+//     masm.SetCPUFeatures(CPUFeatures::All());
+//
+// Feature set manipulation:
+//
+//     CPUFeatures f;  // The default constructor gives an empty set.
+//     // Individual features can be added (or removed).
+//     f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES);
+//     f.Remove(CPUFeatures::kNEON);
+//
+//     // Some helpers exist for extensions that provide several features.
+//     f.Remove(CPUFeatures::All());
+//     f.Combine(CPUFeatures::AArch64LegacyBaseline());
+//
+//     // Chained construction is also possible.
+//     CPUFeatures g =
+//         f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32);
+//
+//     // Features can be queried. Where multiple features are given, they are
+//     // combined with logical AND.
+//     if (h.Has(CPUFeatures::kNEON)) { ... }
+//     if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... }
+//     if (h.Has(g)) { ... }
+//     // If the empty set is requested, the result is always 'true'.
+//     VIXL_ASSERT(h.Has(CPUFeatures()));
+//
+//     // For debug and reporting purposes, features can be enumerated (or
+//     // printed directly):
+//     std::cout << CPUFeatures::kNEON;  // Prints something like "NEON".
+//     std::cout << f;  // Prints something like "FP, NEON, CRC32".
+class CPUFeatures {
+ public:
+  // clang-format off
+  // Individual features.
+  // These should be treated as opaque tokens. User code should not rely on
+  // specific numeric values or ordering.
+  enum Feature {
+    // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that
+    // this class supports.
+
+    kNone = -1,
+#define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL,
+    VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE)
+#undef VIXL_DECLARE_FEATURE
+    kNumberOfFeatures
+  };
+  // clang-format on
+
+  // By default, construct with no features enabled.
+  CPUFeatures() : features_{} {}
+
+  // Construct with some features already enabled.
+  template <typename T, typename... U>
+  CPUFeatures(T first, U... others) : features_{} {
+    Combine(first, others...);
+  }
+
+  // Construct with all features enabled. This can be used to disable feature
+  // checking: `Has(...)` returns true regardless of the argument.
+  static CPUFeatures All();
+
+  // Construct an empty CPUFeatures. This is equivalent to the default
+  // constructor, but is provided for symmetry and convenience.
+  static CPUFeatures None() { return CPUFeatures(); }
+
+  // The presence of these features was assumed by version of VIXL before this
+  // API was added, so using this set by default ensures API compatibility.
+  static CPUFeatures AArch64LegacyBaseline() {
+    return CPUFeatures(kFP, kNEON, kCRC32);
+  }
+
+  // Construct a new CPUFeatures object using ID registers. This assumes that
+  // kIDRegisterEmulation is present.
+  static CPUFeatures InferFromIDRegisters();
+
+  enum QueryIDRegistersOption {
+    kDontQueryIDRegisters,
+    kQueryIDRegistersIfAvailable
+  };
+
+  // Construct a new CPUFeatures object based on what the OS reports.
+  static CPUFeatures InferFromOS(
+      QueryIDRegistersOption option = kQueryIDRegistersIfAvailable);
+
+  // Combine another CPUFeatures object into this one. Features that already
+  // exist in this set are left unchanged.
+  void Combine(const CPUFeatures& other);
+
+  // Combine a specific feature into this set. If it already exists in the set,
+  // the set is left unchanged.
+  void Combine(Feature feature);
+
+  // Combine multiple features (or feature sets) into this set.
+  template <typename T, typename... U>
+  void Combine(T first, U... others) {
+    Combine(first);
+    Combine(others...);
+  }
+
+  // Remove features in another CPUFeatures object from this one.
+  void Remove(const CPUFeatures& other);
+
+  // Remove a specific feature from this set. This has no effect if the feature
+  // doesn't exist in the set.
+  void Remove(Feature feature0);
+
+  // Remove multiple features (or feature sets) from this set.
+  template <typename T, typename... U>
+  void Remove(T first, U... others) {
+    Remove(first);
+    Remove(others...);
+  }
+
+  // Chaining helpers for convenient construction by combining other CPUFeatures
+  // or individual Features.
+  template <typename... T>
+  CPUFeatures With(T... others) const {
+    CPUFeatures f(*this);
+    f.Combine(others...);
+    return f;
+  }
+
+  template <typename... T>
+  CPUFeatures Without(T... others) const {
+    CPUFeatures f(*this);
+    f.Remove(others...);
+    return f;
+  }
+
+  // Test whether the `other` feature set is equal to or a subset of this one.
+  bool Has(const CPUFeatures& other) const;
+
+  // Test whether a single feature exists in this set.
+  // Note that `Has(kNone)` always returns true.
+  bool Has(Feature feature) const;
+
+  // Test whether all of the specified features exist in this set.
+  template <typename T, typename... U>
+  bool Has(T first, U... others) const {
+    return Has(first) && Has(others...);
+  }
+
+  // Return the number of enabled features.
+  size_t Count() const;
+  bool HasNoFeatures() const { return Count() == 0; }
+
+  // Check for equivalence.
+  bool operator==(const CPUFeatures& other) const {
+    return Has(other) && other.Has(*this);
+  }
+  bool operator!=(const CPUFeatures& other) const { return !(*this == other); }
+
+  typedef CPUFeaturesConstIterator const_iterator;
+
+  const_iterator begin() const;
+  const_iterator end() const;
+
+ private:
+  // Each bit represents a feature. This set will be extended as needed.
+  std::bitset<kNumberOfFeatures> features_;
+
+  friend std::ostream& operator<<(std::ostream& os,
+                                  const vixl::CPUFeatures& features);
+};
+
+std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature);
+std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features);
+
+// This is not a proper C++ iterator type, but it simulates enough of
+// ForwardIterator that simple loops can be written.
+class CPUFeaturesConstIterator {
+ public:
+  CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL,
+                           CPUFeatures::Feature start = CPUFeatures::kNone)
+      : cpu_features_(cpu_features), feature_(start) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool operator==(const CPUFeaturesConstIterator& other) const;
+  bool operator!=(const CPUFeaturesConstIterator& other) const {
+    return !(*this == other);
+  }
+  CPUFeaturesConstIterator& operator++();
+  CPUFeaturesConstIterator operator++(int);
+
+  CPUFeatures::Feature operator*() const {
+    VIXL_ASSERT(IsValid());
+    return feature_;
+  }
+
+  // For proper support of C++'s simplest "Iterator" concept, this class would
+  // have to define member types (such as CPUFeaturesIterator::pointer) to make
+  // it appear as if it iterates over Feature objects in memory. That is, we'd
+  // need CPUFeatures::iterator to behave like std::vector<Feature>::iterator.
+  // This is at least partially possible -- the std::vector<bool> specialisation
+  // does something similar -- but it doesn't seem worthwhile for a
+  // special-purpose debug helper, so they are omitted here.
+ private:
+  const CPUFeatures* cpu_features_;
+  CPUFeatures::Feature feature_;
+
+  bool IsValid() const {
+    if (cpu_features_ == NULL) {
+      return feature_ == CPUFeatures::kNone;
+    }
+    return cpu_features_->Has(feature_);
+  }
+};
+
+// A convenience scope for temporarily modifying a CPU features object. This
+// allows features to be enabled for short sequences.
+//
+// Expected usage:
+//
+//  {
+//    CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32);
+//    // This scope can now use CRC32, as well as anything else that was enabled
+//    // before the scope.
+//
+//    ...
+//
+//    // At the end of the scope, the original CPU features are restored.
+//  }
+class CPUFeaturesScope {
+ public:
+  // Start a CPUFeaturesScope on any object that implements
+  // `CPUFeatures* GetCPUFeatures()`.
+  template <typename T>
+  explicit CPUFeaturesScope(T* cpu_features_wrapper)
+      : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
+        old_features_(*cpu_features_) {}
+
+  // Start a CPUFeaturesScope on any object that implements
+  // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled.
+  template <typename T, typename U, typename... V>
+  CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features)
+      : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
+        old_features_(*cpu_features_) {
+    cpu_features_->Combine(first, features...);
+  }
+
+  ~CPUFeaturesScope() { *cpu_features_ = old_features_; }
+
+  // For advanced usage, the CPUFeatures object can be accessed directly.
+  // The scope will restore the original state when it ends.
+
+  CPUFeatures* GetCPUFeatures() const { return cpu_features_; }
+
+  void SetCPUFeatures(const CPUFeatures& cpu_features) {
+    *cpu_features_ = cpu_features;
+  }
+
+ private:
+  CPUFeatures* const cpu_features_;
+  const CPUFeatures old_features_;
+};
+
+
+}  // namespace vixl
+
+#endif  // VIXL_CPU_FEATURES_H
--- a/3rdparty/vixl/include/vixl/globals-vixl.h
+++ b/3rdparty/vixl/include/vixl/globals-vixl.h
@ -0,0 +1,298 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_GLOBALS_H
+#define VIXL_GLOBALS_H
+
+#if __cplusplus < 201402L
+#error VIXL requires C++14
+#endif
+
+// Get standard C99 macros for integer types.
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+extern "C" {
+#include <inttypes.h>
+#include <stdint.h>
+}
+
+#include <cassert>
+#include <cstdarg>
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+
+#include "platform-vixl.h"
+
+#ifdef VIXL_NEGATIVE_TESTING
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#endif
+
+namespace vixl {
+
+typedef uint8_t byte;
+
+const int KBytes = 1024;
+const int MBytes = 1024 * KBytes;
+
+const int kBitsPerByteLog2 = 3;
+const int kBitsPerByte = 1 << kBitsPerByteLog2;
+
+template <int SizeInBits>
+struct Unsigned;
+
+template <>
+struct Unsigned<32> {
+  typedef uint32_t type;
+};
+
+template <>
+struct Unsigned<64> {
+  typedef uint64_t type;
+};
+
+}  // namespace vixl
+
+// Detect the host's pointer size.
+#if (UINTPTR_MAX == UINT32_MAX)
+#define VIXL_HOST_POINTER_32
+#elif (UINTPTR_MAX == UINT64_MAX)
+#define VIXL_HOST_POINTER_64
+#else
+#error "Unsupported host pointer size."
+#endif
+
+#ifdef VIXL_NEGATIVE_TESTING
+#define VIXL_ABORT()                                                         \
+  do {                                                                       \
+    std::ostringstream oss;                                                  \
+    oss << "Aborting in " << __FILE__ << ", line " << __LINE__ << std::endl; \
+    throw std::runtime_error(oss.str());                                     \
+  } while (false)
+#define VIXL_ABORT_WITH_MSG(msg)                                             \
+  do {                                                                       \
+    std::ostringstream oss;                                                  \
+    oss << (msg) << "in " << __FILE__ << ", line " << __LINE__ << std::endl; \
+    throw std::runtime_error(oss.str());                                     \
+  } while (false)
+#define VIXL_CHECK(condition)                                \
+  do {                                                       \
+    if (!(condition)) {                                      \
+      std::ostringstream oss;                                \
+      oss << "Assertion failed (" #condition ")\nin ";       \
+      oss << __FILE__ << ", line " << __LINE__ << std::endl; \
+      throw std::runtime_error(oss.str());                   \
+    }                                                        \
+  } while (false)
+#else
+#define VIXL_ABORT()                                         \
+  do {                                                       \
+    printf("Aborting in %s, line %i\n", __FILE__, __LINE__); \
+    abort();                                                 \
+  } while (false)
+#define VIXL_ABORT_WITH_MSG(msg)                             \
+  do {                                                       \
+    printf("%sin %s, line %i\n", (msg), __FILE__, __LINE__); \
+    abort();                                                 \
+  } while (false)
+#define VIXL_CHECK(condition)                           \
+  do {                                                  \
+    if (!(condition)) {                                 \
+      printf("Assertion failed (%s)\nin %s, line %i\n", \
+             #condition,                                \
+             __FILE__,                                  \
+             __LINE__);                                 \
+      abort();                                          \
+    }                                                   \
+  } while (false)
+#endif
+#ifdef VIXL_DEBUG
+#define VIXL_ASSERT(condition) VIXL_CHECK(condition)
+#define VIXL_UNIMPLEMENTED()               \
+  do {                                     \
+    VIXL_ABORT_WITH_MSG("UNIMPLEMENTED "); \
+  } while (false)
+#define VIXL_UNREACHABLE()               \
+  do {                                   \
+    VIXL_ABORT_WITH_MSG("UNREACHABLE "); \
+  } while (false)
+#else
+#define VIXL_ASSERT(condition) ((void)0)
+#define VIXL_UNIMPLEMENTED() ((void)0)
+#define VIXL_UNREACHABLE() ((void)0)
+#endif
+// This is not as powerful as template based assertions, but it is simple.
+// It assumes that the descriptions are unique. If this starts being a problem,
+// we can switch to a different implementation.
+#define VIXL_CONCAT(a, b) a##b
+#if __cplusplus >= 201103L
+#define VIXL_STATIC_ASSERT_LINE(line_unused, condition, message) \
+  static_assert(condition, message)
+#else
+#define VIXL_STATIC_ASSERT_LINE(line, condition, message_unused)            \
+  typedef char VIXL_CONCAT(STATIC_ASSERT_LINE_, line)[(condition) ? 1 : -1] \
+      __attribute__((unused))
+#endif
+#define VIXL_STATIC_ASSERT(condition) \
+  VIXL_STATIC_ASSERT_LINE(__LINE__, condition, "")
+#define VIXL_STATIC_ASSERT_MESSAGE(condition, message) \
+  VIXL_STATIC_ASSERT_LINE(__LINE__, condition, message)
+
+#define VIXL_WARNING(message)                                          \
+  do {                                                                 \
+    printf("WARNING in %s, line %i: %s", __FILE__, __LINE__, message); \
+  } while (false)
+
+template <typename T1>
+inline void USE(const T1&) {}
+
+template <typename T1, typename T2>
+inline void USE(const T1&, const T2&) {}
+
+template <typename T1, typename T2, typename T3>
+inline void USE(const T1&, const T2&, const T3&) {}
+
+template <typename T1, typename T2, typename T3, typename T4>
+inline void USE(const T1&, const T2&, const T3&, const T4&) {}
+
+#define VIXL_ALIGNMENT_EXCEPTION()                \
+  do {                                            \
+    VIXL_ABORT_WITH_MSG("ALIGNMENT EXCEPTION\t"); \
+  } while (0)
+
+// The clang::fallthrough attribute is used along with the Wimplicit-fallthrough
+// argument to annotate intentional fall-through between switch labels.
+// For more information please refer to:
+// http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
+#ifndef __has_warning
+#define __has_warning(x) 0
+#endif
+
+// Fallthrough annotation for Clang and C++11(201103L).
+#if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L
+#define VIXL_FALLTHROUGH() [[clang::fallthrough]]
+// Fallthrough annotation for GCC >= 7.
+#elif defined(__GNUC__) && __GNUC__ >= 7
+#define VIXL_FALLTHROUGH() __attribute__((fallthrough))
+#else
+#define VIXL_FALLTHROUGH() \
+  do {                     \
+  } while (0)
+#endif
+
+#if __cplusplus >= 201103L
+#define VIXL_NO_RETURN [[noreturn]]
+#else
+#define VIXL_NO_RETURN __attribute__((noreturn))
+#endif
+#ifdef VIXL_DEBUG
+#define VIXL_NO_RETURN_IN_DEBUG_MODE VIXL_NO_RETURN
+#else
+#define VIXL_NO_RETURN_IN_DEBUG_MODE
+#endif
+
+#if __cplusplus >= 201103L
+#define VIXL_OVERRIDE override
+#define VIXL_CONSTEXPR constexpr
+#define VIXL_HAS_CONSTEXPR 1
+#else
+#define VIXL_OVERRIDE
+#define VIXL_CONSTEXPR
+#endif
+
+// With VIXL_NEGATIVE_TESTING on, VIXL_ASSERT and VIXL_CHECK will throw
+// exceptions but C++11 marks destructors as noexcept(true) by default.
+#if defined(VIXL_NEGATIVE_TESTING) && __cplusplus >= 201103L
+#define VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION noexcept(false)
+#else
+#define VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION
+#endif
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+#ifndef VIXL_AARCH64_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH64_GENERATE_SIMULATOR_CODE 1
+#endif
+#else
+#ifndef VIXL_AARCH64_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH64_GENERATE_SIMULATOR_CODE 0
+#endif
+#if VIXL_AARCH64_GENERATE_SIMULATOR_CODE
+#warning "Generating Simulator instructions without Simulator support."
+#endif
+#endif
+
+// We do not have a simulator for AArch32, although we can pretend we do so that
+// tests that require running natively can be skipped.
+#ifndef __arm__
+#define VIXL_INCLUDE_SIMULATOR_AARCH32
+#ifndef VIXL_AARCH32_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH32_GENERATE_SIMULATOR_CODE 1
+#endif
+#else
+#ifndef VIXL_AARCH32_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH32_GENERATE_SIMULATOR_CODE 0
+#endif
+#endif
+
+#ifdef USE_SIMULATOR
+#error "Please see the release notes for USE_SIMULATOR."
+#endif
+
+// Target Architecture/ISA
+#ifdef VIXL_INCLUDE_TARGET_A64
+#ifndef VIXL_INCLUDE_TARGET_AARCH64
+#define VIXL_INCLUDE_TARGET_AARCH64
+#endif
+#endif
+
+#if defined(VIXL_INCLUDE_TARGET_A32) && defined(VIXL_INCLUDE_TARGET_T32)
+#ifndef VIXL_INCLUDE_TARGET_AARCH32
+#define VIXL_INCLUDE_TARGET_AARCH32
+#endif
+#elif defined(VIXL_INCLUDE_TARGET_A32)
+#ifndef VIXL_INCLUDE_TARGET_A32_ONLY
+#define VIXL_INCLUDE_TARGET_A32_ONLY
+#endif
+#else
+#ifndef VIXL_INCLUDE_TARGET_T32_ONLY
+#define VIXL_INCLUDE_TARGET_T32_ONLY
+#endif
+#endif
+
+
+#endif  // VIXL_GLOBALS_H
--- a/3rdparty/vixl/include/vixl/invalset-vixl.h
+++ b/3rdparty/vixl/include/vixl/invalset-vixl.h
@ -0,0 +1,920 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_INVALSET_H_
+#define VIXL_INVALSET_H_
+
+#include <cstring>
+
+#include <algorithm>
+#include <vector>
+
+#include "globals-vixl.h"
+
+namespace vixl {
+
+// We define a custom data structure template and its iterator as `std`
+// containers do not fit the performance requirements for some of our use cases.
+//
+// The structure behaves like an iterable unordered set with special properties
+// and restrictions. "InvalSet" stands for "Invalidatable Set".
+//
+// Restrictions and requirements:
+// - Adding an element already present in the set is illegal. In debug mode,
+//   this is checked at insertion time.
+// - The templated class `ElementType` must provide comparison operators so that
+//   `std::sort()` can be used.
+// - A key must be available to represent invalid elements.
+// - Elements with an invalid key must compare higher or equal to any other
+//   element.
+//
+// Use cases and performance considerations:
+// Our use cases present two specificities that allow us to design this
+// structure to provide fast insertion *and* fast search and deletion
+// operations:
+// - Elements are (generally) inserted in order (sorted according to their key).
+// - A key is available to mark elements as invalid (deleted).
+// The backing `std::vector` allows for fast insertions. When
+// searching for an element we ensure the elements are sorted (this is generally
+// the case) and perform a binary search. When deleting an element we do not
+// free the associated memory immediately. Instead, an element to be deleted is
+// marked with the 'invalid' key. Other methods of the container take care of
+// ignoring entries marked as invalid.
+// To avoid the overhead of the `std::vector` container when only few entries
+// are used, a number of elements are preallocated.
+
+// 'ElementType' and 'KeyType' are respectively the types of the elements and
+// their key. The structure only reclaims memory when safe to do so, if the
+// number of elements that can be reclaimed is greater than `RECLAIM_FROM` and
+// greater than `<total number of elements> / RECLAIM_FACTOR.
+// clang-format off
+#define TEMPLATE_INVALSET_P_DECL                                               \
+  class ElementType,                                                           \
+  unsigned N_PREALLOCATED_ELEMENTS,                                            \
+  class KeyType,                                                               \
+  KeyType INVALID_KEY,                                                         \
+  size_t RECLAIM_FROM,                                                         \
+  unsigned RECLAIM_FACTOR
+// clang-format on
+
+#define TEMPLATE_INVALSET_P_DEF                                             \
+  ElementType, N_PREALLOCATED_ELEMENTS, KeyType, INVALID_KEY, RECLAIM_FROM, \
+      RECLAIM_FACTOR
+
+template <class S>
+class InvalSetIterator;  // Forward declaration.
+
+template <TEMPLATE_INVALSET_P_DECL>
+class InvalSet {
+ public:
+  InvalSet();
+  ~InvalSet() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION;
+
+  static const size_t kNPreallocatedElements = N_PREALLOCATED_ELEMENTS;
+  static const KeyType kInvalidKey = INVALID_KEY;
+
+  // C++ STL iterator interface.
+  typedef InvalSetIterator<InvalSet<TEMPLATE_INVALSET_P_DEF> > iterator;
+  iterator begin();
+  iterator end();
+
+  // It is illegal to insert an element already present in the set.
+  void insert(const ElementType& element);
+
+  // Looks for the specified element in the set and - if found - deletes it.
+  // The return value is the number of elements erased: either 0 or 1.
+  size_t erase(const ElementType& element);
+
+  // This indicates the number of (valid) elements stored in this set.
+  size_t size() const;
+
+  // Returns true if no elements are stored in the set.
+  // Note that this does not mean the backing storage is empty: it can still
+  // contain invalid elements.
+  bool empty() const;
+
+  void clear();
+
+  const ElementType GetMinElement();
+
+  // This returns the key of the minimum element in the set.
+  KeyType GetMinElementKey();
+
+  static bool IsValid(const ElementType& element);
+  static KeyType GetKey(const ElementType& element);
+  static void SetKey(ElementType* element, KeyType key);
+
+  typedef ElementType _ElementType;
+  typedef KeyType _KeyType;
+
+ protected:
+  // Returns a pointer to the element in vector_ if it was found, or NULL
+  // otherwise.
+  ElementType* Search(const ElementType& element);
+
+  // The argument *must* point to an element stored in *this* set.
+  // This function is not allowed to move elements in the backing vector
+  // storage.
+  void EraseInternal(ElementType* element);
+
+  // The elements in the range searched must be sorted.
+  ElementType* BinarySearch(const ElementType& element,
+                            ElementType* start,
+                            ElementType* end) const;
+
+  // Sort the elements.
+  enum SortType {
+    // The 'hard' version guarantees that invalid elements are moved to the end
+    // of the container.
+    kHardSort,
+    // The 'soft' version only guarantees that the elements will be sorted.
+    // Invalid elements may still be present anywhere in the set.
+    kSoftSort
+  };
+  void Sort(SortType sort_type);
+
+  // Delete the elements that have an invalid key. The complexity is linear
+  // with the size of the vector.
+  void Clean();
+
+  const ElementType Front() const;
+  const ElementType Back() const;
+
+  // Delete invalid trailing elements and return the last valid element in the
+  // set.
+  const ElementType CleanBack();
+
+  // Returns a pointer to the start or end of the backing storage.
+  const ElementType* StorageBegin() const;
+  const ElementType* StorageEnd() const;
+  ElementType* StorageBegin();
+  ElementType* StorageEnd();
+
+  // Returns the index of the element within the backing storage. The element
+  // must belong to the backing storage.
+  size_t GetElementIndex(const ElementType* element) const;
+
+  // Returns the element at the specified index in the backing storage.
+  const ElementType* GetElementAt(size_t index) const;
+  ElementType* GetElementAt(size_t index);
+
+  static const ElementType* GetFirstValidElement(const ElementType* from,
+                                                 const ElementType* end);
+
+  void CacheMinElement();
+  const ElementType GetCachedMinElement() const;
+
+  bool ShouldReclaimMemory() const;
+  void ReclaimMemory();
+
+  bool IsUsingVector() const { return vector_ != NULL; }
+  void SetSorted(bool sorted) { sorted_ = sorted; }
+
+  // We cache some data commonly required by users to improve performance.
+  // We cannot cache pointers to elements as we do not control the backing
+  // storage.
+  bool valid_cached_min_;
+  size_t cached_min_index_;  // Valid iff `valid_cached_min_` is true.
+  KeyType cached_min_key_;   // Valid iff `valid_cached_min_` is true.
+
+  // Indicates whether the elements are sorted.
+  bool sorted_;
+
+  // This represents the number of (valid) elements in this set.
+  size_t size_;
+
+  // The backing storage is either the array of preallocated elements or the
+  // vector. The structure starts by using the preallocated elements, and
+  // transitions (permanently) to using the vector once more than
+  // kNPreallocatedElements are used.
+  // Elements are only invalidated when using the vector. The preallocated
+  // storage always only contains valid elements.
+  ElementType preallocated_[kNPreallocatedElements];
+  std::vector<ElementType>* vector_;
+
+  // Iterators acquire and release this monitor. While a set is acquired,
+  // certain operations are illegal to ensure that the iterator will
+  // correctly iterate over the elements in the set.
+  int monitor_;
+#ifdef VIXL_DEBUG
+  int monitor() const { return monitor_; }
+  void Acquire() { monitor_++; }
+  void Release() {
+    monitor_--;
+    VIXL_ASSERT(monitor_ >= 0);
+  }
+#endif
+
+ private:
+// The copy constructor and assignment operator are not used and the defaults
+// are unsafe, so disable them (without an implementation).
+#if __cplusplus >= 201103L
+  InvalSet(const InvalSet& other) = delete;
+  InvalSet operator=(const InvalSet& other) = delete;
+#else
+  InvalSet(const InvalSet& other);
+  InvalSet operator=(const InvalSet& other);
+#endif
+
+  friend class InvalSetIterator<InvalSet<TEMPLATE_INVALSET_P_DEF> >;
+};
+
+
+template <class S>
+class InvalSetIterator {
+ public:
+  using iterator_category = std::forward_iterator_tag;
+  using value_type = typename S::_ElementType;
+  using difference_type = std::ptrdiff_t;
+  using pointer = typename S::_ElementType*;
+  using reference = typename S::_ElementType&;
+
+ private:
+  // Redefine types to mirror the associated set types.
+  typedef typename S::_ElementType ElementType;
+  typedef typename S::_KeyType KeyType;
+
+ public:
+  explicit InvalSetIterator(S* inval_set = NULL);
+
+  // This class implements the standard copy-swap idiom.
+  ~InvalSetIterator();
+  InvalSetIterator(const InvalSetIterator<S>& other);
+  InvalSetIterator<S>& operator=(InvalSetIterator<S> other);
+#if __cplusplus >= 201103L
+  InvalSetIterator(InvalSetIterator<S>&& other) noexcept;
+#endif
+
+  friend void swap(InvalSetIterator<S>& a, InvalSetIterator<S>& b) {
+    using std::swap;
+    swap(a.using_vector_, b.using_vector_);
+    swap(a.index_, b.index_);
+    swap(a.inval_set_, b.inval_set_);
+  }
+
+  // Return true if the iterator is at the end of the set.
+  bool Done() const;
+
+  // Move this iterator to the end of the set.
+  void Finish();
+
+  // Delete the current element and advance the iterator to point to the next
+  // element.
+  void DeleteCurrentAndAdvance();
+
+  static bool IsValid(const ElementType& element);
+  static KeyType GetKey(const ElementType& element);
+
+  // Extra helpers to support the forward-iterator interface.
+  InvalSetIterator<S>& operator++();    // Pre-increment.
+  InvalSetIterator<S> operator++(int);  // Post-increment.
+  bool operator==(const InvalSetIterator<S>& rhs) const;
+  bool operator!=(const InvalSetIterator<S>& rhs) const {
+    return !(*this == rhs);
+  }
+  ElementType& operator*() { return *Current(); }
+  const ElementType& operator*() const { return *Current(); }
+  ElementType* operator->() { return Current(); }
+  const ElementType* operator->() const { return Current(); }
+
+ protected:
+  void MoveToValidElement();
+
+  // Indicates if the iterator is looking at the vector or at the preallocated
+  // elements.
+  bool using_vector_;
+  // Used when looking at the preallocated elements, or in debug mode when using
+  // the vector to track how many times the iterator has advanced.
+  size_t index_;
+  typename std::vector<ElementType>::iterator iterator_;
+  S* inval_set_;
+
+  // TODO: These helpers are deprecated and will be removed in future versions
+  // of VIXL.
+  ElementType* Current() const;
+  void Advance();
+};
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+InvalSet<TEMPLATE_INVALSET_P_DEF>::InvalSet()
+    : valid_cached_min_(false), sorted_(true), size_(0), vector_(NULL) {
+#ifdef VIXL_DEBUG
+  monitor_ = 0;
+#endif
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+InvalSet<TEMPLATE_INVALSET_P_DEF>::~InvalSet()
+    VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
+  VIXL_ASSERT(monitor_ == 0);
+  delete vector_;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+typename InvalSet<TEMPLATE_INVALSET_P_DEF>::iterator
+InvalSet<TEMPLATE_INVALSET_P_DEF>::begin() {
+  return iterator(this);
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+typename InvalSet<TEMPLATE_INVALSET_P_DEF>::iterator
+InvalSet<TEMPLATE_INVALSET_P_DEF>::end() {
+  iterator end(this);
+  end.Finish();
+  return end;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+void InvalSet<TEMPLATE_INVALSET_P_DEF>::insert(const ElementType& element) {
+  VIXL_ASSERT(monitor() == 0);
+  VIXL_ASSERT(IsValid(element));
+  VIXL_ASSERT(Search(element) == NULL);
+  SetSorted(empty() || (sorted_ && (element > CleanBack())));
+  if (IsUsingVector()) {
+    vector_->push_back(element);
+  } else {
+    if (size_ < kNPreallocatedElements) {
+      preallocated_[size_] = element;
+    } else {
+      // Transition to using the vector.
+      vector_ =
+          new std::vector<ElementType>(preallocated_, preallocated_ + size_);
+      vector_->push_back(element);
+    }
+  }
+  size_++;
+
+  if (valid_cached_min_ && (element < GetMinElement())) {
+    cached_min_index_ = IsUsingVector() ? vector_->size() - 1 : size_ - 1;
+    cached_min_key_ = GetKey(element);
+    valid_cached_min_ = true;
+  }
+
+  if (ShouldReclaimMemory()) {
+    ReclaimMemory();
+  }
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+size_t InvalSet<TEMPLATE_INVALSET_P_DEF>::erase(const ElementType& element) {
+  VIXL_ASSERT(monitor() == 0);
+  VIXL_ASSERT(IsValid(element));
+  ElementType* local_element = Search(element);
+  if (local_element != NULL) {
+    EraseInternal(local_element);
+    return 1;
+  }
+  return 0;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+ElementType* InvalSet<TEMPLATE_INVALSET_P_DEF>::Search(
+    const ElementType& element) {
+  VIXL_ASSERT(monitor() == 0);
+  if (empty()) {
+    return NULL;
+  }
+  if (ShouldReclaimMemory()) {
+    ReclaimMemory();
+  }
+  if (!sorted_) {
+    Sort(kHardSort);
+  }
+  if (!valid_cached_min_) {
+    CacheMinElement();
+  }
+  return BinarySearch(element, GetElementAt(cached_min_index_), StorageEnd());
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+size_t InvalSet<TEMPLATE_INVALSET_P_DEF>::size() const {
+  return size_;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+bool InvalSet<TEMPLATE_INVALSET_P_DEF>::empty() const {
+  return size_ == 0;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+void InvalSet<TEMPLATE_INVALSET_P_DEF>::clear() {
+  VIXL_ASSERT(monitor() == 0);
+  size_ = 0;
+  if (IsUsingVector()) {
+    vector_->clear();
+  }
+  SetSorted(true);
+  valid_cached_min_ = false;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+const ElementType InvalSet<TEMPLATE_INVALSET_P_DEF>::GetMinElement() {
+  VIXL_ASSERT(monitor() == 0);
+  VIXL_ASSERT(!empty());
+  CacheMinElement();
+  return *GetElementAt(cached_min_index_);
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+KeyType InvalSet<TEMPLATE_INVALSET_P_DEF>::GetMinElementKey() {
+  VIXL_ASSERT(monitor() == 0);
+  if (valid_cached_min_) {
+    return cached_min_key_;
+  } else {
+    return GetKey(GetMinElement());
+  }
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+bool InvalSet<TEMPLATE_INVALSET_P_DEF>::IsValid(const ElementType& element) {
+  return GetKey(element) != kInvalidKey;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+void InvalSet<TEMPLATE_INVALSET_P_DEF>::EraseInternal(ElementType* element) {
+  // Note that this function must be safe even while an iterator has acquired
+  // this set.
+  VIXL_ASSERT(element != NULL);
+  size_t deleted_index = GetElementIndex(element);
+  if (IsUsingVector()) {
+    VIXL_ASSERT((&(vector_->front()) <= element) &&
+                (element <= &(vector_->back())));
+    SetKey(element, kInvalidKey);
+  } else {
+    VIXL_ASSERT((preallocated_ <= element) &&
+                (element < (preallocated_ + kNPreallocatedElements)));
+    ElementType* end = preallocated_ + kNPreallocatedElements;
+    size_t copy_size = sizeof(*element) * (end - element - 1);
+    memmove(element, element + 1, copy_size);
+  }
+  size_--;
+
+  if (valid_cached_min_ && (deleted_index == cached_min_index_)) {
+    if (sorted_ && !empty()) {
+      const ElementType* min = GetFirstValidElement(element, StorageEnd());
+      cached_min_index_ = GetElementIndex(min);
+      cached_min_key_ = GetKey(*min);
+      valid_cached_min_ = true;
+    } else {
+      valid_cached_min_ = false;
+    }
+  }
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+ElementType* InvalSet<TEMPLATE_INVALSET_P_DEF>::BinarySearch(
+    const ElementType& element, ElementType* start, ElementType* end) const {
+  if (start == end) {
+    return NULL;
+  }
+  VIXL_ASSERT(sorted_);
+  VIXL_ASSERT(start < end);
+  VIXL_ASSERT(!empty());
+
+  // Perform a binary search through the elements while ignoring invalid
+  // elements.
+  ElementType* elements = start;
+  size_t low = 0;
+  size_t high = (end - start) - 1;
+  while (low < high) {
+    // Find valid bounds.
+    while (!IsValid(elements[low]) && (low < high)) ++low;
+    while (!IsValid(elements[high]) && (low < high)) --high;
+    VIXL_ASSERT(low <= high);
+    // Avoid overflow when computing the middle index.
+    size_t middle = low + (high - low) / 2;
+    if ((middle == low) || (middle == high)) {
+      break;
+    }
+    while ((middle < high - 1) && !IsValid(elements[middle])) ++middle;
+    while ((low + 1 < middle) && !IsValid(elements[middle])) --middle;
+    if (!IsValid(elements[middle])) {
+      break;
+    }
+    if (elements[middle] < element) {
+      low = middle;
+    } else {
+      high = middle;
+    }
+  }
+
+  if (elements[low] == element) return &elements[low];
+  if (elements[high] == element) return &elements[high];
+  return NULL;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+void InvalSet<TEMPLATE_INVALSET_P_DEF>::Sort(SortType sort_type) {
+  if (sort_type == kSoftSort) {
+    if (sorted_) {
+      return;
+    }
+  }
+  VIXL_ASSERT(monitor() == 0);
+  if (empty()) {
+    return;
+  }
+
+  Clean();
+  std::sort(StorageBegin(), StorageEnd());
+
+  SetSorted(true);
+  cached_min_index_ = 0;
+  cached_min_key_ = GetKey(Front());
+  valid_cached_min_ = true;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+void InvalSet<TEMPLATE_INVALSET_P_DEF>::Clean() {
+  VIXL_ASSERT(monitor() == 0);
+  if (empty() || !IsUsingVector()) {
+    return;
+  }
+  // Manually iterate through the vector storage to discard invalid elements.
+  ElementType* start = &(vector_->front());
+  ElementType* end = start + vector_->size();
+  ElementType* c = start;
+  ElementType* first_invalid;
+  ElementType* first_valid;
+  ElementType* next_invalid;
+
+  while ((c < end) && IsValid(*c)) c++;
+  first_invalid = c;
+
+  while (c < end) {
+    while ((c < end) && !IsValid(*c)) c++;
+    first_valid = c;
+    while ((c < end) && IsValid(*c)) c++;
+    next_invalid = c;
+
+    ptrdiff_t n_moved_elements = (next_invalid - first_valid);
+    memmove(first_invalid, first_valid, n_moved_elements * sizeof(*c));
+    first_invalid = first_invalid + n_moved_elements;
+    c = next_invalid;
+  }
+
+  // Delete the trailing invalid elements.
+  vector_->erase(vector_->begin() + (first_invalid - start), vector_->end());
+  VIXL_ASSERT(vector_->size() == size_);
+
+  if (sorted_) {
+    valid_cached_min_ = true;
+    cached_min_index_ = 0;
+    cached_min_key_ = GetKey(*GetElementAt(0));
+  } else {
+    valid_cached_min_ = false;
+  }
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+const ElementType InvalSet<TEMPLATE_INVALSET_P_DEF>::Front() const {
+  VIXL_ASSERT(!empty());
+  return IsUsingVector() ? vector_->front() : preallocated_[0];
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+const ElementType InvalSet<TEMPLATE_INVALSET_P_DEF>::Back() const {
+  VIXL_ASSERT(!empty());
+  return IsUsingVector() ? vector_->back() : preallocated_[size_ - 1];
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+const ElementType InvalSet<TEMPLATE_INVALSET_P_DEF>::CleanBack() {
+  VIXL_ASSERT(monitor() == 0);
+  if (IsUsingVector()) {
+    // Delete the invalid trailing elements.
+    typename std::vector<ElementType>::reverse_iterator it = vector_->rbegin();
+    while (!IsValid(*it)) {
+      it++;
+    }
+    vector_->erase(it.base(), vector_->end());
+  }
+  return Back();
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+const ElementType* InvalSet<TEMPLATE_INVALSET_P_DEF>::StorageBegin() const {
+  return IsUsingVector() ? &(vector_->front()) : preallocated_;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+const ElementType* InvalSet<TEMPLATE_INVALSET_P_DEF>::StorageEnd() const {
+  return IsUsingVector() ? &(vector_->back()) + 1 : preallocated_ + size_;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+ElementType* InvalSet<TEMPLATE_INVALSET_P_DEF>::StorageBegin() {
+  return IsUsingVector() ? &(vector_->front()) : preallocated_;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+ElementType* InvalSet<TEMPLATE_INVALSET_P_DEF>::StorageEnd() {
+  return IsUsingVector() ? &(vector_->back()) + 1 : preallocated_ + size_;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+size_t InvalSet<TEMPLATE_INVALSET_P_DEF>::GetElementIndex(
+    const ElementType* element) const {
+  VIXL_ASSERT((StorageBegin() <= element) && (element < StorageEnd()));
+  return element - StorageBegin();
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+const ElementType* InvalSet<TEMPLATE_INVALSET_P_DEF>::GetElementAt(
+    size_t index) const {
+  VIXL_ASSERT((IsUsingVector() && (index < vector_->size())) ||
+              (index < size_));
+  return StorageBegin() + index;
+}
+
+template <TEMPLATE_INVALSET_P_DECL>
+ElementType* InvalSet<TEMPLATE_INVALSET_P_DEF>::GetElementAt(size_t index) {
+  VIXL_ASSERT((IsUsingVector() && (index < vector_->size())) ||
+              (index < size_));
+  return StorageBegin() + index;
+}
+
+template <TEMPLATE_INVALSET_P_DECL>
+const ElementType* InvalSet<TEMPLATE_INVALSET_P_DEF>::GetFirstValidElement(
+    const ElementType* from, const ElementType* end) {
+  while ((from < end) && !IsValid(*from)) {
+    from++;
+  }
+  return from;
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+void InvalSet<TEMPLATE_INVALSET_P_DEF>::CacheMinElement() {
+  VIXL_ASSERT(monitor() == 0);
+  VIXL_ASSERT(!empty());
+
+  if (valid_cached_min_) {
+    return;
+  }
+
+  if (sorted_) {
+    const ElementType* min = GetFirstValidElement(StorageBegin(), StorageEnd());
+    cached_min_index_ = GetElementIndex(min);
+    cached_min_key_ = GetKey(*min);
+    valid_cached_min_ = true;
+  } else {
+    Sort(kHardSort);
+  }
+  VIXL_ASSERT(valid_cached_min_);
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+bool InvalSet<TEMPLATE_INVALSET_P_DEF>::ShouldReclaimMemory() const {
+  if (!IsUsingVector()) {
+    return false;
+  }
+  size_t n_invalid_elements = vector_->size() - size_;
+  return (n_invalid_elements > RECLAIM_FROM) &&
+         (n_invalid_elements > vector_->size() / RECLAIM_FACTOR);
+}
+
+
+template <TEMPLATE_INVALSET_P_DECL>
+void InvalSet<TEMPLATE_INVALSET_P_DEF>::ReclaimMemory() {
+  VIXL_ASSERT(monitor() == 0);
+  Clean();
+}
+
+
+template <class S>
+InvalSetIterator<S>::InvalSetIterator(S* inval_set)
+    : using_vector_((inval_set != NULL) && inval_set->IsUsingVector()),
+      index_(0),
+      inval_set_(inval_set) {
+  if (inval_set != NULL) {
+    inval_set->Sort(S::kSoftSort);
+#ifdef VIXL_DEBUG
+    inval_set->Acquire();
+#endif
+    if (using_vector_) {
+      iterator_ = typename std::vector<ElementType>::iterator(
+          inval_set_->vector_->begin());
+    }
+    MoveToValidElement();
+  }
+}
+
+
+template <class S>
+InvalSetIterator<S>::~InvalSetIterator() {
+#ifdef VIXL_DEBUG
+  if (inval_set_ != NULL) inval_set_->Release();
+#endif
+}
+
+
+template <class S>
+typename S::_ElementType* InvalSetIterator<S>::Current() const {
+  VIXL_ASSERT(!Done());
+  if (using_vector_) {
+    return &(*iterator_);
+  } else {
+    return &(inval_set_->preallocated_[index_]);
+  }
+}
+
+
+template <class S>
+void InvalSetIterator<S>::Advance() {
+  ++(*this);
+}
+
+
+template <class S>
+bool InvalSetIterator<S>::Done() const {
+  if (using_vector_) {
+    bool done = (iterator_ == inval_set_->vector_->end());
+    VIXL_ASSERT(done == (index_ == inval_set_->size()));
+    return done;
+  } else {
+    return index_ == inval_set_->size();
+  }
+}
+
+
+template <class S>
+void InvalSetIterator<S>::Finish() {
+  VIXL_ASSERT(inval_set_->sorted_);
+  if (using_vector_) {
+    iterator_ = inval_set_->vector_->end();
+  }
+  index_ = inval_set_->size();
+}
+
+
+template <class S>
+void InvalSetIterator<S>::DeleteCurrentAndAdvance() {
+  if (using_vector_) {
+    inval_set_->EraseInternal(&(*iterator_));
+    MoveToValidElement();
+  } else {
+    inval_set_->EraseInternal(inval_set_->preallocated_ + index_);
+  }
+}
+
+
+template <class S>
+bool InvalSetIterator<S>::IsValid(const ElementType& element) {
+  return S::IsValid(element);
+}
+
+
+template <class S>
+typename S::_KeyType InvalSetIterator<S>::GetKey(const ElementType& element) {
+  return S::GetKey(element);
+}
+
+
+template <class S>
+void InvalSetIterator<S>::MoveToValidElement() {
+  if (using_vector_) {
+    while ((iterator_ != inval_set_->vector_->end()) && !IsValid(*iterator_)) {
+      iterator_++;
+    }
+  } else {
+    VIXL_ASSERT(inval_set_->empty() || IsValid(inval_set_->preallocated_[0]));
+    // Nothing to do.
+  }
+}
+
+
+template <class S>
+InvalSetIterator<S>::InvalSetIterator(const InvalSetIterator<S>& other)
+    : using_vector_(other.using_vector_),
+      index_(other.index_),
+      inval_set_(other.inval_set_) {
+#ifdef VIXL_DEBUG
+  if (inval_set_ != NULL) inval_set_->Acquire();
+#endif
+}
+
+
+#if __cplusplus >= 201103L
+template <class S>
+InvalSetIterator<S>::InvalSetIterator(InvalSetIterator<S>&& other) noexcept
+    : using_vector_(false), index_(0), inval_set_(NULL) {
+  swap(*this, other);
+}
+#endif
+
+
+template <class S>
+InvalSetIterator<S>& InvalSetIterator<S>::operator=(InvalSetIterator<S> other) {
+  swap(*this, other);
+  return *this;
+}
+
+
+template <class S>
+bool InvalSetIterator<S>::operator==(const InvalSetIterator<S>& rhs) const {
+  bool equal = (inval_set_ == rhs.inval_set_);
+
+  // If the inval_set_ matches, using_vector_ must also match.
+  VIXL_ASSERT(!equal || (using_vector_ == rhs.using_vector_));
+
+  if (using_vector_) {
+    equal = equal && (iterator_ == rhs.iterator_);
+    // In debug mode, index_ is maintained even with using_vector_.
+    VIXL_ASSERT(!equal || (index_ == rhs.index_));
+  } else {
+    equal = equal && (index_ == rhs.index_);
+#ifdef DEBUG
+    // If not using_vector_, iterator_ should be default-initialised.
+    typename std::vector<ElementType>::iterator default_iterator;
+    VIXL_ASSERT(iterator_ == default_iterator);
+    VIXL_ASSERT(rhs.iterator_ == default_iterator);
+#endif
+  }
+  return equal;
+}
+
+
+template <class S>
+InvalSetIterator<S>& InvalSetIterator<S>::operator++() {
+  // Pre-increment.
+  VIXL_ASSERT(!Done());
+  if (using_vector_) {
+    iterator_++;
+#ifdef VIXL_DEBUG
+    index_++;
+#endif
+    MoveToValidElement();
+  } else {
+    index_++;
+  }
+  return *this;
+}
+
+
+template <class S>
+InvalSetIterator<S> InvalSetIterator<S>::operator++(int /* unused */) {
+  // Post-increment.
+  VIXL_ASSERT(!Done());
+  InvalSetIterator<S> old(*this);
+  ++(*this);
+  return old;
+}
+
+
+#undef TEMPLATE_INVALSET_P_DECL
+#undef TEMPLATE_INVALSET_P_DEF
+
+}  // namespace vixl
+
+#endif  // VIXL_INVALSET_H_
--- a/3rdparty/vixl/include/vixl/macro-assembler-interface.h
+++ b/3rdparty/vixl/include/vixl/macro-assembler-interface.h
@ -0,0 +1,75 @@
+// Copyright 2016, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_MACRO_ASSEMBLER_INTERFACE_H
+#define VIXL_MACRO_ASSEMBLER_INTERFACE_H
+
+#include "assembler-base-vixl.h"
+
+namespace vixl {
+
+class MacroAssemblerInterface {
+ public:
+  virtual internal::AssemblerBase* AsAssemblerBase() = 0;
+
+  virtual ~MacroAssemblerInterface() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {}
+
+  virtual bool AllowMacroInstructions() const = 0;
+  virtual bool ArePoolsBlocked() const = 0;
+
+ protected:
+  virtual void SetAllowMacroInstructions(bool allow) = 0;
+
+  virtual void BlockPools() = 0;
+  virtual void ReleasePools() = 0;
+  virtual void EnsureEmitPoolsFor(size_t size) = 0;
+
+  // Emit the branch over a literal/veneer pool, and any necessary padding
+  // before it.
+  virtual void EmitPoolHeader() = 0;
+  // When this is called, the label used for branching over the pool is bound.
+  // This can also generate additional padding, which must correspond to the
+  // alignment_ value passed to the PoolManager (which needs to keep track of
+  // the exact size of the generated pool).
+  virtual void EmitPoolFooter() = 0;
+
+  // Emit n bytes of padding that does not have to be executable.
+  virtual void EmitPaddingBytes(int n) = 0;
+  // Emit n bytes of padding that has to be executable. Implementations must
+  // make sure this is a multiple of the instruction size.
+  virtual void EmitNopBytes(int n) = 0;
+
+  // The following scopes need access to the above method in order to implement
+  // pool blocking and temporarily disable the macro-assembler.
+  friend class ExactAssemblyScope;
+  friend class EmissionCheckScope;
+  template <typename T>
+  friend class PoolManager;
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_MACRO_ASSEMBLER_INTERFACE_H
--- a/3rdparty/vixl/include/vixl/platform-vixl.h
+++ b/3rdparty/vixl/include/vixl/platform-vixl.h
@ -0,0 +1,39 @@
+// Copyright 2014, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PLATFORM_H
+#define PLATFORM_H
+
+// Define platform specific functionalities.
+extern "C" {
+#include <signal.h>
+}
+
+namespace vixl {
+inline void HostBreakpoint() { raise(SIGINT); }
+}  // namespace vixl
+
+#endif
--- a/3rdparty/vixl/include/vixl/pool-manager-impl.h
+++ b/3rdparty/vixl/include/vixl/pool-manager-impl.h
@ -0,0 +1,522 @@
+// Copyright 2017, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_POOL_MANAGER_IMPL_H_
+#define VIXL_POOL_MANAGER_IMPL_H_
+
+#include "pool-manager.h"
+
+#include <algorithm>
+#include "assembler-base-vixl.h"
+
+namespace vixl {
+
+
+template <typename T>
+T PoolManager<T>::Emit(MacroAssemblerInterface* masm,
+                       T pc,
+                       int num_bytes,
+                       ForwardReference<T>* new_reference,
+                       LocationBase<T>* new_object,
+                       EmitOption option) {
+  // Make sure that the buffer still has the alignment we think it does.
+  VIXL_ASSERT(IsAligned(masm->AsAssemblerBase()
+                            ->GetBuffer()
+                            ->GetStartAddress<uintptr_t>(),
+                        buffer_alignment_));
+
+  // We should not call this method when the pools are blocked.
+  VIXL_ASSERT(!IsBlocked());
+  if (objects_.empty()) return pc;
+
+  // Emit header.
+  if (option == kBranchRequired) {
+    masm->EmitPoolHeader();
+    // TODO: The pc at this point might not actually be aligned according to
+    // alignment_. This is to support the current AARCH32 MacroAssembler which
+    // does not have a fixed size instruction set. In practice, the pc will be
+    // aligned to the alignment instructions need for the current instruction
+    // set, so we do not need to align it here. All other calculations do take
+    // the alignment into account, which only makes the checkpoint calculations
+    // more conservative when we use T32. Uncomment the following assertion if
+    // the AARCH32 MacroAssembler is modified to only support one ISA at the
+    // time.
+    // VIXL_ASSERT(pc == AlignUp(pc, alignment_));
+    pc += header_size_;
+  } else {
+    // If the header is optional, we might need to add some extra padding to
+    // meet the minimum location of the first object.
+    if (pc < objects_[0].min_location_) {
+      int32_t padding = objects_[0].min_location_ - pc;
+      masm->EmitNopBytes(padding);
+      pc += padding;
+    }
+  }
+
+  PoolObject<T>* existing_object = GetObjectIfTracked(new_object);
+
+  // Go through all objects and emit one by one.
+  for (objects_iter iter = objects_.begin(); iter != objects_.end();) {
+    PoolObject<T>& current = *iter;
+    if (ShouldSkipObject(&current,
+                         pc,
+                         num_bytes,
+                         new_reference,
+                         new_object,
+                         existing_object)) {
+      ++iter;
+      continue;
+    }
+    LocationBase<T>* label_base = current.label_base_;
+    T aligned_pc = AlignUp(pc, current.alignment_);
+    masm->EmitPaddingBytes(aligned_pc - pc);
+    pc = aligned_pc;
+    VIXL_ASSERT(pc >= current.min_location_);
+    VIXL_ASSERT(pc <= current.max_location_);
+    // First call SetLocation, which will also resolve the references, and then
+    // call EmitPoolObject, which might add a new reference.
+    label_base->SetLocation(masm->AsAssemblerBase(), pc);
+    label_base->EmitPoolObject(masm);
+    int object_size = label_base->GetPoolObjectSizeInBytes();
+    if (label_base->ShouldDeletePoolObjectOnPlacement()) {
+      label_base->MarkBound();
+      iter = RemoveAndDelete(iter);
+    } else {
+      VIXL_ASSERT(!current.label_base_->ShouldDeletePoolObjectOnPlacement());
+      current.label_base_->UpdatePoolObject(&current);
+      VIXL_ASSERT(current.alignment_ >= label_base->GetPoolObjectAlignment());
+      ++iter;
+    }
+    pc += object_size;
+  }
+
+  // Recalculate the checkpoint before emitting the footer. The footer might
+  // call Bind() which will check if we need to emit.
+  RecalculateCheckpoint();
+
+  // Always emit footer - this might add some padding.
+  masm->EmitPoolFooter();
+  pc = AlignUp(pc, alignment_);
+
+  return pc;
+}
+
+template <typename T>
+bool PoolManager<T>::ShouldSkipObject(PoolObject<T>* pool_object,
+                                      T pc,
+                                      int num_bytes,
+                                      ForwardReference<T>* new_reference,
+                                      LocationBase<T>* new_object,
+                                      PoolObject<T>* existing_object) const {
+  // We assume that all objects before this have been skipped and all objects
+  // after this will be emitted, therefore we will emit the whole pool. Add
+  // the header size and alignment, as well as the number of bytes we are
+  // planning to emit.
+  T max_actual_location = pc + num_bytes + max_pool_size_;
+
+  if (new_reference != NULL) {
+    // If we're adding a new object, also assume that it will have to be emitted
+    // before the object we are considering to skip.
+    VIXL_ASSERT(new_object != NULL);
+    T new_object_alignment = std::max(new_reference->object_alignment_,
+                                      new_object->GetPoolObjectAlignment());
+    if ((existing_object != NULL) &&
+        (existing_object->alignment_ > new_object_alignment)) {
+      new_object_alignment = existing_object->alignment_;
+    }
+    max_actual_location +=
+        (new_object->GetPoolObjectSizeInBytes() + new_object_alignment - 1);
+  }
+
+  // Hard limit.
+  if (max_actual_location >= pool_object->max_location_) return false;
+
+  // Use heuristic.
+  return (pc < pool_object->skip_until_location_hint_);
+}
+
+template <typename T>
+T PoolManager<T>::UpdateCheckpointForObject(T checkpoint,
+                                            const PoolObject<T>* object) {
+  checkpoint -= object->label_base_->GetPoolObjectSizeInBytes();
+  if (checkpoint > object->max_location_) checkpoint = object->max_location_;
+  checkpoint = AlignDown(checkpoint, object->alignment_);
+  return checkpoint;
+}
+
+template <typename T>
+static T MaxCheckpoint() {
+  return std::numeric_limits<T>::max();
+}
+
+template <typename T>
+static inline bool CheckCurrentPC(T pc, T checkpoint) {
+  VIXL_ASSERT(pc <= checkpoint);
+  // We must emit the pools if we are at the checkpoint now.
+  return pc == checkpoint;
+}
+
+template <typename T>
+static inline bool CheckFuturePC(T pc, T checkpoint) {
+  // We do not need to emit the pools now if the projected future PC will be
+  // equal to the checkpoint (we will need to emit the pools then).
+  return pc > checkpoint;
+}
+
+template <typename T>
+bool PoolManager<T>::MustEmit(T pc,
+                              int num_bytes,
+                              ForwardReference<T>* reference,
+                              LocationBase<T>* label_base) const {
+  // Check if we are at or past the checkpoint.
+  if (CheckCurrentPC(pc, checkpoint_)) return true;
+
+  // Check if the future PC will be past the checkpoint.
+  pc += num_bytes;
+  if (CheckFuturePC(pc, checkpoint_)) return true;
+
+  // No new reference - nothing to do.
+  if (reference == NULL) {
+    VIXL_ASSERT(label_base == NULL);
+    return false;
+  }
+
+  if (objects_.empty()) {
+    // Basic assertions that restrictions on the new (and only) reference are
+    // possible to satisfy.
+    VIXL_ASSERT(AlignUp(pc + header_size_, alignment_) >=
+                reference->min_object_location_);
+    VIXL_ASSERT(pc <= reference->max_object_location_);
+    return false;
+  }
+
+  // Check if the object is already being tracked.
+  const PoolObject<T>* existing_object = GetObjectIfTracked(label_base);
+  if (existing_object != NULL) {
+    // If the existing_object is already in existing_objects_ and its new
+    // alignment and new location restrictions are not stricter, skip the more
+    // expensive check.
+    if ((reference->min_object_location_ <= existing_object->min_location_) &&
+        (reference->max_object_location_ >= existing_object->max_location_) &&
+        (reference->object_alignment_ <= existing_object->alignment_)) {
+      return false;
+    }
+  }
+
+  // Create a temporary object.
+  PoolObject<T> temp(label_base);
+  temp.RestrictRange(reference->min_object_location_,
+                     reference->max_object_location_);
+  temp.RestrictAlignment(reference->object_alignment_);
+  if (existing_object != NULL) {
+    temp.RestrictRange(existing_object->min_location_,
+                       existing_object->max_location_);
+    temp.RestrictAlignment(existing_object->alignment_);
+  }
+
+  // Check if the new reference can be added after the end of the current pool.
+  // If yes, we don't need to emit.
+  T last_reachable = AlignDown(temp.max_location_, temp.alignment_);
+  const PoolObject<T>& last = objects_.back();
+  T after_pool = AlignDown(last.max_location_, last.alignment_) +
+                 last.label_base_->GetPoolObjectSizeInBytes();
+  // The current object can be placed at the end of the pool, even if the last
+  // object is placed at the last possible location.
+  if (last_reachable >= after_pool) return false;
+  // The current object can be placed after the code we are about to emit and
+  // after the existing pool (with a pessimistic size estimate).
+  if (last_reachable >= pc + num_bytes + max_pool_size_) return false;
+
+  // We're not in a trivial case, so we need to recalculate the checkpoint.
+
+  // Check (conservatively) if we can fit it into the objects_ array, without
+  // breaking our assumptions. Here we want to recalculate the checkpoint as
+  // if the new reference was added to the PoolManager but without actually
+  // adding it (as removing it is non-trivial).
+
+  T checkpoint = MaxCheckpoint<T>();
+  // Will temp be the last object in objects_?
+  if (PoolObjectLessThan(last, temp)) {
+    checkpoint = UpdateCheckpointForObject(checkpoint, &temp);
+    if (checkpoint < temp.min_location_) return true;
+  }
+
+  bool temp_not_placed_yet = true;
+  for (int i = static_cast<int>(objects_.size()) - 1; i >= 0; --i) {
+    const PoolObject<T>& current = objects_[i];
+    if (temp_not_placed_yet && PoolObjectLessThan(current, temp)) {
+      checkpoint = UpdateCheckpointForObject(checkpoint, &temp);
+      if (checkpoint < temp.min_location_) return true;
+      if (CheckFuturePC(pc, checkpoint)) return true;
+      temp_not_placed_yet = false;
+    }
+    if (current.label_base_ == label_base) continue;
+    checkpoint = UpdateCheckpointForObject(checkpoint, &current);
+    if (checkpoint < current.min_location_) return true;
+    if (CheckFuturePC(pc, checkpoint)) return true;
+  }
+  // temp is the object with the smallest max_location_.
+  if (temp_not_placed_yet) {
+    checkpoint = UpdateCheckpointForObject(checkpoint, &temp);
+    if (checkpoint < temp.min_location_) return true;
+  }
+
+  // Take the header into account.
+  checkpoint -= header_size_;
+  checkpoint = AlignDown(checkpoint, alignment_);
+
+  return CheckFuturePC(pc, checkpoint);
+}
+
+template <typename T>
+void PoolManager<T>::RecalculateCheckpoint(SortOption sort_option) {
+  // TODO: Improve the max_pool_size_ estimate by starting from the
+  // min_location_ of the first object, calculating the end of the pool as if
+  // all objects were placed starting from there, and in the end adding the
+  // maximum object alignment found minus one (which is the maximum extra
+  // padding we would need if we were to relocate the pool to a different
+  // address).
+  max_pool_size_ = 0;
+
+  if (objects_.empty()) {
+    checkpoint_ = MaxCheckpoint<T>();
+    return;
+  }
+
+  // Sort objects by their max_location_.
+  if (sort_option == kSortRequired) {
+    std::sort(objects_.begin(), objects_.end(), PoolObjectLessThan);
+  }
+
+  // Add the header size and header and footer max alignment to the maximum
+  // pool size.
+  max_pool_size_ += header_size_ + 2 * (alignment_ - 1);
+
+  T checkpoint = MaxCheckpoint<T>();
+  int last_object_index = static_cast<int>(objects_.size()) - 1;
+  for (int i = last_object_index; i >= 0; --i) {
+    // Bring back the checkpoint by the size of the current object, unless
+    // we need to bring it back more, then align.
+    PoolObject<T>& current = objects_[i];
+    checkpoint = UpdateCheckpointForObject(checkpoint, &current);
+    VIXL_ASSERT(checkpoint >= current.min_location_);
+    max_pool_size_ += (current.alignment_ - 1 +
+                       current.label_base_->GetPoolObjectSizeInBytes());
+  }
+  // Take the header into account.
+  checkpoint -= header_size_;
+  checkpoint = AlignDown(checkpoint, alignment_);
+
+  // Update the checkpoint of the pool manager.
+  checkpoint_ = checkpoint;
+
+  // NOTE: To handle min_location_ in the generic case, we could make a second
+  // pass of the objects_ vector, increasing the checkpoint as needed, while
+  // maintaining the alignment requirements.
+  // It should not be possible to have any issues with min_location_ with actual
+  // code, since there should always be some kind of branch over the pool,
+  // whether introduced by the pool emission or by the user, which will make
+  // sure the min_location_ requirement is satisfied. It's possible that the
+  // user could emit code in the literal pool and intentionally load the first
+  // value and then fall-through into the pool, but that is not a supported use
+  // of VIXL and we will assert in that case.
+}
+
+template <typename T>
+bool PoolManager<T>::PoolObjectLessThan(const PoolObject<T>& a,
+                                        const PoolObject<T>& b) {
+  if (a.max_location_ != b.max_location_)
+    return (a.max_location_ < b.max_location_);
+  int a_size = a.label_base_->GetPoolObjectSizeInBytes();
+  int b_size = b.label_base_->GetPoolObjectSizeInBytes();
+  if (a_size != b_size) return (a_size < b_size);
+  if (a.alignment_ != b.alignment_) return (a.alignment_ < b.alignment_);
+  if (a.min_location_ != b.min_location_)
+    return (a.min_location_ < b.min_location_);
+  return false;
+}
+
+template <typename T>
+void PoolManager<T>::AddObjectReference(const ForwardReference<T>* reference,
+                                        LocationBase<T>* label_base) {
+  VIXL_ASSERT(reference->object_alignment_ <= buffer_alignment_);
+  VIXL_ASSERT(label_base->GetPoolObjectAlignment() <= buffer_alignment_);
+
+  PoolObject<T>* object = GetObjectIfTracked(label_base);
+
+  if (object == NULL) {
+    PoolObject<T> new_object(label_base);
+    new_object.RestrictRange(reference->min_object_location_,
+                             reference->max_object_location_);
+    new_object.RestrictAlignment(reference->object_alignment_);
+    Insert(new_object);
+  } else {
+    object->RestrictRange(reference->min_object_location_,
+                          reference->max_object_location_);
+    object->RestrictAlignment(reference->object_alignment_);
+
+    // Move the object, if needed.
+    if (objects_.size() != 1) {
+      PoolObject<T> new_object(*object);
+      ptrdiff_t distance = std::distance(objects_.data(), object);
+      objects_.erase(objects_.begin() + distance);
+      Insert(new_object);
+    }
+  }
+  // No need to sort, we inserted the object in an already sorted array.
+  RecalculateCheckpoint(kNoSortRequired);
+}
+
+template <typename T>
+void PoolManager<T>::Insert(const PoolObject<T>& new_object) {
+  bool inserted = false;
+  // Place the object in the right position.
+  for (objects_iter iter = objects_.begin(); iter != objects_.end(); ++iter) {
+    PoolObject<T>& current = *iter;
+    if (!PoolObjectLessThan(current, new_object)) {
+      objects_.insert(iter, new_object);
+      inserted = true;
+      break;
+    }
+  }
+  if (!inserted) {
+    objects_.push_back(new_object);
+  }
+}
+
+template <typename T>
+void PoolManager<T>::RemoveAndDelete(PoolObject<T>* object) {
+  for (objects_iter iter = objects_.begin(); iter != objects_.end(); ++iter) {
+    PoolObject<T>& current = *iter;
+    if (current.label_base_ == object->label_base_) {
+      (void)RemoveAndDelete(iter);
+      return;
+    }
+  }
+  VIXL_UNREACHABLE();
+}
+
+template <typename T>
+typename PoolManager<T>::objects_iter PoolManager<T>::RemoveAndDelete(
+    objects_iter iter) {
+  PoolObject<T>& object = *iter;
+  LocationBase<T>* label_base = object.label_base_;
+
+  // Check if we also need to delete the LocationBase object.
+  if (label_base->ShouldBeDeletedOnPoolManagerDestruction()) {
+    delete_on_destruction_.push_back(label_base);
+  }
+  if (label_base->ShouldBeDeletedOnPlacementByPoolManager()) {
+    VIXL_ASSERT(!label_base->ShouldBeDeletedOnPoolManagerDestruction());
+    delete label_base;
+  }
+
+  return objects_.erase(iter);
+}
+
+template <typename T>
+T PoolManager<T>::Bind(MacroAssemblerInterface* masm,
+                       LocationBase<T>* object,
+                       T location) {
+  PoolObject<T>* existing_object = GetObjectIfTracked(object);
+  int alignment;
+  T min_location;
+  if (existing_object == NULL) {
+    alignment = object->GetMaxAlignment();
+    min_location = object->GetMinLocation();
+  } else {
+    alignment = existing_object->alignment_;
+    min_location = existing_object->min_location_;
+  }
+
+  // Align if needed, and add necessary padding to reach the min_location_.
+  T aligned_location = AlignUp(location, alignment);
+  masm->EmitNopBytes(aligned_location - location);
+  location = aligned_location;
+  while (location < min_location) {
+    masm->EmitNopBytes(alignment);
+    location += alignment;
+  }
+
+  object->SetLocation(masm->AsAssemblerBase(), location);
+  object->MarkBound();
+
+  if (existing_object != NULL) {
+    RemoveAndDelete(existing_object);
+    // No need to sort, we removed the object from a sorted array.
+    RecalculateCheckpoint(kNoSortRequired);
+  }
+
+  // We assume that the maximum padding we can possibly add here is less
+  // than the header alignment - hence that we're not going to go past our
+  // checkpoint.
+  VIXL_ASSERT(!CheckFuturePC(location, checkpoint_));
+  return location;
+}
+
+template <typename T>
+void PoolManager<T>::Release(T pc) {
+  USE(pc);
+  if (--monitor_ == 0) {
+    // Ensure the pool has not been blocked for too long.
+    VIXL_ASSERT(pc <= checkpoint_);
+  }
+}
+
+template <typename T>
+PoolManager<T>::~PoolManager<T>() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
+#ifdef VIXL_DEBUG
+  // Check for unbound objects.
+  for (objects_iter iter = objects_.begin(); iter != objects_.end(); ++iter) {
+    // There should not be any bound objects left in the pool. For unbound
+    // objects, we will check in the destructor of the object itself.
+    VIXL_ASSERT(!(*iter).label_base_->IsBound());
+  }
+#endif
+  // Delete objects the pool manager owns.
+  for (typename std::vector<LocationBase<T>*>::iterator
+           iter = delete_on_destruction_.begin(),
+           end = delete_on_destruction_.end();
+       iter != end;
+       ++iter) {
+    delete *iter;
+  }
+}
+
+template <typename T>
+int PoolManager<T>::GetPoolSizeForTest() const {
+  // Iterate over objects and return their cumulative size. This does not take
+  // any padding into account, just the size of the objects themselves.
+  int size = 0;
+  for (const_objects_iter iter = objects_.begin(); iter != objects_.end();
+       ++iter) {
+    size += (*iter).label_base_->GetPoolObjectSizeInBytes();
+  }
+  return size;
+}
+}
+
+#endif  // VIXL_POOL_MANAGER_IMPL_H_
--- a/3rdparty/vixl/include/vixl/pool-manager.h
+++ b/3rdparty/vixl/include/vixl/pool-manager.h
@ -0,0 +1,555 @@
+// Copyright 2017, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_POOL_MANAGER_H_
+#define VIXL_POOL_MANAGER_H_
+
+#include <stdint.h>
+
+#include <cstddef>
+#include <limits>
+#include <map>
+#include <vector>
+
+#include "globals-vixl.h"
+#include "macro-assembler-interface.h"
+#include "utils-vixl.h"
+
+namespace vixl {
+
+class TestPoolManager;
+
+// There are four classes declared in this header file:
+// PoolManager, PoolObject, ForwardReference and LocationBase.
+
+// The PoolManager manages both literal and veneer pools, and is designed to be
+// shared between AArch32 and AArch64. A pool is represented as an abstract
+// collection of references to objects. The manager does not need to know
+// architecture-specific details about literals and veneers; the actual
+// emission of the pool objects is delegated.
+//
+// Literal and Label will derive from LocationBase. The MacroAssembler will
+// create these objects as instructions that reference pool objects are
+// encountered, and ask the PoolManager to track them. The PoolManager will
+// create an internal PoolObject object for each object derived from
+// LocationBase.  Some of these PoolObject objects will be deleted when placed
+// (e.g. the ones corresponding to Literals), whereas others will be updated
+// with a new range when placed (e.g.  Veneers) and deleted when Bind() is
+// called on the PoolManager with their corresponding object as a parameter.
+//
+// A ForwardReference represents a reference to a PoolObject that will be
+// placed later in the instruction stream. Each ForwardReference may only refer
+// to one PoolObject, but many ForwardReferences may refer to the same
+// object.
+//
+// A PoolObject represents an object that has not yet been placed.  The final
+// location of a PoolObject (and hence the LocationBase object to which it
+// corresponds) is constrained mostly by the instructions that refer to it, but
+// PoolObjects can also have inherent constraints, such as alignment.
+//
+// LocationBase objects, unlike PoolObject objects, can be used outside of the
+// pool manager (e.g. as manually placed literals, which may still have
+// forward references that need to be resolved).
+//
+// At the moment, each LocationBase will have at most one PoolObject that keeps
+// the relevant information for placing this object in the pool. When that
+// object is placed, all forward references of the object are resolved. For
+// that reason, we do not need to keep track of the ForwardReference objects in
+// the PoolObject.
+
+// T is an integral type used for representing locations. For a 32-bit
+// architecture it will typically be int32_t, whereas for a 64-bit
+// architecture it will be int64_t.
+template <typename T>
+class ForwardReference;
+template <typename T>
+class PoolObject;
+template <typename T>
+class PoolManager;
+
+// Represents an object that has a size and alignment, and either has a known
+// location or has not been placed yet. An object of a subclass of LocationBase
+// will typically keep track of a number of ForwardReferences when it has not
+// yet been placed, but LocationBase does not assume or implement that
+// functionality.  LocationBase provides virtual methods for emitting the
+// object, updating all the forward references, and giving the PoolManager
+// information on the lifetime of this object and the corresponding PoolObject.
+template <typename T>
+class LocationBase {
+ public:
+  // The size of a LocationBase object is restricted to 4KB, in order to avoid
+  // situations where the size of the pool becomes larger than the range of
+  // an unconditional branch. This cannot happen without having large objects,
+  // as typically the range of an unconditional branch is the larger range
+  // an instruction supports.
+  // TODO: This would ideally be an architecture-specific value, perhaps
+  // another template parameter.
+  static const int kMaxObjectSize = 4 * KBytes;
+
+  // By default, LocationBase objects are aligned naturally to their size.
+  LocationBase(uint32_t type, int size)
+      : pool_object_size_(size),
+        pool_object_alignment_(size),
+        pool_object_type_(type),
+        is_bound_(false),
+        location_(0) {
+    VIXL_ASSERT(size > 0);
+    VIXL_ASSERT(size <= kMaxObjectSize);
+    VIXL_ASSERT(IsPowerOf2(size));
+  }
+
+  // Allow alignment to be specified, as long as it is smaller than the size.
+  LocationBase(uint32_t type, int size, int alignment)
+      : pool_object_size_(size),
+        pool_object_alignment_(alignment),
+        pool_object_type_(type),
+        is_bound_(false),
+        location_(0) {
+    VIXL_ASSERT(size > 0);
+    VIXL_ASSERT(size <= kMaxObjectSize);
+    VIXL_ASSERT(IsPowerOf2(alignment));
+    VIXL_ASSERT(alignment <= size);
+  }
+
+  // Constructor for locations that are already bound.
+  explicit LocationBase(T location)
+      : pool_object_size_(-1),
+        pool_object_alignment_(-1),
+        pool_object_type_(0),
+        is_bound_(true),
+        location_(location) {}
+
+  virtual ~LocationBase() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {}
+
+  // The PoolManager should assume ownership of some objects, and delete them
+  // after they have been placed. This can happen for example for literals that
+  // are created internally to the MacroAssembler and the user doesn't get a
+  // handle to. By default, the PoolManager will not do this.
+  virtual bool ShouldBeDeletedOnPlacementByPoolManager() const { return false; }
+  // The PoolManager should assume ownership of some objects, and delete them
+  // when it is destroyed. By default, the PoolManager will not do this.
+  virtual bool ShouldBeDeletedOnPoolManagerDestruction() const { return false; }
+
+  // Emit the PoolObject. Derived classes will implement this method to emit
+  // the necessary data and/or code (for example, to emit a literal or a
+  // veneer). This should not add padding, as it is added explicitly by the pool
+  // manager.
+  virtual void EmitPoolObject(MacroAssemblerInterface* masm) = 0;
+
+  // Resolve the references to this object. Will encode the necessary offset
+  // in the instruction corresponding to each reference and then delete it.
+  // TODO: An alternative here would be to provide a ResolveReference()
+  // method that only asks the LocationBase to resolve a specific reference
+  // (thus allowing the pool manager to resolve some of the references only).
+  // This would mean we need to have some kind of API to get all the references
+  // to a LabelObject.
+  virtual void ResolveReferences(internal::AssemblerBase* assembler) = 0;
+
+  // Returns true when the PoolObject corresponding to this LocationBase object
+  // needs to be removed from the pool once placed, and false if it needs to
+  // be updated instead (in which case UpdatePoolObject will be called).
+  virtual bool ShouldDeletePoolObjectOnPlacement() const { return true; }
+
+  // Update the PoolObject after placing it, if necessary. This will happen for
+  // example in the case of a placed veneer, where we need to use a new updated
+  // range and a new reference (from the newly added branch instruction).
+  // By default, this does nothing, to avoid forcing objects that will not need
+  // this to have an empty implementation.
+  virtual void UpdatePoolObject(PoolObject<T>*) {}
+
+  // Implement heuristics for emitting this object. If a margin is to be used
+  // as a hint during pool emission, we will try not to emit the object if we
+  // are further away from the maximum reachable location by more than the
+  // margin.
+  virtual bool UsePoolObjectEmissionMargin() const { return false; }
+  virtual T GetPoolObjectEmissionMargin() const {
+    VIXL_ASSERT(UsePoolObjectEmissionMargin() == false);
+    return 0;
+  }
+
+  int GetPoolObjectSizeInBytes() const { return pool_object_size_; }
+  int GetPoolObjectAlignment() const { return pool_object_alignment_; }
+  uint32_t GetPoolObjectType() const { return pool_object_type_; }
+
+  bool IsBound() const { return is_bound_; }
+  T GetLocation() const { return location_; }
+
+  // This function can be called multiple times before the object is marked as
+  // bound with MarkBound() below. This is because some objects (e.g. the ones
+  // used to represent labels) can have veneers; every time we place a veneer
+  // we need to keep track of the location in order to resolve the references
+  // to the object. Reusing the location_ field for this is convenient.
+  void SetLocation(internal::AssemblerBase* assembler, T location) {
+    VIXL_ASSERT(!is_bound_);
+    location_ = location;
+    ResolveReferences(assembler);
+  }
+
+  void MarkBound() {
+    VIXL_ASSERT(!is_bound_);
+    is_bound_ = true;
+  }
+
+  // The following two functions are used when an object is bound by a call to
+  // PoolManager<T>::Bind().
+  virtual int GetMaxAlignment() const {
+    VIXL_ASSERT(!ShouldDeletePoolObjectOnPlacement());
+    return 1;
+  }
+  virtual T GetMinLocation() const {
+    VIXL_ASSERT(!ShouldDeletePoolObjectOnPlacement());
+    return 0;
+  }
+
+ private:
+  // The size of the corresponding PoolObject, in bytes.
+  int pool_object_size_;
+  // The alignment of the corresponding PoolObject; this must be a power of two.
+  int pool_object_alignment_;
+
+  // Different derived classes should have different type values. This can be
+  // used internally by the PoolManager for grouping of objects.
+  uint32_t pool_object_type_;
+  // Has the object been bound to a location yet?
+  bool is_bound_;
+
+ protected:
+  // See comment on SetLocation() for the use of this field.
+  T location_;
+};
+
+template <typename T>
+class PoolObject {
+ public:
+  // By default, PoolObjects have no inherent position constraints.
+  explicit PoolObject(LocationBase<T>* parent)
+      : label_base_(parent),
+        min_location_(0),
+        max_location_(std::numeric_limits<T>::max()),
+        alignment_(parent->GetPoolObjectAlignment()),
+        skip_until_location_hint_(0),
+        type_(parent->GetPoolObjectType()) {
+    VIXL_ASSERT(IsPowerOf2(alignment_));
+    UpdateLocationHint();
+  }
+
+  // Reset the minimum and maximum location and the alignment of the object.
+  // This function is public in order to allow the LocationBase corresponding to
+  // this PoolObject to update the PoolObject when placed, e.g. in the case of
+  // veneers. The size and type of the object cannot be modified.
+  void Update(T min, T max, int alignment) {
+    // We don't use RestrictRange here as the new range is independent of the
+    // old range (and the maximum location is typically larger).
+    min_location_ = min;
+    max_location_ = max;
+    RestrictAlignment(alignment);
+    UpdateLocationHint();
+  }
+
+ private:
+  void RestrictRange(T min, T max) {
+    VIXL_ASSERT(min <= max_location_);
+    VIXL_ASSERT(max >= min_location_);
+    min_location_ = std::max(min_location_, min);
+    max_location_ = std::min(max_location_, max);
+    UpdateLocationHint();
+  }
+
+  void RestrictAlignment(int alignment) {
+    VIXL_ASSERT(IsPowerOf2(alignment));
+    VIXL_ASSERT(IsPowerOf2(alignment_));
+    alignment_ = std::max(alignment_, alignment);
+  }
+
+  void UpdateLocationHint() {
+    if (label_base_->UsePoolObjectEmissionMargin()) {
+      skip_until_location_hint_ =
+          max_location_ - label_base_->GetPoolObjectEmissionMargin();
+    }
+  }
+
+  // The LocationBase that this pool object represents.
+  LocationBase<T>* label_base_;
+
+  // Hard, precise location constraints for the start location of the object.
+  // They are both inclusive, that is the start location of the object can be
+  // at any location between min_location_ and max_location_, themselves
+  // included.
+  T min_location_;
+  T max_location_;
+
+  // The alignment must be a power of two.
+  int alignment_;
+
+  // Avoid generating this object until skip_until_location_hint_. This
+  // supports cases where placing the object in the pool has an inherent cost
+  // that could be avoided in some other way. Veneers are a typical example; we
+  // would prefer to branch directly (over a pool) rather than use veneers, so
+  // this value can be set using some heuristic to leave them in the pool.
+  // This value is only a hint, which will be ignored if it has to in order to
+  // meet the hard constraints we have.
+  T skip_until_location_hint_;
+
+  // Used only to group objects of similar type together. The PoolManager does
+  // not know what the types represent.
+  uint32_t type_;
+
+  friend class PoolManager<T>;
+};
+
+// Class that represents a forward reference. It is the responsibility of
+// LocationBase objects to keep track of forward references and patch them when
+// an object is placed - this class is only used by the PoolManager in order to
+// restrict the requirements on PoolObjects it is tracking.
+template <typename T>
+class ForwardReference {
+ public:
+  ForwardReference(T location,
+                   int size,
+                   T min_object_location,
+                   T max_object_location,
+                   int object_alignment = 1)
+      : location_(location),
+        size_(size),
+        object_alignment_(object_alignment),
+        min_object_location_(min_object_location),
+        max_object_location_(max_object_location) {
+    VIXL_ASSERT(AlignDown(max_object_location, object_alignment) >=
+                min_object_location);
+  }
+
+  bool LocationIsEncodable(T location) const {
+    return location >= min_object_location_ &&
+           location <= max_object_location_ &&
+           IsAligned(location, object_alignment_);
+  }
+
+  T GetLocation() const { return location_; }
+  T GetMinLocation() const { return min_object_location_; }
+  T GetMaxLocation() const { return max_object_location_; }
+  int GetAlignment() const { return object_alignment_; }
+
+  // Needed for InvalSet.
+  void SetLocationToInvalidateOnly(T location) { location_ = location; }
+
+ private:
+  // The location of the thing that contains the reference. For example, this
+  // can be the location of the branch or load instruction.
+  T location_;
+
+  // The size of the instruction that makes the reference, in bytes.
+  int size_;
+
+  // The alignment that the object must satisfy for this reference - must be a
+  // power of two.
+  int object_alignment_;
+
+  // Specify the possible locations where the object could be stored. AArch32's
+  // PC offset, and T32's PC alignment calculations should be applied by the
+  // Assembler, not here. The PoolManager deals only with simple locations.
+  // Including min_object_address_ is necessary to handle AArch32 some
+  // instructions which have a minimum offset of 0, but also have the implicit
+  // PC offset.
+  // Note that this structure cannot handle sparse ranges, such as A32's ADR,
+  // but doing so is costly and probably not useful in practice. The min and
+  // and max object location both refer to the beginning of the object, are
+  // inclusive and are not affected by the object size. E.g. if
+  // max_object_location_ is equal to X, we can place the object at location X
+  // regardless of its size.
+  T min_object_location_;
+  T max_object_location_;
+
+  friend class PoolManager<T>;
+};
+
+
+template <typename T>
+class PoolManager {
+ public:
+  PoolManager(int header_size, int alignment, int buffer_alignment)
+      : header_size_(header_size),
+        alignment_(alignment),
+        buffer_alignment_(buffer_alignment),
+        checkpoint_(std::numeric_limits<T>::max()),
+        max_pool_size_(0),
+        monitor_(0) {}
+
+  ~PoolManager() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION;
+
+  // Check if we will need to emit the pool at location 'pc', when planning to
+  // generate a certain number of bytes. This optionally takes a
+  // ForwardReference we are about to generate, in which case the size of the
+  // reference must be included in 'num_bytes'.
+  bool MustEmit(T pc,
+                int num_bytes = 0,
+                ForwardReference<T>* reference = NULL,
+                LocationBase<T>* object = NULL) const;
+
+  enum EmitOption { kBranchRequired, kNoBranchRequired };
+
+  // Emit the pool at location 'pc', using 'masm' as the macroassembler.
+  // The branch over the header can be optionally omitted using 'option'.
+  // Returns the new PC after pool emission.
+  // This expects a number of bytes that are about to be emitted, to be taken
+  // into account in heuristics for pool object emission.
+  // This also optionally takes a forward reference and an object as
+  // parameters, to be used in the case where emission of the pool is triggered
+  // by adding a new reference to the pool that does not fit. The pool manager
+  // will need this information in order to apply its heuristics correctly.
+  T Emit(MacroAssemblerInterface* masm,
+         T pc,
+         int num_bytes = 0,
+         ForwardReference<T>* new_reference = NULL,
+         LocationBase<T>* new_object = NULL,
+         EmitOption option = kBranchRequired);
+
+  // Add 'reference' to 'object'. Should not be preceded by a call to MustEmit()
+  // that returned true, unless Emit() has been successfully afterwards.
+  void AddObjectReference(const ForwardReference<T>* reference,
+                          LocationBase<T>* object);
+
+  // This is to notify the pool that a LocationBase has been bound to a location
+  // and does not need to be tracked anymore.
+  // This will happen, for example, for Labels, which are manually bound by the
+  // user.
+  // This can potentially add some padding bytes in order to meet the object
+  // requirements, and will return the new location.
+  T Bind(MacroAssemblerInterface* masm, LocationBase<T>* object, T location);
+
+  // Functions for blocking and releasing the pools.
+  void Block() { monitor_++; }
+  void Release(T pc);
+  bool IsBlocked() const { return monitor_ != 0; }
+
+ private:
+  typedef typename std::vector<PoolObject<T> >::iterator objects_iter;
+  typedef
+      typename std::vector<PoolObject<T> >::const_iterator const_objects_iter;
+
+  PoolObject<T>* GetObjectIfTracked(LocationBase<T>* label) {
+    return const_cast<PoolObject<T>*>(
+        static_cast<const PoolManager<T>*>(this)->GetObjectIfTracked(label));
+  }
+
+  const PoolObject<T>* GetObjectIfTracked(LocationBase<T>* label) const {
+    for (const_objects_iter iter = objects_.begin(); iter != objects_.end();
+         ++iter) {
+      const PoolObject<T>& current = *iter;
+      if (current.label_base_ == label) return &current;
+    }
+    return NULL;
+  }
+
+  // Helper function for calculating the checkpoint.
+  enum SortOption { kSortRequired, kNoSortRequired };
+  void RecalculateCheckpoint(SortOption sort_option = kSortRequired);
+
+  // Comparison function for using std::sort() on objects_. PoolObject A is
+  // ordered before PoolObject B when A should be emitted before B. The
+  // comparison depends on the max_location_, size_, alignment_ and
+  // min_location_.
+  static bool PoolObjectLessThan(const PoolObject<T>& a,
+                                 const PoolObject<T>& b);
+
+  // Helper function used in the checkpoint calculation. 'checkpoint' is the
+  // current checkpoint, which is modified to take 'object' into account. The
+  // new checkpoint is returned.
+  static T UpdateCheckpointForObject(T checkpoint, const PoolObject<T>* object);
+
+  // Helper function to add a new object into a sorted objects_ array.
+  void Insert(const PoolObject<T>& new_object);
+
+  // Helper functions to remove an object from objects_ and delete the
+  // corresponding LocationBase object, if necessary. This will be called
+  // either after placing the object, or when Bind() is called.
+  void RemoveAndDelete(PoolObject<T>* object);
+  objects_iter RemoveAndDelete(objects_iter iter);
+
+  // Helper function to check if we should skip emitting an object.
+  bool ShouldSkipObject(PoolObject<T>* pool_object,
+                        T pc,
+                        int num_bytes,
+                        ForwardReference<T>* new_reference,
+                        LocationBase<T>* new_object,
+                        PoolObject<T>* existing_object) const;
+
+  // Used only for debugging.
+  void DumpCurrentState(T pc) const;
+
+  // Methods used for testing only, via the test friend classes.
+  bool PoolIsEmptyForTest() const { return objects_.empty(); }
+  T GetCheckpointForTest() const { return checkpoint_; }
+  int GetPoolSizeForTest() const;
+
+  // The objects we are tracking references to. The objects_ vector is sorted
+  // at all times between calls to the public members of the PoolManager. It
+  // is sorted every time we add, delete or update a PoolObject.
+  // TODO: Consider a more efficient data structure here, to allow us to delete
+  // elements as we emit them.
+  std::vector<PoolObject<T> > objects_;
+
+  // Objects to be deleted on pool destruction.
+  std::vector<LocationBase<T>*> delete_on_destruction_;
+
+  // The header_size_ and alignment_ values are hardcoded for each instance of
+  // PoolManager. The PoolManager does not know how to emit the header, and
+  // relies on the EmitPoolHeader and EndPool methods of the
+  // MacroAssemblerInterface for that.  It will also emit padding if necessary,
+  // both for the header and at the end of the pool, according to alignment_,
+  // and using the EmitNopBytes and EmitPaddingBytes method of the
+  // MacroAssemblerInterface.
+
+  // The size of the header, in bytes.
+  int header_size_;
+  // The alignment of the header - must be a power of two.
+  int alignment_;
+  // The alignment of the buffer - we cannot guarantee any object alignment
+  // larger than this alignment. When a buffer is grown, this alignment has
+  // to be guaranteed.
+  // TODO: Consider extending this to describe the guaranteed alignment as the
+  // modulo of a known number.
+  int buffer_alignment_;
+
+  // The current checkpoint. This is the latest location at which the pool
+  // *must* be emitted. This should not be visible outside the pool manager
+  // and should only be updated in RecalculateCheckpoint.
+  T checkpoint_;
+
+  // Maximum size of the pool, assuming we need the maximum possible padding
+  // for each object and for the header. It is only updated in
+  // RecalculateCheckpoint.
+  T max_pool_size_;
+
+  // Indicates whether the emission of this pool is blocked.
+  int monitor_;
+
+  friend class vixl::TestPoolManager;
+};
+
+
+}  // namespace vixl
+
+#endif  // VIXL_POOL_MANAGER_H_
--- a/3rdparty/vixl/include/vixl/utils-vixl.h
+++ b/3rdparty/vixl/include/vixl/utils-vixl.h
--- a/3rdparty/vixl/src/aarch64/assembler-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/assembler-aarch64.cc
--- a/3rdparty/vixl/src/aarch64/assembler-sve-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/assembler-sve-aarch64.cc
--- a/3rdparty/vixl/src/aarch64/cpu-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/cpu-aarch64.cc
@ -0,0 +1,581 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
+#include <sys/auxv.h>
+#define VIXL_USE_LINUX_HWCAP 1
+#endif
+
+#include "../utils-vixl.h"
+
+#include "cpu-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+
+const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
+const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
+const IDRegister::Field AA64PFR0::kRAS(28);
+const IDRegister::Field AA64PFR0::kSVE(32);
+const IDRegister::Field AA64PFR0::kDIT(48);
+const IDRegister::Field AA64PFR0::kCSV2(56);
+const IDRegister::Field AA64PFR0::kCSV3(60);
+
+const IDRegister::Field AA64PFR1::kBT(0);
+const IDRegister::Field AA64PFR1::kSSBS(4);
+const IDRegister::Field AA64PFR1::kMTE(8);
+const IDRegister::Field AA64PFR1::kSME(24);
+
+const IDRegister::Field AA64ISAR0::kAES(4);
+const IDRegister::Field AA64ISAR0::kSHA1(8);
+const IDRegister::Field AA64ISAR0::kSHA2(12);
+const IDRegister::Field AA64ISAR0::kCRC32(16);
+const IDRegister::Field AA64ISAR0::kAtomic(20);
+const IDRegister::Field AA64ISAR0::kRDM(28);
+const IDRegister::Field AA64ISAR0::kSHA3(32);
+const IDRegister::Field AA64ISAR0::kSM3(36);
+const IDRegister::Field AA64ISAR0::kSM4(40);
+const IDRegister::Field AA64ISAR0::kDP(44);
+const IDRegister::Field AA64ISAR0::kFHM(48);
+const IDRegister::Field AA64ISAR0::kTS(52);
+const IDRegister::Field AA64ISAR0::kRNDR(60);
+
+const IDRegister::Field AA64ISAR1::kDPB(0);
+const IDRegister::Field AA64ISAR1::kAPA(4);
+const IDRegister::Field AA64ISAR1::kAPI(8);
+const IDRegister::Field AA64ISAR1::kJSCVT(12);
+const IDRegister::Field AA64ISAR1::kFCMA(16);
+const IDRegister::Field AA64ISAR1::kLRCPC(20);
+const IDRegister::Field AA64ISAR1::kGPA(24);
+const IDRegister::Field AA64ISAR1::kGPI(28);
+const IDRegister::Field AA64ISAR1::kFRINTTS(32);
+const IDRegister::Field AA64ISAR1::kSB(36);
+const IDRegister::Field AA64ISAR1::kSPECRES(40);
+const IDRegister::Field AA64ISAR1::kBF16(44);
+const IDRegister::Field AA64ISAR1::kDGH(48);
+const IDRegister::Field AA64ISAR1::kI8MM(52);
+
+const IDRegister::Field AA64ISAR2::kWFXT(0);
+const IDRegister::Field AA64ISAR2::kRPRES(4);
+const IDRegister::Field AA64ISAR2::kMOPS(16);
+const IDRegister::Field AA64ISAR2::kCSSC(52);
+
+const IDRegister::Field AA64MMFR0::kECV(60);
+
+const IDRegister::Field AA64MMFR1::kLO(16);
+const IDRegister::Field AA64MMFR1::kAFP(44);
+
+const IDRegister::Field AA64MMFR2::kAT(32);
+
+const IDRegister::Field AA64ZFR0::kSVEver(0);
+const IDRegister::Field AA64ZFR0::kAES(4);
+const IDRegister::Field AA64ZFR0::kBitPerm(16);
+const IDRegister::Field AA64ZFR0::kBF16(20);
+const IDRegister::Field AA64ZFR0::kSHA3(32);
+const IDRegister::Field AA64ZFR0::kSM4(40);
+const IDRegister::Field AA64ZFR0::kI8MM(44);
+const IDRegister::Field AA64ZFR0::kF32MM(52);
+const IDRegister::Field AA64ZFR0::kF64MM(56);
+
+const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1);
+const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1);
+const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1);
+const IDRegister::Field AA64SMFR0::kSMEi8i32(36);
+const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1);
+const IDRegister::Field AA64SMFR0::kSMEi16i64(52);
+const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1);
+
+CPUFeatures AA64PFR0::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
+  if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
+  if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
+  if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
+  if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS);
+  if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
+  if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
+  if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2);
+  if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM);
+  if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3);
+  return f;
+}
+
+CPUFeatures AA64PFR1::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
+  if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS);
+  if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
+  if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
+  if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
+  if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3);
+  if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME);
+  return f;
+}
+
+CPUFeatures AA64ISAR0::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
+  if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
+  if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
+  if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
+  if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
+  if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
+  if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
+  if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
+  if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
+  if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
+  if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
+  if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
+  if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
+  if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
+  if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
+  if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG);
+  return f;
+}
+
+CPUFeatures AA64ISAR1::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
+  if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP);
+  if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
+  if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
+  if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
+  if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
+  if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
+  if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
+  if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
+  if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
+  if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16);
+  if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
+  if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
+
+  // Only one of these fields should be non-zero, but they have the same
+  // encodings, so merge the logic.
+  int apx = std::max(Get(kAPI), Get(kAPA));
+  if (apx >= 1) {
+    f.Combine(CPUFeatures::kPAuth);
+    // APA (rather than API) indicates QARMA.
+    if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA);
+    if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC);
+    if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2);
+    if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC);
+    if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined);
+  }
+
+  if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
+  if (Get(kGPA) >= 1) {
+    f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
+  }
+  return f;
+}
+
+CPUFeatures AA64ISAR2::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT);
+  if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
+  if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS);
+  if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC);
+  return f;
+}
+
+CPUFeatures AA64MMFR0::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV);
+  return f;
+}
+
+CPUFeatures AA64MMFR1::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
+  if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP);
+  return f;
+}
+
+CPUFeatures AA64MMFR2::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT);
+  return f;
+}
+
+CPUFeatures AA64ZFR0::GetCPUFeatures() const {
+  // This register is only available with SVE, but reads-as-zero in its absence,
+  // so it's always safe to read it.
+  CPUFeatures f;
+  if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
+  if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
+  if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
+  if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4);
+  if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3);
+  if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
+  if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kSVE_EBF16);
+  if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm);
+  if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES);
+  if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128);
+  if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2);
+  return f;
+}
+
+CPUFeatures AA64SMFR0::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32);
+  if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32);
+  if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32);
+  if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32);
+  if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64);
+  if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64);
+  if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64);
+  return f;
+}
+
+int IDRegister::Get(IDRegister::Field field) const {
+  int msb = field.GetMsb();
+  int lsb = field.GetLsb();
+  VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
+                     (sizeof(int) * kBitsPerByte));
+  switch (field.GetType()) {
+    case Field::kSigned:
+      return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
+    case Field::kUnsigned:
+      return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
+  }
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
+CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
+  CPUFeatures f;
+#define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \
+  f.Combine(Read##NAME().GetCPUFeatures());
+  VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
+#undef VIXL_COMBINE_ID_REG
+  return f;
+}
+
+CPUFeatures CPU::InferCPUFeaturesFromOS(
+    CPUFeatures::QueryIDRegistersOption option) {
+  CPUFeatures features;
+
+#ifdef VIXL_USE_LINUX_HWCAP
+  // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
+  // than explicit bits, but explicit bits allow us to identify features that
+  // the toolchain doesn't know about.
+  static const CPUFeatures::Feature kFeatureBitsLow[] =
+      {// Bits 0-7
+       CPUFeatures::kFP,
+       CPUFeatures::kNEON,
+       CPUFeatures::kNone,  // "EVTSTRM", which VIXL doesn't track.
+       CPUFeatures::kAES,
+       CPUFeatures::kPmull1Q,
+       CPUFeatures::kSHA1,
+       CPUFeatures::kSHA2,
+       CPUFeatures::kCRC32,
+       // Bits 8-15
+       CPUFeatures::kAtomics,
+       CPUFeatures::kFPHalf,
+       CPUFeatures::kNEONHalf,
+       CPUFeatures::kIDRegisterEmulation,
+       CPUFeatures::kRDM,
+       CPUFeatures::kJSCVT,
+       CPUFeatures::kFcma,
+       CPUFeatures::kRCpc,
+       // Bits 16-23
+       CPUFeatures::kDCPoP,
+       CPUFeatures::kSHA3,
+       CPUFeatures::kSM3,
+       CPUFeatures::kSM4,
+       CPUFeatures::kDotProduct,
+       CPUFeatures::kSHA512,
+       CPUFeatures::kSVE,
+       CPUFeatures::kFHM,
+       // Bits 24-31
+       CPUFeatures::kDIT,
+       CPUFeatures::kUSCAT,
+       CPUFeatures::kRCpcImm,
+       CPUFeatures::kFlagM,
+       CPUFeatures::kSSBSControl,
+       CPUFeatures::kSB,
+       CPUFeatures::kPAuth,
+       CPUFeatures::kPAuthGeneric};
+  VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64);
+
+  static const CPUFeatures::Feature kFeatureBitsHigh[] =
+      {// Bits 0-7
+       CPUFeatures::kDCCVADP,
+       CPUFeatures::kSVE2,
+       CPUFeatures::kSVEAES,
+       CPUFeatures::kSVEPmull128,
+       CPUFeatures::kSVEBitPerm,
+       CPUFeatures::kSVESHA3,
+       CPUFeatures::kSVESM4,
+       CPUFeatures::kAXFlag,
+       // Bits 8-15
+       CPUFeatures::kFrintToFixedSizedInt,
+       CPUFeatures::kSVEI8MM,
+       CPUFeatures::kSVEF32MM,
+       CPUFeatures::kSVEF64MM,
+       CPUFeatures::kSVEBF16,
+       CPUFeatures::kI8MM,
+       CPUFeatures::kBF16,
+       CPUFeatures::kDGH,
+       // Bits 16-23
+       CPUFeatures::kRNG,
+       CPUFeatures::kBTI,
+       CPUFeatures::kMTE,
+       CPUFeatures::kECV,
+       CPUFeatures::kAFP,
+       CPUFeatures::kRPRES,
+       CPUFeatures::kMTE3,
+       CPUFeatures::kSME,
+       // Bits 24-31
+       CPUFeatures::kSMEi16i64,
+       CPUFeatures::kSMEf64f64,
+       CPUFeatures::kSMEi8i32,
+       CPUFeatures::kSMEf16f32,
+       CPUFeatures::kSMEb16f32,
+       CPUFeatures::kSMEf32f32,
+       CPUFeatures::kSMEfa64,
+       CPUFeatures::kWFXT,
+       // Bits 32-39
+       CPUFeatures::kEBF16,
+       CPUFeatures::kSVE_EBF16};
+  VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64);
+
+  auto combine_features = [&features](uint64_t hwcap,
+                                      const CPUFeatures::Feature* feature_array,
+                                      size_t features_size) {
+    for (size_t i = 0; i < features_size; i++) {
+      if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]);
+    }
+  };
+
+  uint64_t hwcap_low = getauxval(AT_HWCAP);
+  uint64_t hwcap_high = getauxval(AT_HWCAP2);
+
+  combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow));
+  combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh));
+
+  // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support
+  if (features.Has(CPUFeatures::kMTE)) {
+    features.Combine(CPUFeatures::kMTEInstructions);
+  }
+#endif  // VIXL_USE_LINUX_HWCAP
+
+  if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
+      (features.Has(CPUFeatures::kIDRegisterEmulation))) {
+    features.Combine(InferCPUFeaturesFromIDRegisters());
+  }
+  return features;
+}
+
+
+#ifdef __aarch64__
+#define VIXL_READ_ID_REG(NAME, MRS_ARG)        \
+  NAME CPU::Read##NAME() {                     \
+    uint64_t value = 0;                        \
+    __asm__("mrs %0, " MRS_ARG : "=r"(value)); \
+    return NAME(value);                        \
+  }
+#else  // __aarch64__
+#define VIXL_READ_ID_REG(NAME, MRS_ARG) \
+  NAME CPU::Read##NAME() {              \
+    VIXL_UNREACHABLE();                 \
+    return NAME(0);                     \
+  }
+#endif  // __aarch64__
+
+VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
+
+#undef VIXL_READ_ID_REG
+
+
+// Initialise to smallest possible cache size.
+unsigned CPU::dcache_line_size_ = 1;
+unsigned CPU::icache_line_size_ = 1;
+
+
+// Currently computes I and D cache line size.
+void CPU::SetUp() {
+  uint32_t cache_type_register = GetCacheType();
+
+  // The cache type register holds information about the caches, including I
+  // D caches line size.
+  static const int kDCacheLineSizeShift = 16;
+  static const int kICacheLineSizeShift = 0;
+  static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
+  static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
+
+  // The cache type register holds the size of the I and D caches in words as
+  // a power of two.
+  uint32_t dcache_line_size_power_of_two =
+      (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
+  uint32_t icache_line_size_power_of_two =
+      (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
+
+  dcache_line_size_ = 4 << dcache_line_size_power_of_two;
+  icache_line_size_ = 4 << icache_line_size_power_of_two;
+}
+
+
+uint32_t CPU::GetCacheType() {
+#ifdef __aarch64__
+  uint64_t cache_type_register;
+  // Copy the content of the cache type register to a core register.
+  __asm__ __volatile__("mrs %[ctr], ctr_el0"  // NOLINT(runtime/references)
+                       : [ctr] "=r"(cache_type_register));
+  VIXL_ASSERT(IsUint32(cache_type_register));
+  return static_cast<uint32_t>(cache_type_register);
+#else
+  // This will lead to a cache with 1 byte long lines, which is fine since
+  // neither EnsureIAndDCacheCoherency nor the simulator will need this
+  // information.
+  return 0;
+#endif
+}
+
+
+// Query the SVE vector length. This requires CPUFeatures::kSVE.
+int CPU::ReadSVEVectorLengthInBits() {
+#ifdef __aarch64__
+  uint64_t vl;
+  // To support compilers that don't understand `rdvl`, encode the value
+  // directly and move it manually.
+  __asm__(
+      "   .word 0x04bf5100\n"  // rdvl x0, #8
+      "   mov %[vl], x0\n"
+      : [vl] "=r"(vl)
+      :
+      : "x0");
+  VIXL_ASSERT(vl <= INT_MAX);
+  return static_cast<int>(vl);
+#else
+  VIXL_UNREACHABLE();
+  return 0;
+#endif
+}
+
+
+void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
+#ifdef __aarch64__
+  // Implement the cache synchronisation for all targets where AArch64 is the
+  // host, even if we're building the simulator for an AAarch64 host. This
+  // allows for cases where the user wants to simulate code as well as run it
+  // natively.
+
+  if (length == 0) {
+    return;
+  }
+
+  // The code below assumes user space cache operations are allowed.
+
+  // Work out the line sizes for each cache, and use them to determine the
+  // start addresses.
+  uintptr_t start = reinterpret_cast<uintptr_t>(address);
+  uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_);
+  uintptr_t isize = static_cast<uintptr_t>(icache_line_size_);
+  uintptr_t dline = start & ~(dsize - 1);
+  uintptr_t iline = start & ~(isize - 1);
+
+  // Cache line sizes are always a power of 2.
+  VIXL_ASSERT(IsPowerOf2(dsize));
+  VIXL_ASSERT(IsPowerOf2(isize));
+  uintptr_t end = start + length;
+
+  do {
+    __asm__ __volatile__(
+        // Clean each line of the D cache containing the target data.
+        //
+        // dc       : Data Cache maintenance
+        //     c    : Clean
+        //      va  : by (Virtual) Address
+        //        u : to the point of Unification
+        // The point of unification for a processor is the point by which the
+        // instruction and data caches are guaranteed to see the same copy of a
+        // memory location. See ARM DDI 0406B page B2-12 for more information.
+        "   dc    cvau, %[dline]\n"
+        :
+        : [dline] "r"(dline)
+        // This code does not write to memory, but the "memory" dependency
+        // prevents GCC from reordering the code.
+        : "memory");
+    dline += dsize;
+  } while (dline < end);
+
+  __asm__ __volatile__(
+      // Make sure that the data cache operations (above) complete before the
+      // instruction cache operations (below).
+      //
+      // dsb      : Data Synchronisation Barrier
+      //      ish : Inner SHareable domain
+      //
+      // The point of unification for an Inner Shareable shareability domain is
+      // the point by which the instruction and data caches of all the
+      // processors
+      // in that Inner Shareable shareability domain are guaranteed to see the
+      // same copy of a memory location. See ARM DDI 0406B page B2-12 for more
+      // information.
+      "   dsb   ish\n"
+      :
+      :
+      : "memory");
+
+  do {
+    __asm__ __volatile__(
+        // Invalidate each line of the I cache containing the target data.
+        //
+        // ic      : Instruction Cache maintenance
+        //    i    : Invalidate
+        //     va  : by Address
+        //       u : to the point of Unification
+        "   ic   ivau, %[iline]\n"
+        :
+        : [iline] "r"(iline)
+        : "memory");
+    iline += isize;
+  } while (iline < end);
+
+  __asm__ __volatile__(
+      // Make sure that the instruction cache operations (above) take effect
+      // before the isb (below).
+      "   dsb  ish\n"
+
+      // Ensure that any instructions already in the pipeline are discarded and
+      // reloaded from the new data.
+      // isb : Instruction Synchronisation Barrier
+      "   isb\n"
+      :
+      :
+      : "memory");
+#else
+  // If the host isn't AArch64, we must be using the simulator, so this function
+  // doesn't have to do anything.
+  USE(address, length);
+#endif
+}
+
+
+}  // namespace aarch64
+}  // namespace vixl
--- a/3rdparty/vixl/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/cpu-features-auditor-aarch64.cc
--- a/3rdparty/vixl/src/aarch64/decoder-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/decoder-aarch64.cc
@ -0,0 +1,575 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <string>
+
+#include "../globals-vixl.h"
+#include "../utils-vixl.h"
+
+#include "decoder-aarch64.h"
+#include "decoder-constants-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+void Decoder::Decode(const Instruction* instr) {
+  std::list<DecoderVisitor*>::iterator it;
+  for (it = visitors_.begin(); it != visitors_.end(); it++) {
+    VIXL_ASSERT((*it)->IsConstVisitor());
+  }
+  VIXL_ASSERT(compiled_decoder_root_ != NULL);
+  compiled_decoder_root_->Decode(instr);
+}
+
+void Decoder::Decode(Instruction* instr) {
+  compiled_decoder_root_->Decode(const_cast<const Instruction*>(instr));
+}
+
+void Decoder::AddDecodeNode(const DecodeNode& node) {
+  if (decode_nodes_.count(node.GetName()) == 0) {
+    decode_nodes_.insert(std::make_pair(node.GetName(), node));
+  }
+}
+
+DecodeNode* Decoder::GetDecodeNode(std::string name) {
+  if (decode_nodes_.count(name) != 1) {
+    std::string msg = "Can't find decode node " + name + ".\n";
+    VIXL_ABORT_WITH_MSG(msg.c_str());
+  }
+  return &decode_nodes_[name];
+}
+
+void Decoder::ConstructDecodeGraph() {
+  // Add all of the decoding nodes to the Decoder.
+  for (unsigned i = 0; i < ArrayLength(kDecodeMapping); i++) {
+    AddDecodeNode(DecodeNode(kDecodeMapping[i], this));
+
+    // Add a node for each instruction form named, identified by having no '_'
+    // prefix on the node name.
+    const DecodeMapping& map = kDecodeMapping[i];
+    for (unsigned j = 0; j < map.mapping.size(); j++) {
+      if ((map.mapping[j].handler != NULL) &&
+          (map.mapping[j].handler[0] != '_')) {
+        AddDecodeNode(DecodeNode(map.mapping[j].handler, this));
+      }
+    }
+  }
+
+  // Add an "unallocated" node, used when an instruction encoding is not
+  // recognised by the decoding graph.
+  AddDecodeNode(DecodeNode("unallocated", this));
+
+  // Compile the graph from the root.
+  compiled_decoder_root_ = GetDecodeNode("Root")->Compile(this);
+}
+
+void Decoder::AppendVisitor(DecoderVisitor* new_visitor) {
+  visitors_.push_back(new_visitor);
+}
+
+
+void Decoder::PrependVisitor(DecoderVisitor* new_visitor) {
+  visitors_.push_front(new_visitor);
+}
+
+
+void Decoder::InsertVisitorBefore(DecoderVisitor* new_visitor,
+                                  DecoderVisitor* registered_visitor) {
+  std::list<DecoderVisitor*>::iterator it;
+  for (it = visitors_.begin(); it != visitors_.end(); it++) {
+    if (*it == registered_visitor) {
+      visitors_.insert(it, new_visitor);
+      return;
+    }
+  }
+  // We reached the end of the list. The last element must be
+  // registered_visitor.
+  VIXL_ASSERT(*it == registered_visitor);
+  visitors_.insert(it, new_visitor);
+}
+
+
+void Decoder::InsertVisitorAfter(DecoderVisitor* new_visitor,
+                                 DecoderVisitor* registered_visitor) {
+  std::list<DecoderVisitor*>::iterator it;
+  for (it = visitors_.begin(); it != visitors_.end(); it++) {
+    if (*it == registered_visitor) {
+      it++;
+      visitors_.insert(it, new_visitor);
+      return;
+    }
+  }
+  // We reached the end of the list. The last element must be
+  // registered_visitor.
+  VIXL_ASSERT(*it == registered_visitor);
+  visitors_.push_back(new_visitor);
+}
+
+
+void Decoder::RemoveVisitor(DecoderVisitor* visitor) {
+  visitors_.remove(visitor);
+}
+
+void Decoder::VisitNamedInstruction(const Instruction* instr,
+                                    const std::string& name) {
+  std::list<DecoderVisitor*>::iterator it;
+  Metadata m = {{"form", name}};
+  for (it = visitors_.begin(); it != visitors_.end(); it++) {
+    (*it)->Visit(&m, instr);
+  }
+}
+
+// Initialise empty vectors for sampled bits and pattern table.
+const std::vector<uint8_t> DecodeNode::kEmptySampledBits;
+const std::vector<DecodePattern> DecodeNode::kEmptyPatternTable;
+
+void DecodeNode::CompileNodeForBits(Decoder* decoder,
+                                    std::string name,
+                                    uint32_t bits) {
+  DecodeNode* n = decoder->GetDecodeNode(name);
+  VIXL_ASSERT(n != NULL);
+  if (!n->IsCompiled()) {
+    n->Compile(decoder);
+  }
+  VIXL_ASSERT(n->IsCompiled());
+  compiled_node_->SetNodeForBits(bits, n->GetCompiledNode());
+}
+
+
+#define INSTANTIATE_TEMPLATE_M(M)                      \
+  case 0x##M:                                          \
+    bit_extract_fn = &Instruction::ExtractBits<0x##M>; \
+    break;
+#define INSTANTIATE_TEMPLATE_MV(M, V)                           \
+  case 0x##M##V:                                                \
+    bit_extract_fn = &Instruction::IsMaskedValue<0x##M, 0x##V>; \
+    break;
+
+BitExtractFn DecodeNode::GetBitExtractFunctionHelper(uint32_t x, uint32_t y) {
+  // Instantiate a templated bit extraction function for every pattern we
+  // might encounter. If the assertion in the default clause is reached, add a
+  // new instantiation below using the information in the failure message.
+  BitExtractFn bit_extract_fn = NULL;
+
+  // The arguments x and y represent the mask and value. If y is 0, x is the
+  // mask. Otherwise, y is the mask, and x is the value to compare against a
+  // masked result.
+  uint64_t signature = (static_cast<uint64_t>(y) << 32) | x;
+  switch (signature) {
+    INSTANTIATE_TEMPLATE_M(00000002);
+    INSTANTIATE_TEMPLATE_M(00000010);
+    INSTANTIATE_TEMPLATE_M(00000060);
+    INSTANTIATE_TEMPLATE_M(000000df);
+    INSTANTIATE_TEMPLATE_M(00000100);
+    INSTANTIATE_TEMPLATE_M(00000200);
+    INSTANTIATE_TEMPLATE_M(00000400);
+    INSTANTIATE_TEMPLATE_M(00000800);
+    INSTANTIATE_TEMPLATE_M(00000c00);
+    INSTANTIATE_TEMPLATE_M(00000c10);
+    INSTANTIATE_TEMPLATE_M(00000fc0);
+    INSTANTIATE_TEMPLATE_M(00001000);
+    INSTANTIATE_TEMPLATE_M(00001400);
+    INSTANTIATE_TEMPLATE_M(00001800);
+    INSTANTIATE_TEMPLATE_M(00001c00);
+    INSTANTIATE_TEMPLATE_M(00002000);
+    INSTANTIATE_TEMPLATE_M(00002010);
+    INSTANTIATE_TEMPLATE_M(00002400);
+    INSTANTIATE_TEMPLATE_M(00003000);
+    INSTANTIATE_TEMPLATE_M(00003020);
+    INSTANTIATE_TEMPLATE_M(00003400);
+    INSTANTIATE_TEMPLATE_M(00003800);
+    INSTANTIATE_TEMPLATE_M(00003c00);
+    INSTANTIATE_TEMPLATE_M(00013000);
+    INSTANTIATE_TEMPLATE_M(000203e0);
+    INSTANTIATE_TEMPLATE_M(000303e0);
+    INSTANTIATE_TEMPLATE_M(00040000);
+    INSTANTIATE_TEMPLATE_M(00040010);
+    INSTANTIATE_TEMPLATE_M(00060000);
+    INSTANTIATE_TEMPLATE_M(00061000);
+    INSTANTIATE_TEMPLATE_M(00070000);
+    INSTANTIATE_TEMPLATE_M(000703c0);
+    INSTANTIATE_TEMPLATE_M(00080000);
+    INSTANTIATE_TEMPLATE_M(00090000);
+    INSTANTIATE_TEMPLATE_M(000f0000);
+    INSTANTIATE_TEMPLATE_M(000f0010);
+    INSTANTIATE_TEMPLATE_M(00100000);
+    INSTANTIATE_TEMPLATE_M(00180000);
+    INSTANTIATE_TEMPLATE_M(001b1c00);
+    INSTANTIATE_TEMPLATE_M(001f0000);
+    INSTANTIATE_TEMPLATE_M(001f0018);
+    INSTANTIATE_TEMPLATE_M(001f2000);
+    INSTANTIATE_TEMPLATE_M(001f3000);
+    INSTANTIATE_TEMPLATE_M(00400000);
+    INSTANTIATE_TEMPLATE_M(00400018);
+    INSTANTIATE_TEMPLATE_M(00400800);
+    INSTANTIATE_TEMPLATE_M(00403000);
+    INSTANTIATE_TEMPLATE_M(00500000);
+    INSTANTIATE_TEMPLATE_M(00500800);
+    INSTANTIATE_TEMPLATE_M(00583000);
+    INSTANTIATE_TEMPLATE_M(005f0000);
+    INSTANTIATE_TEMPLATE_M(00800000);
+    INSTANTIATE_TEMPLATE_M(00800400);
+    INSTANTIATE_TEMPLATE_M(00800c1d);
+    INSTANTIATE_TEMPLATE_M(0080101f);
+    INSTANTIATE_TEMPLATE_M(00801c00);
+    INSTANTIATE_TEMPLATE_M(00803000);
+    INSTANTIATE_TEMPLATE_M(00803c00);
+    INSTANTIATE_TEMPLATE_M(009f0000);
+    INSTANTIATE_TEMPLATE_M(009f2000);
+    INSTANTIATE_TEMPLATE_M(00c00000);
+    INSTANTIATE_TEMPLATE_M(00c00010);
+    INSTANTIATE_TEMPLATE_M(00c0001f);
+    INSTANTIATE_TEMPLATE_M(00c00200);
+    INSTANTIATE_TEMPLATE_M(00c00400);
+    INSTANTIATE_TEMPLATE_M(00c00c00);
+    INSTANTIATE_TEMPLATE_M(00c00c19);
+    INSTANTIATE_TEMPLATE_M(00c01000);
+    INSTANTIATE_TEMPLATE_M(00c01400);
+    INSTANTIATE_TEMPLATE_M(00c01c00);
+    INSTANTIATE_TEMPLATE_M(00c02000);
+    INSTANTIATE_TEMPLATE_M(00c03000);
+    INSTANTIATE_TEMPLATE_M(00c03c00);
+    INSTANTIATE_TEMPLATE_M(00c70000);
+    INSTANTIATE_TEMPLATE_M(00c83000);
+    INSTANTIATE_TEMPLATE_M(00d00200);
+    INSTANTIATE_TEMPLATE_M(00d80800);
+    INSTANTIATE_TEMPLATE_M(00d81800);
+    INSTANTIATE_TEMPLATE_M(00d81c00);
+    INSTANTIATE_TEMPLATE_M(00d82800);
+    INSTANTIATE_TEMPLATE_M(00d82c00);
+    INSTANTIATE_TEMPLATE_M(00d92400);
+    INSTANTIATE_TEMPLATE_M(00d93000);
+    INSTANTIATE_TEMPLATE_M(00db0000);
+    INSTANTIATE_TEMPLATE_M(00db2000);
+    INSTANTIATE_TEMPLATE_M(00dc0000);
+    INSTANTIATE_TEMPLATE_M(00dc2000);
+    INSTANTIATE_TEMPLATE_M(00df0000);
+    INSTANTIATE_TEMPLATE_M(40000000);
+    INSTANTIATE_TEMPLATE_M(40000010);
+    INSTANTIATE_TEMPLATE_M(40000c00);
+    INSTANTIATE_TEMPLATE_M(40002000);
+    INSTANTIATE_TEMPLATE_M(40002010);
+    INSTANTIATE_TEMPLATE_M(40003000);
+    INSTANTIATE_TEMPLATE_M(40003c00);
+    INSTANTIATE_TEMPLATE_M(401f2000);
+    INSTANTIATE_TEMPLATE_M(40400800);
+    INSTANTIATE_TEMPLATE_M(40400c00);
+    INSTANTIATE_TEMPLATE_M(40403c00);
+    INSTANTIATE_TEMPLATE_M(405f0000);
+    INSTANTIATE_TEMPLATE_M(40800000);
+    INSTANTIATE_TEMPLATE_M(40800c00);
+    INSTANTIATE_TEMPLATE_M(40802000);
+    INSTANTIATE_TEMPLATE_M(40802010);
+    INSTANTIATE_TEMPLATE_M(40803400);
+    INSTANTIATE_TEMPLATE_M(40803c00);
+    INSTANTIATE_TEMPLATE_M(40c00000);
+    INSTANTIATE_TEMPLATE_M(40c00400);
+    INSTANTIATE_TEMPLATE_M(40c00800);
+    INSTANTIATE_TEMPLATE_M(40c00c00);
+    INSTANTIATE_TEMPLATE_M(40c00c10);
+    INSTANTIATE_TEMPLATE_M(40c02000);
+    INSTANTIATE_TEMPLATE_M(40c02010);
+    INSTANTIATE_TEMPLATE_M(40c02c00);
+    INSTANTIATE_TEMPLATE_M(40c03c00);
+    INSTANTIATE_TEMPLATE_M(40c80000);
+    INSTANTIATE_TEMPLATE_M(40c90000);
+    INSTANTIATE_TEMPLATE_M(40cf0000);
+    INSTANTIATE_TEMPLATE_M(40d02000);
+    INSTANTIATE_TEMPLATE_M(40d02010);
+    INSTANTIATE_TEMPLATE_M(40d80000);
+    INSTANTIATE_TEMPLATE_M(40d81800);
+    INSTANTIATE_TEMPLATE_M(40dc0000);
+    INSTANTIATE_TEMPLATE_M(bf20c000);
+    INSTANTIATE_TEMPLATE_MV(00000006, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00000006, 00000006);
+    INSTANTIATE_TEMPLATE_MV(00000007, 00000000);
+    INSTANTIATE_TEMPLATE_MV(0000001f, 0000001f);
+    INSTANTIATE_TEMPLATE_MV(00000210, 00000000);
+    INSTANTIATE_TEMPLATE_MV(000003e0, 00000000);
+    INSTANTIATE_TEMPLATE_MV(000003e0, 000003e0);
+    INSTANTIATE_TEMPLATE_MV(000003e2, 000003e0);
+    INSTANTIATE_TEMPLATE_MV(000003e6, 000003e0);
+    INSTANTIATE_TEMPLATE_MV(000003e6, 000003e6);
+    INSTANTIATE_TEMPLATE_MV(00000c00, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00000fc0, 00000000);
+    INSTANTIATE_TEMPLATE_MV(000013e0, 00001000);
+    INSTANTIATE_TEMPLATE_MV(00001c00, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00002400, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00003000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00003000, 00001000);
+    INSTANTIATE_TEMPLATE_MV(00003000, 00002000);
+    INSTANTIATE_TEMPLATE_MV(00003000, 00003000);
+    INSTANTIATE_TEMPLATE_MV(00003010, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00003c00, 00003c00);
+    INSTANTIATE_TEMPLATE_MV(00040010, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00060000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00061000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00070000, 00030000);
+    INSTANTIATE_TEMPLATE_MV(00073ee0, 00033060);
+    INSTANTIATE_TEMPLATE_MV(00073f9f, 0000001f);
+    INSTANTIATE_TEMPLATE_MV(000f0000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(000f0010, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00100200, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00100210, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00160000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00170000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(001c0000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(001d0000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(001e0000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(001f0000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(001f0000, 00010000);
+    INSTANTIATE_TEMPLATE_MV(001f0000, 00100000);
+    INSTANTIATE_TEMPLATE_MV(001f0000, 001f0000);
+    INSTANTIATE_TEMPLATE_MV(001f3000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(001f3000, 00001000);
+    INSTANTIATE_TEMPLATE_MV(001f3000, 001f0000);
+    INSTANTIATE_TEMPLATE_MV(001f300f, 0000000d);
+    INSTANTIATE_TEMPLATE_MV(001f301f, 0000000d);
+    INSTANTIATE_TEMPLATE_MV(001f33e0, 000103e0);
+    INSTANTIATE_TEMPLATE_MV(001f3800, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00401000, 00400000);
+    INSTANTIATE_TEMPLATE_MV(005f3000, 001f0000);
+    INSTANTIATE_TEMPLATE_MV(005f3000, 001f1000);
+    INSTANTIATE_TEMPLATE_MV(00800010, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00800400, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00800410, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00803000, 00002000);
+    INSTANTIATE_TEMPLATE_MV(00870000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(009f0000, 00010000);
+    INSTANTIATE_TEMPLATE_MV(00c00000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00c00000, 00400000);
+    INSTANTIATE_TEMPLATE_MV(00c0001f, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00c001ff, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00c00200, 00400000);
+    INSTANTIATE_TEMPLATE_MV(00c0020f, 00400000);
+    INSTANTIATE_TEMPLATE_MV(00c003e0, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00c00800, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00d80800, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00df0000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(00df3800, 001f0800);
+    INSTANTIATE_TEMPLATE_MV(40002000, 40000000);
+    INSTANTIATE_TEMPLATE_MV(40003c00, 00000000);
+    INSTANTIATE_TEMPLATE_MV(40040000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(401f2000, 401f0000);
+    INSTANTIATE_TEMPLATE_MV(40800c00, 40000400);
+    INSTANTIATE_TEMPLATE_MV(40c00000, 00000000);
+    INSTANTIATE_TEMPLATE_MV(40c00000, 00400000);
+    INSTANTIATE_TEMPLATE_MV(40c00000, 40000000);
+    INSTANTIATE_TEMPLATE_MV(40c00000, 40800000);
+    INSTANTIATE_TEMPLATE_MV(40df0000, 00000000);
+    default: {
+      static bool printed_preamble = false;
+      if (!printed_preamble) {
+        printf("One or more missing template instantiations.\n");
+        printf(
+            "Add the following to either GetBitExtractFunction() "
+            "implementations\n");
+        printf("in %s near line %d:\n", __FILE__, __LINE__);
+        printed_preamble = true;
+      }
+
+      if (y == 0) {
+        printf("  INSTANTIATE_TEMPLATE_M(%08x);\n", x);
+        bit_extract_fn = &Instruction::ExtractBitsAbsent;
+      } else {
+        printf("  INSTANTIATE_TEMPLATE_MV(%08x, %08x);\n", y, x);
+        bit_extract_fn = &Instruction::IsMaskedValueAbsent;
+      }
+    }
+  }
+  return bit_extract_fn;
+}
+
+#undef INSTANTIATE_TEMPLATE_M
+#undef INSTANTIATE_TEMPLATE_MV
+
+bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) {
+  // EitherOr optimisation: if there are only one or two patterns in the table,
+  // try to optimise the node to exploit that.
+  size_t table_size = pattern_table_.size();
+  if ((table_size <= 2) && (GetSampledBitsCount() > 1)) {
+    // TODO: support 'x' in this optimisation by dropping the sampled bit
+    // positions before making the mask/value.
+    if (!PatternContainsSymbol(pattern_table_[0].pattern,
+                               PatternSymbol::kSymbolX) &&
+        (table_size == 1)) {
+      // A pattern table consisting of a fixed pattern with no x's, and an
+      // "otherwise" or absent case. Optimise this into an instruction mask and
+      // value test.
+      uint32_t single_decode_mask = 0;
+      uint32_t single_decode_value = 0;
+      const std::vector<uint8_t>& bits = GetSampledBits();
+
+      // Construct the instruction mask and value from the pattern.
+      VIXL_ASSERT(bits.size() == GetPatternLength(pattern_table_[0].pattern));
+      for (size_t i = 0; i < bits.size(); i++) {
+        single_decode_mask |= 1U << bits[i];
+        if (GetSymbolAt(pattern_table_[0].pattern, i) ==
+            PatternSymbol::kSymbol1) {
+          single_decode_value |= 1U << bits[i];
+        }
+      }
+      BitExtractFn bit_extract_fn =
+          GetBitExtractFunction(single_decode_mask, single_decode_value);
+
+      // Create a compiled node that contains a two entry table for the
+      // either/or cases.
+      CreateCompiledNode(bit_extract_fn, 2);
+
+      // Set DecodeNode for when the instruction after masking doesn't match the
+      // value.
+      CompileNodeForBits(decoder, "unallocated", 0);
+
+      // Set DecodeNode for when it does match.
+      CompileNodeForBits(decoder, pattern_table_[0].handler, 1);
+
+      return true;
+    }
+  }
+  return false;
+}
+
+CompiledDecodeNode* DecodeNode::Compile(Decoder* decoder) {
+  if (IsLeafNode()) {
+    // A leaf node is a simple wrapper around a visitor function, with no
+    // instruction decoding to do.
+    CreateVisitorNode();
+  } else if (!TryCompileOptimisedDecodeTable(decoder)) {
+    // The "otherwise" node is the default next node if no pattern matches.
+    std::string otherwise = "unallocated";
+
+    // For each pattern in pattern_table_, create an entry in matches that
+    // has a corresponding mask and value for the pattern.
+    std::vector<MaskValuePair> matches;
+    for (size_t i = 0; i < pattern_table_.size(); i++) {
+      matches.push_back(GenerateMaskValuePair(
+          GenerateOrderedPattern(pattern_table_[i].pattern)));
+    }
+
+    BitExtractFn bit_extract_fn =
+        GetBitExtractFunction(GenerateSampledBitsMask());
+
+    // Create a compiled node that contains a table with an entry for every bit
+    // pattern.
+    CreateCompiledNode(bit_extract_fn,
+                       static_cast<size_t>(1) << GetSampledBitsCount());
+    VIXL_ASSERT(compiled_node_ != NULL);
+
+    // When we find a pattern matches the representation, set the node's decode
+    // function for that representation to the corresponding function.
+    for (uint32_t bits = 0; bits < (1U << GetSampledBitsCount()); bits++) {
+      for (size_t i = 0; i < matches.size(); i++) {
+        if ((bits & matches[i].first) == matches[i].second) {
+          // Only one instruction class should match for each value of bits, so
+          // if we get here, the node pointed to should still be unallocated.
+          VIXL_ASSERT(compiled_node_->GetNodeForBits(bits) == NULL);
+          CompileNodeForBits(decoder, pattern_table_[i].handler, bits);
+          break;
+        }
+      }
+
+      // If the decode_table_ entry for these bits is still NULL, the
+      // instruction must be handled by the "otherwise" case, which by default
+      // is the Unallocated visitor.
+      if (compiled_node_->GetNodeForBits(bits) == NULL) {
+        CompileNodeForBits(decoder, otherwise, bits);
+      }
+    }
+  }
+
+  VIXL_ASSERT(compiled_node_ != NULL);
+  return compiled_node_;
+}
+
+void CompiledDecodeNode::Decode(const Instruction* instr) const {
+  if (IsLeafNode()) {
+    // If this node is a leaf, call the registered visitor function.
+    VIXL_ASSERT(decoder_ != NULL);
+    decoder_->VisitNamedInstruction(instr, instruction_name_);
+  } else {
+    // Otherwise, using the sampled bit extractor for this node, look up the
+    // next node in the decode tree, and call its Decode method.
+    VIXL_ASSERT(bit_extract_fn_ != NULL);
+    VIXL_ASSERT((instr->*bit_extract_fn_)() < decode_table_size_);
+    VIXL_ASSERT(decode_table_[(instr->*bit_extract_fn_)()] != NULL);
+    decode_table_[(instr->*bit_extract_fn_)()]->Decode(instr);
+  }
+}
+
+DecodeNode::MaskValuePair DecodeNode::GenerateMaskValuePair(
+    uint32_t pattern) const {
+  uint32_t mask = 0, value = 0;
+  for (size_t i = 0; i < GetPatternLength(pattern); i++) {
+    PatternSymbol sym = GetSymbolAt(pattern, i);
+    mask = (mask << 1) | ((sym == PatternSymbol::kSymbolX) ? 0 : 1);
+    value = (value << 1) | (static_cast<uint32_t>(sym) & 1);
+  }
+  return std::make_pair(mask, value);
+}
+
+uint32_t DecodeNode::GenerateOrderedPattern(uint32_t pattern) const {
+  const std::vector<uint8_t>& sampled_bits = GetSampledBits();
+  uint64_t temp = 0xffffffffffffffff;
+
+  // Place symbols into the field of set bits. Symbols are two bits wide and
+  // take values 0, 1 or 2, so 3 will represent "no symbol".
+  for (size_t i = 0; i < sampled_bits.size(); i++) {
+    int shift = sampled_bits[i] * 2;
+    temp ^= static_cast<uint64_t>(kEndOfPattern) << shift;
+    temp |= static_cast<uint64_t>(GetSymbolAt(pattern, i)) << shift;
+  }
+
+  // Iterate over temp and extract new pattern ordered by sample position.
+  uint32_t result = kEndOfPattern;  // End of pattern marker.
+
+  // Iterate over the pattern one symbol (two bits) at a time.
+  for (int i = 62; i >= 0; i -= 2) {
+    uint32_t sym = (temp >> i) & kPatternSymbolMask;
+
+    // If this is a valid symbol, shift into the result.
+    if (sym != kEndOfPattern) {
+      result = (result << 2) | sym;
+    }
+  }
+
+  // The length of the ordered pattern must be the same as the input pattern,
+  // and the number of sampled bits.
+  VIXL_ASSERT(GetPatternLength(result) == GetPatternLength(pattern));
+  VIXL_ASSERT(GetPatternLength(result) == sampled_bits.size());
+
+  return result;
+}
+
+uint32_t DecodeNode::GenerateSampledBitsMask() const {
+  uint32_t mask = 0;
+  for (int bit : GetSampledBits()) {
+    mask |= 1 << bit;
+  }
+  return mask;
+}
+
+}  // namespace aarch64
+}  // namespace vixl
--- a/3rdparty/vixl/src/aarch64/disasm-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/disasm-aarch64.cc
--- a/3rdparty/vixl/src/aarch64/instructions-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/instructions-aarch64.cc
--- a/3rdparty/vixl/src/aarch64/logic-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/logic-aarch64.cc
--- a/3rdparty/vixl/src/aarch64/macro-assembler-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/macro-assembler-aarch64.cc
--- a/3rdparty/vixl/src/aarch64/macro-assembler-sve-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/macro-assembler-sve-aarch64.cc
--- a/3rdparty/vixl/src/aarch64/operands-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/operands-aarch64.cc
@ -0,0 +1,469 @@
+// Copyright 2016, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "operands-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+// CPURegList utilities.
+CPURegister CPURegList::PopLowestIndex(RegList mask) {
+  RegList list = list_ & mask;
+  if (list == 0) return NoCPUReg;
+  int index = CountTrailingZeros(list);
+  VIXL_ASSERT(((static_cast<RegList>(1) << index) & list) != 0);
+  Remove(index);
+  return CPURegister(index, size_, type_);
+}
+
+
+CPURegister CPURegList::PopHighestIndex(RegList mask) {
+  RegList list = list_ & mask;
+  if (list == 0) return NoCPUReg;
+  int index = CountLeadingZeros(list);
+  index = kRegListSizeInBits - 1 - index;
+  VIXL_ASSERT(((static_cast<RegList>(1) << index) & list) != 0);
+  Remove(index);
+  return CPURegister(index, size_, type_);
+}
+
+
+bool CPURegList::IsValid() const {
+  if (type_ == CPURegister::kNoRegister) {
+    // We can't use IsEmpty here because that asserts IsValid().
+    return list_ == 0;
+  } else {
+    bool is_valid = true;
+    // Try to create a CPURegister for each element in the list.
+    for (int i = 0; i < kRegListSizeInBits; i++) {
+      if (((list_ >> i) & 1) != 0) {
+        is_valid &= CPURegister(i, size_, type_).IsValid();
+      }
+    }
+    return is_valid;
+  }
+}
+
+
+void CPURegList::RemoveCalleeSaved() {
+  if (GetType() == CPURegister::kRegister) {
+    Remove(GetCalleeSaved(GetRegisterSizeInBits()));
+  } else if (GetType() == CPURegister::kVRegister) {
+    Remove(GetCalleeSavedV(GetRegisterSizeInBits()));
+  } else {
+    VIXL_ASSERT(GetType() == CPURegister::kNoRegister);
+    VIXL_ASSERT(IsEmpty());
+    // The list must already be empty, so do nothing.
+  }
+}
+
+
+CPURegList CPURegList::Union(const CPURegList& list_1,
+                             const CPURegList& list_2,
+                             const CPURegList& list_3) {
+  return Union(list_1, Union(list_2, list_3));
+}
+
+
+CPURegList CPURegList::Union(const CPURegList& list_1,
+                             const CPURegList& list_2,
+                             const CPURegList& list_3,
+                             const CPURegList& list_4) {
+  return Union(Union(list_1, list_2), Union(list_3, list_4));
+}
+
+
+CPURegList CPURegList::Intersection(const CPURegList& list_1,
+                                    const CPURegList& list_2,
+                                    const CPURegList& list_3) {
+  return Intersection(list_1, Intersection(list_2, list_3));
+}
+
+
+CPURegList CPURegList::Intersection(const CPURegList& list_1,
+                                    const CPURegList& list_2,
+                                    const CPURegList& list_3,
+                                    const CPURegList& list_4) {
+  return Intersection(Intersection(list_1, list_2),
+                      Intersection(list_3, list_4));
+}
+
+
+CPURegList CPURegList::GetCalleeSaved(unsigned size) {
+  return CPURegList(CPURegister::kRegister, size, 19, 29);
+}
+
+
+CPURegList CPURegList::GetCalleeSavedV(unsigned size) {
+  return CPURegList(CPURegister::kVRegister, size, 8, 15);
+}
+
+
+CPURegList CPURegList::GetCallerSaved(unsigned size) {
+  // Registers x0-x18 and lr (x30) are caller-saved.
+  CPURegList list = CPURegList(CPURegister::kRegister, size, 0, 18);
+  // Do not use lr directly to avoid initialisation order fiasco bugs for users.
+  list.Combine(Register(30, kXRegSize));
+  return list;
+}
+
+
+CPURegList CPURegList::GetCallerSavedV(unsigned size) {
+  // Registers d0-d7 and d16-d31 are caller-saved.
+  CPURegList list = CPURegList(CPURegister::kVRegister, size, 0, 7);
+  list.Combine(CPURegList(CPURegister::kVRegister, size, 16, 31));
+  return list;
+}
+
+
+const CPURegList kCalleeSaved = CPURegList::GetCalleeSaved();
+const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV();
+const CPURegList kCallerSaved = CPURegList::GetCallerSaved();
+const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV();
+
+// Operand.
+Operand::Operand(int64_t immediate)
+    : immediate_(immediate),
+      reg_(NoReg),
+      shift_(NO_SHIFT),
+      extend_(NO_EXTEND),
+      shift_amount_(0) {}
+
+Operand::Operand(IntegerOperand immediate)
+    : immediate_(immediate.AsIntN(64)),
+      reg_(NoReg),
+      shift_(NO_SHIFT),
+      extend_(NO_EXTEND),
+      shift_amount_(0) {}
+
+Operand::Operand(Register reg, Shift shift, unsigned shift_amount)
+    : reg_(reg),
+      shift_(shift),
+      extend_(NO_EXTEND),
+      shift_amount_(shift_amount) {
+  VIXL_ASSERT(shift != MSL);
+  VIXL_ASSERT(reg.Is64Bits() || (shift_amount < kWRegSize));
+  VIXL_ASSERT(reg.Is32Bits() || (shift_amount < kXRegSize));
+  VIXL_ASSERT(!reg.IsSP());
+}
+
+
+Operand::Operand(Register reg, Extend extend, unsigned shift_amount)
+    : reg_(reg),
+      shift_(NO_SHIFT),
+      extend_(extend),
+      shift_amount_(shift_amount) {
+  VIXL_ASSERT(reg.IsValid());
+  VIXL_ASSERT(shift_amount <= 4);
+  VIXL_ASSERT(!reg.IsSP());
+
+  // Extend modes SXTX and UXTX require a 64-bit register.
+  VIXL_ASSERT(reg.Is64Bits() || ((extend != SXTX) && (extend != UXTX)));
+}
+
+
+bool Operand::IsImmediate() const { return reg_.Is(NoReg); }
+
+
+bool Operand::IsPlainRegister() const {
+  return reg_.IsValid() &&
+         (((shift_ == NO_SHIFT) && (extend_ == NO_EXTEND)) ||
+          // No-op shifts.
+          ((shift_ != NO_SHIFT) && (shift_amount_ == 0)) ||
+          // No-op extend operations.
+          // We can't include [US]XTW here without knowing more about the
+          // context; they are only no-ops for 32-bit operations.
+          //
+          // For example, this operand could be replaced with w1:
+          //   __ Add(w0, w0, Operand(w1, UXTW));
+          // However, no plain register can replace it in this context:
+          //   __ Add(x0, x0, Operand(w1, UXTW));
+          (((extend_ == UXTX) || (extend_ == SXTX)) && (shift_amount_ == 0)));
+}
+
+
+bool Operand::IsShiftedRegister() const {
+  return reg_.IsValid() && (shift_ != NO_SHIFT);
+}
+
+
+bool Operand::IsExtendedRegister() const {
+  return reg_.IsValid() && (extend_ != NO_EXTEND);
+}
+
+
+bool Operand::IsZero() const {
+  if (IsImmediate()) {
+    return GetImmediate() == 0;
+  } else {
+    return GetRegister().IsZero();
+  }
+}
+
+
+Operand Operand::ToExtendedRegister() const {
+  VIXL_ASSERT(IsShiftedRegister());
+  VIXL_ASSERT((shift_ == LSL) && (shift_amount_ <= 4));
+  return Operand(reg_, reg_.Is64Bits() ? UXTX : UXTW, shift_amount_);
+}
+
+
+// MemOperand
+MemOperand::MemOperand()
+    : base_(NoReg),
+      regoffset_(NoReg),
+      offset_(0),
+      addrmode_(Offset),
+      shift_(NO_SHIFT),
+      extend_(NO_EXTEND) {}
+
+
+MemOperand::MemOperand(Register base, int64_t offset, AddrMode addrmode)
+    : base_(base),
+      regoffset_(NoReg),
+      offset_(offset),
+      addrmode_(addrmode),
+      shift_(NO_SHIFT),
+      extend_(NO_EXTEND),
+      shift_amount_(0) {
+  VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+}
+
+
+MemOperand::MemOperand(Register base,
+                       Register regoffset,
+                       Extend extend,
+                       unsigned shift_amount)
+    : base_(base),
+      regoffset_(regoffset),
+      offset_(0),
+      addrmode_(Offset),
+      shift_(NO_SHIFT),
+      extend_(extend),
+      shift_amount_(shift_amount) {
+  VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+  VIXL_ASSERT(!regoffset.IsSP());
+  VIXL_ASSERT((extend == UXTW) || (extend == SXTW) || (extend == SXTX));
+
+  // SXTX extend mode requires a 64-bit offset register.
+  VIXL_ASSERT(regoffset.Is64Bits() || (extend != SXTX));
+}
+
+
+MemOperand::MemOperand(Register base,
+                       Register regoffset,
+                       Shift shift,
+                       unsigned shift_amount)
+    : base_(base),
+      regoffset_(regoffset),
+      offset_(0),
+      addrmode_(Offset),
+      shift_(shift),
+      extend_(NO_EXTEND),
+      shift_amount_(shift_amount) {
+  VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+  VIXL_ASSERT(regoffset.Is64Bits() && !regoffset.IsSP());
+  VIXL_ASSERT(shift == LSL);
+}
+
+
+MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode)
+    : base_(base),
+      regoffset_(NoReg),
+      addrmode_(addrmode),
+      shift_(NO_SHIFT),
+      extend_(NO_EXTEND),
+      shift_amount_(0) {
+  VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+
+  if (offset.IsImmediate()) {
+    offset_ = offset.GetImmediate();
+  } else if (offset.IsShiftedRegister()) {
+    VIXL_ASSERT((addrmode == Offset) || (addrmode == PostIndex));
+
+    regoffset_ = offset.GetRegister();
+    shift_ = offset.GetShift();
+    shift_amount_ = offset.GetShiftAmount();
+
+    extend_ = NO_EXTEND;
+    offset_ = 0;
+
+    // These assertions match those in the shifted-register constructor.
+    VIXL_ASSERT(regoffset_.Is64Bits() && !regoffset_.IsSP());
+    VIXL_ASSERT(shift_ == LSL);
+  } else {
+    VIXL_ASSERT(offset.IsExtendedRegister());
+    VIXL_ASSERT(addrmode == Offset);
+
+    regoffset_ = offset.GetRegister();
+    extend_ = offset.GetExtend();
+    shift_amount_ = offset.GetShiftAmount();
+
+    shift_ = NO_SHIFT;
+    offset_ = 0;
+
+    // These assertions match those in the extended-register constructor.
+    VIXL_ASSERT(!regoffset_.IsSP());
+    VIXL_ASSERT((extend_ == UXTW) || (extend_ == SXTW) || (extend_ == SXTX));
+    VIXL_ASSERT((regoffset_.Is64Bits() || (extend_ != SXTX)));
+  }
+}
+
+
+bool MemOperand::IsPlainRegister() const {
+  return IsImmediateOffset() && (GetOffset() == 0);
+}
+
+
+bool MemOperand::IsEquivalentToPlainRegister() const {
+  if (regoffset_.Is(NoReg)) {
+    // Immediate offset, pre-index or post-index.
+    return GetOffset() == 0;
+  } else if (GetRegisterOffset().IsZero()) {
+    // Zero register offset, pre-index or post-index.
+    // We can ignore shift and extend options because they all result in zero.
+    return true;
+  }
+  return false;
+}
+
+
+bool MemOperand::IsImmediateOffset() const {
+  return (addrmode_ == Offset) && regoffset_.Is(NoReg);
+}
+
+
+bool MemOperand::IsRegisterOffset() const {
+  return (addrmode_ == Offset) && !regoffset_.Is(NoReg);
+}
+
+bool MemOperand::IsPreIndex() const { return addrmode_ == PreIndex; }
+bool MemOperand::IsPostIndex() const { return addrmode_ == PostIndex; }
+
+bool MemOperand::IsImmediatePreIndex() const {
+  return IsPreIndex() && regoffset_.Is(NoReg);
+}
+
+bool MemOperand::IsImmediatePostIndex() const {
+  return IsPostIndex() && regoffset_.Is(NoReg);
+}
+
+void MemOperand::AddOffset(int64_t offset) {
+  VIXL_ASSERT(IsImmediateOffset());
+  offset_ += offset;
+}
+
+
+bool SVEMemOperand::IsValid() const {
+#ifdef VIXL_DEBUG
+  {
+    // It should not be possible for an SVEMemOperand to match multiple types.
+    int count = 0;
+    if (IsScalarPlusImmediate()) count++;
+    if (IsScalarPlusScalar()) count++;
+    if (IsScalarPlusVector()) count++;
+    if (IsVectorPlusImmediate()) count++;
+    if (IsVectorPlusScalar()) count++;
+    if (IsVectorPlusVector()) count++;
+    VIXL_ASSERT(count <= 1);
+  }
+#endif
+
+  // We can't have a register _and_ an immediate offset.
+  if ((offset_ != 0) && (!regoffset_.IsNone())) return false;
+
+  if (shift_amount_ != 0) {
+    // Only shift and extend modifiers can take a shift amount.
+    switch (mod_) {
+      case NO_SVE_OFFSET_MODIFIER:
+      case SVE_MUL_VL:
+        return false;
+      case SVE_LSL:
+      case SVE_UXTW:
+      case SVE_SXTW:
+        // Fall through.
+        break;
+    }
+  }
+
+  return IsScalarPlusImmediate() || IsScalarPlusScalar() ||
+         IsScalarPlusVector() || IsVectorPlusImmediate() ||
+         IsVectorPlusScalar() || IsVectorPlusVector();
+}
+
+
+bool SVEMemOperand::IsEquivalentToScalar() const {
+  if (IsScalarPlusImmediate()) {
+    return GetImmediateOffset() == 0;
+  }
+  if (IsScalarPlusScalar()) {
+    // We can ignore the shift because it will still result in zero.
+    return GetScalarOffset().IsZero();
+  }
+  // Forms involving vectors are never equivalent to a single scalar.
+  return false;
+}
+
+bool SVEMemOperand::IsPlainRegister() const {
+  if (IsScalarPlusImmediate()) {
+    return GetImmediateOffset() == 0;
+  }
+  return false;
+}
+
+GenericOperand::GenericOperand(const CPURegister& reg)
+    : cpu_register_(reg), mem_op_size_(0) {
+  if (reg.IsQ()) {
+    VIXL_ASSERT(reg.GetSizeInBits() > static_cast<int>(kXRegSize));
+    // Support for Q registers is not implemented yet.
+    VIXL_UNIMPLEMENTED();
+  }
+}
+
+
+GenericOperand::GenericOperand(const MemOperand& mem_op, size_t mem_op_size)
+    : cpu_register_(NoReg), mem_op_(mem_op), mem_op_size_(mem_op_size) {
+  if (mem_op_size_ > kXRegSizeInBytes) {
+    // We only support generic operands up to the size of X registers.
+    VIXL_UNIMPLEMENTED();
+  }
+}
+
+bool GenericOperand::Equals(const GenericOperand& other) const {
+  if (!IsValid() || !other.IsValid()) {
+    // Two invalid generic operands are considered equal.
+    return !IsValid() && !other.IsValid();
+  }
+  if (IsCPURegister() && other.IsCPURegister()) {
+    return GetCPURegister().Is(other.GetCPURegister());
+  } else if (IsMemOperand() && other.IsMemOperand()) {
+    return GetMemOperand().Equals(other.GetMemOperand()) &&
+           (GetMemOperandSizeInBytes() == other.GetMemOperandSizeInBytes());
+  }
+  return false;
+}
+}
+}  // namespace vixl::aarch64
--- a/3rdparty/vixl/src/aarch64/pointer-auth-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/pointer-auth-aarch64.cc
@ -0,0 +1,197 @@
+// Copyright 2018, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+
+#include "simulator-aarch64.h"
+
+#include "utils-vixl.h"
+
+namespace vixl {
+namespace aarch64 {
+
+// Randomly generated example keys for simulating only.
+const Simulator::PACKey Simulator::kPACKeyIA = {0xc31718727de20f71,
+                                                0xab9fd4e14b2fec51,
+                                                0};
+const Simulator::PACKey Simulator::kPACKeyIB = {0xeebb163b474e04c8,
+                                                0x5267ac6fc280fb7c,
+                                                1};
+const Simulator::PACKey Simulator::kPACKeyDA = {0x5caef808deb8b1e2,
+                                                0xd347cbc06b7b0f77,
+                                                0};
+const Simulator::PACKey Simulator::kPACKeyDB = {0xe06aa1a949ba8cc7,
+                                                0xcfde69e3db6d0432,
+                                                1};
+
+// The general PAC key isn't intended to be used with AuthPAC so we ensure the
+// key number is invalid and asserts if used incorrectly.
+const Simulator::PACKey Simulator::kPACKeyGA = {0xfcd98a44d564b3d5,
+                                                0x6c56df1904bf0ddc,
+                                                -1};
+
+static uint64_t GetNibble(uint64_t in_data, int position) {
+  return (in_data >> position) & 0xf;
+}
+
+static uint64_t ShuffleNibbles(uint64_t in_data) {
+  static int in_positions[16] =
+      {4, 36, 52, 40, 44, 0, 24, 12, 56, 60, 8, 32, 16, 28, 20, 48};
+  uint64_t out_data = 0;
+  for (int i = 0; i < 16; i++) {
+    out_data |= GetNibble(in_data, in_positions[i]) << (4 * i);
+  }
+  return out_data;
+}
+
+static uint64_t SubstituteNibbles(uint64_t in_data) {
+  // Randomly chosen substitutes.
+  static uint64_t subs[16] =
+      {4, 7, 3, 9, 10, 14, 0, 1, 15, 2, 8, 6, 12, 5, 11, 13};
+  uint64_t out_data = 0;
+  for (int i = 0; i < 16; i++) {
+    int index = (in_data >> (4 * i)) & 0xf;
+    out_data |= subs[index] << (4 * i);
+  }
+  return out_data;
+}
+
+// Rotate nibble to the left by the amount specified.
+static uint64_t RotNibble(uint64_t in_cell, int amount) {
+  VIXL_ASSERT((amount >= 0) && (amount <= 3));
+
+  in_cell &= 0xf;
+  uint64_t temp = (in_cell << 4) | in_cell;
+  return (temp >> (4 - amount)) & 0xf;
+}
+
+static uint64_t BigShuffle(uint64_t in_data) {
+  uint64_t out_data = 0;
+  for (int i = 0; i < 4; i++) {
+    uint64_t n12 = GetNibble(in_data, 4 * (i + 12));
+    uint64_t n8 = GetNibble(in_data, 4 * (i + 8));
+    uint64_t n4 = GetNibble(in_data, 4 * (i + 4));
+    uint64_t n0 = GetNibble(in_data, 4 * (i + 0));
+
+    uint64_t t0 = RotNibble(n8, 2) ^ RotNibble(n4, 1) ^ RotNibble(n0, 1);
+    uint64_t t1 = RotNibble(n12, 1) ^ RotNibble(n4, 2) ^ RotNibble(n0, 1);
+    uint64_t t2 = RotNibble(n12, 2) ^ RotNibble(n8, 1) ^ RotNibble(n0, 1);
+    uint64_t t3 = RotNibble(n12, 1) ^ RotNibble(n8, 1) ^ RotNibble(n4, 2);
+
+    out_data |= t3 << (4 * (i + 0));
+    out_data |= t2 << (4 * (i + 4));
+    out_data |= t1 << (4 * (i + 8));
+    out_data |= t0 << (4 * (i + 12));
+  }
+  return out_data;
+}
+
+// A simple, non-standard hash function invented for simulating. It mixes
+// reasonably well, however it is unlikely to be cryptographically secure and
+// may have a higher collision chance than other hashing algorithms.
+uint64_t Simulator::ComputePAC(uint64_t data, uint64_t context, PACKey key) {
+  uint64_t working_value = data ^ key.high;
+  working_value = BigShuffle(working_value);
+  working_value = ShuffleNibbles(working_value);
+  working_value ^= key.low;
+  working_value = ShuffleNibbles(working_value);
+  working_value = BigShuffle(working_value);
+  working_value ^= context;
+  working_value = SubstituteNibbles(working_value);
+  working_value = BigShuffle(working_value);
+  working_value = SubstituteNibbles(working_value);
+
+  return working_value;
+}
+
+// The TTBR is selected by bit 63 or 55 depending on TBI for pointers without
+// codes, but is always 55 once a PAC code is added to a pointer. For this
+// reason, it must be calculated at the call site.
+uint64_t Simulator::CalculatePACMask(uint64_t ptr, PointerType type, int ttbr) {
+  int bottom_pac_bit = GetBottomPACBit(ptr, ttbr);
+  int top_pac_bit = GetTopPACBit(ptr, type);
+  return ExtractUnsignedBitfield64(top_pac_bit,
+                                   bottom_pac_bit,
+                                   0xffffffffffffffff & ~kTTBRMask)
+         << bottom_pac_bit;
+}
+
+uint64_t Simulator::AuthPAC(uint64_t ptr,
+                            uint64_t context,
+                            PACKey key,
+                            PointerType type) {
+  VIXL_ASSERT((key.number == 0) || (key.number == 1));
+
+  uint64_t pac_mask = CalculatePACMask(ptr, type, (ptr >> 55) & 1);
+  uint64_t original_ptr =
+      ((ptr & kTTBRMask) == 0) ? (ptr & ~pac_mask) : (ptr | pac_mask);
+
+  uint64_t pac = ComputePAC(original_ptr, context, key);
+
+  uint64_t error_code = 1 << key.number;
+  if ((pac & pac_mask) == (ptr & pac_mask)) {
+    return original_ptr;
+  } else {
+    int error_lsb = GetTopPACBit(ptr, type) - 2;
+    uint64_t error_mask = UINT64_C(0x3) << error_lsb;
+    return (original_ptr & ~error_mask) | (error_code << error_lsb);
+  }
+}
+
+uint64_t Simulator::AddPAC(uint64_t ptr,
+                           uint64_t context,
+                           PACKey key,
+                           PointerType type) {
+  int top_pac_bit = GetTopPACBit(ptr, type);
+
+  // TODO: Properly handle the case where extension bits are bad and TBI is
+  // turned off, and also test me.
+  VIXL_ASSERT(HasTBI(ptr, type));
+  int ttbr = (ptr >> 55) & 1;
+  uint64_t pac_mask = CalculatePACMask(ptr, type, ttbr);
+  uint64_t ext_ptr = (ttbr == 0) ? (ptr & ~pac_mask) : (ptr | pac_mask);
+
+  uint64_t pac = ComputePAC(ext_ptr, context, key);
+
+  // If the pointer isn't all zeroes or all ones in the PAC bitfield, corrupt
+  // the resulting code.
+  if (((ptr & (pac_mask | kTTBRMask)) != 0x0) &&
+      ((~ptr & (pac_mask | kTTBRMask)) != 0x0)) {
+    pac ^= UINT64_C(1) << (top_pac_bit - 1);
+  }
+
+  uint64_t ttbr_shifted = static_cast<uint64_t>(ttbr) << 55;
+  return (pac & pac_mask) | ttbr_shifted | (ptr & ~pac_mask);
+}
+
+uint64_t Simulator::StripPAC(uint64_t ptr, PointerType type) {
+  uint64_t pac_mask = CalculatePACMask(ptr, type, (ptr >> 55) & 1);
+  return ((ptr & kTTBRMask) == 0) ? (ptr & ~pac_mask) : (ptr | pac_mask);
+}
+}  // namespace aarch64
+}  // namespace vixl
+
+#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
--- a/3rdparty/vixl/src/aarch64/registers-aarch64.cc
+++ b/3rdparty/vixl/src/aarch64/registers-aarch64.cc
@ -0,0 +1,321 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <sstream>
+#include <string>
+
+#include "registers-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+std::string CPURegister::GetArchitecturalName() const {
+  std::ostringstream name;
+  if (IsZRegister()) {
+    name << 'z' << GetCode();
+    if (HasLaneSize()) {
+      name << '.' << GetLaneSizeSymbol();
+    }
+  } else if (IsPRegister()) {
+    name << 'p' << GetCode();
+    if (HasLaneSize()) {
+      name << '.' << GetLaneSizeSymbol();
+    }
+    switch (qualifiers_) {
+      case kNoQualifiers:
+        break;
+      case kMerging:
+        name << "/m";
+        break;
+      case kZeroing:
+        name << "/z";
+        break;
+    }
+  } else {
+    VIXL_UNIMPLEMENTED();
+  }
+  return name.str();
+}
+
+unsigned CPURegister::GetMaxCodeFor(CPURegister::RegisterBank bank) {
+  switch (bank) {
+    case kNoRegisterBank:
+      return 0;
+    case kRRegisterBank:
+      return Register::GetMaxCode();
+    case kVRegisterBank:
+#ifdef VIXL_HAS_CONSTEXPR
+      VIXL_STATIC_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
+#else
+      VIXL_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
+#endif
+      return VRegister::GetMaxCode();
+    case kPRegisterBank:
+      return PRegister::GetMaxCode();
+  }
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
+bool CPURegister::IsValidRegister() const {
+  return ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)) &&
+         (bank_ == kRRegisterBank) &&
+         ((size_ == kEncodedWRegSize) || (size_ == kEncodedXRegSize)) &&
+         (qualifiers_ == kNoQualifiers) && (lane_size_ == size_);
+}
+
+bool CPURegister::IsValidVRegister() const {
+  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+  return (code_ < kNumberOfVRegisters) && (bank_ == kVRegisterBank) &&
+         ((size_ >= kEncodedBRegSize) && (size_ <= kEncodedQRegSize)) &&
+         (qualifiers_ == kNoQualifiers) &&
+         (lane_size_ != kEncodedUnknownSize) && (lane_size_ <= size_);
+}
+
+bool CPURegister::IsValidFPRegister() const {
+  return IsValidVRegister() && IsFPRegister();
+}
+
+bool CPURegister::IsValidZRegister() const {
+  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+  // Z registers are valid with or without a lane size, so we don't need to
+  // check lane_size_.
+  return (code_ < kNumberOfZRegisters) && (bank_ == kVRegisterBank) &&
+         (size_ == kEncodedUnknownSize) && (qualifiers_ == kNoQualifiers);
+}
+
+bool CPURegister::IsValidPRegister() const {
+  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+  // P registers are valid with or without a lane size, so we don't need to
+  // check lane_size_.
+  return (code_ < kNumberOfPRegisters) && (bank_ == kPRegisterBank) &&
+         (size_ == kEncodedUnknownSize) &&
+         ((qualifiers_ == kNoQualifiers) || (qualifiers_ == kMerging) ||
+          (qualifiers_ == kZeroing));
+}
+
+bool CPURegister::IsValid() const {
+  return IsValidRegister() || IsValidVRegister() || IsValidZRegister() ||
+         IsValidPRegister();
+}
+
+// Most coercions simply invoke the necessary constructor.
+#define VIXL_CPUREG_COERCION_LIST(U) \
+  U(Register, W, R)                  \
+  U(Register, X, R)                  \
+  U(VRegister, B, V)                 \
+  U(VRegister, H, V)                 \
+  U(VRegister, S, V)                 \
+  U(VRegister, D, V)                 \
+  U(VRegister, Q, V)                 \
+  U(VRegister, V, V)                 \
+  U(ZRegister, Z, V)                 \
+  U(PRegister, P, P)
+#define VIXL_DEFINE_CPUREG_COERCION(RET_TYPE, CTOR_TYPE, BANK) \
+  RET_TYPE CPURegister::CTOR_TYPE() const {                    \
+    VIXL_ASSERT(GetBank() == k##BANK##RegisterBank);           \
+    return CTOR_TYPE##Register(GetCode());                     \
+  }
+VIXL_CPUREG_COERCION_LIST(VIXL_DEFINE_CPUREG_COERCION)
+#undef VIXL_CPUREG_COERCION_LIST
+#undef VIXL_DEFINE_CPUREG_COERCION
+
+// NEON lane-format coercions always return VRegisters.
+#define VIXL_CPUREG_NEON_COERCION_LIST(V) \
+  V(8, B)                                 \
+  V(16, B)                                \
+  V(2, H)                                 \
+  V(4, H)                                 \
+  V(8, H)                                 \
+  V(2, S)                                 \
+  V(4, S)                                 \
+  V(1, D)                                 \
+  V(2, D)
+#define VIXL_DEFINE_CPUREG_NEON_COERCION(LANES, LANE_TYPE)             \
+  VRegister VRegister::V##LANES##LANE_TYPE() const {                   \
+    VIXL_ASSERT(IsVRegister());                                        \
+    return VRegister(GetCode(), LANES * k##LANE_TYPE##RegSize, LANES); \
+  }
+VIXL_CPUREG_NEON_COERCION_LIST(VIXL_DEFINE_CPUREG_NEON_COERCION)
+#undef VIXL_CPUREG_NEON_COERCION_LIST
+#undef VIXL_DEFINE_CPUREG_NEON_COERCION
+
+// Semantic type coercion for sdot and udot.
+// TODO: Use the qualifiers_ field to distinguish this from ::S().
+VRegister VRegister::S4B() const {
+  VIXL_ASSERT(IsVRegister());
+  return SRegister(GetCode());
+}
+
+bool AreAliased(const CPURegister& reg1,
+                const CPURegister& reg2,
+                const CPURegister& reg3,
+                const CPURegister& reg4,
+                const CPURegister& reg5,
+                const CPURegister& reg6,
+                const CPURegister& reg7,
+                const CPURegister& reg8) {
+  int number_of_valid_regs = 0;
+  int number_of_valid_vregs = 0;
+  int number_of_valid_pregs = 0;
+
+  RegList unique_regs = 0;
+  RegList unique_vregs = 0;
+  RegList unique_pregs = 0;
+
+  const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
+
+  for (size_t i = 0; i < ArrayLength(regs); i++) {
+    switch (regs[i].GetBank()) {
+      case CPURegister::kRRegisterBank:
+        number_of_valid_regs++;
+        unique_regs |= regs[i].GetBit();
+        break;
+      case CPURegister::kVRegisterBank:
+        number_of_valid_vregs++;
+        unique_vregs |= regs[i].GetBit();
+        break;
+      case CPURegister::kPRegisterBank:
+        number_of_valid_pregs++;
+        unique_pregs |= regs[i].GetBit();
+        break;
+      case CPURegister::kNoRegisterBank:
+        VIXL_ASSERT(regs[i].IsNone());
+        break;
+    }
+  }
+
+  int number_of_unique_regs = CountSetBits(unique_regs);
+  int number_of_unique_vregs = CountSetBits(unique_vregs);
+  int number_of_unique_pregs = CountSetBits(unique_pregs);
+
+  VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
+  VIXL_ASSERT(number_of_valid_vregs >= number_of_unique_vregs);
+  VIXL_ASSERT(number_of_valid_pregs >= number_of_unique_pregs);
+
+  return (number_of_valid_regs != number_of_unique_regs) ||
+         (number_of_valid_vregs != number_of_unique_vregs) ||
+         (number_of_valid_pregs != number_of_unique_pregs);
+}
+
+bool AreSameSizeAndType(const CPURegister& reg1,
+                        const CPURegister& reg2,
+                        const CPURegister& reg3,
+                        const CPURegister& reg4,
+                        const CPURegister& reg5,
+                        const CPURegister& reg6,
+                        const CPURegister& reg7,
+                        const CPURegister& reg8) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1);
+  match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1);
+  match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1);
+  match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1);
+  match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1);
+  match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1);
+  match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1);
+  return match;
+}
+
+bool AreEven(const CPURegister& reg1,
+             const CPURegister& reg2,
+             const CPURegister& reg3,
+             const CPURegister& reg4,
+             const CPURegister& reg5,
+             const CPURegister& reg6,
+             const CPURegister& reg7,
+             const CPURegister& reg8) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool even = (reg1.GetCode() % 2) == 0;
+  even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0);
+  even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0);
+  even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0);
+  even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0);
+  even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0);
+  even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0);
+  even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0);
+  return even;
+}
+
+bool AreConsecutive(const CPURegister& reg1,
+                    const CPURegister& reg2,
+                    const CPURegister& reg3,
+                    const CPURegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+
+  if (!reg2.IsValid()) {
+    return true;
+  } else if (reg2.GetCode() !=
+             ((reg1.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+    return false;
+  }
+
+  if (!reg3.IsValid()) {
+    return true;
+  } else if (reg3.GetCode() !=
+             ((reg2.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+    return false;
+  }
+
+  if (!reg4.IsValid()) {
+    return true;
+  } else if (reg4.GetCode() !=
+             ((reg3.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+    return false;
+  }
+
+  return true;
+}
+
+bool AreSameFormat(const CPURegister& reg1,
+                   const CPURegister& reg2,
+                   const CPURegister& reg3,
+                   const CPURegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &= !reg2.IsValid() || reg2.IsSameFormat(reg1);
+  match &= !reg3.IsValid() || reg3.IsSameFormat(reg1);
+  match &= !reg4.IsValid() || reg4.IsSameFormat(reg1);
+  return match;
+}
+
+bool AreSameLaneSize(const CPURegister& reg1,
+                     const CPURegister& reg2,
+                     const CPURegister& reg3,
+                     const CPURegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &=
+      !reg2.IsValid() || (reg2.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+  match &=
+      !reg3.IsValid() || (reg3.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+  match &=
+      !reg4.IsValid() || (reg4.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+  return match;
+}
+}
+}  // namespace vixl::aarch64
--- a/3rdparty/vixl/src/code-buffer-vixl.cc
+++ b/3rdparty/vixl/src/code-buffer-vixl.cc
@ -0,0 +1,93 @@
+// Copyright 2017, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "code-buffer-vixl.h"
+#include "utils-vixl.h"
+
+namespace vixl {
+
+
+CodeBuffer::CodeBuffer(byte* buffer, size_t capacity)
+    : buffer_(reinterpret_cast<byte*>(buffer)),
+      cursor_(reinterpret_cast<byte*>(buffer)),
+      dirty_(false),
+      capacity_(capacity) {
+  VIXL_ASSERT(buffer_ != NULL);
+}
+
+
+CodeBuffer::~CodeBuffer() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
+  VIXL_ASSERT(!IsDirty());
+}
+
+
+void CodeBuffer::EmitString(const char* string) {
+  const auto len = strlen(string) + 1;
+  VIXL_ASSERT(HasSpaceFor(len));
+  char* dst = reinterpret_cast<char*>(cursor_);
+  dirty_ = true;
+  memcpy(dst, string, len);
+  cursor_ = reinterpret_cast<byte*>(dst + len);
+}
+
+
+void CodeBuffer::EmitData(const void* data, size_t size) {
+  VIXL_ASSERT(HasSpaceFor(size));
+  dirty_ = true;
+  memcpy(cursor_, data, size);
+  cursor_ = cursor_ + size;
+}
+
+
+void CodeBuffer::UpdateData(size_t offset, const void* data, size_t size) {
+  dirty_ = true;
+  byte* dst = buffer_ + offset;
+  VIXL_ASSERT(dst + size <= cursor_);
+  memcpy(dst, data, size);
+}
+
+
+void CodeBuffer::Align() {
+  byte* end = AlignUp(cursor_, 4);
+  const size_t padding_size = end - cursor_;
+  VIXL_ASSERT(padding_size <= 4);
+  EmitZeroedBytes(static_cast<int>(padding_size));
+}
+
+void CodeBuffer::EmitZeroedBytes(int n) {
+  VIXL_ASSERT(HasSpaceFor(n));
+  dirty_ = true;
+  memset(cursor_, 0, n);
+  cursor_ += n;
+}
+
+void CodeBuffer::Reset() {
+  cursor_ = buffer_;
+  SetClean();
+}
+
+
+}  // namespace vixl
--- a/3rdparty/vixl/src/compiler-intrinsics-vixl.cc
+++ b/3rdparty/vixl/src/compiler-intrinsics-vixl.cc
@ -0,0 +1,146 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "compiler-intrinsics-vixl.h"
+#include "utils-vixl.h"
+
+namespace vixl {
+
+
+int CountLeadingSignBitsFallBack(int64_t value, int width) {
+  VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+  if (width < 64) VIXL_ASSERT(IsIntN(width, value));
+  if (value >= 0) {
+    return CountLeadingZeros(value, width) - 1;
+  } else {
+    return CountLeadingZeros(~value, width) - 1;
+  }
+}
+
+
+int CountLeadingZerosFallBack(uint64_t value, int width) {
+  VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+  if (value == 0) {
+    return width;
+  }
+  int count = 0;
+  value = value << (64 - width);
+  if ((value & UINT64_C(0xffffffff00000000)) == 0) {
+    count += 32;
+    value = value << 32;
+  }
+  if ((value & UINT64_C(0xffff000000000000)) == 0) {
+    count += 16;
+    value = value << 16;
+  }
+  if ((value & UINT64_C(0xff00000000000000)) == 0) {
+    count += 8;
+    value = value << 8;
+  }
+  if ((value & UINT64_C(0xf000000000000000)) == 0) {
+    count += 4;
+    value = value << 4;
+  }
+  if ((value & UINT64_C(0xc000000000000000)) == 0) {
+    count += 2;
+    value = value << 2;
+  }
+  if ((value & UINT64_C(0x8000000000000000)) == 0) {
+    count += 1;
+  }
+  count += (value == 0);
+  return count;
+}
+
+
+int CountSetBitsFallBack(uint64_t value, int width) {
+  VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+
+  // Mask out unused bits to ensure that they are not counted.
+  value &= (UINT64_C(0xffffffffffffffff) >> (64 - width));
+
+  // Add up the set bits.
+  // The algorithm works by adding pairs of bit fields together iteratively,
+  // where the size of each bit field doubles each time.
+  // An example for an 8-bit value:
+  // Bits:  h  g  f  e  d  c  b  a
+  //         \ |   \ |   \ |   \ |
+  // value = h+g   f+e   d+c   b+a
+  //            \    |      \    |
+  // value =   h+g+f+e     d+c+b+a
+  //                  \          |
+  // value =       h+g+f+e+d+c+b+a
+  const uint64_t kMasks[] = {
+      UINT64_C(0x5555555555555555),
+      UINT64_C(0x3333333333333333),
+      UINT64_C(0x0f0f0f0f0f0f0f0f),
+      UINT64_C(0x00ff00ff00ff00ff),
+      UINT64_C(0x0000ffff0000ffff),
+      UINT64_C(0x00000000ffffffff),
+  };
+
+  for (unsigned i = 0; i < (sizeof(kMasks) / sizeof(kMasks[0])); i++) {
+    int shift = 1 << i;
+    value = ((value >> shift) & kMasks[i]) + (value & kMasks[i]);
+  }
+
+  return static_cast<int>(value);
+}
+
+
+int CountTrailingZerosFallBack(uint64_t value, int width) {
+  VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+  int count = 0;
+  value = value << (64 - width);
+  if ((value & UINT64_C(0xffffffff)) == 0) {
+    count += 32;
+    value = value >> 32;
+  }
+  if ((value & 0xffff) == 0) {
+    count += 16;
+    value = value >> 16;
+  }
+  if ((value & 0xff) == 0) {
+    count += 8;
+    value = value >> 8;
+  }
+  if ((value & 0xf) == 0) {
+    count += 4;
+    value = value >> 4;
+  }
+  if ((value & 0x3) == 0) {
+    count += 2;
+    value = value >> 2;
+  }
+  if ((value & 0x1) == 0) {
+    count += 1;
+  }
+  count += (value == 0);
+  return count - (64 - width);
+}
+
+
+}  // namespace vixl
--- a/3rdparty/vixl/src/cpu-features.cc
+++ b/3rdparty/vixl/src/cpu-features.cc
@ -0,0 +1,159 @@
+// Copyright 2018, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <ostream>
+
+#include "cpu-features.h"
+#include "globals-vixl.h"
+#include "utils-vixl.h"
+
+#if defined(__aarch64__) && defined(VIXL_INCLUDE_TARGET_AARCH64)
+#include "aarch64/cpu-aarch64.h"
+#define VIXL_USE_AARCH64_CPU_HELPERS
+#endif
+
+namespace vixl {
+
+CPUFeatures CPUFeatures::All() {
+  CPUFeatures all;
+  all.features_.set();
+  return all;
+}
+
+CPUFeatures CPUFeatures::InferFromIDRegisters() {
+  // This function assumes that kIDRegisterEmulation is available.
+  CPUFeatures features(CPUFeatures::kIDRegisterEmulation);
+#ifdef VIXL_USE_AARCH64_CPU_HELPERS
+  // Note that the Linux kernel filters these values during emulation, so the
+  // results may not exactly match the expected hardware support.
+  features.Combine(aarch64::CPU::InferCPUFeaturesFromIDRegisters());
+#endif
+  return features;
+}
+
+CPUFeatures CPUFeatures::InferFromOS(QueryIDRegistersOption option) {
+#ifdef VIXL_USE_AARCH64_CPU_HELPERS
+  return aarch64::CPU::InferCPUFeaturesFromOS(option);
+#else
+  USE(option);
+  return CPUFeatures();
+#endif
+}
+
+void CPUFeatures::Combine(const CPUFeatures& other) {
+  features_ |= other.features_;
+}
+
+void CPUFeatures::Combine(Feature feature) {
+  if (feature != CPUFeatures::kNone) features_.set(feature);
+}
+
+void CPUFeatures::Remove(const CPUFeatures& other) {
+  features_ &= ~other.features_;
+}
+
+void CPUFeatures::Remove(Feature feature) {
+  if (feature != CPUFeatures::kNone) features_.reset(feature);
+}
+
+bool CPUFeatures::Has(const CPUFeatures& other) const {
+  return (features_ & other.features_) == other.features_;
+}
+
+bool CPUFeatures::Has(Feature feature) const {
+  return (feature == CPUFeatures::kNone) || features_[feature];
+}
+
+size_t CPUFeatures::Count() const { return features_.count(); }
+
+std::ostream& operator<<(std::ostream& os, CPUFeatures::Feature feature) {
+  // clang-format off
+  switch (feature) {
+#define VIXL_FORMAT_FEATURE(SYMBOL, NAME, CPUINFO) \
+    case CPUFeatures::SYMBOL:                      \
+      return os << NAME;
+VIXL_CPU_FEATURE_LIST(VIXL_FORMAT_FEATURE)
+#undef VIXL_FORMAT_FEATURE
+    case CPUFeatures::kNone:
+      return os << "none";
+    case CPUFeatures::kNumberOfFeatures:
+      VIXL_UNREACHABLE();
+  }
+  // clang-format on
+  VIXL_UNREACHABLE();
+  return os;
+}
+
+CPUFeatures::const_iterator CPUFeatures::begin() const {
+  // For iterators in general, it's undefined to increment `end()`, but here we
+  // control the implementation and it is safe to do this.
+  return ++end();
+}
+
+CPUFeatures::const_iterator CPUFeatures::end() const {
+  return const_iterator(this, kNone);
+}
+
+std::ostream& operator<<(std::ostream& os, const CPUFeatures& features) {
+  bool need_separator = false;
+  for (CPUFeatures::Feature feature : features) {
+    if (need_separator) os << ", ";
+    need_separator = true;
+    os << feature;
+  }
+  return os;
+}
+
+bool CPUFeaturesConstIterator::operator==(
+    const CPUFeaturesConstIterator& other) const {
+  VIXL_ASSERT(IsValid());
+  return (cpu_features_ == other.cpu_features_) && (feature_ == other.feature_);
+}
+
+CPUFeaturesConstIterator& CPUFeaturesConstIterator::operator++() {  // Prefix
+  VIXL_ASSERT(IsValid());
+  do {
+    // Find the next feature. The order is unspecified.
+    feature_ = static_cast<CPUFeatures::Feature>(feature_ + 1);
+    if (feature_ == CPUFeatures::kNumberOfFeatures) {
+      feature_ = CPUFeatures::kNone;
+      VIXL_STATIC_ASSERT(CPUFeatures::kNone == -1);
+    }
+    VIXL_ASSERT(CPUFeatures::kNone <= feature_);
+    VIXL_ASSERT(feature_ < CPUFeatures::kNumberOfFeatures);
+    // cpu_features_->Has(kNone) is always true, so this will terminate even if
+    // the features list is empty.
+  } while (!cpu_features_->Has(feature_));
+  return *this;
+}
+
+CPUFeaturesConstIterator CPUFeaturesConstIterator::operator++(int) {  // Postfix
+  CPUFeaturesConstIterator result = *this;
+  ++(*this);
+  return result;
+}
+
+}  // namespace vixl
--- a/3rdparty/vixl/src/utils-vixl.cc
+++ b/3rdparty/vixl/src/utils-vixl.cc
@ -0,0 +1,555 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "utils-vixl.h"
+
+#include <cstdio>
+
+namespace vixl {
+
+// The default NaN values (for FPCR.DN=1).
+const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000));
+const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000);
+const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00);
+
+// Floating-point zero values.
+const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0);
+const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000);
+
+// Floating-point infinity values.
+const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00);
+const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00);
+const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000);
+const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000);
+const double kFP64PositiveInfinity =
+    RawbitsToDouble(UINT64_C(0x7ff0000000000000));
+const double kFP64NegativeInfinity =
+    RawbitsToDouble(UINT64_C(0xfff0000000000000));
+
+bool IsZero(Float16 value) {
+  uint16_t bits = Float16ToRawbits(value);
+  return (bits == Float16ToRawbits(kFP16PositiveZero) ||
+          bits == Float16ToRawbits(kFP16NegativeZero));
+}
+
+uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; }
+
+uint32_t FloatToRawbits(float value) {
+  uint32_t bits = 0;
+  memcpy(&bits, &value, 4);
+  return bits;
+}
+
+
+uint64_t DoubleToRawbits(double value) {
+  uint64_t bits = 0;
+  memcpy(&bits, &value, 8);
+  return bits;
+}
+
+
+Float16 RawbitsToFloat16(uint16_t bits) {
+  Float16 f;
+  f.rawbits_ = bits;
+  return f;
+}
+
+
+float RawbitsToFloat(uint32_t bits) {
+  float value = 0.0;
+  memcpy(&value, &bits, 4);
+  return value;
+}
+
+
+double RawbitsToDouble(uint64_t bits) {
+  double value = 0.0;
+  memcpy(&value, &bits, 8);
+  return value;
+}
+
+
+uint32_t Float16Sign(internal::SimFloat16 val) {
+  uint16_t rawbits = Float16ToRawbits(val);
+  return ExtractUnsignedBitfield32(15, 15, rawbits);
+}
+
+
+uint32_t Float16Exp(internal::SimFloat16 val) {
+  uint16_t rawbits = Float16ToRawbits(val);
+  return ExtractUnsignedBitfield32(14, 10, rawbits);
+}
+
+uint32_t Float16Mantissa(internal::SimFloat16 val) {
+  uint16_t rawbits = Float16ToRawbits(val);
+  return ExtractUnsignedBitfield32(9, 0, rawbits);
+}
+
+
+uint32_t FloatSign(float val) {
+  uint32_t rawbits = FloatToRawbits(val);
+  return ExtractUnsignedBitfield32(31, 31, rawbits);
+}
+
+
+uint32_t FloatExp(float val) {
+  uint32_t rawbits = FloatToRawbits(val);
+  return ExtractUnsignedBitfield32(30, 23, rawbits);
+}
+
+
+uint32_t FloatMantissa(float val) {
+  uint32_t rawbits = FloatToRawbits(val);
+  return ExtractUnsignedBitfield32(22, 0, rawbits);
+}
+
+
+uint32_t DoubleSign(double val) {
+  uint64_t rawbits = DoubleToRawbits(val);
+  return static_cast<uint32_t>(ExtractUnsignedBitfield64(63, 63, rawbits));
+}
+
+
+uint32_t DoubleExp(double val) {
+  uint64_t rawbits = DoubleToRawbits(val);
+  return static_cast<uint32_t>(ExtractUnsignedBitfield64(62, 52, rawbits));
+}
+
+
+uint64_t DoubleMantissa(double val) {
+  uint64_t rawbits = DoubleToRawbits(val);
+  return ExtractUnsignedBitfield64(51, 0, rawbits);
+}
+
+
+internal::SimFloat16 Float16Pack(uint16_t sign,
+                                 uint16_t exp,
+                                 uint16_t mantissa) {
+  uint16_t bits = (sign << 15) | (exp << 10) | mantissa;
+  return RawbitsToFloat16(bits);
+}
+
+
+float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa) {
+  uint32_t bits = (sign << 31) | (exp << 23) | mantissa;
+  return RawbitsToFloat(bits);
+}
+
+
+double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa) {
+  uint64_t bits = (sign << 63) | (exp << 52) | mantissa;
+  return RawbitsToDouble(bits);
+}
+
+
+int Float16Classify(Float16 value) {
+  uint16_t bits = Float16ToRawbits(value);
+  uint16_t exponent_max = (1 << 5) - 1;
+  uint16_t exponent_mask = exponent_max << 10;
+  uint16_t mantissa_mask = (1 << 10) - 1;
+
+  uint16_t exponent = (bits & exponent_mask) >> 10;
+  uint16_t mantissa = bits & mantissa_mask;
+  if (exponent == 0) {
+    if (mantissa == 0) {
+      return FP_ZERO;
+    }
+    return FP_SUBNORMAL;
+  } else if (exponent == exponent_max) {
+    if (mantissa == 0) {
+      return FP_INFINITE;
+    }
+    return FP_NAN;
+  }
+  return FP_NORMAL;
+}
+
+
+unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) {
+  VIXL_ASSERT((reg_size % 8) == 0);
+  int count = 0;
+  for (unsigned i = 0; i < (reg_size / 16); i++) {
+    if ((imm & 0xffff) == 0) {
+      count++;
+    }
+    imm >>= 16;
+  }
+  return count;
+}
+
+
+int BitCount(uint64_t value) { return CountSetBits(value); }
+
+// Float16 definitions.
+
+Float16::Float16(double dvalue) {
+  rawbits_ =
+      Float16ToRawbits(FPToFloat16(dvalue, FPTieEven, kIgnoreDefaultNaN));
+}
+
+namespace internal {
+
+SimFloat16 SimFloat16::operator-() const {
+  return RawbitsToFloat16(rawbits_ ^ 0x8000);
+}
+
+// SimFloat16 definitions.
+SimFloat16 SimFloat16::operator+(SimFloat16 rhs) const {
+  return static_cast<double>(*this) + static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator-(SimFloat16 rhs) const {
+  return static_cast<double>(*this) - static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator*(SimFloat16 rhs) const {
+  return static_cast<double>(*this) * static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator/(SimFloat16 rhs) const {
+  return static_cast<double>(*this) / static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator<(SimFloat16 rhs) const {
+  return static_cast<double>(*this) < static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator>(SimFloat16 rhs) const {
+  return static_cast<double>(*this) > static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator==(SimFloat16 rhs) const {
+  if (IsNaN(*this) || IsNaN(rhs)) {
+    return false;
+  } else if (IsZero(rhs) && IsZero(*this)) {
+    // +0 and -0 should be treated as equal.
+    return true;
+  }
+  return this->rawbits_ == rhs.rawbits_;
+}
+
+bool SimFloat16::operator!=(SimFloat16 rhs) const { return !(*this == rhs); }
+
+bool SimFloat16::operator==(double rhs) const {
+  return static_cast<double>(*this) == static_cast<double>(rhs);
+}
+
+SimFloat16::operator double() const {
+  return FPToDouble(*this, kIgnoreDefaultNaN);
+}
+
+Int64 BitCount(Uint32 value) { return CountSetBits(value.Get()); }
+
+}  // namespace internal
+
+float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception) {
+  uint16_t bits = Float16ToRawbits(value);
+  uint32_t sign = bits >> 15;
+  uint32_t exponent =
+      ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
+                                kFloat16MantissaBits,
+                                bits);
+  uint32_t mantissa =
+      ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, bits);
+
+  switch (Float16Classify(value)) {
+    case FP_ZERO:
+      return (sign == 0) ? 0.0f : -0.0f;
+
+    case FP_INFINITE:
+      return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
+
+    case FP_SUBNORMAL: {
+      // Calculate shift required to put mantissa into the most-significant bits
+      // of the destination mantissa.
+      int shift = CountLeadingZeros(mantissa << (32 - 10));
+
+      // Shift mantissa and discard implicit '1'.
+      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
+      mantissa &= (1 << kFloatMantissaBits) - 1;
+
+      // Adjust the exponent for the shift applied, and rebias.
+      exponent = exponent - shift + (-15 + 127);
+      break;
+    }
+
+    case FP_NAN:
+      if (IsSignallingNaN(value)) {
+        if (exception != NULL) {
+          *exception = true;
+        }
+      }
+      if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
+
+      // Convert NaNs as the processor would:
+      //  - The sign is propagated.
+      //  - The payload (mantissa) is transferred entirely, except that the top
+      //    bit is forced to '1', making the result a quiet NaN. The unused
+      //    (low-order) payload bits are set to 0.
+      exponent = (1 << kFloatExponentBits) - 1;
+
+      // Increase bits in mantissa, making low-order bits 0.
+      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
+      mantissa |= 1 << 22;  // Force a quiet NaN.
+      break;
+
+    case FP_NORMAL:
+      // Increase bits in mantissa, making low-order bits 0.
+      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
+
+      // Change exponent bias.
+      exponent += (-15 + 127);
+      break;
+
+    default:
+      VIXL_UNREACHABLE();
+  }
+  return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
+                        mantissa);
+}
+
+
+float FPToFloat(double value,
+                FPRounding round_mode,
+                UseDefaultNaN DN,
+                bool* exception) {
+  // Only the FPTieEven rounding mode is implemented.
+  VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
+  USE(round_mode);
+
+  switch (std::fpclassify(value)) {
+    case FP_NAN: {
+      if (IsSignallingNaN(value)) {
+        if (exception != NULL) {
+          *exception = true;
+        }
+      }
+      if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
+
+      // Convert NaNs as the processor would:
+      //  - The sign is propagated.
+      //  - The payload (mantissa) is transferred as much as possible, except
+      //    that the top bit is forced to '1', making the result a quiet NaN.
+      uint64_t raw = DoubleToRawbits(value);
+
+      uint32_t sign = raw >> 63;
+      uint32_t exponent = (1 << 8) - 1;
+      uint32_t payload =
+          static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
+      payload |= (1 << 22);  // Force a quiet NaN.
+
+      return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
+    }
+
+    case FP_ZERO:
+    case FP_INFINITE: {
+      // In a C++ cast, any value representable in the target type will be
+      // unchanged. This is always the case for +/-0.0 and infinities.
+      return static_cast<float>(value);
+    }
+
+    case FP_NORMAL:
+    case FP_SUBNORMAL: {
+      // Convert double-to-float as the processor would, assuming that FPCR.FZ
+      // (flush-to-zero) is not set.
+      uint64_t raw = DoubleToRawbits(value);
+      // Extract the IEEE-754 double components.
+      uint32_t sign = raw >> 63;
+      // Extract the exponent and remove the IEEE-754 encoding bias.
+      int32_t exponent =
+          static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
+      // Extract the mantissa and add the implicit '1' bit.
+      uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
+      if (std::fpclassify(value) == FP_NORMAL) {
+        mantissa |= (UINT64_C(1) << 52);
+      }
+      return FPRoundToFloat(sign, exponent, mantissa, round_mode);
+    }
+  }
+
+  VIXL_UNREACHABLE();
+  return static_cast<float>(value);
+}
+
+// TODO: We should consider implementing a full FPToDouble(Float16)
+// conversion function (for performance reasons).
+double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception) {
+  // We can rely on implicit float to double conversion here.
+  return FPToFloat(value, DN, exception);
+}
+
+
+double FPToDouble(float value, UseDefaultNaN DN, bool* exception) {
+  switch (std::fpclassify(value)) {
+    case FP_NAN: {
+      if (IsSignallingNaN(value)) {
+        if (exception != NULL) {
+          *exception = true;
+        }
+      }
+      if (DN == kUseDefaultNaN) return kFP64DefaultNaN;
+
+      // Convert NaNs as the processor would:
+      //  - The sign is propagated.
+      //  - The payload (mantissa) is transferred entirely, except that the top
+      //    bit is forced to '1', making the result a quiet NaN. The unused
+      //    (low-order) payload bits are set to 0.
+      uint32_t raw = FloatToRawbits(value);
+
+      uint64_t sign = raw >> 31;
+      uint64_t exponent = (1 << 11) - 1;
+      uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
+      payload <<= (52 - 23);           // The unused low-order bits should be 0.
+      payload |= (UINT64_C(1) << 51);  // Force a quiet NaN.
+
+      return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
+    }
+
+    case FP_ZERO:
+    case FP_NORMAL:
+    case FP_SUBNORMAL:
+    case FP_INFINITE: {
+      // All other inputs are preserved in a standard cast, because every value
+      // representable using an IEEE-754 float is also representable using an
+      // IEEE-754 double.
+      return static_cast<double>(value);
+    }
+  }
+
+  VIXL_UNREACHABLE();
+  return static_cast<double>(value);
+}
+
+
+Float16 FPToFloat16(float value,
+                    FPRounding round_mode,
+                    UseDefaultNaN DN,
+                    bool* exception) {
+  // Only the FPTieEven rounding mode is implemented.
+  VIXL_ASSERT(round_mode == FPTieEven);
+  USE(round_mode);
+
+  uint32_t raw = FloatToRawbits(value);
+  int32_t sign = raw >> 31;
+  int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
+  uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
+
+  switch (std::fpclassify(value)) {
+    case FP_NAN: {
+      if (IsSignallingNaN(value)) {
+        if (exception != NULL) {
+          *exception = true;
+        }
+      }
+      if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
+
+      // Convert NaNs as the processor would:
+      //  - The sign is propagated.
+      //  - The payload (mantissa) is transferred as much as possible, except
+      //    that the top bit is forced to '1', making the result a quiet NaN.
+      uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
+                                    : Float16ToRawbits(kFP16NegativeInfinity);
+      result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
+      result |= (1 << 9);  // Force a quiet NaN;
+      return RawbitsToFloat16(result);
+    }
+
+    case FP_ZERO:
+      return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
+
+    case FP_INFINITE:
+      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
+
+    case FP_NORMAL:
+    case FP_SUBNORMAL: {
+      // Convert float-to-half as the processor would, assuming that FPCR.FZ
+      // (flush-to-zero) is not set.
+
+      // Add the implicit '1' bit to the mantissa.
+      mantissa += (1 << 23);
+      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
+    }
+  }
+
+  VIXL_UNREACHABLE();
+  return kFP16PositiveZero;
+}
+
+
+Float16 FPToFloat16(double value,
+                    FPRounding round_mode,
+                    UseDefaultNaN DN,
+                    bool* exception) {
+  // Only the FPTieEven rounding mode is implemented.
+  VIXL_ASSERT(round_mode == FPTieEven);
+  USE(round_mode);
+
+  uint64_t raw = DoubleToRawbits(value);
+  int32_t sign = raw >> 63;
+  int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
+  uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
+
+  switch (std::fpclassify(value)) {
+    case FP_NAN: {
+      if (IsSignallingNaN(value)) {
+        if (exception != NULL) {
+          *exception = true;
+        }
+      }
+      if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
+
+      // Convert NaNs as the processor would:
+      //  - The sign is propagated.
+      //  - The payload (mantissa) is transferred as much as possible, except
+      //    that the top bit is forced to '1', making the result a quiet NaN.
+      uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
+                                    : Float16ToRawbits(kFP16NegativeInfinity);
+      result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
+      result |= (1 << 9);  // Force a quiet NaN;
+      return RawbitsToFloat16(result);
+    }
+
+    case FP_ZERO:
+      return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
+
+    case FP_INFINITE:
+      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
+    case FP_NORMAL:
+    case FP_SUBNORMAL: {
+      // Convert double-to-half as the processor would, assuming that FPCR.FZ
+      // (flush-to-zero) is not set.
+
+      // Add the implicit '1' bit to the mantissa.
+      mantissa += (UINT64_C(1) << 52);
+      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
+    }
+  }
+
+  VIXL_UNREACHABLE();
+  return kFP16PositiveZero;
+}
+
+}  // namespace vixl
--- a/3rdparty/vixl/vixl.vcxproj
+++ b/3rdparty/vixl/vixl.vcxproj
@ -0,0 +1,93 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(SolutionDir)common\vsprops\BaseProjectConfig.props" />
+  <Import Project="$(SolutionDir)common\vsprops\WinSDK.props" />
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8906836E-F06E-46E8-B11A-74E5E8C7B8FB}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <PlatformToolset Condition="!$(Configuration.Contains(Clang))">$(DefaultPlatformToolset)</PlatformToolset>
+    <PlatformToolset Condition="$(Configuration.Contains(Clang))">ClangCL</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+    <WholeProgramOptimization Condition="$(Configuration.Contains(Release))">true</WholeProgramOptimization>
+    <UseDebugLibraries Condition="$(Configuration.Contains(Debug))">true</UseDebugLibraries>
+    <UseDebugLibraries Condition="!$(Configuration.Contains(Debug))">false</UseDebugLibraries>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings" />
+  <ImportGroup Label="PropertySheets">
+    <Import Project="..\DefaultProjectRootDir.props" />
+    <Import Project="..\3rdparty.props" />
+    <Import Condition="$(Configuration.Contains(Debug))" Project="..\..\common\vsprops\CodeGen_Debug.props" />
+    <Import Condition="$(Configuration.Contains(Devel))" Project="..\..\common\vsprops\CodeGen_Devel.props" />
+    <Import Condition="$(Configuration.Contains(Release))" Project="..\..\common\vsprops\CodeGen_Release.props" />
+    <Import Condition="!$(Configuration.Contains(Release))" Project="..\..\common\vsprops\IncrementalLinking.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
+  </PropertyGroup>
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <WarningLevel>TurnOffAllWarnings</WarningLevel>
+      <PreprocessorDefinitions>VIXL_INCLUDE_TARGET_AARCH64;VIXL_CODE_BUFFER_MALLOC;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="$(Configuration.Contains(Debug))">_SECURE_SCL_=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="$(Configuration.Contains(Devel))">NDEBUG;_SECURE_SCL_=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="$(Configuration.Contains(Release))">NDEBUG;_SECURE_SCL_=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;$(ProjectDir)include\vixl;$(ProjectDir)include\vixl\aarch64;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalOptions>/Zc:__cplusplus /Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="include\vixl\aarch64\abi-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\assembler-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\constants-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\cpu-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\cpu-features-auditor-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\decoder-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\decoder-constants-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\decoder-visitor-map-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\disasm-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\instructions-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\macro-assembler-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\operands-aarch64.h" />
+    <ClInclude Include="include\vixl\aarch64\registers-aarch64.h" />
+    <ClInclude Include="include\vixl\assembler-base-vixl.h" />
+    <ClInclude Include="include\vixl\code-buffer-vixl.h" />
+    <ClInclude Include="include\vixl\code-generation-scopes-vixl.h" />
+    <ClInclude Include="include\vixl\compiler-intrinsics-vixl.h" />
+    <ClInclude Include="include\vixl\cpu-features.h" />
+    <ClInclude Include="include\vixl\globals-vixl.h" />
+    <ClInclude Include="include\vixl\invalset-vixl.h" />
+    <ClInclude Include="include\vixl\macro-assembler-interface.h" />
+    <ClInclude Include="include\vixl\platform-vixl.h" />
+    <ClInclude Include="include\vixl\pool-manager-impl.h" />
+    <ClInclude Include="include\vixl\pool-manager.h" />
+    <ClInclude Include="include\vixl\utils-vixl.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="src\aarch64\assembler-aarch64.cc" />
+    <ClCompile Include="src\aarch64\assembler-sve-aarch64.cc" />
+    <ClCompile Include="src\aarch64\cpu-aarch64.cc" />
+    <ClCompile Include="src\aarch64\cpu-features-auditor-aarch64.cc" />
+    <ClCompile Include="src\aarch64\decoder-aarch64.cc" />
+    <ClCompile Include="src\aarch64\disasm-aarch64.cc" />
+    <ClCompile Include="src\aarch64\instructions-aarch64.cc" />
+    <ClCompile Include="src\aarch64\logic-aarch64.cc" />
+    <ClCompile Include="src\aarch64\macro-assembler-aarch64.cc" />
+    <ClCompile Include="src\aarch64\macro-assembler-sve-aarch64.cc" />
+    <ClCompile Include="src\aarch64\operands-aarch64.cc" />
+    <ClCompile Include="src\aarch64\pointer-auth-aarch64.cc" />
+    <ClCompile Include="src\aarch64\registers-aarch64.cc" />
+    <ClCompile Include="src\code-buffer-vixl.cc" />
+    <ClCompile Include="src\compiler-intrinsics-vixl.cc" />
+    <ClCompile Include="src\cpu-features.cc" />
+    <ClCompile Include="src\utils-vixl.cc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets" />
+</Project>
--- a/3rdparty/vixl/vixl.vcxproj.filters
+++ b/3rdparty/vixl/vixl.vcxproj.filters
@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="aarch64">
+      <UniqueIdentifier>{bad5c611-84e1-42b6-b20b-828618673b31}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="include\vixl\aarch64\decoder-constants-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\decoder-visitor-map-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\disasm-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\instructions-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\macro-assembler-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\operands-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\registers-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\abi-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\assembler-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\constants-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\cpu-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\cpu-features-auditor-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\aarch64\decoder-aarch64.h">
+      <Filter>aarch64</Filter>
+    </ClInclude>
+    <ClInclude Include="include\vixl\assembler-base-vixl.h" />
+    <ClInclude Include="include\vixl\code-buffer-vixl.h" />
+    <ClInclude Include="include\vixl\code-generation-scopes-vixl.h" />
+    <ClInclude Include="include\vixl\compiler-intrinsics-vixl.h" />
+    <ClInclude Include="include\vixl\cpu-features.h" />
+    <ClInclude Include="include\vixl\globals-vixl.h" />
+    <ClInclude Include="include\vixl\invalset-vixl.h" />
+    <ClInclude Include="include\vixl\macro-assembler-interface.h" />
+    <ClInclude Include="include\vixl\platform-vixl.h" />
+    <ClInclude Include="include\vixl\pool-manager.h" />
+    <ClInclude Include="include\vixl\pool-manager-impl.h" />
+    <ClInclude Include="include\vixl\utils-vixl.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="src\aarch64\decoder-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\disasm-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\instructions-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\logic-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\macro-assembler-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\macro-assembler-sve-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\operands-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\pointer-auth-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\registers-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\assembler-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\assembler-sve-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\cpu-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\aarch64\cpu-features-auditor-aarch64.cc">
+      <Filter>aarch64</Filter>
+    </ClCompile>
+    <ClCompile Include="src\code-buffer-vixl.cc" />
+    <ClCompile Include="src\compiler-intrinsics-vixl.cc" />
+    <ClCompile Include="src\cpu-features.cc" />
+    <ClCompile Include="src\utils-vixl.cc" />
+  </ItemGroup>
+</Project>